NCBI C++ ToolKit
wgsread.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SRA__READER__SRA__WGSREAD__HPP
2 #define SRA__READER__SRA__WGSREAD__HPP
3 /* $Id: wgsread.hpp 100317 2023-07-20 14:23:42Z vasilche $
4  * ===========================================================================
5  *
6  * PUBLIC DOMAIN NOTICE
7  * National Center for Biotechnology Information
8  *
9  * This software/database is a "United States Government Work" under the
10  * terms of the United States Copyright Act. It was written as part of
11  * the author's official duties as a United States Government employee and
12  * thus cannot be copyrighted. This software/database is freely available
13  * to the public for use. The National Library of Medicine and the U.S.
14  * Government have not placed any restriction on its use or reproduction.
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors: Eugene Vasilchenko
29  *
30  * File Description:
31  * Access to WGS files
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbimtx.hpp>
37 #include <util/range.hpp>
38 #include <util/rangemap.hpp>
39 #include <util/simple_buffer.hpp>
40 #include <serial/serialbase.hpp>
43 #include <objects/seq/Bioseq.hpp>
45 #include <objects/seq/Seq_inst.hpp>
46 #include <objects/seq/Seq_data.hpp>
47 #include <sra/readers/sra/snpread.hpp> // for CSafeFlags
49 #include <map>
50 #include <list>
51 
52 //#include <ncbi/ncbi.h> // for NCBI_gb_state
54 //#include <insdc/insdc.h> // for INSDC_coord_*, INSDC_quality_phred
59 //#include <ncbi/wgs-contig.h> // for NCBI_WGS_component_props, NCBI_WGS_gap_linkage
62 
63 // missing wgs-contig.h definitions
65 enum
66 {
71 };
72 
74 
76 enum
77 {
82 };
83 
84 
85 enum {
93 };
94 
96 
98 
100 
102 
103 class CSeq_entry;
104 class CSeq_annot;
105 class CSeq_align;
106 class CSeq_graph;
107 class CSeq_feat;
108 class CBioseq;
109 class CSeq_literal;
110 class CUser_object;
111 class CUser_field;
112 class CID2S_Split_Info;
113 class CID2S_Chunk;
114 
115 class CWGSSeqIterator;
117 class CWGSGiIterator;
118 class CWGSProteinIterator;
119 class CWGSFeatureIterator;
120 
121 struct SWGSCreateInfo;
122 struct SWGSFeatChunkInfo;
123 
125 {
126 public:
127  explicit CAsnBinData(CSerialObject& obj);
128  virtual ~CAsnBinData(void);
129 
130  const CSerialObject& GetMainObject(void) const {
131  return *m_MainObject;
132  }
133  virtual void Serialize(CObjectOStreamAsnBinary& out) const;
134 
135 private:
137 };
138 
139 
141 {
142  enum EFlags {
143  fIds_gi = 1<<0,
144  fIds_acc = 1<<1,
145  fIds_gnl = 1<<2,
148 
150  fInst_delta = 1<<3,
153 
154  fSeqDescr = 1<<4,
155  fNucProtDescr = 1<<12,
156  fMasterDescr = 1<<5,
158  fSeqDescrObj = 1<<14,
161 
162  fSeqAnnot = 1<<6,
166 
169  fSplitProducts = 1<<10,
170  fSplitFeatures = 1<<11,
174 
176  };
178 
184  };
185 
186  typedef int TSplitVersion;
188 
189  typedef int TChunkId;
190 };
192 
193 
195 {
196 public:
197  CWGSDb_Impl(CVDBMgr& mgr,
198  CTempString path_or_acc,
199  CTempString vol_path = CTempString());
200  virtual ~CWGSDb_Impl(void);
201 
202  const string& GetIdPrefix(void) const {
203  return m_IdPrefix;
204  }
205  const string& GetIdPrefixWithVersion(void) const {
206  return m_IdPrefixWithVersion;
207  }
208  const string& GetWGSPath(void) const {
209  return m_WGSPath;
210  }
211 
212  // normalize accession to the form acceptable by SRA SDK
213  // 0. if the argument looks like path - do not change it
214  // 1. exclude contig/scaffold id
215  // 2. add default version (1)
216  static string NormalizePathOrAccession(CTempString path_or_acc,
217  CTempString vol_path = CTempString());
218 
219  enum ERowType {
220  eRowType_contig = 0,
221  eRowType_scaffold = 'S',
222  eRowType_protein = 'P'
223  };
224  typedef char TRowType;
226  fAllowRowType_contig = 1<<0,
227  fAllowRowType_scaffold = 1<<1,
228  fAllowRowType_protein = 1<<2
229  };
231  // parse row id from accession
232  // returns (row, accession_type) pair
233  // row will be 0 if accession is in wrong format
234  static
235  pair<TVDBRowId, ERowType> ParseRowType(CTempString acc,
236  TAllowRowType allow);
237  // parse row id from accession
238  // returns 0 if accession is in wrong format
239  // if is_scaffold flag pointer is not null, then scaffold ids are also
240  // accepted and the flag is set appropriately
241  TVDBRowId ParseRow(CTempString acc, bool* is_scaffold) const;
242  // parse contig row id from accession
243  // returns 0 if accession is in wrong format
244  static
246  return ParseRowType(acc, fAllowRowType_contig).first;
247  }
248  // parse scaffold row id from accession
249  // returns 0 if accession is in wrong format
250  static
252  return ParseRowType(acc, fAllowRowType_scaffold).first;
253  }
254  // parse protein row id from accession
255  // returns 0 if accession is in wrong format
256  static
258  return ParseRowType(acc, fAllowRowType_protein).first;
259  }
260  Uint4 GetIdRowDigits(void) const {
261  return m_IdRowDigits;
262  }
263 
264  bool IsTSA(void) const;
265 
266  enum EGnlIdFlags {
267  fGnlId_NoWGSVersion = 1<<0,
268  fGnlId_NoWGSId = 1<<1,
269  fGnlId_Default = 0
270  };
272 
273  CRef<CSeq_id> GetGeneralSeq_id(CTempString prefix,
274  CTempString tag) const;
275  CRef<CSeq_id> GetGeneralSeq_id(CTempString tag,
276  TGnlIdFlags gnl_id_flags = fGnlId_Default) const;
277  CRef<CSeq_id> GetPatentSeq_id(int id) const;
278 
279  CRef<CSeq_id> GetGeneralOrPatentSeq_id(CTempString str, TVDBRowId row,
280  TGnlIdFlags gnl_id_flags = fGnlId_Default) const;
281  CRef<CSeq_id> GetAccSeq_id(CTempString acc,
282  int version) const;
283  CRef<CSeq_id> GetAccSeq_id(ERowType type,
284  TVDBRowId row_id,
285  int version) const;
286  CRef<CSeq_id> GetMasterSeq_id(void) const;
287  TGi GetMasterGi(void) const;
288  CRef<CSeq_id> GetContigSeq_id(TVDBRowId row_id) const;
289  CRef<CSeq_id> GetScaffoldSeq_id(TVDBRowId row_id) const;
290  CRef<CSeq_id> GetProteinSeq_id(TVDBRowId row_id) const;
291 
292  CSeq_inst::TMol GetContigMolType(void) const;
293  CSeq_inst::TMol GetScaffoldMolType(void) const;
294  CSeq_inst::TMol GetProteinMolType(void) const;
295 
296  CRef<CSeq_entry> GetMasterSeq_entry(void) const;
297 
298  typedef list< CRef<CSeqdesc> > TMasterDescr;
299 
300  bool IsSetMasterDescr(void) const {
301  return m_IsSetMasterDescr;
302  }
303  const TMasterDescr& GetMasterDescr(void) const {
304  return m_MasterDescr;
305  }
306 
308  // return size of the master Seq-entry data (ASN.1 binary)
309  // the size is also available as buffer.size()
310  size_t GetMasterDescrBytes(TMasterDescrBytes& buffer);
311  // return entry or null if absent
312  CRef<CSeq_entry> GetMasterDescrEntry(void);
313  void AddMasterDescr(CSeq_descr& descr, const CBioseq* main_seq = 0, TFlags flags = fDefaultFlags) const;
314 
315  void ResetMasterDescr(void);
316  void SetMasterDescr(const TMasterDescr& descr, int filter);
317  bool LoadMasterDescr(int filter);
318 
319  void SetPatentId(CRef<CSeq_id> id);
320  bool HasPatentId() const
321  {
322  return m_PatentId;
323  }
324  const CRef<CSeq_id>& GetPatentId(void) const
325  {
326  return m_PatentId;
327  }
328 
329  // get GI range of nucleotide sequences
330  pair<TGi, TGi> GetNucGiRange(void);
331  // get GI range of proteine sequences
332  pair<TGi, TGi> GetProtGiRange(void);
333  // get row_id for a given GI or 0 if there is no GI
334  // the second value in returned value is true if the sequence is protein
335  pair<TVDBRowId, bool> GetGiRowId(TGi gi);
336  // get nucleotide row_id (SEQUENCE) for a given GI or 0 if there is no GI
337  TVDBRowId GetNucGiRowId(TGi gi);
338  // get protein row_id (PROTEIN) for a given GI or 0 if there is no GI
339  TVDBRowId GetProtGiRowId(TGi gi);
340 
341  // get contig row_id (SEQUENCE) for contig name or 0 if there is none
342  TVDBRowId GetContigNameRowId(const string& name);
343  // get scaffold row_id (SCAFFOLD) for scaffold name or 0 if there is none
344  TVDBRowId GetScaffoldNameRowId(const string& name);
345  // get protein row_id (PROTEIN) for protein name or 0 if there is none
346  TVDBRowId GetProteinNameRowId(const string& name);
347  // get protein row_id (PROTEIN) for product name or 0 if there is none
348  TVDBRowId GetProductNameRowId(const string& name);
349  // get protein row_id (PROTEIN) for GB accession or 0 if there is no acc.version
350  // if version == -1 return latest protein version
351  TVDBRowId GetProtAccRowId(const string& acc, int version = -1);
352 
354  typedef vector<TGiRange> TGiRanges;
355  // return sorted non-overlapping ranges of nucleotide GIs in the VDB
356  TGiRanges GetNucGiRanges(void);
357  // return sorted non-overlapping ranges of protein GIs in the VDB
358  TGiRanges GetProtGiRanges(void);
359 
361  {
363  : m_IdLength(0)
364  {
365  }
366  SProtAccInfo(CTempString acc, Uint4& id);
367 
368  string GetAcc(Uint4 id) const;
369 
370  DECLARE_OPERATOR_BOOL(m_IdLength != 0);
371 
372  bool operator<(const SProtAccInfo& b) const {
373  if ( m_IdLength != b.m_IdLength ) {
374  return m_IdLength < b.m_IdLength;
375  }
376  return m_AccPrefix < b.m_AccPrefix;
377  }
378 
379  bool operator==(const SProtAccInfo& b) const {
380  return m_IdLength == b.m_IdLength &&
381  m_AccPrefix == b.m_AccPrefix;
382  }
383  bool operator!=(const SProtAccInfo& b) const {
384  return !(*this == b);
385  }
386 
387  string m_AccPrefix;
389  };
390 
393  // return map of 3+5 accession ranges
394  // Key of each element is accession prefix/length pair
395  TProtAccRanges GetProtAccRanges(void);
396 
397  EFeatLocIdType GetFeatLocIdType();
398  EFeatLocIdType DetermineFeatLocIdType();
399  bool HasStandardFeatLocIdType();
400 
402  return m_ProjectGBState;
403  }
404  const string& GetReplacedBy() const {
405  return m_ReplacedBy;
406  }
407  bool IsReplaced() const {
408  return !m_ReplacedBy.empty();
409  }
410 
411  bool HasCommonTaxId(void) const
412  {
413  return m_HasCommonTaxId;
414  }
415  TTaxId GetCommonTaxId(void) const
416  {
417  return m_CommonTaxId;
418  }
419  bool CanHaveGis();
420  TVDBRowCount GetTotalFeatureCount();
421  bool HasFeatures();
422 
423  struct SAmbiguityInfo;
424 
425 protected:
426  friend class CWGSSeqIterator;
427  friend class CWGSScaffoldIterator;
428  friend class CWGSGiIterator;
429  friend class CWGSProteinIterator;
430  friend class CWGSFeatureIterator;
431 
432  // SSeq0TableCursor is helper accessor structure for SEQUENCE table, minimal columns
433  struct SSeq0TableCursor;
434  // SSeq4naTableCursor is helper accessor structure for SEQUENCE table, 4na columns
435  struct SSeq4naTableCursor;
436  // SSeqTableCursor is helper accessor structure for SEQUENCE table, remaining columns
437  struct SSeqTableCursor;
438  // SScfTableCursor is helper accessor structure for SCAFFOLD table
439  struct SScfTableCursor;
440  // SProt0TableCursor is helper accessor structure for optional PROTEIN table, minimal columns
441  struct SProt0TableCursor;
442  // SProtTableCursor is helper accessor structure for optional PROTEIN table, remaining columns
443  struct SProtTableCursor;
444  // SFeatTableCursor is helper accessor structure for optional FEATURE table
445  struct SFeatTableCursor;
446  // SGiIdsTableCursor is helper accessor structure for GI_IDX table
447  struct SGiIdxTableCursor;
448  // SProtIdsTableCursor is helper accessor structure for PROT_ACC_IDX table
449  struct SProtIdxTableCursor;
450 
451  const CVDBTable& SeqTable(void) {
452  return m_SeqTable;
453  }
454  const CVDBTable& ScfTable(void) {
455  if ( !m_ScfTableIsOpened.load(memory_order_acquire) ) {
456  OpenScfTable();
457  }
458  return m_ScfTable;
459  }
460  const CVDBTable& ProtTable(void) {
461  if ( !m_ProtTableIsOpened.load(memory_order_acquire) ) {
462  OpenProtTable();
463  }
464  return m_ProtTable;
465  }
466  const CVDBTable& FeatTable(void) {
467  if ( !m_FeatTableIsOpened.load(memory_order_acquire) ) {
468  OpenFeatTable();
469  }
470  return m_FeatTable;
471  }
472  const CVDBTable& GiIdxTable(void) {
473  if ( !m_GiIdxTableIsOpened.load(memory_order_acquire) ) {
474  OpenGiIdxTable();
475  }
476  return m_GiIdxTable;
477  }
478  const CVDBTable& ProtIdxTable(void) {
479  if ( !m_ProtIdxTableIsOpened.load(memory_order_acquire) ) {
480  OpenProtIdxTable();
481  }
482  return m_ProtIdxTable;
483  }
485  if ( !m_ProtAccIndexIsOpened.load(memory_order_acquire) ) {
486  OpenProtAccIndex();
487  }
488  return m_ProtAccIndex;
489  }
491  if ( !m_ContigNameIndexIsOpened.load(memory_order_acquire) ) {
492  OpenContigNameIndex();
493  }
494  return m_ContigNameIndex;
495  }
497  if ( !m_ScaffoldNameIndexIsOpened.load(memory_order_acquire) ) {
498  OpenScaffoldNameIndex();
499  }
500  return m_ScaffoldNameIndex;
501  }
503  if ( !m_ProteinNameIndexIsOpened.load(memory_order_acquire) ) {
504  OpenProteinNameIndex();
505  }
506  return m_ProteinNameIndex;
507  }
509  if ( !m_ProductNameIndexIsOpened.load(memory_order_acquire) ) {
510  OpenProductNameIndex();
511  }
512  return m_ProductNameIndex;
513  }
514 
515  // get table accessor object for exclusive access
525  // return table accessor object for reuse
526  void Put(CRef<SSeq0TableCursor>& curs, TVDBRowId row = 0);
527  void Put(CRef<SSeqTableCursor>& curs, TVDBRowId row = 0);
528  void Put(CRef<SSeq4naTableCursor>& curs, TVDBRowId row = 0);
529  void Put(CRef<SScfTableCursor>& curs, TVDBRowId row = 0);
530  void Put(CRef<SProt0TableCursor>& curs, TVDBRowId row = 0);
531  void Put(CRef<SProtTableCursor>& curs, TVDBRowId row = 0);
532  void Put(CRef<SFeatTableCursor>& curs, TVDBRowId row = 0);
533  void Put(CRef<SGiIdxTableCursor>& curs, TVDBRowId row = 0);
534  void Put(CRef<SProtIdxTableCursor>& curs, TVDBRowId row = 0);
535 
536  // structure to hold and cache 4na data for ambiguous blocks of sequence
537  CRef<SAmbiguityInfo> GetAmbiguityInfo(TVDBRowId row);
538  void PutAmbiguityInfo(CRef<SAmbiguityInfo>& ambiguity);
539 
540  CRef<CSeq_id> GetGeneralOrPatentSeq_id(CTempString str,
541  const SSeq0TableCursor& cur,
542  TVDBRowId row) const;
543  CRef<CSeq_id> GetGeneralOrPatentSeq_id(CTempString str,
544  const SScfTableCursor& cur,
545  TVDBRowId row) const;
546  CRef<CSeq_id> GetGeneralOrPatentSeq_id(CTempString str,
547  const SProt0TableCursor& cur,
548  TVDBRowId row) const;
549 
550 protected:
551  // open tables
552  void OpenTable(CVDBTable& table,
553  atomic<bool>& table_is_opened,
554  const char* table_name);
555  void OpenIndex(const CVDBTable& table,
556  CVDBTableIndex& index,
557  atomic<Int1>& index_is_opened,
558  const char* index_name,
559  const char* backup_index_name = 0);
560 
561  void OpenScfTable(void);
562  void OpenProtTable(void);
563  void OpenFeatTable(void);
564  void OpenGiIdxTable(void);
565  void OpenProtIdxTable(void);
566  void OpenProtAccIndex(void);
567  void OpenContigNameIndex(void);
568  void OpenScaffoldNameIndex(void);
569  void OpenProteinNameIndex(void);
570  void OpenProductNameIndex(void);
571 
572  TVDBRowId Lookup(const string& name,
573  const CVDBTableIndex& index,
574  bool upcase);
575 
576  void x_InitIdParams(void);
577  void x_LoadMasterDescr(int filter);
578 
579  void x_SortGiRanges(TGiRanges& ranges);
580 
581 private:
583  string m_WGSPath;
587  string m_IdPrefix;
589  string m_IdPrefixDb;
592 
594  atomic<bool> m_ScfTableIsOpened;
595  atomic<bool> m_ProtTableIsOpened;
596  atomic<bool> m_FeatTableIsOpened;
597  atomic<bool> m_GiIdxTableIsOpened;
609 
623 
627 
632  atomic<EFeatLocIdType> m_FeatLocIdType;
638  string m_ReplacedBy;
640 };
643 
644 
645 class CWGSDb : public CRef<CWGSDb_Impl>
646 {
647 public:
648  CWGSDb(void)
649  {
650  }
652  : CRef<CWGSDb_Impl>(impl)
653  {
654  }
656  CTempString path_or_acc,
657  CTempString vol_path = CTempString())
658  : CRef<CWGSDb_Impl>(new CWGSDb_Impl(mgr, path_or_acc, vol_path))
659  {
660  }
661 
662  const string& GetWGSPath(void) const {
663  return GetObject().GetWGSPath();
664  }
665 
666  // parse row id from accession
667  // returns 0 if accession is in wrong format
668  // if is_scaffold flag pointer is not null, then scaffold ids are also
669  // accepted and the flag is set appropriately
671  TVDBRowId ParseRow(CTempString acc, bool* is_scaffold = NULL) const {
672  return GetObject().ParseRow(acc, is_scaffold);
673  }
674  // parse contig row id from accession
675  // returns 0 if accession is in wrong format
676  static
678  return CWGSDb_Impl::ParseContigRow(acc);
679  }
680  // parse scaffold row id from accession
681  // returns 0 if accession is in wrong format
682  static
684  return CWGSDb_Impl::ParseScaffoldRow(acc);
685  }
686  // parse protein row id from accession
687  // returns 0 if accession is in wrong format
688  static
690  return CWGSDb_Impl::ParseProteinRow(acc);
691  }
692 
693  // get GI range of nucleotide sequences
694  pair<TGi, TGi> GetNucGiRange(void) const {
695  return GetNCObject().GetNucGiRange();
696  }
697  // get GI range of proteine sequences
698  pair<TGi, TGi> GetProtGiRange(void) const {
699  return GetNCObject().GetProtGiRange();
700  }
703  // return sorted non-overlapping ranges of nucleotide GIs in the VDB
704  TGiRanges GetNucGiRanges(void) const {
705  return GetNCObject().GetNucGiRanges();
706  }
707  // return sorted non-overlapping ranges of protein GIs in the VDB
709  return GetNCObject().GetProtGiRanges();
710  }
711 
715  // return map of 3+5 accession ranges
716  // Key of each element is accession pattern, digital part zeroed.
718  return GetNCObject().GetProtAccRanges();
719  }
720 
721  // get row_id for a given GI or 0 if there is no GI
722  // the second value in returned value is true if the sequence is protein
723  pair<TVDBRowId, bool> GetGiRowId(TGi gi) const {
724  return GetNCObject().GetGiRowId(gi);
725  }
726  // get nucleotide row_id (SEQUENCE) for a given GI or 0 if there is no GI
728  return GetNCObject().GetNucGiRowId(gi);
729  }
730  // get protein row_id (PROTEIN) for a given GI or 0 if there is no GI
732  return GetNCObject().GetProtGiRowId(gi);
733  }
734 
735  // get nucleotide row_id (SEQUENCE) for a given contig name or 0 if
736  // name not found.
737  TVDBRowId GetContigNameRowId(const string& name) const {
738  return GetNCObject().GetContigNameRowId(name);
739  }
740 
741  // get scaffold row_id (SCAFFOLD) for a given scaffold name or 0 if
742  // name not found.
743  TVDBRowId GetScaffoldNameRowId(const string& name) const {
744  return GetNCObject().GetScaffoldNameRowId(name);
745  }
746 
747  // get protein row_id (PROTEIN) for a protein name or 0 if
748  // name not found.
749  TVDBRowId GetProteinNameRowId(const string& name) const {
750  return GetNCObject().GetProteinNameRowId(name);
751  }
752 
753  // get protein row_id (PROTEIN) for a product name or 0 if
754  // name not found.
755  TVDBRowId GetProductNameRowId(const string& name) const {
756  return GetNCObject().GetProductNameRowId(name);
757  }
758 
759  // get protein row_id (PROTEIN) for GB accession or 0 if there is no acc.version
760  // if version == -1 return latest protein version
761  TVDBRowId GetProtAccRowId(const string& acc, int version = -1) const {
762  return GetNCObject().GetProtAccRowId(acc, version);
763  }
764 
768  };
769  // load master descriptors from VDB metadata (if any)
770  // doesn't try to load if master descriptors already set
771  // returns true if descriptors are set at the end, or false if not
773  return GetNCObject().LoadMasterDescr(filter);
774  }
775  // set master descriptors
776  typedef list< CRef<CSeqdesc> > TMasterDescr;
777  void SetMasterDescr(const TMasterDescr& descr,
778  EDescrFilter filter = eDescrDefaultFilter) const {
779  GetNCObject().SetMasterDescr(descr, filter);
780  }
781  enum EDescrType {
785  };
786  // return type of master descriptor propagation
787  static EDescrType GetMasterDescrType(const CSeqdesc& desc);
788 };
789 
790 
792 {
793 public:
794  enum EWithdrawn {
796  eIncludeWithdrawn
797  };
798 
799  enum EClipType {
800  eDefaultClip, // as defined by config
801  eNoClip, // force no clipping
802  eClipByQuality // force clipping
803  };
805  fIncludeLive = 1 << NCBI_gb_state_eWGSGenBankLive,
806  fIncludeSuppressed = 1 << NCBI_gb_state_eWGSGenBankSuppressed,
807  fIncludeReplaced = 1 << NCBI_gb_state_eWGSGenBankReplaced,
808  fIncludeWithdrawn = 1 << NCBI_gb_state_eWGSGenBankWithdrawn,
809  fIncludeUnverified = 1 << NCBI_gb_state_eWGSGenBankUnverified,
810  fIncludeAll = 0xff,
811  fIncludeDefault = fIncludeLive | fIncludeUnverified
812  };
814 
815  CWGSSeqIterator(void);
816  // TIncludeFlags versions
817  explicit
818  CWGSSeqIterator(const CWGSDb& wgs_db,
819  EIncludeFlags flags = fIncludeDefault,
820  EClipType clip_type = eDefaultClip);
821  CWGSSeqIterator(const CWGSDb& wgs_db,
822  TVDBRowId row,
823  EIncludeFlags flags = fIncludeDefault,
824  EClipType clip_type = eDefaultClip);
825  CWGSSeqIterator(const CWGSDb& wgs_db,
826  TVDBRowId first_row, TVDBRowId last_row,
827  EIncludeFlags flags = fIncludeDefault,
828  EClipType clip_type = eDefaultClip);
829  CWGSSeqIterator(const CWGSDb& wgs_db,
830  CTempString acc,
831  EIncludeFlags flags = fIncludeDefault,
832  EClipType clip_type = eDefaultClip);
833  CWGSSeqIterator(const CWGSDb& wgs_db,
834  TIncludeFlags flags,
835  EClipType clip_type = eDefaultClip);
836  CWGSSeqIterator(const CWGSDb& wgs_db,
837  TVDBRowId row,
838  TIncludeFlags flags,
839  EClipType clip_type = eDefaultClip);
840  CWGSSeqIterator(const CWGSDb& wgs_db,
841  TVDBRowId first_row, TVDBRowId last_row,
842  TIncludeFlags flags,
843  EClipType clip_type = eDefaultClip);
844  CWGSSeqIterator(const CWGSDb& wgs_db,
845  CTempString acc,
846  TIncludeFlags flags,
847  EClipType clip_type = eDefaultClip);
848  // EWithdrawn versions (deprecated)
850  CWGSSeqIterator(const CWGSDb& wgs_db,
851  EWithdrawn withdrawn,
852  EClipType clip_type = eDefaultClip);
854  CWGSSeqIterator(const CWGSDb& wgs_db,
855  TVDBRowId row,
856  EWithdrawn withdrawn,
857  EClipType clip_type = eDefaultClip);
859  CWGSSeqIterator(const CWGSDb& wgs_db,
860  TVDBRowId first_row, TVDBRowId last_row,
861  EWithdrawn withdrawn,
862  EClipType clip_type = eDefaultClip);
864  CWGSSeqIterator(const CWGSDb& wgs_db,
865  CTempString acc,
866  EWithdrawn withdrawn,
867  EClipType clip_type = eDefaultClip);
868  ~CWGSSeqIterator(void);
869 
870  void Reset(void);
871  CWGSSeqIterator(const CWGSSeqIterator& iter);
873 
874  CWGSSeqIterator& SelectRow(TVDBRowId row);
875 
876  DECLARE_OPERATOR_BOOL(m_CurrId < m_FirstBadId);
877 
878  CWGSSeqIterator& operator++(void);
879 
881  return m_CurrId;
882  }
884  return m_FirstGoodId;
885  }
887  return m_FirstBadId;
888  }
889  TVDBRowId GetLastRowId(void) const {
890  return GetFirstBadRowId() - 1;
891  }
893  return GetFirstBadRowId() - GetCurrentRowId();
894  }
895  TVDBRowId GetSize(void) const {
896  return GetFirstBadRowId() - GetFirstGoodRowId();
897  }
898 
899  bool HasGi(void) const;
900  CSeq_id::TGi GetGi(void) const;
901  CTempString GetAccession(void) const;
902 
903  int GetLatestAccVersion(void) const;
904  unsigned GetAccVersionCount(void) const;
905  bool HasAccVersion(int version) const;
906 
907  int GetAccVersion(void) const {
908  return GetLatestAccVersion() + m_AccVersion.m_Offset;
909  }
910 
911  // default SVersion object means latest version
912  // offset is negative, -1 means prevous version, -2 is secont previous...
913  enum ELatest {
914  eLatest
915  };
917  SVersionSelector(ELatest) : m_Offset(0) {}
918  int m_Offset;
919  };
920 
921  // if version == -1 select latest version
922  void SelectAccVersion(int version);
923 
924  bool HasTitle(void) const;
925  CTempString GetTitle(void) const;
926 
927  // return raw trim/clip values
928  TSeqPos GetClipQualityLeft(void) const;
929  TSeqPos GetClipQualityLength(void) const;
931  {
932  // inclusive
933  return GetClipQualityLeft() + GetClipQualityLength() - 1;
934  }
935 
936  // Returns true if current read has clipping info that can or does
937  // reduce sequence length.
938  bool HasClippingInfo(void) const;
939  // Returns true if current read is actually clipped by quality.
940  // It can be true only if clipping by quality is on.
941  bool IsClippedByQuality(void) const {
942  return m_ClipByQuality && HasClippingInfo();
943  }
944  // Returns true if current read has actual clipping info that is not
945  // applied because clipping by quality is off.
946  bool ShouldBeClippedByQuality(void) const {
947  return !m_ClipByQuality && HasClippingInfo();
948  }
949 
950  // return clip type
951  bool GetClipByQualityFlag(EClipType clip_type = eDefaultClip) const
952  {
953  return (clip_type == eDefaultClip?
954  m_ClipByQuality:
955  clip_type == eClipByQuality);
956  }
957 
958  // return raw unclipped sequence length
959  TSeqPos GetRawSeqLength(void) const;
960  // return clipping start position within raw unclipped sequence
961  TSeqPos GetSeqOffset(EClipType clip_type = eDefaultClip) const;
962  // return effective sequence length, depending on clip type
963  TSeqPos GetSeqLength(EClipType clip_type = eDefaultClip) const;
964 
965  // return corresponding kind of Seq-id if exists
966  // return null if there is no such Seq-id
967  CRef<CSeq_id> GetAccSeq_id(void) const;
968  CRef<CSeq_id> GetGiSeq_id(void) const;
969  NCBI_DEPRECATED // use GetGeneralOrPatentSeq_id() instead
970  CRef<CSeq_id> GetGeneralSeq_id(void) const;
971  CRef<CSeq_id> GetGeneralOrPatentSeq_id(void) const;
972 
973  //CTempString GetGeneralId(void) const;
974  CTempString GetContigName(void) const;
975 
976  bool HasTaxId(void) const;
977  TTaxId GetTaxId(void) const;
978 
980  bool HasSeqHash(void) const;
981  THash GetSeqHash(void) const;
982 
983  TVDBRowIdRange GetLocFeatRowIdRange(void) const;
984 
985  typedef struct SWGSContigGapInfo {
986  size_t gaps_count;
992  : gaps_count(0),
993  gaps_start(0),
994  gaps_len(0),
995  gaps_props(0),
996  gaps_linkage(0)
997  {
998  }
999 
1000  DECLARE_OPERATOR_BOOL(gaps_count > 0);
1001  void operator++(void) {
1002  _ASSERT(*this);
1003  --gaps_count;
1004  ++gaps_start;
1005  ++gaps_len;
1006  ++gaps_props;
1007  if ( gaps_linkage ) {
1008  ++gaps_linkage;
1009  }
1010  }
1011  TSeqPos GetLength(void) const { return *gaps_len; }
1012  TSeqPos GetFrom(void) const { return *gaps_start; }
1013  TSeqPos GetToOpen(void) const { return GetFrom()+GetLength(); }
1014  TSeqPos GetTo(void) const { return GetToOpen()-1; }
1015 
1016  // prepare iteration starting with pos
1017  void SetPos(TSeqPos pos);
1018  // check if pos is in current gap, ++ after use
1019  bool IsInGap(TSeqPos pos) const { return *this && pos >= GetFrom(); }
1020  // return intersecting length of current gap
1022  return min(len, GetToOpen()-pos);
1023  }
1024  // return intersecting length of data before current gap
1026  return !*this? len: min(len, GetFrom()-pos);
1027  }
1028  } TWGSContigGapInfo;
1029 
1030  bool HasGapInfo(void) const;
1031  void GetGapInfo(TWGSContigGapInfo& gap_info) const;
1032  vector<Uint1> GetAmbiguityBytes() const;
1033 
1034  CRef<CSeq_id> GetId(TFlags flags = fDefaultFlags) const;
1035  void GetIds(CBioseq::TId& ids, TFlags flags = fDefaultFlags) const;
1036 
1037  // Return descr binary byte sequence as is
1038  bool HasSeqDescrBytes(void) const;
1039  CTempString GetSeqDescrBytes(void) const;
1040  // Return descr binary byte sequence for nuc-prot set as is
1041  bool HasNucProtDescrBytes(void) const;
1042  CTempString GetNucProtDescrBytes(void) const;
1043  // return effective descr
1044  bool HasSeq_descr(TFlags flags = fDefaultFlags) const;
1045  // Parse the binary byte sequence and instantiate ASN.1 object
1046  CRef<CSeq_descr> GetSeq_descr(TFlags flags = fDefaultFlags) const;
1047 
1048  bool HasAnnotSet(void) const;
1049  // Return annot binary byte sequence as is
1050  CTempString GetAnnotBytes(void) const;
1052  void GetAnnotSet(TAnnotSet& annot_set, TFlags flags = fDefaultFlags) const;
1053 
1054  // check if the VDB structure allows quality graph in data
1055  bool CanHaveQualityGraph(void) const;
1056  // check if this sequence has quality graph
1057  bool HasQualityGraph(void) const;
1058  void GetQualityVec(vector<INSDC_quality_phred>& quality_vec) const;
1059  void GetQualityAnnot(TAnnotSet& annot_set,
1060  TFlags flags = fDefaultFlags) const;
1061  string GetQualityAnnotName(void) const;
1062 
1063  NCBI_gb_state GetGBState(void) const;
1064 
1065  bool HasPublicComment(void) const;
1066  CTempString GetPublicComment(void) const;
1067 
1068  bool IsCircular(void) const;
1069 
1070  CRef<CSeq_inst> GetSeq_inst(TFlags flags = fDefaultFlags) const;
1073  TWGSContigGapInfo gap_info,
1074  vector< COpenRange<TSeqPos> >* split = 0) const;
1075  CRef<CSeq_data> Get2na(TSeqPos pos, TSeqPos len) const;
1076  CRef<CSeq_data> Get4na(TSeqPos pos, TSeqPos len) const;
1077 
1080  struct SAmbiguityAccess;
1081  SAmbiguityAccess GetAmbiguity() const;
1083  SAmbiguityAccess& ambiguity) const;
1085  SAmbiguityAccess& ambiguity) const;
1086 
1087  CRef<CBioseq> GetBioseq(TFlags flags = fDefaultFlags) const;
1088  // GetSeq_entry may create nuc-prot set if the sequence has products
1089  CRef<CSeq_entry> GetSeq_entry(TFlags flags = fDefaultFlags) const;
1090  CRef<CAsnBinData> GetSeq_entryData(TFlags flags = fDefaultFlags) const;
1091  // GetSplitInfo may create Seq-entry as a skeleton w/o actual splitting
1093  CRef<CID2S_Split_Info> GetSplitInfo(TFlags flags = fDefaultFlags) const;
1095  CRef<CAsnBinData> GetSplitInfoData(TFlags flags = fDefaultFlags) const;
1096  pair<CRef<CID2S_Split_Info>, TSplitVersion>
1097  GetSplitInfoAndVersion(TFlags flags = fDefaultFlags) const;
1098  pair<CRef<CAsnBinData>, TSplitVersion>
1099  GetSplitInfoDataAndVersion(TFlags flags = fDefaultFlags) const;
1100  // Make chunk for the above split-info, flags must be the same
1102  CRef<CID2S_Chunk> GetChunk(TChunkId chunk_id,
1103  TFlags flags = fDefaultFlags) const;
1105  CRef<CAsnBinData> GetChunkData(TChunkId chunk_id,
1106  TFlags flags = fDefaultFlags) const;
1107  CRef<CID2S_Chunk> GetChunkForVersion(TChunkId chunk_id,
1108  TSplitVersion split_version) const;
1109  CRef<CAsnBinData> GetChunkDataForVersion(TChunkId chunk_id,
1110  TSplitVersion split_version) const;
1111 
1113  fInst_MakeData = 1<<0, // generate Seq-data in data segments
1114  fInst_MakeGaps = 1<<1, // generate gap segments
1115  fInst_Split = 1<<2, // split data by chunk boundaries
1116  fInst_Minimal = 1<<3 // minimize number of data segments
1117  };
1119 
1120 protected:
1121  void x_Select(const CWGSDb& wgs_db,
1122  TIncludeFlags include_flags,
1123  EClipType clip_type);
1124  void x_Select(const CWGSDb& wgs_db,
1125  TIncludeFlags include_flags,
1126  EClipType clip_type,
1127  TVDBRowId get_row);
1128  void x_Select(const CWGSDb& wgs_db,
1129  TIncludeFlags include_flags,
1130  EClipType clip_type,
1131  TVDBRowId first_row,
1132  TVDBRowId last_row);
1133  void x_Select(const CWGSDb& wgs_db,
1134  TIncludeFlags include_flags,
1135  EClipType clip_type,
1136  CTempString acc);
1137 
1138  void x_Init(const CWGSDb& wgs_db,
1139  TIncludeFlags include_flags,
1140  EClipType clip_type,
1141  TVDBRowId get_row);
1142 
1143  CWGSDb_Impl& GetDb(void) const {
1144  return m_Db.GetNCObject();
1145  }
1146 
1147  void x_Settle(void);
1148  bool x_Excluded(void) const;
1149 
1150  void x_ReportInvalid(const char* method) const;
1151  void x_CheckValid(const char* method) const {
1152  if ( !*this ) {
1153  x_ReportInvalid(method);
1154  }
1155  }
1156 
1157  // if version == -1 return latest version
1158  SVersionSelector x_GetAccVersionSelector(int version) const;
1159 
1160  void x_CreateEntry(SWGSCreateInfo& info) const;
1161  void x_CreateBioseq(SWGSCreateInfo& info) const;
1162  bool x_InitSplit(SWGSCreateInfo& info) const;
1163  void x_CreateSplit(SWGSCreateInfo& info) const;
1164  void x_CreateChunk(SWGSCreateInfo& info,
1165  TChunkId chunk_id) const;
1166 
1167  void x_CreateDataChunk(SWGSCreateInfo& info,
1168  unsigned index) const;
1169  void x_CreateQualityChunk(SWGSCreateInfo& info,
1170  unsigned index) const;
1171  void x_CreateProductsChunk(SWGSCreateInfo& info,
1172  unsigned index) const;
1173  void x_CreateFeaturesChunk(SWGSCreateInfo& info,
1174  unsigned index) const;
1175 
1176  TSeqPos x_GetQualityArraySize(void) const;
1177  void x_AddQualityChunkInfo(SWGSCreateInfo& info) const;
1178  void x_GetQualityAnnot(TAnnotSet& annot_set,
1180  TSeqPos pos = 0,
1181  TSeqPos len = kInvalidSeqPos) const;
1182 
1184  SAmbiguityAccess& ambiguity) const;
1185 
1186  // methods to be used if no gap information exist
1188  SAmbiguityAccess& ambiguity) const;
1190  SAmbiguityAccess& ambiguity) const;
1192  TSeqPos stop_2na_len,
1193  TSeqPos stop_gap_len,
1194  SAmbiguityAccess& ambiguity) const;
1195 
1196  // methods to be used with gap information
1197  bool x_AmbiguousBlock(TSeqPos block_index,
1198  SAmbiguityAccess& ambiguity) const;
1200  SAmbiguityAccess& ambiguity) const;
1202  SAmbiguityAccess& ambiguity) const;
1203 
1204  struct SSegment {
1206  bool is_gap;
1208  };
1209  typedef vector<SSegment> TSegments;
1210 
1211  COpenRange<TSeqPos> x_NormalizeSeqRange(COpenRange<TSeqPos> range) const;
1212  void x_AddGap(TSegments& segments,
1213  TSeqPos pos, TSeqPos len,
1214  const TWGSContigGapInfo& gap_info) const;
1215  void x_SetDelta(CSeq_inst& inst, const TSegments& segments) const;
1216  void x_SetDeltaOrData(CSeq_inst& inst, const TSegments& segments) const;
1217 
1218  void x_GetSegmentsWithExplicitGaps(TSegments& data,
1220  TWGSContigGapInfo gap_info,
1221  TInstSegmentFlags flags) const;
1222  void x_GetSegmentsWithRecoveredGaps(TSegments& segments,
1223  COpenRange<TSeqPos> range) const;
1224 
1225  CRef<CSeq_inst> x_GetSeq_inst(SWGSCreateInfo& info) const;
1226 
1227 private:
1229  CRef<CWGSDb_Impl::SSeq0TableCursor> m_Cur0; // VDB seq table accessor
1230  CRef<CWGSDb_Impl::SSeqTableCursor> m_Cur; // VDB seq table accessor
1232  TVDBRowId m_CurrId, m_FirstGoodId, m_FirstBadId;
1234  TIncludeFlags m_IncludeFlags;
1236 };
1239 
1240 
1242 {
1243 public:
1244  CWGSScaffoldIterator(void);
1245  explicit
1246  CWGSScaffoldIterator(const CWGSDb& wgs_db);
1247  CWGSScaffoldIterator(const CWGSDb& wgs_db, TVDBRowId row);
1248  CWGSScaffoldIterator(const CWGSDb& wgs_db, CTempString acc);
1249  ~CWGSScaffoldIterator(void);
1250 
1251  void Reset(void);
1254 
1255  CWGSScaffoldIterator& SelectRow(TVDBRowId row);
1256 
1257  DECLARE_OPERATOR_BOOL(m_CurrId < m_FirstBadId);
1258 
1260  ++m_CurrId;
1261  return *this;
1262  }
1263 
1265  return m_CurrId;
1266  }
1268  return m_FirstGoodId;
1269  }
1271  return m_FirstBadId;
1272  }
1274  return GetFirstBadRowId() - GetCurrentRowId();
1275  }
1276  TVDBRowCount GetSize(void) const {
1277  return GetFirstBadRowId() - GetFirstGoodRowId();
1278  }
1279 
1280  CTempString GetAccession(void) const;
1281  int GetAccVersion(void) const;
1282 
1283  CRef<CSeq_id> GetAccSeq_id(void) const;
1284  CRef<CSeq_id> GetGiSeq_id(void) const;
1285  NCBI_DEPRECATED // use GetGeneralOrPatentSeq_id() instead
1286  CRef<CSeq_id> GetGeneralSeq_id(void) const;
1287  CRef<CSeq_id> GetGeneralOrPatentSeq_id(void) const;
1288 
1289  CTempString GetScaffoldName(void) const;
1290 
1291  TVDBRowIdRange GetLocFeatRowIdRange(void) const;
1292 
1293  NCBI_gb_state GetGBState(void) const;
1294 
1295  CRef<CSeq_id> GetId(TFlags flags = fDefaultFlags) const;
1296  void GetIds(CBioseq::TId& ids, TFlags flags = fDefaultFlags) const;
1297 
1298  bool HasSeq_descr(TFlags flags = fDefaultFlags) const;
1299  CRef<CSeq_descr> GetSeq_descr(TFlags flags = fDefaultFlags) const;
1300 
1301  TSeqPos GetSeqLength(void) const;
1302 
1303  bool IsCircular(void) const;
1304 
1305  CRef<CSeq_inst> GetSeq_inst(TFlags flags = fDefaultFlags) const;
1306 
1307  CRef<CBioseq> GetBioseq(TFlags flags = fDefaultFlags) const;
1308  // GetSeq_entry may create nuc-prot set if the sequence has products
1309  CRef<CSeq_entry> GetSeq_entry(TFlags flags = fDefaultFlags) const;
1310 
1311 protected:
1312  void x_Init(const CWGSDb& wgs_db);
1313 
1314  CWGSDb_Impl& GetDb(void) const {
1315  return m_Db.GetNCObject();
1316  }
1317 
1318  void x_ReportInvalid(const char* method) const;
1319  void x_CheckValid(const char* method) const {
1320  if ( !*this ) {
1321  x_ReportInvalid(method);
1322  }
1323  }
1324 
1325  void x_CreateBioseq(SWGSCreateInfo& info) const;
1326  void x_CreateEntry(SWGSCreateInfo& info) const;
1327 
1328 private:
1330  CRef<CWGSDb_Impl::SScfTableCursor> m_Cur; // VDB scaffold table accessor
1331  TVDBRowId m_CurrId, m_FirstGoodId, m_FirstBadId;
1332 };
1333 
1334 
1336 {
1337 public:
1338  enum ESeqType {
1339  eNuc = 1 << 0,
1340  eProt = 1 << 1,
1341  eAll = eNuc | eProt
1342  };
1343  CWGSGiIterator(void);
1344  explicit
1345  CWGSGiIterator(const CWGSDb& wgs_db, ESeqType seq_type = eAll);
1346  CWGSGiIterator(const CWGSDb& wgs_db, TGi gi, ESeqType seq_type = eAll);
1347  ~CWGSGiIterator(void);
1348 
1349  void Reset(void);
1350  CWGSGiIterator(const CWGSGiIterator& iter);
1351  CWGSGiIterator& operator=(const CWGSGiIterator& iter);
1352 
1353  DECLARE_OPERATOR_BOOL(m_CurrGi < m_FirstBadGi);
1354 
1356  ++m_CurrGi;
1357  x_Settle();
1358  return *this;
1359  }
1360 
1361  // get currently selected gi
1362  TGi GetGi(void) const {
1363  return m_CurrGi;
1364  }
1365 
1366  // get currently selected gi type, eNuc or eProt
1367  ESeqType GetSeqType(void) const {
1368  return m_CurrSeqType;
1369  }
1370 
1371  // get currently selected gi row id in corresponding nuc or prot table
1372  TVDBRowId GetRowId(void) const {
1373  return m_CurrRowId;
1374  }
1375 
1376 protected:
1377  void x_Init(const CWGSDb& wgs_db, ESeqType seq_type);
1378 
1379  CWGSDb_Impl& GetDb(void) const {
1380  return m_Db.GetNCObject();
1381  }
1382 
1383  void x_Settle(void);
1384  bool x_Excluded(void);
1385 
1386 private:
1388  CRef<CWGSDb_Impl::SGiIdxTableCursor> m_Cur; // VDB GI index table accessor
1389  TGi m_CurrGi, m_FirstBadGi;
1391  ESeqType m_CurrSeqType, m_FilterSeqType;
1392 };
1393 
1394 
1396 {
1397 public:
1398  CWGSProteinIterator(void);
1399  explicit
1400  CWGSProteinIterator(const CWGSDb& wgs_db);
1401  CWGSProteinIterator(const CWGSDb& wgs_db, TVDBRowId row);
1402  CWGSProteinIterator(const CWGSDb& wgs_db, CTempString acc);
1403  ~CWGSProteinIterator(void);
1404 
1405  void Reset(void);
1408 
1409  DECLARE_OPERATOR_BOOL(m_CurrId < m_FirstBadId);
1410 
1412  ++m_CurrId;
1413  return *this;
1414  }
1415 
1417  return m_CurrId;
1418  }
1420  return m_FirstGoodId;
1421  }
1423  return m_FirstBadId;
1424  }
1426  return GetFirstBadRowId() - GetCurrentRowId();
1427  }
1428  TVDBRowCount GetSize(void) const {
1429  return GetFirstBadRowId() - GetFirstGoodRowId();
1430  }
1431 
1432  CWGSProteinIterator& SelectRow(TVDBRowId row);
1433 
1434  bool HasGi(void) const;
1435  CSeq_id::TGi GetGi(void) const;
1436 
1437  CTempString GetAccession(void) const;
1438  int GetAccVersion(void) const;
1439 
1440  CRef<CSeq_id> GetAccSeq_id(void) const;
1441  CRef<CSeq_id> GetGiSeq_id(void) const;
1442  NCBI_DEPRECATED // use GetGeneralOrPatentSeq_id() instead
1443  CRef<CSeq_id> GetGeneralSeq_id(void) const;
1444  CRef<CSeq_id> GetGeneralOrPatentSeq_id(void) const;
1445 
1446  CTempString GetProteinName(void) const;
1447  CTempString GetProductName(void) const;
1448 
1449  bool HasTaxId(void) const;
1450  TTaxId GetTaxId(void) const;
1451 
1453  bool HasSeqHash(void) const;
1454  THash GetSeqHash(void) const;
1455 
1456  TVDBRowIdRange GetLocFeatRowIdRange(void) const;
1457  size_t GetProductFeatCount(void) const;
1458  TVDBRowId GetProductFeatRowId(size_t index) const;
1459  // return best product feature or 0 if none
1460  TVDBRowId GetBestProductFeatRowId(void) const;
1462  TVDBRowId GetProductFeatRowId(void) const; // single or 0 if none
1463 
1464  TVDBRowId GetReplacedByRowId(void) const;
1465  TVDBRowId GetReplacesRowId(void) const;
1466 
1467  CRef<CSeq_id> GetId(TFlags flags = fDefaultFlags) const;
1468  void GetIds(CBioseq::TId& ids, TFlags flags = fDefaultFlags) const;
1469 
1470  // reference protein accession WP_
1471  bool HasRefAcc(void) const;
1472  CTempString GetRefAcc(void) const;
1473 
1474  NCBI_gb_state GetGBState(void) const;
1475 
1476  bool HasPublicComment(void) const;
1477  CTempString GetPublicComment(void) const;
1478 
1479  TSeqPos GetSeqLength(void) const;
1480 
1481  bool HasSeq_descr(TFlags flags = fDefaultFlags) const;
1482  CRef<CSeq_descr> GetSeq_descr(TFlags flags = fDefaultFlags) const;
1483 
1484  bool HasTitle(void) const;
1485  CTempString GetTitle(void) const;
1486 
1487  bool HasAnnotSet(void) const;
1489  void GetAnnotSet(TAnnotSet& annot_set, TFlags flags = fDefaultFlags) const;
1490 
1491  CRef<CSeq_inst> GetSeq_inst(TFlags flags = fDefaultFlags) const;
1492 
1493  CRef<CBioseq> GetBioseq(TFlags flags = fDefaultFlags) const;
1494  // GetSeq_entry will always return seq entry
1495  CRef<CSeq_entry> GetSeq_entry(TFlags flags = fDefaultFlags) const;
1496 
1497 protected:
1498  friend struct SWGSCreateInfo;
1499  friend struct SWGSFeatChunkInfo;
1500 
1501  void x_Init(const CWGSDb& wgs_db);
1502  void x_Cur() const;
1503 
1504  CWGSDb_Impl& GetDb(void) const {
1505  return m_Db.GetNCObject();
1506  }
1507 
1508  void x_ReportInvalid(const char* method) const;
1509  void x_CheckValid(const char* method) const {
1510  if ( !*this ) {
1511  x_ReportInvalid(method);
1512  }
1513  }
1514 
1515  void x_CreateBioseq(SWGSCreateInfo& info) const;
1516  void x_CreateEntry(SWGSCreateInfo& info) const;
1517 
1518 private:
1520  CRef<CWGSDb_Impl::SProt0TableCursor> m_Cur0; // VDB protein table accessor
1521  CRef<CWGSDb_Impl::SProtTableCursor> m_Cur; // VDB protein table accessor
1522  TVDBRowId m_CurrId, m_FirstGoodId, m_FirstBadId;
1523 };
1524 
1525 
1527 {
1528 public:
1529  CWGSFeatureIterator(void);
1530  explicit
1531  CWGSFeatureIterator(const CWGSDb& wgs);
1532  CWGSFeatureIterator(const CWGSDb& wgs, TVDBRowId row);
1533  CWGSFeatureIterator(const CWGSDb& wgs, TVDBRowIdRange row_range);
1534  ~CWGSFeatureIterator(void);
1535 
1536  void Reset(void);
1539 
1540  DECLARE_OPERATOR_BOOL(m_CurrId < m_FirstBadId);
1541 
1543  ++m_CurrId;
1544  return *this;
1545  }
1546 
1548  return m_CurrId;
1549  }
1551  return m_FirstGoodId;
1552  }
1554  return m_FirstBadId;
1555  }
1557  return GetFirstBadRowId() - GetCurrentRowId();
1558  }
1559  TVDBRowCount GetSize(void) const {
1560  return GetFirstBadRowId() - GetFirstGoodRowId();
1561  }
1562 
1563  CWGSFeatureIterator& SelectRow(TVDBRowId row);
1564  CWGSFeatureIterator& SelectRowRange(TVDBRowIdRange row_range);
1565 
1566  NCBI_WGS_feattype GetFeatType(void) const;
1567 
1568  NCBI_WGS_seqtype GetLocSeqType(void) const;
1569  NCBI_WGS_seqtype GetProductSeqType(void) const;
1570 
1571  TVDBRowId GetLocRowId(void) const;
1572  TVDBRowId GetProductRowId(void) const;
1573 
1574  TSeqPos GetLocStart(void) const;
1575  TSeqPos GetLocLength(void) const;
1576  CRange<TSeqPos> GetLocRange(void) const;
1577 
1578  CTempString GetSeq_featBytes(void) const;
1579  CRef<CSeq_feat> GetSeq_feat() const;
1580 
1581 protected:
1582  CWGSDb_Impl& GetDb(void) const {
1583  return m_Db.GetNCObject();
1584  }
1585 
1586  void x_Init(const CWGSDb& wgs_db);
1587 
1588  void x_ReportInvalid(const char* method) const;
1589  void x_CheckValid(const char* method) const {
1590  if ( !*this ) {
1591  x_ReportInvalid(method);
1592  }
1593  }
1594 
1595 private:
1597  CRef<CWGSDb_Impl::SFeatTableCursor> m_Cur; // VDB feature table accessor
1598  TVDBRowId m_CurrId, m_FirstGoodId, m_FirstBadId;
1599 };
1600 
1601 
1604 
1605 #endif // SRA__READER__SRA__WGSREAD__HPP
CRef< CSerialObject > m_MainObject
Definition: wgsread.hpp:136
const CSerialObject & GetMainObject(void) const
Definition: wgsread.hpp:130
CFastMutex –.
Definition: ncbimtx.hpp:667
CID2S_Chunk –.
Definition: ID2S_Chunk.hpp:66
CID2S_Split_Info –.
CObjectOStreamAsnBinary –.
Definition: objostrasnb.hpp:58
CObject –.
Definition: ncbiobj.hpp:180
@Seq_descr.hpp User-defined methods of the data storage class.
Definition: Seq_descr.hpp:55
Definition: Seq_entry.hpp:56
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
Base class for all serializable objects.
Definition: serialbase.hpp:150
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
CVDBTableIndex m_ProteinNameIndex
Definition: wgsread.hpp:621
DECLARE_SAFE_FLAGS_TYPE(EGnlIdFlags, TGnlIdFlags)
const string & GetIdPrefixWithVersion(void) const
Definition: wgsread.hpp:205
string m_IdPrefix
Definition: wgsread.hpp:587
CVDBObjectCache< SProtIdxTableCursor > m_ProtIdx
Definition: wgsread.hpp:617
map< SProtAccInfo, TIdRange > TProtAccRanges
Definition: wgsread.hpp:392
CVDBObjectCache< SProt0TableCursor > m_Prot0
Definition: wgsread.hpp:613
atomic< bool > m_GiIdxTableIsOpened
Definition: wgsread.hpp:597
NCBI_gb_state m_ProjectGBState
Definition: wgsread.hpp:636
Uint4 m_IdRowDigits
Definition: wgsread.hpp:591
const CVDBTableIndex & ContigNameIndex(void)
Definition: wgsread.hpp:490
const CVDBTableIndex & ProductNameIndex(void)
Definition: wgsread.hpp:508
pair< TGi, TGi > GetNucGiRange(void)
Definition: wgsread.cpp:3310
bool LoadMasterDescr(int filter)
Definition: wgsread.cpp:3012
const CVDBTable & GiIdxTable(void)
Definition: wgsread.hpp:472
COpenRange< TIntId > TGiRange
Definition: wgsread.hpp:353
static TVDBRowId ParseContigRow(CTempString acc)
Definition: wgsread.hpp:245
DECLARE_SAFE_FLAGS_TYPE(EAllowRowType, TAllowRowType)
atomic< bool > m_FeatTableIsOpened
Definition: wgsread.hpp:596
CVDBTableIndex m_ScaffoldNameIndex
Definition: wgsread.hpp:620
atomic< bool > m_ScfTableIsOpened
Definition: wgsread.hpp:594
string m_IdPrefixDbWithVersion
Definition: wgsread.hpp:588
bool HasCommonTaxId(void) const
Definition: wgsread.hpp:411
CFastMutex m_TableMutex
Definition: wgsread.hpp:593
const string & GetReplacedBy() const
Definition: wgsread.hpp:404
CVDBTable m_ProtIdxTable
Definition: wgsread.hpp:608
TAmbiguityCache m_AmbiguityCache
Definition: wgsread.hpp:626
TVDBRowId GetNucGiRowId(TGi gi)
Definition: wgsread.cpp:3507
const CVDBTableIndex & ScaffoldNameIndex(void)
Definition: wgsread.hpp:496
void SetMasterDescr(const TMasterDescr &descr, int filter)
Definition: wgsread.cpp:3140
TTaxId GetCommonTaxId(void) const
Definition: wgsread.hpp:415
pair< TVDBRowId, bool > GetGiRowId(TGi gi)
Definition: wgsread.cpp:3482
CVDBMgr m_Mgr
Definition: wgsread.hpp:582
int m_IdVersion
Definition: wgsread.hpp:590
CVDBTable m_FeatTable
Definition: wgsread.hpp:606
const CVDBTable & ScfTable(void)
Definition: wgsread.hpp:454
const CRef< CSeq_id > & GetPatentId(void) const
Definition: wgsread.hpp:324
CVDBTableIndex m_ProtAccIndex
Definition: wgsread.hpp:618
TMasterDescr m_MasterDescr
Definition: wgsread.hpp:634
CRef< CSeq_entry > m_MasterEntry
Definition: wgsread.hpp:633
string m_ReplacedBy
Definition: wgsread.hpp:638
TVDBRowId GetScaffoldNameRowId(const string &name)
Definition: wgsread.cpp:3586
TVDBRowId GetContigNameRowId(const string &name)
Definition: wgsread.cpp:3564
Uint4 GetIdRowDigits(void) const
Definition: wgsread.hpp:260
CVDBObjectCache< SSeqTableCursor > m_Seq
Definition: wgsread.hpp:611
TTaxId m_CommonTaxId
Definition: wgsread.hpp:639
COpenRange< Uint4 > TIdRange
Definition: wgsread.hpp:391
atomic< Int1 > m_ScaffoldNameIndexIsOpened
Definition: wgsread.hpp:601
atomic< bool > m_ProtIdxTableIsOpened
Definition: wgsread.hpp:598
NCBI_gb_state GetProjectGBState() const
Definition: wgsread.hpp:401
TVDBRowId GetProductNameRowId(const string &name)
Definition: wgsread.cpp:3600
CVDBTable m_SeqTable
Definition: wgsread.hpp:585
TVDBRowId GetProtAccRowId(const string &acc, int version=-1)
Definition: wgsread.cpp:3607
CVDBObjectCache< SProtTableCursor > m_Prot
Definition: wgsread.hpp:614
TVDBRowId GetProteinNameRowId(const string &name)
Definition: wgsread.cpp:3593
atomic< Int1 > m_ProductNameIndexIsOpened
Definition: wgsread.hpp:603
vector< TGiRange > TGiRanges
Definition: wgsread.hpp:354
string m_WGSPath
Definition: wgsread.hpp:583
list< CRef< CSeqdesc > > TMasterDescr
Definition: wgsread.hpp:298
CSimpleBufferT< char > TMasterDescrBytes
Definition: wgsread.hpp:307
TProtAccRanges GetProtAccRanges(void)
Definition: wgsread.cpp:3445
CFastMutex m_AmbiguityCacheMutex
Definition: wgsread.hpp:624
const CVDBTableIndex & ProteinNameIndex(void)
Definition: wgsread.hpp:502
TGiRanges GetProtGiRanges(void)
Definition: wgsread.cpp:3401
TVDBRowId ParseRow(CTempString acc, bool *is_scaffold) const
Definition: wgsread.cpp:2523
TGiRanges GetNucGiRanges(void)
Definition: wgsread.cpp:3369
limited_resource_map< TVDBRowId, CRef< SAmbiguityInfo >, size_t > TAmbiguityCache
Definition: wgsread.hpp:625
CVDBObjectCache< SScfTableCursor > m_Scf
Definition: wgsread.hpp:612
static TVDBRowId ParseScaffoldRow(CTempString acc)
Definition: wgsread.hpp:251
bool IsSetMasterDescr(void) const
Definition: wgsread.hpp:300
CVDBTable m_GiIdxTable
Definition: wgsread.hpp:607
CVDBTableIndex m_ContigNameIndex
Definition: wgsread.hpp:619
bool m_HasNoDefaultGnlId
Definition: wgsread.hpp:630
CVDBObjectCache< SSeq0TableCursor > m_Seq0
Definition: wgsread.hpp:610
bool IsReplaced() const
Definition: wgsread.hpp:407
CRef< CSeq_id > m_PatentId
Definition: wgsread.hpp:635
CVDBTable m_ScfTable
Definition: wgsread.hpp:604
atomic< bool > m_ProtTableIsOpened
Definition: wgsread.hpp:595
bool HasPatentId() const
Definition: wgsread.hpp:320
CVDBTable m_ProtTable
Definition: wgsread.hpp:605
const CVDBTable & FeatTable(void)
Definition: wgsread.hpp:466
const CVDBTableIndex & ProtAccIndex(void)
Definition: wgsread.hpp:484
const CVDBTable & SeqTable(void)
Definition: wgsread.hpp:451
pair< TGi, TGi > GetProtGiRange(void)
Definition: wgsread.cpp:3330
CSeq_inst::TMol m_ContigMolType
Definition: wgsread.hpp:628
string m_IdPrefixWithVersion
Definition: wgsread.hpp:586
atomic< Int1 > m_ProtAccIndexIsOpened
Definition: wgsread.hpp:599
CSeq_id::E_Choice m_SeqIdType
Definition: wgsread.hpp:637
string m_IdPrefixDb
Definition: wgsread.hpp:589
const TMasterDescr & GetMasterDescr(void) const
Definition: wgsread.hpp:303
atomic< EFeatLocIdType > m_FeatLocIdType
Definition: wgsread.hpp:632
CVDBTableIndex m_ProductNameIndex
Definition: wgsread.hpp:622
static TVDBRowId ParseProteinRow(CTempString acc)
Definition: wgsread.hpp:257
CVDBObjectCache< SFeatTableCursor > m_Feat
Definition: wgsread.hpp:615
const CVDBTable & ProtIdxTable(void)
Definition: wgsread.hpp:478
const CVDBTable & ProtTable(void)
Definition: wgsread.hpp:460
char TRowType
Definition: wgsread.hpp:224
bool m_HasCommonTaxId
Definition: wgsread.hpp:631
CVDBObjectCache< SGiIdxTableCursor > m_GiIdx
Definition: wgsread.hpp:616
const string & GetIdPrefix(void) const
Definition: wgsread.hpp:202
atomic< Int1 > m_ProteinNameIndexIsOpened
Definition: wgsread.hpp:602
bool m_IsSetMasterDescr
Definition: wgsread.hpp:629
TVDBRowId GetProtGiRowId(TGi gi)
Definition: wgsread.cpp:3525
const string & GetWGSPath(void) const
Definition: wgsread.hpp:208
atomic< Int1 > m_ContigNameIndexIsOpened
Definition: wgsread.hpp:600
CWGSDb(CVDBMgr &mgr, CTempString path_or_acc, CTempString vol_path=CTempString())
Definition: wgsread.hpp:655
EDescrFilter
Definition: wgsread.hpp:765
@ eDescrDefaultFilter
Definition: wgsread.hpp:767
@ eDescrNoFilter
Definition: wgsread.hpp:766
TGiRanges GetProtGiRanges(void)
Definition: wgsread.hpp:708
pair< TVDBRowId, bool > GetGiRowId(TGi gi) const
Definition: wgsread.hpp:723
TVDBRowId GetProductNameRowId(const string &name) const
Definition: wgsread.hpp:755
CWGSDb(CWGSDb_Impl *impl)
Definition: wgsread.hpp:651
TGiRanges GetNucGiRanges(void) const
Definition: wgsread.hpp:704
CWGSDb_Impl::SProtAccInfo SProtAccInfo
Definition: wgsread.hpp:712
TProtAccRanges GetProtAccRanges(void)
Definition: wgsread.hpp:717
static TVDBRowId ParseProteinRow(CTempString acc)
Definition: wgsread.hpp:689
EDescrType
Definition: wgsread.hpp:781
@ eDescr_default
Definition: wgsread.hpp:783
@ eDescr_skip
Definition: wgsread.hpp:782
@ eDescr_force
Definition: wgsread.hpp:784
const string & GetWGSPath(void) const
Definition: wgsread.hpp:662
static TVDBRowId ParseScaffoldRow(CTempString acc)
Definition: wgsread.hpp:683
TVDBRowId GetContigNameRowId(const string &name) const
Definition: wgsread.hpp:737
TVDBRowId GetNucGiRowId(TGi gi) const
Definition: wgsread.hpp:727
void SetMasterDescr(const TMasterDescr &descr, EDescrFilter filter=eDescrDefaultFilter) const
Definition: wgsread.hpp:777
CWGSDb_Impl::TIdRange TIdRange
Definition: wgsread.hpp:713
pair< TGi, TGi > GetProtGiRange(void) const
Definition: wgsread.hpp:698
pair< TGi, TGi > GetNucGiRange(void) const
Definition: wgsread.hpp:694
CWGSDb_Impl::TGiRange TGiRange
Definition: wgsread.hpp:701
CWGSDb_Impl::TProtAccRanges TProtAccRanges
Definition: wgsread.hpp:714
TVDBRowId GetProteinNameRowId(const string &name) const
Definition: wgsread.hpp:749
TVDBRowId GetProtAccRowId(const string &acc, int version=-1) const
Definition: wgsread.hpp:761
list< CRef< CSeqdesc > > TMasterDescr
Definition: wgsread.hpp:776
TVDBRowId ParseRow(CTempString acc, bool *is_scaffold=NULL) const
Definition: wgsread.hpp:671
TVDBRowId GetProtGiRowId(TGi gi) const
Definition: wgsread.hpp:731
bool LoadMasterDescr(EDescrFilter filter=eDescrDefaultFilter) const
Definition: wgsread.hpp:772
static EDescrType GetMasterDescrType(const CSeqdesc &desc)
Definition: wgsread.cpp:3108
CWGSDb_Impl::TGiRanges TGiRanges
Definition: wgsread.hpp:702
static TVDBRowId ParseContigRow(CTempString acc)
Definition: wgsread.hpp:677
TVDBRowId GetScaffoldNameRowId(const string &name) const
Definition: wgsread.hpp:743
CWGSDb(void)
Definition: wgsread.hpp:648
TVDBRowId m_FirstGoodId
Definition: wgsread.hpp:1598
CWGSDb_Impl & GetDb(void) const
Definition: wgsread.hpp:1582
TVDBRowCount GetSize(void) const
Definition: wgsread.hpp:1559
CWGSFeatureIterator & operator++(void)
Definition: wgsread.hpp:1542
TVDBRowId GetFirstBadRowId(void) const
Definition: wgsread.hpp:1553
DECLARE_OPERATOR_BOOL(m_CurrId< m_FirstBadId)
TVDBRowCount GetRemainingCount(void) const
Definition: wgsread.hpp:1556
TVDBRowId GetFirstGoodRowId(void) const
Definition: wgsread.hpp:1550
TVDBRowId GetCurrentRowId(void) const
Definition: wgsread.hpp:1547
CRef< CWGSDb_Impl::SFeatTableCursor > m_Cur
Definition: wgsread.hpp:1597
void x_CheckValid(const char *method) const
Definition: wgsread.hpp:1589
CWGSGiIterator & operator++(void)
Definition: wgsread.hpp:1355
ESeqType GetSeqType(void) const
Definition: wgsread.hpp:1367
TGi GetGi(void) const
Definition: wgsread.hpp:1362
TVDBRowId GetRowId(void) const
Definition: wgsread.hpp:1372
TVDBRowId m_CurrRowId
Definition: wgsread.hpp:1390
CRef< CWGSDb_Impl::SGiIdxTableCursor > m_Cur
Definition: wgsread.hpp:1388
CWGSDb_Impl & GetDb(void) const
Definition: wgsread.hpp:1379
DECLARE_OPERATOR_BOOL(m_CurrGi< m_FirstBadGi)
ESeqType m_FilterSeqType
Definition: wgsread.hpp:1391
TVDBRowId GetFirstBadRowId(void) const
Definition: wgsread.hpp:1422
CBioseq::TAnnot TAnnotSet
Definition: wgsread.hpp:1488
TVDBRowId GetCurrentRowId(void) const
Definition: wgsread.hpp:1416
TVDBRowId GetFirstGoodRowId(void) const
Definition: wgsread.hpp:1419
DECLARE_OPERATOR_BOOL(m_CurrId< m_FirstBadId)
CWGSDb_Impl & GetDb(void) const
Definition: wgsread.hpp:1504
TVDBRowCount GetRemainingCount(void) const
Definition: wgsread.hpp:1425
void x_CheckValid(const char *method) const
Definition: wgsread.hpp:1509
CRef< CWGSDb_Impl::SProtTableCursor > m_Cur
Definition: wgsread.hpp:1521
TVDBRowId m_FirstGoodId
Definition: wgsread.hpp:1522
NCBI_WGS_hash THash
Definition: wgsread.hpp:1452
TVDBRowCount GetSize(void) const
Definition: wgsread.hpp:1428
CWGSProteinIterator & operator++(void)
Definition: wgsread.hpp:1411
CRef< CWGSDb_Impl::SProt0TableCursor > m_Cur0
Definition: wgsread.hpp:1520
TVDBRowCount GetSize(void) const
Definition: wgsread.hpp:1276
TVDBRowId m_FirstGoodId
Definition: wgsread.hpp:1331
TVDBRowId GetFirstBadRowId(void) const
Definition: wgsread.hpp:1270
TVDBRowCount GetRemainingCount(void) const
Definition: wgsread.hpp:1273
TVDBRowId GetFirstGoodRowId(void) const
Definition: wgsread.hpp:1267
CRef< CWGSDb_Impl::SScfTableCursor > m_Cur
Definition: wgsread.hpp:1330
void x_CheckValid(const char *method) const
Definition: wgsread.hpp:1319
CWGSDb_Impl & GetDb(void) const
Definition: wgsread.hpp:1314
CWGSScaffoldIterator & operator++(void)
Definition: wgsread.hpp:1259
TVDBRowId GetCurrentRowId(void) const
Definition: wgsread.hpp:1264
DECLARE_OPERATOR_BOOL(m_CurrId< m_FirstBadId)
TVDBRowId GetRemainingCount(void) const
Definition: wgsread.hpp:892
TIncludeFlags m_IncludeFlags
Definition: wgsread.hpp:1234
TSeqPos x_Get4naLength(TSeqPos pos, TSeqPos len, SAmbiguityAccess &ambiguity) const
TSeqPos x_Get2naLength(TSeqPos pos, TSeqPos len, SAmbiguityAccess &ambiguity) const
DECLARE_SAFE_FLAGS_TYPE(EIncludeFlags, TIncludeFlags)
TSeqPos x_Get4naLengthExact(TSeqPos pos, TSeqPos len, TSeqPos stop_2na_len, TSeqPos stop_gap_len, SAmbiguityAccess &ambiguity) const
TVDBRowId m_FirstGoodId
Definition: wgsread.hpp:1232
NCBI_WGS_hash THash
Definition: wgsread.hpp:979
bool x_AmbiguousBlock(TSeqPos block_index, SAmbiguityAccess &ambiguity) const
CWGSDb_Impl::SAmbiguityInfo SAmbiguityInfo
Definition: wgsread.hpp:1079
TVDBRowId GetCurrentRowId(void) const
Definition: wgsread.hpp:880
CRef< CWGSDb_Impl::SSeq0TableCursor > m_Cur0
Definition: wgsread.hpp:1229
bool IsClippedByQuality(void) const
Definition: wgsread.hpp:941
int GetAccVersion(void) const
Definition: wgsread.hpp:907
CRef< CDelta_ext > GetDelta(TSeqPos pos, TSeqPos len) const
TSeqPos x_GetGapLengthExact(TSeqPos pos, TSeqPos len, SAmbiguityAccess &ambiguity) const
CRef< CSeq_data > Get4na(TSeqPos pos, TSeqPos len, SAmbiguityAccess &ambiguity) const
TVDBRowId GetFirstBadRowId(void) const
Definition: wgsread.hpp:886
CWGSDb_Impl::SSeq4naTableCursor SSeq4naTableCursor
Definition: wgsread.hpp:1078
CBioseq::TAnnot TAnnotSet
Definition: wgsread.hpp:1051
CRef< CWGSDb_Impl::SAmbiguityInfo > m_AmbiguityInfo
Definition: wgsread.hpp:1231
SVersionSelector m_AccVersion
Definition: wgsread.hpp:1233
CRef< CWGSDb_Impl::SSeqTableCursor > m_Cur
Definition: wgsread.hpp:1230
TVDBRowId GetSize(void) const
Definition: wgsread.hpp:895
DECLARE_SAFE_FLAGS_TYPE(EInstSegmentFlags, TInstSegmentFlags)
TSeqPos x_Get2naLengthExact(TSeqPos pos, TSeqPos len, SAmbiguityAccess &ambiguity) const
CRef< CSeq_data > Get2na(TSeqPos pos, TSeqPos len, SAmbiguityAccess &ambiguity) const
vector< SSegment > TSegments
Definition: wgsread.hpp:1209
DECLARE_OPERATOR_BOOL(m_CurrId< m_FirstBadId)
bool ShouldBeClippedByQuality(void) const
Definition: wgsread.hpp:946
TVDBRowId GetLastRowId(void) const
Definition: wgsread.hpp:889
const Uint1 * x_GetUnpacked4na(TSeqPos pos, TSeqPos len, SAmbiguityAccess &ambiguity) const
bool m_ClipByQuality
Definition: wgsread.hpp:1235
bool GetClipByQualityFlag(EClipType clip_type=eDefaultClip) const
Definition: wgsread.hpp:951
void x_CheckValid(const char *method) const
Definition: wgsread.hpp:1151
CRef< CDelta_ext > GetDelta(TSeqPos pos, TSeqPos len, TWGSContigGapInfo gap_info, vector< COpenRange< TSeqPos > > *split=0) const
CWGSDb_Impl & GetDb(void) const
Definition: wgsread.hpp:1143
TVDBRowId GetFirstGoodRowId(void) const
Definition: wgsread.hpp:883
TSeqPos GetClipQualityRight(void) const
Definition: wgsread.hpp:930
Definition: map.hpp:338
Include a standard set of the NCBI C++ Toolkit most basic headers.
uint32_t INSDC_coord_len
Definition: csraread.hpp:49
static uch flags
int GetSeqLength(const CBioseq &bioseq)
Definition: cuSequence.cpp:216
std::ofstream out("events_result.xml")
main entry point for tests
static const char table_name[]
Definition: bcp.c:249
static const char * str(char *buf, int n)
Definition: stats.c:84
char data[12]
Definition: iconv.c:80
Int4 int32_t
Int2 int16_t
unsigned char uint8_t
Uint4 uint32_t
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
#define NULL
Definition: ncbistd.hpp:225
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
string GetProteinName(const CBioseq_Handle &seq)
Return protein name from corresponding Prot-ref feature.
Definition: sequence.cpp:356
NCBI_XOBJUTIL_EXPORT string GetTitle(const CBioseq_Handle &hnd, TGetTitleFlags flags=0)
Definition: seqtitle.cpp:106
TTaxId GetTaxId(const CBioseq_Handle &handle)
return the tax-id associated with a given sequence.
Definition: sequence.cpp:274
CObject & operator=(const CObject &src) THROWS_NONE
Assignment operator.
Definition: ncbiobj.hpp:482
TObjectType & GetObject(void)
Get object.
Definition: ncbiobj.hpp:1011
TObjectType & GetNCObject(void) const
Get object.
Definition: ncbiobj.hpp:1187
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
#define NCBI_DEPRECATED
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int8_t Int1
1-byte (8-bit) signed integer
Definition: ncbitype.h:98
#define NCBI_SRAREAD_EXPORT
Definition: ncbi_export.h:1227
NCBI_NS_NCBI::TGi TGi
Definition: Seq_id_.hpp:180
E_Choice
Choice variants.
Definition: Seq_id_.hpp:93
list< CRef< CSeq_id > > TId
Definition: Bioseq_.hpp:94
EMol
molecule class in living organism
Definition: Seq_inst_.hpp:108
list< CRef< CSeq_annot > > TAnnot
Definition: Bioseq_.hpp:97
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
int len
Generic map with additional resource limited by some value.
CSeq_entry_Info & GetSeq_entry(CTSE_Info &tse, const CBioObjectId &id)
CBioseq_Info & GetBioseq(CTSE_Info &tse, const CBioObjectId &id)
static MDB_envinfo info
Definition: mdb_load.c:37
static int version
Definition: mdb_load.c:29
static string GetProductName(const CProt_ref &prot)
Definition: utils.cpp:62
range(_Ty, _Ty) -> range< _Ty >
const char * tag
Multi-threading – mutexes; rw-locks; semaphore.
T min(T x_, T y_)
void split(std::vector< std::string > *strVec, const std::string &str_, const std::string &split_)
const CConstRef< CSeq_id > GetAccession(const CSeq_id_Handle &id_handle)
int TChunkId
Definition: blob_id.hpp:145
static const char * prefix[]
Definition: pcregrep.c:405
static pcre_uint8 * buffer
Definition: pcretest.c:1051
static bool GetIds(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
#define row(bind, expected)
Definition: string_bind.c:73
bool operator<(const SProtAccInfo &b) const
Definition: wgsread.hpp:372
bool operator==(const SProtAccInfo &b) const
Definition: wgsread.hpp:379
bool operator!=(const SProtAccInfo &b) const
Definition: wgsread.hpp:383
DECLARE_OPERATOR_BOOL(m_IdLength !=0)
CRef< CSeq_literal > literal
Definition: wgsread.hpp:1207
COpenRange< TSeqPos > range
Definition: wgsread.hpp:1205
const INSDC_coord_len * gaps_len
Definition: wgsread.hpp:988
bool IsInGap(TSeqPos pos) const
Definition: wgsread.hpp:1019
TSeqPos GetGapLength(TSeqPos pos, TSeqPos len) const
Definition: wgsread.hpp:1021
TSeqPos GetLength(void) const
Definition: wgsread.hpp:1011
TSeqPos GetToOpen(void) const
Definition: wgsread.hpp:1013
const NCBI_WGS_component_props * gaps_props
Definition: wgsread.hpp:989
const INSDC_coord_zero * gaps_start
Definition: wgsread.hpp:987
TSeqPos GetDataLength(TSeqPos pos, TSeqPos len) const
Definition: wgsread.hpp:1025
const NCBI_WGS_gap_linkage * gaps_linkage
Definition: wgsread.hpp:990
DECLARE_SAFE_FLAGS_TYPE(EFlags, TFlags)
int TSplitVersion
Definition: wgsread.hpp:186
@ eFeatLocIdAccVer
Definition: wgsread.hpp:182
@ eFeatLocIdAccNoVer
Definition: wgsread.hpp:183
@ eFeatLocIdUninitialized
Definition: wgsread.hpp:180
@ fSplitProducts
Definition: wgsread.hpp:169
@ fMasterDescrMark
Definition: wgsread.hpp:157
@ fSplitQualityGraph
Definition: wgsread.hpp:167
@ fSplitFeatures
Definition: wgsread.hpp:170
static const TSplitVersion kDefaultSplitVersion
Definition: wgsread.hpp:187
int TChunkId
Definition: wgsread.hpp:189
Definition: type.c:6
#define _ASSERT
uint64_t TVDBRowCount
Definition: vdbread.hpp:82
pair< TVDBRowId, TVDBRowCount > TVDBRowIdRange
Definition: vdbread.hpp:83
int64_t TVDBRowId
Definition: vdbread.hpp:79
void Serialize(CNcbiOstream &, const CRawScoreVector< Key, Score > &)
Generics These throw an exception; we must implement serialization for each type.
int32_t NCBI_WGS_gap_linkage
Definition: wgs-contig.h:111
int16_t NCBI_WGS_component_props
Definition: wgs-contig.h:54
EDescrType
Definition: wgsmaster.cpp:266
void AddMasterDescr(CBioseq_Info &seq, const CSeq_descr &src, EDescrType type)
Definition: wgsmaster.cpp:297
BEGIN_NAMESPACE(objects)
uint8_t NCBI_WGS_seqtype
Definition: wgsread.hpp:64
int32_t NCBI_WGS_gap_linkage
Definition: wgsread.hpp:61
@ NCBI_WGS_loc_strand_both
Definition: wgsread.hpp:81
@ NCBI_WGS_loc_strand_minus
Definition: wgsread.hpp:80
@ NCBI_WGS_loc_strand_plus
Definition: wgsread.hpp:79
@ NCBI_WGS_loc_strand_unknown
Definition: wgsread.hpp:78
int32_t INSDC_coord_one
Definition: wgsread.hpp:56
DECLARE_SAFE_FLAGS(SWGSDb_Defs::EFlags)
uint32_t INSDC_coord_len
Definition: wgsread.hpp:57
uint8_t INSDC_quality_phred
Definition: wgsread.hpp:58
int16_t NCBI_WGS_component_props
Definition: wgsread.hpp:60
@ NCBI_gb_state_eWGSGenBankUnverified
Definition: wgsread.hpp:90
@ NCBI_gb_state_eWGSGenBankReplaced
Definition: wgsread.hpp:88
@ NCBI_gb_state_eWGSGenBankSuppressed
Definition: wgsread.hpp:87
@ NCBI_gb_state_eWGSGenBankMissing
Definition: wgsread.hpp:91
@ NCBI_gb_state_eWGSGenBankMigrated
Definition: wgsread.hpp:92
@ NCBI_gb_state_eWGSGenBankLive
Definition: wgsread.hpp:86
@ NCBI_gb_state_eWGSGenBankWithdrawn
Definition: wgsread.hpp:89
int32_t NCBI_WGS_hash
Definition: wgsread.hpp:95
END_NCBI_NAMESPACE
Definition: wgsread.hpp:1603
END_NAMESPACE(objects)
BEGIN_NCBI_NAMESPACE
Definition: wgsread.hpp:97
@ NCBI_WGS_seqtype_mrna
Definition: wgsread.hpp:70
@ NCBI_WGS_seqtype_scaffold
Definition: wgsread.hpp:68
@ NCBI_WGS_seqtype_protein
Definition: wgsread.hpp:69
@ NCBI_WGS_seqtype_contig
Definition: wgsread.hpp:67
uint8_t NCBI_WGS_loc_strand
Definition: wgsread.hpp:75
int32_t INSDC_coord_zero
Definition: wgsread.hpp:55
uint8_t NCBI_WGS_feattype
Definition: wgsread.hpp:73
uint32_t NCBI_gb_state
Definition: wgsread.hpp:53
Modified on Sun Apr 21 03:45:25 2024 by modify_doxy.py rev. 669887