NCBI C++ ToolKit
bamread.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SRA__READER__BAM__BAMREAD__HPP
2 #define SRA__READER__BAM__BAMREAD__HPP
3 /* $Id: bamread.hpp 101436 2023-12-13 17:06:32Z vasilche $
4  * ===========================================================================
5  *
6  * PUBLIC DOMAIN NOTICE
7  * National Center for Biotechnology Information
8  *
9  * This software/database is a "United States Government Work" under the
10  * terms of the United States Copyright Act. It was written as part of
11  * the author's official duties as a United States Government employee and
12  * thus cannot be copyrighted. This software/database is freely available
13  * to the public for use. The National Library of Medicine and the U.S.
14  * Government have not placed any restriction on its use or reproduction.
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors: Eugene Vasilchenko
29  *
30  * File Description:
31  * Access to BAM files
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <corelib/ncbiexpt.hpp>
37 #include <corelib/ncbiobj.hpp>
38 #include <corelib/ncbi_fast.hpp>
39 
43 #include <objects/seq/Bioseq.hpp>
47 
49 #include <unordered_map>
50 
51 struct VFSManager;
52 
53 //#include <align/bam.h>
54 struct BAMFile;
55 struct BAMAlignment;
56 
57 //#include <align/align-access.h>
58 struct AlignAccessMgr;
59 struct AlignAccessDB;
60 struct AlignAccessRefSeqEnumerator;
61 struct AlignAccessAlignmentEnumerator;
62 
64 
67 
68 class CSeq_entry;
69 class CBioseq;
70 class CSeq_align;
71 class CSeq_id;
72 class CBamFileAlign;
73 class CBamMgr;
74 class CBamDb;
75 class CBGZFPos;
76 
77 
78 SPECIALIZE_BAM_REF_TRAITS(AlignAccessMgr, const);
79 SPECIALIZE_BAM_REF_TRAITS(AlignAccessDB, const);
80 SPECIALIZE_BAM_REF_TRAITS(AlignAccessRefSeqEnumerator, );
81 SPECIALIZE_BAM_REF_TRAITS(AlignAccessAlignmentEnumerator, );
82 SPECIALIZE_BAM_REF_TRAITS(BAMFile, const);
83 SPECIALIZE_BAM_REF_TRAITS(BAMAlignment, const);
85 
86 
87 /////////////////////////////////////////////////////////////////////////////
88 // CSrzException
89 /////////////////////////////////////////////////////////////////////////////
90 
92 {
93 public:
94  enum EErrCode {
96  eBadFormat, ///< Invalid SRZ accession format
97  eNotFound ///< Accession not found
98  };
99  virtual const char* GetErrCodeString(void) const override;
101 };
102 
103 
104 /////////////////////////////////////////////////////////////////////////////
105 // CSrzPath
106 /////////////////////////////////////////////////////////////////////////////
107 
108 #define SRZ_CONFIG_NAME "analysis.bam.cfg"
109 
111 {
112 public:
113  CSrzPath(void);
114  CSrzPath(const string& rep_path, const string& vol_path);
115 
116  static string GetDefaultRepPath(void);
117  static string GetDefaultVolPath(void);
118 
119  void AddRepPath(const string& rep_path);
120  void AddVolPath(const string& vol_path);
121 
122  enum EMissing {
124  eMissing_Empty
125  };
126  string FindAccPath(const string& acc, EMissing mising);
127  string FindAccPath(const string& acc)
128  {
129  return FindAccPath(acc, eMissing_Throw);
130  }
131  string FindAccPathNoThrow(const string& acc)
132  {
133  return FindAccPath(acc, eMissing_Empty);
134  }
135 
136 
137 protected:
138  void x_Init(void);
139 
140 private:
141  vector<string> m_RepPath;
142  vector<string> m_VolPath;
143 };
144 
145 
146 class CBamMgr;
147 class CBamDb;
148 class CBamRefSeqIterator;
149 class CBamAlignIterator;
150 
151 
153  : public CBamRef<VFSManager>
154 {
156 public:
158  {
159  x_Init();
160  }
161 
162 private:
163  void x_Init();
164 };
165 
166 
168 {
169 public:
170  CBamMgr(void);
171 
173  {
174  return m_VFSMgr;
175  }
177  {
178  return m_AlignAccessMgr;
179  }
180 
181 private:
184 };
185 
187 {
188 public:
189  enum EUseAPI {
190  eUseDefaultAPI, // use underlying API determined by config
191  eUseAlignAccess, // use VDB AlignAccess module
192  eUseRawIndex // use raw index and BAM file access
193  };
194  CBamDb(void)
195  {
196  }
197  CBamDb(const CBamMgr& mgr,
198  const string& db_name,
199  EUseAPI use_api = eUseDefaultAPI);
200  // If idx_name is empty string or equal to db_name then CBamDb constructor
201  // will try to deduce the index file name using configurable naming conventions.
202  CBamDb(const CBamMgr& mgr,
203  const string& db_name,
204  const string& idx_name,
205  EUseAPI use_api = eUseDefaultAPI);
206 
207  DECLARE_OPERATOR_BOOL(m_AADB || m_RawDB);
208 
209  static bool UseRawIndex(EUseAPI use_api);
210  static int GetDebugLevel();
211 
212  bool UsesAlignAccessDB() const
213  {
214  return m_AADB;
215  }
216  bool UsesRawIndex() const
217  {
218  return m_RawDB;
219  }
221  {
222  return *m_RawDB;
223  }
224 
225  const string& GetDbName(void) const
226  {
227  return m_DbName;
228  }
229  const string& GetIndexName(void) const
230  {
231  return m_IndexName;
232  }
233 
234  TSeqPos GetPageSize() const;
235 
236  void SetIdMapper(IIdMapper* idmapper, EOwnership ownership)
237  {
238  m_IdMapper.reset(idmapper, ownership);
239  }
240  IIdMapper* GetIdMapper(void) const
241  {
242  return m_IdMapper.get();
243  }
244 
245  struct STagInfo {
247  {
248  return n.size() == 2 && n[0] == name[0] && n[1] == name[1];
249  }
250 
251  char name[2];
253  };
254  typedef vector<STagInfo> TTagList;
256  {
257  return m_IncludedAlignTags;
258  }
259  // return true if tag was included in this call, false if it was included before
260  bool IncludeAlignTag(CTempString tag);
261  // return true if tag was excluded in this call, false if it wasn't included before
262  bool ExcludeAlignTag(CTempString tag);
263 
264  CRef<CSeq_id> GetRefSeq_id(const string& label) const;
265  CRef<CSeq_id> GetShortSeq_id(const string& str, bool external = false) const;
266 
267  TSeqPos GetRefSeqLength(const string& str) const;
268 
269  string GetHeaderText(void) const;
270 
271 #define HAVE_NEW_PILEUP_COLLECTOR
272 
273 #ifdef HAVE_NEW_PILEUP_COLLECTOR
274 
275  struct SPileupValues;
276 
278  {
279  public:
281 
282  // check if this alignment needs to be processed
283  virtual bool AcceptAlign(const CBamAlignIterator& ait);
284 
285  // count and previously added values or zeros are multiple of 16
286  virtual void AddZerosBy16(TSeqPos count) = 0;
287  // count and previously added values or zeros are multiple of 16
288  virtual void AddValuesBy16(TSeqPos count, const SPileupValues& values) = 0;
289  // final add of less then 16 values
290  virtual void AddValuesTail(TSeqPos count, const SPileupValues& values) = 0;
291  };
292 
294  {
295  typedef Uint4 TCount;
296 
297  enum {
298  kStat_A = 0,
299  kStat_C = 1,
300  kStat_G = 2,
301  kStat_T = 3,
302  kStat_Gap = 4,
304  kStat_Intron = 6,
305  kNumStat_ACGT = 4,
306  kNumStat = 7
307  };
308 
309  enum EIntronMode {
311  eCountIntron
312  };
313 
314  TSeqPos m_RefFrom; // current values array start on ref sequence
315  TSeqPos m_RefToOpen; // current values array end on ref sequence
316  TSeqPos m_RefStop; // limit of pileup collection on ref sequence
319 
320  bool count_introns() const
321  {
322  return m_IntronMode != eNoCountIntron;
323  }
324 
325  struct SCountACGT {
326  TCount cc[kNumStat_ACGT];
327  };
329  CSimpleBufferT<TCount> cc_split_acgt[kNumStat_ACGT];
333  TCount max_count[kNumStat];
334 
336  explicit SPileupValues(CRange<TSeqPos> ref_range,
337  EIntronMode intron_mode = eNoCountIntron);
338 
339  void initialize(CRange<TSeqPos> ref_range,
340  EIntronMode intron_mode = eNoCountIntron);
342 
343  const TCount* get_acgt_counts() const
344  {
345  return cc_acgt.data()->cc;
346  }
349  {
350  if ( m_SplitACGTLen < len ) {
351  const_cast<SPileupValues*>(this)->make_split_acgt(len);
352  }
353  return cc_split_acgt[k].data();
354  }
355  const TCount* get_match_counts() const
356  {
357  return cc_match.data();
358  }
359  const TCount* get_gap_counts() const
360  {
361  return cc_gap.data();
362  }
363  const TCount* get_intron_counts() const
364  {
365  return cc_intron.data();
366  }
367 
369  {
370  cc_match[pos] += 1;
371  }
373  {
374  for ( ; pos < end; ++pos ) {
375  cc_match[pos] += 1;
376  }
377  }
379  {
380  _ASSERT(pos < end);
381  cc_gap[pos] += 1;
382  cc_gap[end] -= 1;
383  }
385  {
386  _ASSERT(pos < end);
387  cc_intron[pos] += 1;
388  cc_intron[end] -= 1;
389  }
391  CTempString read, TSeqPos read_pos);
393  CTempString read, TSeqPos read_pos);
394 
395  static const TSeqPos FLUSH_SIZE = 512;
396 
401  // update pileup collection start, the alignments should be coming sorted by start
403  {
404  if ( callback && ref_pos >= m_RefFrom+FLUSH_SIZE && 2*ref_pos >= m_RefFrom+m_RefToOpen ) {
405  advance_current_beg(ref_pos, callback);
406  }
407  }
408  bool trim_ref_range(TSeqPos& ref_pos, TSeqPos& ref_end)
409  {
410  _ASSERT(ref_pos < m_RefStop);
411  if ( ref_end <= m_RefFrom ) {
412  // completely before
413  return false;
414  }
415  if ( ref_pos < m_RefFrom ) {
416  ref_pos = m_RefFrom;
417  }
418  if ( ref_end > m_RefStop ) {
419  ref_end = m_RefStop;
420  }
421  if ( ref_end > m_RefToOpen ) {
422  advance_current_end(ref_end);
423  }
424  return true;
425  }
426  bool trim_ref_range(TSeqPos& ref_pos, TSeqPos& ref_end, TSeqPos& read_pos)
427  {
428  _ASSERT(ref_pos < m_RefStop);
429  if ( ref_end <= m_RefFrom ) {
430  // completely before
431  return false;
432  }
433  if ( ref_pos < m_RefFrom ) {
434  // skip read
435  read_pos += m_RefFrom - ref_pos;
436  ref_pos = m_RefFrom;
437  }
438  if ( ref_end > m_RefStop ) {
439  ref_end = m_RefStop;
440  }
441  if ( ref_end > m_RefToOpen ) {
442  advance_current_end(ref_end);
443  }
444  return true;
445  }
446  void add_match_ref_range(TSeqPos ref_pos, TSeqPos ref_end)
447  {
448  if ( trim_ref_range(ref_pos, ref_end) ) {
449  add_match_graph_range(ref_pos - m_RefFrom,
450  ref_end - m_RefFrom);
451  }
452  }
453  void add_gap_ref_range(TSeqPos ref_pos, TSeqPos ref_end)
454  {
455  if ( trim_ref_range(ref_pos, ref_end) ) {
456  add_gap_graph_range(ref_pos - m_RefFrom,
457  ref_end - m_RefFrom);
458  }
459  }
460  void add_intron_ref_range(TSeqPos ref_pos, TSeqPos ref_end)
461  {
462  if ( count_introns() && trim_ref_range(ref_pos, ref_end) ) {
463  add_intron_graph_range(ref_pos - m_RefFrom,
464  ref_end - m_RefFrom);
465  }
466  }
467  void add_bases_ref_range(TSeqPos ref_pos, TSeqPos ref_end,
468  const CTempString& read, TSeqPos read_pos)
469  {
470  if ( trim_ref_range(ref_pos, ref_end, read_pos) ) {
471  add_bases_graph_range(ref_pos - m_RefFrom,
472  ref_end - m_RefFrom,
473  read, read_pos);
474  }
475  }
476  void add_bases_ref_range_raw(TSeqPos ref_pos, TSeqPos ref_end,
477  const CTempString& read, TSeqPos read_pos)
478  {
479  if ( trim_ref_range(ref_pos, ref_end, read_pos) ) {
480  add_bases_graph_range_raw(ref_pos - m_RefFrom,
481  ref_end - m_RefFrom,
482  read, read_pos);
483  }
484  }
485 
487 
489  {
490  return max_count[type];
491  }
492  };
493 
494  size_t CollectPileup(SPileupValues& values,
495  const string& ref_id,
496  CRange<TSeqPos> graph_range,
497  ICollectPileupCallback* callback = 0,
498  SPileupValues::EIntronMode intron_mode = SPileupValues::eNoCountIntron,
499  TSeqPos gap_to_intron_threshold = kInvalidSeqPos) const;
500 #endif //HAVE_NEW_PILEUP_COLLECTOR
501 
502 private:
503  friend class CBamRefSeqIterator;
504  friend class CBamAlignIterator;
505 
506  struct SAADBImpl : public CObject
507  {
508  SAADBImpl(const CBamMgr& mgr, const string& db_name);
509  SAADBImpl(const CBamMgr& mgr, const string& db_name, string& idx_name);
510 
511  mutable CMutex m_Mutex;
513  };
514 
515  string m_DbName;
516  string m_IndexName;
519  typedef unordered_map<string, TSeqPos> TRefSeqLengths;
521  typedef unordered_map<string, CRef<CSeq_id> > TRefSeqIds;
525 };
526 
527 
529 {
530 public:
532  : m_Size(0), m_Capacity(0)
533  {
534  }
535  explicit CBamString(size_t cap)
536  : m_Size(0)
537  {
538  reserve(cap);
539  }
540 
541  void clear()
542  {
543  m_Size = 0;
544  if ( char* p = m_Buffer.get() ) {
545  *p = '\0';
546  }
547  }
548  size_t capacity() const
549  {
550  return m_Capacity;
551  }
552  void reserve(size_t min_capacity)
553  {
554  if ( capacity() < min_capacity ) {
555  x_reserve(min_capacity);
556  }
557  }
558 
559  size_t size() const
560  {
561  return m_Size;
562  }
563  bool empty(void) const
564  {
565  return m_Size == 0;
566  }
567  const char* data() const
568  {
569  return m_Buffer.get();
570  }
571  char operator[](size_t pos) const
572  {
573  return m_Buffer[pos];
574  }
575  operator string() const
576  {
577  return string(data(), size());
578  }
579  operator CTempString() const
580  {
581  return CTempString(data(), size());
582  }
583 
584  char* data()
585  {
586  return m_Buffer.get();
587  }
588  void resize(size_t sz)
589  {
590  _ASSERT(sz+1 <= capacity());
591  m_Size = sz;
592  data()[sz] = '\0';
593  }
594 
595 private:
596  size_t m_Size;
597  size_t m_Capacity;
599 
600  void x_reserve(size_t min_capacity);
601 
602 private:
604  void operator=(const CBamString&);
605 };
606 
607 
608 inline
610 {
611  return out.write(str.data(), str.size());
612 }
613 
614 
616 {
617 public:
619  explicit CBamRefSeqIterator(const CBamDb& bam_db);
620 
623 
624  DECLARE_OPERATOR_BOOL(m_AADBImpl || m_RawDB);
625 
626  IIdMapper* GetIdMapper(void) const
627  {
628  return m_DB->GetIdMapper();
629  }
630 
631  CBamRefSeqIterator& operator++(void);
632 
633  CTempString GetRefSeqId(void) const;
634  CRef<CSeq_id> GetRefSeq_id(void) const;
635 
636  TSeqPos GetLength(void) const;
637 
638 private:
639  typedef rc_t (*TGetString)(const AlignAccessRefSeqEnumerator *self,
640  char *buffer, size_t bsize, size_t *size);
641 
642  void x_AllocBuffers(void);
643  void x_InvalidateBuffers(void);
644 
645  void x_CheckValid(void) const;
646  bool x_CheckRC(CBamString& buf,
647  rc_t rc, size_t size, const char* msg) const;
648  void x_GetString(CBamString& buf,
649  const char* msg, TGetString func) const;
650 
651  struct SAADBImpl : public CObject
652  {
655  };
656 
657  const CBamDb* m_DB;
660  size_t m_RefIndex;
662 };
663 
664 
666  : public CBamRef<const BAMAlignment>
667 {
668 public:
669  explicit CBamFileAlign(const CBamAlignIterator& iter);
670 
671  Int4 GetRefSeqIndex(void) const;
672 
673  Uint2 GetFlags(void) const;
674  // returns false if BAM flags are not available
675  bool TryGetFlags(Uint2& flags) const;
676 };
677 
678 
680 {
681 public:
682  enum ESearchMode {
684  eSearchByStart
685  };
686 
687  CBamAlignIterator(void);
688  explicit
689  CBamAlignIterator(const CBamDb& bam_db,
690  const CBGZFPos* file_pos = nullptr);
691  CBamAlignIterator(const CBamDb& bam_db,
692  const string& ref_id,
693  TSeqPos ref_pos,
694  TSeqPos window = 0,
695  ESearchMode search_mode = eSearchByOverlap,
696  const CBGZFPos* file_pos = nullptr);
697  CBamAlignIterator(const CBamDb& bam_db,
698  const string& ref_id,
699  TSeqPos ref_pos,
700  TSeqPos window,
701  CBamIndex::EIndexLevel min_level,
702  CBamIndex::EIndexLevel max_level,
703  ESearchMode search_mode = eSearchByOverlap,
704  const CBGZFPos* file_pos = nullptr);
706 
709 
710  DECLARE_OPERATOR_BOOL(m_AADBImpl || m_RawImpl);
711 
712  IIdMapper* GetIdMapper(void) const
713  {
714  return m_DB->GetIdMapper();
715  }
716 
717  /// ISpotIdDetector interface is used to detect spot id in case
718  /// of incorrect flag combination.
719  /// The actual type should be CObject derived.
721  public:
722  virtual ~ISpotIdDetector(void);
723 
724  // The AddSpotId() should append spot id to the short_id argument.
725  virtual void AddSpotId(string& short_id,
726  const CBamAlignIterator* iter) = 0;
727  };
728 
729  void SetSpotIdDetector(ISpotIdDetector* spot_id_detector)
730  {
731  m_SpotIdDetector = spot_id_detector;
732  }
734  {
735  return m_SpotIdDetector.GetNCPointerOrNull();
736  }
737 
738  CBamAlignIterator& operator++(void);
739 
740  bool UsesAlignAccessDB() const
741  {
742  return m_AADBImpl;
743  }
744  bool UsesRawIndex() const
745  {
746  return m_RawImpl;
747  }
749  {
750  return m_RawImpl? &m_RawImpl.GetNCObject().m_Iter: 0;
751  }
752 
753  Int4 GetRefSeqIndex(void) const;
754  CTempString GetRefSeqId(void) const;
755  TSeqPos GetRefSeqPos(void) const;
756 
757  // next segment in template (mate)
758  Int4 GetNextRefSeqIndex() const; // -1 if no next segment
759  CTempString GetNextRefSeqId(void) const; // "" if no next segment
760  TSeqPos GetNextRefSeqPos() const; // kInvalidSeqPos if no next segment
761 
762  CTempString GetShortSeqId(void) const;
763  CTempString GetShortSeqAcc(void) const;
764  CTempString GetShortSequence(void) const;
765  TSeqPos GetShortSequenceLength(void) const;
766 
767  pair< COpenRange<TSeqPos>, COpenRange<TSeqPos> > GetCIGARAlignment(void) const;
768  TSeqPos GetCIGARPos(void) const;
769  CTempString GetCIGAR(void) const;
770  TSeqPos GetCIGARRefSize(void) const;
771  TSeqPos GetCIGARShortSize(void) const;
772 
773  // raw CIGAR access
774  Uint2 GetRawCIGAROpsCount() const;
775  Uint4 GetRawCIGAROp(Uint2 index) const;
776  void GetRawCIGAR(vector<Uint4>& raw_cigar) const;
777 
778  CRef<CSeq_id> GetRefSeq_id(void) const;
779  CRef<CSeq_id> GetShortSeq_id(void) const;
780  CRef<CSeq_id> GetShortSeq_id(const string& str) const;
781  void SetRefSeq_id(CRef<CSeq_id> seq_id);
782  void SetShortSeq_id(CRef<CSeq_id> seq_id);
783 
784  CRef<CBioseq> GetShortBioseq(void) const;
785  CRef<CSeq_align> GetMatchAlign(void) const;
786  CRef<CSeq_entry> GetMatchEntry(void) const;
787  CRef<CSeq_entry> GetMatchEntry(const string& annot_name) const;
788  CRef<CSeq_annot> GetSeq_annot(void) const;
789  CRef<CSeq_annot> GetSeq_annot(const string& annot_name) const;
790 
791  bool IsSetStrand(void) const;
792  ENa_strand GetStrand(void) const;
793 
794  Uint1 GetMapQuality(void) const;
795 
796  bool IsPaired(void) const;
797  bool IsFirstInPair(void) const;
798  bool IsSecondInPair(void) const;
799  bool IsSecondary(void) const;
800 
801  Uint2 GetFlags(void) const;
802  // returns false if BAM flags are not available
803  bool TryGetFlags(Uint2& flags) const;
804 
805  CBamAuxIterator GetAuxIterator() const;
806 
807 private:
808  friend class CBamFileAlign;
809 
810  typedef rc_t (*TGetString)(const AlignAccessAlignmentEnumerator *self,
811  char *buffer, size_t bsize, size_t *size);
812  typedef rc_t (*TGetString2)(const AlignAccessAlignmentEnumerator *self,
813  uint64_t *start_pos,
814  char *buffer, size_t bsize, size_t *size);
815 
816  void x_CheckValid(void) const;
817  bool x_CheckRC(CBamString& buf,
818  rc_t rc, size_t size, const char* msg) const;
819  void x_GetString(CBamString& buf,
820  const char* msg, TGetString func) const;
821  void x_GetString(CBamString& buf, uint64_t& pos,
822  const char* msg, TGetString2 func) const;
823  void x_GetCIGAR(void) const;
824  bool x_HasAmbiguousMatch() const;
825  void x_GetStrand(void) const;
826 
827  void x_MapId(CSeq_id& id) const;
828 
829  CRef<CSeq_entry> x_GetMatchEntry(const string* annot_name) const;
830  CRef<CSeq_annot> x_GetSeq_annot(const string* annot_name) const;
831 
832  struct SAADBImpl : public CObject {
842  mutable int m_Strand;
843 
844  SAADBImpl(const CBamDb::SAADBImpl& db,
845  AlignAccessAlignmentEnumerator* ptr);
846 
847  TSeqPos GetRefSeqPos() const;
848 
849  void x_InvalidateBuffers();
850  bool x_HasAmbiguousMatch() const;
851  };
852  struct SRawImpl : public CObject {
857 
858  explicit
860  const CBGZFPos* file_pos = nullptr);
862  const string& ref_label,
863  TSeqPos ref_pos,
864  TSeqPos window,
865  ESearchMode search_mode,
866  const CBGZFPos* file_pos = nullptr);
868  const string& ref_label,
869  TSeqPos ref_pos,
870  TSeqPos window,
871  CBamIndex::EIndexLevel min_level,
872  CBamIndex::EIndexLevel max_level,
873  ESearchMode search_mode,
874  const CBGZFPos* file_pos = nullptr);
875 
876  void x_InvalidateBuffers();
877  };
878 
879  const CBamDb* m_DB;
882 
885  eStrand_not_read = -2,
886  eStrand_not_set = -1
887  };
891  eBamFlags_Available
892  };
896 
898  struct SCreateCache;
900 
901  SCreateCache& x_GetCreateCache(void) const;
902 };
903 
904 
905 inline
907 CBamAlignIterator::GetMatchEntry(const string& annot_name) const
908 {
909  return x_GetMatchEntry(&annot_name);
910 }
911 
912 
913 inline
915 {
916  return x_GetMatchEntry(0);
917 }
918 
919 
920 inline
922 CBamAlignIterator::GetSeq_annot(const string& annot_name) const
923 {
924  return x_GetSeq_annot(&annot_name);
925 }
926 
927 
928 inline
930 {
931  return x_GetSeq_annot(0);
932 }
933 
934 
935 inline
937 {
938  return m_RawImpl->m_Iter.GetCIGAROpsCount();
939 }
940 
941 
942 inline
944 {
945  return m_RawImpl->m_Iter.GetCIGAROp(index);
946 }
947 
948 
951 
952 #endif // SRA__READER__BAM__BAMREAD__HPP
BEGIN_NAMESPACE(objects)
CNcbiOstream & operator<<(CNcbiOstream &out, const CBamString &str)
Definition: bamread.hpp:609
END_NCBI_NAMESPACE
Definition: bamread.hpp:950
END_NAMESPACE(objects)
BEGIN_NCBI_NAMESPACE
Definition: bamread.hpp:61
SPECIALIZE_BAM_REF_TRAITS(AlignAccessMgr, const)
uint32_t rc_t
ISpotIdDetector interface is used to detect spot id in case of incorrect flag combination.
Definition: bamread.hpp:720
virtual void AddSpotId(string &short_id, const CBamAlignIterator *iter)=0
ISpotIdDetector * GetSpotIdDetector(void) const
Definition: bamread.hpp:733
CIRef< ISpotIdDetector > m_SpotIdDetector
Definition: bamread.hpp:883
CRef< CSeq_annot > GetSeq_annot(void) const
Definition: bamread.hpp:929
AutoPtr< SCreateCache > m_CreateCache
Definition: bamread.hpp:898
DECLARE_OPERATOR_BOOL(m_AADBImpl||m_RawImpl)
CRef< CSeq_annot > x_GetSeq_annot(const string *annot_name) const
Definition: bamread.cpp:3167
CRef< SRawImpl > m_RawImpl
Definition: bamread.hpp:881
Uint2 GetRawCIGAROpsCount() const
Definition: bamread.hpp:936
CRef< CObject_id > TObjectIdCache
Definition: bamread.hpp:897
void x_MapId(CSeq_id &id) const
CBamRawAlignIterator * GetRawIndexIteratorPtr() const
Definition: bamread.hpp:748
IIdMapper * GetIdMapper(void) const
Definition: bamread.hpp:712
void SetSpotIdDetector(ISpotIdDetector *spot_id_detector)
Definition: bamread.hpp:729
EBamFlagsAvailability m_BamFlagsAvailability
Definition: bamread.hpp:893
CRef< CSeq_id > m_ShortSeq_id
Definition: bamread.hpp:895
bool UsesRawIndex() const
Definition: bamread.hpp:744
CRef< SAADBImpl > m_AADBImpl
Definition: bamread.hpp:880
bool UsesAlignAccessDB() const
Definition: bamread.hpp:740
Uint4 GetRawCIGAROp(Uint2 index) const
Definition: bamread.hpp:943
const CBamDb * m_DB
Definition: bamread.hpp:879
CRef< CSeq_id > m_RefSeq_id
Definition: bamread.hpp:894
CRef< CSeq_entry > GetMatchEntry(void) const
Definition: bamread.hpp:914
CRef< CSeq_entry > x_GetMatchEntry(const string *annot_name) const
Definition: bamread.cpp:3185
virtual void AddValuesTail(TSeqPos count, const SPileupValues &values)=0
virtual void AddZerosBy16(TSeqPos count)=0
virtual bool AcceptAlign(const CBamAlignIterator &ait)
virtual void AddValuesBy16(TSeqPos count, const SPileupValues &values)=0
string m_IndexName
Definition: bamread.hpp:516
AutoPtr< TRefSeqLengths > m_RefSeqLengths
Definition: bamread.hpp:520
CBamDb(void)
Definition: bamread.hpp:194
@ eUseDefaultAPI
Definition: bamread.hpp:190
@ eUseAlignAccess
Definition: bamread.hpp:191
string m_DbName
Definition: bamread.hpp:515
DECLARE_OPERATOR_BOOL(m_AADB||m_RawDB)
CRef< SAADBImpl > m_AADB
Definition: bamread.hpp:523
bool UsesAlignAccessDB() const
Definition: bamread.hpp:212
size_t CollectPileup(SPileupValues &values, const string &ref_id, CRange< TSeqPos > graph_range, ICollectPileupCallback *callback=0, SPileupValues::EIntronMode intron_mode=SPileupValues::eNoCountIntron, TSeqPos gap_to_intron_threshold=kInvalidSeqPos) const
bool UsesRawIndex() const
Definition: bamread.hpp:216
TTagList m_IncludedAlignTags
Definition: bamread.hpp:518
AutoPtr< IIdMapper > m_IdMapper
Definition: bamread.hpp:517
void SetIdMapper(IIdMapper *idmapper, EOwnership ownership)
Definition: bamread.hpp:236
const string & GetDbName(void) const
Definition: bamread.hpp:225
vector< STagInfo > TTagList
Definition: bamread.hpp:254
unordered_map< string, CRef< CSeq_id > > TRefSeqIds
Definition: bamread.hpp:521
AutoPtr< TRefSeqIds > m_RefSeqIds
Definition: bamread.hpp:522
IIdMapper * GetIdMapper(void) const
Definition: bamread.hpp:240
const TTagList & GetIncludedAlignTags() const
Definition: bamread.hpp:255
const string & GetIndexName(void) const
Definition: bamread.hpp:229
CBamRawDb & GetRawDb()
Definition: bamread.hpp:220
CRef< CObjectFor< CBamRawDb > > m_RawDB
Definition: bamread.hpp:524
unordered_map< string, TSeqPos > TRefSeqLengths
Definition: bamread.hpp:519
const CBamRef< const AlignAccessMgr > & GetAlignAccessMgr() const
Definition: bamread.hpp:176
const CBamVFSManager & GetVFSManager() const
Definition: bamread.hpp:172
CBamVFSManager m_VFSMgr
Definition: bamread.hpp:183
CBamRef< const AlignAccessMgr > m_AlignAccessMgr
Definition: bamread.hpp:182
CRef< CObjectFor< CBamRawDb > > m_RawDB
Definition: bamread.hpp:659
DECLARE_OPERATOR_BOOL(m_AADBImpl||m_RawDB)
CRef< CSeq_id > m_CachedRefSeq_id
Definition: bamread.hpp:661
IIdMapper * GetIdMapper(void) const
Definition: bamread.hpp:626
const CBamDb * m_DB
Definition: bamread.hpp:657
CRef< SAADBImpl > m_AADBImpl
Definition: bamread.hpp:658
void clear()
Definition: bamread.hpp:541
size_t m_Capacity
Definition: bamread.hpp:597
size_t capacity() const
Definition: bamread.hpp:548
void reserve(size_t min_capacity)
Definition: bamread.hpp:552
char operator[](size_t pos) const
Definition: bamread.hpp:571
size_t m_Size
Definition: bamread.hpp:596
void operator=(const CBamString &)
AutoArray< char > m_Buffer
Definition: bamread.hpp:598
const char * data() const
Definition: bamread.hpp:567
CBamString(const CBamString &)
CBamString(void)
Definition: bamread.hpp:531
char * data()
Definition: bamread.hpp:584
size_t size() const
Definition: bamread.hpp:559
bool empty(void) const
Definition: bamread.hpp:563
void resize(size_t sz)
Definition: bamread.hpp:588
CBamString(size_t cap)
Definition: bamread.hpp:535
CBamVFSManager(void)
Definition: bamread.hpp:157
CBamRef< VPath > TParent
Definition: bamread.hpp:155
CMutex –.
Definition: ncbimtx.hpp:749
CObjectFor –.
Definition: ncbiobj.hpp:2335
CObject –.
Definition: ncbiobj.hpp:180
Definition: Seq_entry.hpp:56
Reallocable memory buffer (no memory copy overhead) Mimics vector<>, without the overhead of explicit...
const value_type * data() const
@ eBadFormat
Invalid SRZ accession format.
Definition: bamread.hpp:96
NCBI_EXCEPTION_DEFAULT(CSrzException, CException)
@ eMissing_Throw
Definition: bamread.hpp:123
vector< string > m_VolPath
Definition: bamread.hpp:142
vector< string > m_RepPath
Definition: bamread.hpp:141
string FindAccPathNoThrow(const string &acc)
Definition: bamread.hpp:131
string FindAccPath(const string &acc)
Definition: bamread.hpp:127
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
General IdMapper interface.
Definition: iidmapper.hpp:48
Include a standard set of the NCBI C++ Toolkit most basic headers.
@ kStat_C
@ kStat_Match
@ kStat_T
@ kStat_G
@ kNumStat
@ kStat_A
static uch flags
std::ofstream out("events_result.xml")
main entry point for tests
static int type
Definition: getdata.c:31
static const char * str(char *buf, int n)
Definition: stats.c:84
char data[12]
Definition: iconv.c:80
Uint8 uint64_t
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
string
Definition: cgiapp.hpp:687
CException & operator=(const CException &)
Private assignment operator to prohibit assignment.
virtual void x_Init(const CDiagCompileInfo &info, const string &message, const CException *prev_exception, EDiagSev severity)
Helper method for initializing exception data.
Definition: ncbiexpt.cpp:509
EErrCode
Error types that an application can generate.
Definition: ncbiexpt.hpp:884
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
ENa_strand GetStrand(const CSeq_loc &loc, CScope *scope=0)
Returns eNa_strand_unknown if multiple Bioseqs in loc Returns eNa_strand_other if multiple strands in...
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
enum ENcbiOwnership EOwnership
Ownership relations between objects.
#define NCBI_BAMREAD_EXPORT
Definition: ncbi_export.h:1235
static const char label[]
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
char * buf
yy_size_t n
int len
const struct ncbi::grid::netcache::search::fields::SIZE size
const char * tag
Defines NCBI C++ exception handling.
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
int GetDebugLevel()
static pcre_uint8 * buffer
Definition: pcretest.c:1051
@ eNotFound
Not found.
CConstRef< CBamDb::SAADBImpl > m_DB
Definition: bamread.hpp:833
CBamRef< AlignAccessAlignmentEnumerator > m_Iter
Definition: bamread.hpp:835
CRef< CObjectFor< CBamRawDb > > m_RawDB
Definition: bamread.hpp:853
CBamRawAlignIterator m_Iter
Definition: bamread.hpp:854
CBamRef< const AlignAccessDB > m_DB
Definition: bamread.hpp:512
void add_match_graph_pos(TSeqPos pos)
Definition: bamread.hpp:368
TCount get_max_count(int type) const
Definition: bamread.hpp:488
void add_gap_graph_range(TSeqPos pos, TSeqPos end)
Definition: bamread.hpp:378
void advance_current_end(TSeqPos ref_end)
void add_gap_ref_range(TSeqPos ref_pos, TSeqPos ref_end)
Definition: bamread.hpp:453
EIntronMode m_IntronMode
Definition: bamread.hpp:318
void add_bases_ref_range_raw(TSeqPos ref_pos, TSeqPos ref_end, const CTempString &read, TSeqPos read_pos)
Definition: bamread.hpp:476
void add_match_ref_range(TSeqPos ref_pos, TSeqPos ref_end)
Definition: bamread.hpp:446
void decode_intron(TSeqPos len)
void add_bases_ref_range(TSeqPos ref_pos, TSeqPos ref_end, const CTempString &read, TSeqPos read_pos)
Definition: bamread.hpp:467
bool trim_ref_range(TSeqPos &ref_pos, TSeqPos &ref_end, TSeqPos &read_pos)
Definition: bamread.hpp:426
SPileupValues(CRange< TSeqPos > ref_range, EIntronMode intron_mode=eNoCountIntron)
void add_bases_graph_range_raw(TSeqPos pos, TSeqPos end, CTempString read, TSeqPos read_pos)
const TCount * get_intron_counts() const
Definition: bamread.hpp:363
CSimpleBufferT< TCount > cc_gap
Definition: bamread.hpp:330
void add_intron_graph_range(TSeqPos pos, TSeqPos end)
Definition: bamread.hpp:384
void update_max_counts(TSeqPos len)
CSimpleBufferT< TCount > cc_intron
Definition: bamread.hpp:332
bool trim_ref_range(TSeqPos &ref_pos, TSeqPos &ref_end)
Definition: bamread.hpp:408
void add_bases_graph_range(TSeqPos pos, TSeqPos end, CTempString read, TSeqPos read_pos)
const TCount * get_match_counts() const
Definition: bamread.hpp:355
void add_intron_ref_range(TSeqPos ref_pos, TSeqPos ref_end)
Definition: bamread.hpp:460
CSimpleBufferT< TCount > cc_match
Definition: bamread.hpp:331
void finalize(ICollectPileupCallback *callback)
CSimpleBufferT< SCountACGT > cc_acgt
Definition: bamread.hpp:328
void initialize(CRange< TSeqPos > ref_range, EIntronMode intron_mode=eNoCountIntron)
const TCount * get_acgt_counts() const
Definition: bamread.hpp:343
void add_match_graph_range(TSeqPos pos, TSeqPos end)
Definition: bamread.hpp:372
void advance_current_beg(TSeqPos ref_pos, ICollectPileupCallback *callback)
const TCount * get_split_acgt_counts(int k, TSeqPos len) const
Definition: bamread.hpp:348
void decode_gap(TSeqPos len)
void update_current_ref_start(TSeqPos ref_pos, ICollectPileupCallback *callback)
Definition: bamread.hpp:402
const TCount * get_gap_counts() const
Definition: bamread.hpp:359
void make_split_acgt(TSeqPos len)
bool count_introns() const
Definition: bamread.hpp:320
bool operator==(CTempString n) const
Definition: bamread.hpp:246
CRef< CObject_id > id_cache
Definition: bamread.hpp:252
CBamRef< AlignAccessRefSeqEnumerator > m_Iter
Definition: bamread.hpp:653
Definition: type.c:6
#define _ASSERT
Modified on Mon Apr 22 04:04:56 2024 by modify_doxy.py rev. 669887