NCBI C++ ToolKit
bamindex.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SRA__READER__BAM__BAMINDEX__HPP
2 #define SRA__READER__BAM__BAMINDEX__HPP
3 /* $Id: bamindex.hpp 96759 2022-05-06 17:12:10Z vasilche $
4  * ===========================================================================
5  *
6  * PUBLIC DOMAIN NOTICE
7  * National Center for Biotechnology Information
8  *
9  * This software/database is a "United States Government Work" under the
10  * terms of the United States Copyright Act. It was written as part of
11  * the author's official duties as a United States Government employee and
12  * thus cannot be copyrighted. This software/database is freely available
13  * to the public for use. The National Library of Medicine and the U.S.
14  * Government have not placed any restriction on its use or reproduction.
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors: Eugene Vasilchenko
29  *
30  * File Description:
31  * Access to BAM index files
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <util/range.hpp>
37 #include <sra/readers/bam/bgzf.hpp>
39 
42 
43 class CSeq_annot;
44 class CBGZFStream;
45 class CBamString;
46 
48 {
49  void Read(CBGZFStream& in);
50 
51  string m_Name;
53 };
54 
55 
57 {
58 public:
59  CBamHeader();
60  explicit
61  CBamHeader(const string& bam_file_name);
62  ~CBamHeader();
63 
64  void Read(CBGZFStream& stream);
65  void Read(const string& bam_file_name);
66 
67  const string& GetText() const
68  {
69  return m_Text;
70  }
71 
73  typedef pair<string, TSBamTags> TSBamRecord;
74  typedef list<TSBamRecord> TSBamRecords;
75  // parse m_Text (SAM header), return number of records
76  size_t GetSBamRecords(TSBamRecords& records) const;
77 
78  typedef vector<SBamHeaderRefInfo> TRefs;
79  const TRefs& GetRefs() const
80  {
81  return m_Refs;
82  }
83 
84  size_t GetRefCount() const
85  {
86  return m_Refs.size();
87  }
88  const SBamHeaderRefInfo& GetRef(size_t ref_index) const;
89  size_t GetRefIndex(const string& name) const;
90  const string& GetRefName(size_t index) const
91  {
92  return m_Refs[index].m_Name;
93  }
94  TSeqPos GetRefLength(size_t index) const
95  {
96  return m_Refs[index].m_Length;
97  }
98 
100 
102  {
103  return m_AlignStart;
104  }
105 
106 private:
107  string m_Text;
111 };
112 
113 
115 {
116  enum ESearchMode {
119  };
120  typedef uint32_t TBin;
122  // Unfortunately, the bins and index levels are reversly ordered
123  // smallest by size bins are at min index level (0) and has largest bin numbers
124  // bin size = smallest, index level = smallest (=0), bin numbers = largest
125  // the largest by size bin is at max index level (variable) and has bin number = 0
126  // bin size = largest, index level = largest, bin number = smallest (=0)
127  //
128  // To avoid ambiguity we use Min and Max always with Bin and {Index}Level
129  // MinBin refers to bins smallest size (unfortunately with largest bin numbers)
130  // MaxBin refers to the bin with largest size (unfortunately with smallest bin number = 0)
131  // Min{Index}Level refers to index level 0 (largest bin numbers, but smallest bin size)
132  // Max{Index}Level refers to max index level (smallest bin number, but largest bin size)
133  static const TBin kMaxBinNumber = 0; // single max bin (largest in size)
134  static const TIndexLevel kMinBinIndexLevel = 0; // there're multiple min bins (smallest in size)
135 
136  typedef uint8_t TShift;
137  static const TShift kLevelStepBinShift = 3;
138  static const TShift kBAI_min_shift = 14;
139  static const TIndexLevel kBAI_depth = 5;
140 
142  // number of index levels
143  kMinLevel = 0, // bins smallest in size
146  kMaxLevel = kBAI_depth // special value, to be treated as actual max level
147  };
148 };
149 
150 #define BAM_SUPPORT_CSI
151 
153 {
154 #ifdef BAM_SUPPORT_CSI
155  bool is_CSI;
158  constexpr TShift GetMinLevelBinShift() const
159  {
160  return min_shift;
161  }
162  constexpr TIndexLevel GetMaxIndexLevel() const
163  {
164  return depth;
165  }
166  constexpr TIndexLevel ToIndexLevel(EIndexLevel level) const
167  {
168  return level == kMaxLevel? GetMaxIndexLevel(): TIndexLevel(level);
169  }
170 #else
171  static const bool is_CSI = false;
172  static constexpr TShift GetMinLevelBinShift()
173  {
174  return kBAI_min_shift;
175  }
176  static constexpr TIndexLevel GetMaxIndexLevel()
177  {
178  return kBAI_depth;
179  }
180  static constexpr TIndexLevel ToIndexLevel(EIndexLevel level)
181  {
182  return TIndexLevel(level); // direct mapping
183  }
184 #endif
185 
186  // return bit shift for size of bin on a specific index level
187  constexpr TShift GetLevelBinShift(TIndexLevel level) const
188  {
189  return GetMinLevelBinShift() + kLevelStepBinShift*level;
190  }
191  constexpr TShift GetLevelBinShift(EIndexLevel level) const
192  {
193  return GetLevelBinShift(ToIndexLevel(level));
194  }
195  // return size of bin on a specific index level
196  constexpr TSeqPos GetBinSize(TIndexLevel level) const
197  {
198  return TSeqPos(1) << GetLevelBinShift(level);
199  }
200  constexpr TSeqPos GetBinSize(EIndexLevel level) const
201  {
202  return GetBinSize(ToIndexLevel(level));
203  }
204  constexpr TShift GetMinBinShift() const
205  {
207  }
208  constexpr TSeqPos GetMinBinSize() const
209  {
211  }
212  constexpr TSeqPos GetMaxBinSize() const
213  {
214  return GetBinSize(GetMaxIndexLevel());
215  }
216 
217  // Min bin size is page size
218  constexpr TSeqPos GetPageSize() const
219  {
220  return GetMinBinSize();
221  }
222  // number of bits to shift to convert between page and position
223  constexpr TShift GetPageShift() const
224  {
225  return GetMinBinShift();
226  }
227 
228  // normal TIndexLevel=0 - has smallest bin sizes and bins numbers are biggest
229  // for bin number calculation it's better to count from the maximal level with bin=0
230  constexpr TBin GetBinNumberBaseReversed(int reversed_level) const
231  {
232  // (kAllowedLevels*kLevelStepBinShift+1) bits must fit in unsigned
233  constexpr int kAllowedLevels = 10;
234  constexpr unsigned kBaseBits =
235  ((1u<<(kAllowedLevels*kLevelStepBinShift))-1)/((1<<kLevelStepBinShift)-1);
236  return kBaseBits >> ((kAllowedLevels-reversed_level)*kLevelStepBinShift);
237  }
238  // base bin number of a specific index level
239  constexpr TBin GetBinNumberBase(int level) const
240  {
242  }
243  constexpr TBin GetBinNumberBase(EIndexLevel level) const
244  {
245  return GetBinNumberBase(ToIndexLevel(level));
246  }
247  // base for bin numbers calculation
248  constexpr TBin GetMinBinNumberBase() const
249  {
250  // kBinNumberBase == 4681 == 011111 in octal for 5 levels with 3 bits per level
252  }
253  constexpr TBin GetFirstOverflowBin(TIndexLevel level = 0) const
254  {
255  return GetBinNumberBase(level-1);
256  }
257  constexpr TBin GetFirstBin(TIndexLevel level) const
258  {
259  return GetBinNumberBase(level);
260  }
261  constexpr TBin GetLastBin(TIndexLevel level) const
262  {
263  return GetBinNumberBase(level-1)-1;
264  }
265  constexpr TBin GetPseudoBin() const
266  {
267  return GetFirstOverflowBin()+1;
268  }
269  bool IsOverflowBin(TBin bin, TIndexLevel level = 0) const
270  {
271  return bin >= GetFirstOverflowBin(level);
272  }
273  bool IsOverflowPos(TSeqPos pos) const
274  {
275  return pos < GetMaxBinSize();
276  }
278  {
279  return TBin(pos >> GetLevelBinShift(level));
280  }
282  {
283  return GetBinNumberOffset(pos, ToIndexLevel(level));
284  }
286  {
287  return GetBinNumberBase(level) + GetBinNumberOffset(pos, level);
288  }
290  {
291  return GetBinNumber(pos, ToIndexLevel(level));
292  }
293  // return range of bins from an index level covering a sequence range
294  // the range may be empty (second < first) if sequence range is beyond index
295  pair<TBin, TBin> GetBinRange(COpenRange<TSeqPos> ref_range,
296  TIndexLevel index_level) const;
298  {
299  _ASSERT(bin != 0);
300  return IsOverflowBin(bin)? 0: (bin-1)>>kLevelStepBinShift;
301  }
303  {
304  TBin bin_start = GetMinBinNumberBase();
305  for ( TIndexLevel level = 0; ; ++level, bin_start >>= kLevelStepBinShift ) {
306  if ( bin >= bin_start ) {
307  return level;
308  }
309  }
310  }
312  {
313  TIndexLevel level = 0;
314  auto local_min_shift = GetMinLevelBinShift();
315  TSeqPos pos1 = range.GetFrom() >> local_min_shift;
316  TSeqPos pos2 = range.GetTo() >> local_min_shift;
317  while ( level < GetMaxIndexLevel() && pos1 != pos2 ) {
318  ++level;
319  pos1 >>= kLevelStepBinShift;
320  pos2 >>= kLevelStepBinShift;
321  }
322  return level;
323  }
324 
326  {
327  TIndexLevel level = Bin2IndexLevel(bin);
328  TSeqPos len = GetBinSize(level);
329  TSeqPos index = bin - GetBinNumberBase(level);
330  TSeqPos pos = index*len;
331  return COpenRange<TSeqPos>(pos, pos+len);
332  }
333 };
334 
335 
337 {
338  void Read(CNcbiIstream& in,
339  SBamIndexParams params);
340  const char* Read(const char* buffer_ptr, const char* buffer_end,
341  SBamIndexParams params);
342 
344  {
345  return params.GetSeqRange(m_Bin);
346  }
347 
349 #ifdef BAM_SUPPORT_CSI
351 #endif
352  vector<CBGZFRange> m_Chunks;
353 
355  {
356  return m_Chunks.front().first;
357  }
359  {
360  return m_Chunks.back().second;
361  }
362 };
363 static inline bool operator<(const SBamIndexBinInfo& b1, const SBamIndexBinInfo& b2)
364 {
365  return b1.m_Bin < b2.m_Bin;
366 }
367 static inline bool operator<(const SBamIndexBinInfo& b1, SBamIndexBinInfo::TBin b2)
368 {
369  return b1.m_Bin < b2;
370 }
371 static inline bool operator<(SBamIndexBinInfo::TBin b1, const SBamIndexBinInfo& b2)
372 {
373  return b1 < b2.m_Bin;
374 }
375 
376 
378 {
379  const char* Read(const char* buffer_ptr, const char* buffer_end,
380  SBamIndexParams params,
381  int32_t ref_index);
382  void Read(CNcbiIstream& in,
383  SBamIndexParams params,
384  int32_t ref_index);
385 
386  // return limits of data in file based on linear index
387  // also adjusts argument ref_range to be within reference sequence
388  CBGZFRange GetLimitRange(COpenRange<TSeqPos>& ref_range,
389  ESearchMode search_mode) const;
390 
391  CBGZFRange GetFileRange() const;
392  vector<uint64_t> CollectEstimatedCoverage(TIndexLevel min_index_level,
393  TIndexLevel max_index_level) const;
394  vector<uint64_t> CollectEstimatedCoverage(EIndexLevel min_index_level,
395  EIndexLevel max_index_level) const
396  {
397  return CollectEstimatedCoverage(ToIndexLevel(min_index_level),
398  ToIndexLevel(max_index_level));
399  }
400  vector<Uint8> EstimateDataSizeByAlnStartPos(TSeqPos seqlen = kInvalidSeqPos) const;
401 
402  // return array of min start position of alignments overlapping with each page
403  // may return shorter array if the remaining alignments are completely within their page
404  vector<TSeqPos> GetAlnOverStarts(void) const;
405  // return array of max end position of alignments overlapping with each page
406  // may return shorter array if the remaining alignments are completely within their page
407  vector<TSeqPos> GetAlnOverEnds(void) const;
408 
409 
410  typedef vector<SBamIndexBinInfo> TBins;
411  typedef TBins::const_iterator TBinsIter;
412  pair<TBinsIter, TBinsIter> GetLevelBins(TIndexLevel level) const;
413  pair<TBinsIter, TBinsIter> GetLevelBins(EIndexLevel level) const
414  {
415  return GetLevelBins(ToIndexLevel(level));
416  }
417  // add file ranges with alignments from specific index level
418  // return first bin in the range, and first bin iter after the range
419  // the TBinsIter range is always valid, if no bins in the range both iters are the same
420  pair<TBinsIter, TBinsIter> AddLevelFileRanges(vector<CBGZFRange>& ranges,
421  CBGZFRange limit_file_range,
422  pair<TBin, TBin> bin_range) const;
423  pair<TBinsIter, TBinsIter> GetBinsIterRange(pair<TBin, TBin> bin_range) const;
424 
425  void SetLengthFromHeader(TSeqPos length);
426  void ProcessBin(const SBamIndexBinInfo& bin);
427  bool ProcessPseudoBin(SBamIndexBinInfo& bin);
428 
433  vector<CBGZFPos> m_Overlaps;
434  // estimation of sequence length for practical use, rounded to min bin size
436 };
437 
438 
440 {
441 public:
442  CBamIndex();
443  explicit
444  CBamIndex(const string& index_file_name);
445  ~CBamIndex();
446 
447  const string& GetFileName() const
448  {
449  return m_FileName;
450  }
451 
452  void Read(const string& index_file_name);
453  void Read(const char* buffer_ptr, size_t buffer_size);
454  void Read(CNcbiIstream& in);
455 
456  typedef vector<SBamIndexRefIndex> TRefs;
457  const TRefs& GetRefs() const
458  {
459  return m_Refs;
460  }
461  size_t GetRefCount() const
462  {
463  return m_Refs.size();
464  }
465  const SBamIndexRefIndex& GetRef(size_t ref_index) const;
466  void SetLengthFromHeader(const CBamHeader& header);
467 
468  CBGZFRange GetTotalFileRange(size_t ref_index) const;
469 
471  MakeEstimatedCoverageAnnot(const CBamHeader& header,
472  const string& ref_name,
473  const string& seq_id,
474  const string& annot_name,
475  TIndexLevel min_index_level,
476  TIndexLevel max_index_level) const;
479  const string& ref_name,
480  const string& seq_id,
481  const string& annot_name,
482  EIndexLevel min_index_level,
483  EIndexLevel max_index_level) const
484  {
485  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
486  ToIndexLevel(min_index_level),
487  ToIndexLevel(max_index_level));
488  }
491  const string& ref_name,
492  const string& seq_id,
493  const string& annot_name,
494  TIndexLevel min_index_level = 0) const
495  {
496  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
497  min_index_level, GetMaxIndexLevel());
498  }
501  const string& ref_name,
502  const string& seq_id,
503  const string& annot_name,
504  EIndexLevel min_index_level) const
505  {
506  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
507  ToIndexLevel(min_index_level));
508  }
510  MakeEstimatedCoverageAnnot(const CBamHeader& header,
511  const string& ref_name,
512  const CSeq_id& seq_id,
513  const string& annot_name,
514  TIndexLevel min_index_level,
515  TIndexLevel max_index_level) const;
518  const string& ref_name,
519  const CSeq_id& seq_id,
520  const string& annot_name,
521  EIndexLevel min_index_level,
522  EIndexLevel max_index_level) const
523  {
524  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
525  ToIndexLevel(min_index_level),
526  ToIndexLevel(max_index_level));
527  }
530  const string& ref_name,
531  const CSeq_id& seq_id,
532  const string& annot_name,
533  TIndexLevel min_index_level = 0) const
534  {
535  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
536  min_index_level, GetMaxIndexLevel());
537  }
540  const string& ref_name,
541  const CSeq_id& seq_id,
542  const string& annot_name,
543  EIndexLevel min_index_level) const
544  {
545  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
546  ToIndexLevel(min_index_level));
547  }
548 
549 
551  MakeEstimatedCoverageAnnot(size_t ref_index,
552  const string& seq_id,
553  const string& annot_name,
554  TIndexLevel min_index_level,
555  TIndexLevel max_index_level) const;
557  MakeEstimatedCoverageAnnot(size_t ref_index,
558  const string& seq_id,
559  const string& annot_name,
560  EIndexLevel min_index_level,
561  EIndexLevel max_index_level) const
562  {
563  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
564  ToIndexLevel(min_index_level),
565  ToIndexLevel(max_index_level));
566  }
568  MakeEstimatedCoverageAnnot(size_t ref_index,
569  const string& seq_id,
570  const string& annot_name,
571  TIndexLevel min_index_level = 0) const
572  {
573  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
574  min_index_level, GetMaxIndexLevel());
575  }
577  MakeEstimatedCoverageAnnot(size_t ref_index,
578  const string& seq_id,
579  const string& annot_name,
580  EIndexLevel min_index_level) const
581  {
582  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
583  ToIndexLevel(min_index_level));
584  }
586  MakeEstimatedCoverageAnnot(size_t ref_index,
587  const CSeq_id& seq_id,
588  const string& annot_name,
589  TIndexLevel min_index_level,
590  TIndexLevel max_index_level) const;
592  MakeEstimatedCoverageAnnot(size_t ref_index,
593  const CSeq_id& seq_id,
594  const string& annot_name,
595  EIndexLevel min_index_level,
596  EIndexLevel max_index_level) const
597  {
598  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
599  ToIndexLevel(min_index_level),
600  ToIndexLevel(max_index_level));
601  }
603  MakeEstimatedCoverageAnnot(size_t ref_index,
604  const CSeq_id& seq_id,
605  const string& annot_name,
606  TIndexLevel min_index_level = 0) const
607  {
608  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
609  min_index_level, GetMaxIndexLevel());
610  }
612  MakeEstimatedCoverageAnnot(size_t ref_index,
613  const CSeq_id& seq_id,
614  const string& annot_name,
615  EIndexLevel min_index_level) const
616  {
617  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
618  ToIndexLevel(min_index_level));
619  }
620 
622  MakeEstimatedCoverageAnnot(size_t ref_index,
623  const string& seq_id,
624  const string& annot_name,
625  TSeqPos ref_length,
626  TIndexLevel min_index_level,
627  TIndexLevel max_index_level) const;
629  MakeEstimatedCoverageAnnot(size_t ref_index,
630  const string& seq_id,
631  const string& annot_name,
632  TSeqPos ref_length,
633  EIndexLevel min_index_level,
634  EIndexLevel max_index_level) const
635  {
636  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
637  ToIndexLevel(min_index_level),
638  ToIndexLevel(max_index_level));
639  }
641  MakeEstimatedCoverageAnnot(size_t ref_index,
642  const string& seq_id,
643  const string& annot_name,
644  TSeqPos ref_length,
645  TIndexLevel min_index_level = 0) const
646  {
647  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
648  min_index_level, GetMaxIndexLevel());
649  }
651  MakeEstimatedCoverageAnnot(size_t ref_index,
652  const string& seq_id,
653  const string& annot_name,
654  TSeqPos ref_length,
655  EIndexLevel min_index_level) const
656  {
657  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
658  ToIndexLevel(min_index_level));
659  }
661  MakeEstimatedCoverageAnnot(size_t ref_index,
662  const CSeq_id& seq_id,
663  const string& annot_name,
664  TSeqPos ref_length,
665  TIndexLevel min_index_level,
666  TIndexLevel max_index_level) const;
668  MakeEstimatedCoverageAnnot(size_t ref_index,
669  const CSeq_id& seq_id,
670  const string& annot_name,
671  TSeqPos ref_length,
672  EIndexLevel min_index_level,
673  EIndexLevel max_index_level) const
674  {
675  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
676  ToIndexLevel(min_index_level),
677  ToIndexLevel(max_index_level));
678  }
680  MakeEstimatedCoverageAnnot(size_t ref_index,
681  const CSeq_id& seq_id,
682  const string& annot_name,
683  TSeqPos ref_length,
684  TIndexLevel min_index_level = 0) const
685  {
686  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
687  min_index_level, GetMaxIndexLevel());
688  }
690  MakeEstimatedCoverageAnnot(size_t ref_index,
691  const CSeq_id& seq_id,
692  const string& annot_name,
693  TSeqPos ref_length,
694  EIndexLevel min_index_level) const
695  {
696  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
697  ToIndexLevel(min_index_level));
698  }
699 
700  // collect estimated coverage from index level range
701  // result bin size will be equal to bin size of min_index_level
702  vector<uint64_t>
703  CollectEstimatedCoverage(size_t ref_index,
704  TIndexLevel min_index_level,
705  TIndexLevel max_index_level) const;
706  vector<uint64_t>
707  CollectEstimatedCoverage(size_t ref_index,
708  EIndexLevel min_index_level,
709  EIndexLevel max_index_level) const
710  {
711  return CollectEstimatedCoverage(ref_index,
712  ToIndexLevel(min_index_level),
713  ToIndexLevel(max_index_level));
714  }
715  // collect estimated coverage from specified index level
716  // result bin size will be equal to bin size of index_level
717  vector<uint64_t>
718  CollectEstimatedCoverage(size_t ref_index,
719  TIndexLevel index_level) const
720  {
721  return CollectEstimatedCoverage(ref_index, index_level, index_level);
722  }
723  vector<uint64_t>
724  CollectEstimatedCoverage(size_t ref_index,
725  EIndexLevel index_level) const
726  {
727  return CollectEstimatedCoverage(ref_index, ToIndexLevel(index_level));
728  }
729  // collect estimated coverage from all index levels
730  // result bin size will be equal to bin size of most detailed index level
731  vector<uint64_t>
732  CollectEstimatedCoverage(size_t ref_index) const
733  {
734  return CollectEstimatedCoverage(ref_index, 0, GetMaxIndexLevel());
735  }
736  // collect estimated coverage from all index levels
737  // result bin size will be equal to bin size of most detailed index level
738  vector<uint64_t>
739  EstimateDataSizeByAlnStartPos(size_t ref_index) const
740  {
741  return GetRef(ref_index).EstimateDataSizeByAlnStartPos();
742  }
743 
744  pair<Uint8, double> GetReadStatistics() const
745  {
746  return make_pair(m_TotalReadBytes, m_TotalReadSeconds);
747  }
748 
749 private:
750  string m_FileName;
755 };
756 
757 
758 template<class Position>
760 {
761 public:
762  typedef Position position_type;
764 
765  typedef pair<position_type, position_type> TRange;
767  typedef typename TRanges::iterator iterator;
769 
770  void clear()
771  {
772  m_Ranges.clear();
773  }
774  bool empty() const
775  {
776  return m_Ranges.empty();
777  }
779  {
780  return m_Ranges.begin();
781  }
783  {
784  return m_Ranges.end();
785  }
786 
788  {
789  if ( !(range.first < range.second) ) {
790  // empty range, do nothing
791  return;
792  }
793 
794  // find insertion point
795  // iterator next points to ranges that start after new range start
797  assert(next == m_Ranges.end() || (range.first < next->first));
798 
799  // check for overlapping with previous range
800  iterator iter;
801  if ( next != m_Ranges.begin() &&
802  !((iter = prev(next))->second < range.first) ) {
803  // overlaps with previous range
804  // update it if necessary
805  if ( !(iter->second < range.second) ) {
806  // new range is completely within an old one
807  // no more work to do
808  return;
809  }
810  // need to extend previous range to include inserted range
811  // next ranges may need to be removed
812  }
813  else {
814  // new range, use found iterator as an insertion hint
815  iter = m_Ranges.insert(next, range);
816  // next ranges may need to be removed
817  }
818  assert(iter != m_Ranges.end() && next != m_Ranges.begin() &&
819  iter == prev(next) &&
820  !(range.first < iter->first) &&
821  !(range.second < iter->second));
822 
823  // erase all existing ranges that start within inserted range
824  // and extend inserted range if necessary
825  while ( next != m_Ranges.end() &&
826  !(range.second < next->first) ) {
827  if ( range.second < next->second ) {
828  // range that start within inserted range is bigger,
829  // extend inserted range
830  range.second = next->second;
831  }
832  // erase completely covered range
833  m_Ranges.erase(next++);
834  }
835  // update current range
836  iter->second = range.second;
837  }
838 
840  {
841  add_range(range);
842  return *this;
843  }
844 
845 private:
847 };
848 
849 
851 {
852 public:
854  CBamFileRangeSet(const CBamIndex& index,
855  size_t ref_index, COpenRange<TSeqPos> ref_range,
856  ESearchMode search_mode = eSearchByOverlap);
857  CBamFileRangeSet(const CBamIndex& index,
858  size_t ref_index, COpenRange<TSeqPos> ref_range,
859  TIndexLevel min_level, TIndexLevel max_level,
860  ESearchMode search_mode = eSearchByOverlap);
861  CBamFileRangeSet(const CBamIndex& index,
862  size_t ref_index, COpenRange<TSeqPos> ref_range,
863  EIndexLevel min_level, EIndexLevel max_level,
864  ESearchMode search_mode = eSearchByOverlap);
865  ~CBamFileRangeSet();
866 
867  void Clear()
868  {
869  m_Ranges.clear();
870  }
871  void SetRanges(const CBamIndex& index,
872  size_t ref_index, COpenRange<TSeqPos> ref_range,
873  ESearchMode search_mode = eSearchByOverlap);
874  void AddRanges(const CBamIndex& index,
875  size_t ref_index, COpenRange<TSeqPos> ref_range,
876  ESearchMode search_mode = eSearchByOverlap);
877  void SetRanges(const CBamIndex& index,
878  size_t ref_index, COpenRange<TSeqPos> ref_range,
879  TIndexLevel index_level,
880  ESearchMode search_mode = eSearchByOverlap);
881  void SetRanges(const CBamIndex& index,
882  size_t ref_index, COpenRange<TSeqPos> ref_range,
883  EIndexLevel index_level,
884  ESearchMode search_mode = eSearchByOverlap)
885  {
886  SetRanges(index, ref_index, ref_range, index.ToIndexLevel(index_level), search_mode);
887  }
888  void AddRanges(const CBamIndex& index,
889  size_t ref_index, COpenRange<TSeqPos> ref_range,
890  TIndexLevel index_level,
891  ESearchMode search_mode = eSearchByOverlap);
892  void AddRanges(const CBamIndex& index,
893  size_t ref_index, COpenRange<TSeqPos> ref_range,
894  EIndexLevel index_level,
895  ESearchMode search_mode = eSearchByOverlap)
896  {
897  AddRanges(index, ref_index, ref_range, index.ToIndexLevel(index_level), search_mode);
898  }
899  void SetRanges(const CBamIndex& index,
900  size_t ref_index, COpenRange<TSeqPos> ref_range,
901  TIndexLevel min_index_level, TIndexLevel max_index_level,
902  ESearchMode search_mode = eSearchByOverlap);
903  void SetRanges(const CBamIndex& index,
904  size_t ref_index, COpenRange<TSeqPos> ref_range,
905  EIndexLevel min_index_level, EIndexLevel max_index_level,
906  ESearchMode search_mode = eSearchByOverlap)
907  {
908  SetRanges(index, ref_index, ref_range,
909  index.ToIndexLevel(min_index_level),
910  index.ToIndexLevel(max_index_level),
911  search_mode);
912  }
913  void AddRanges(const CBamIndex& index,
914  size_t ref_index, COpenRange<TSeqPos> ref_range,
915  TIndexLevel min_index_level, TIndexLevel max_index_level,
916  ESearchMode search_mode = eSearchByOverlap);
917  void AddRanges(const CBamIndex& index,
918  size_t ref_index, COpenRange<TSeqPos> ref_range,
919  EIndexLevel min_index_level, EIndexLevel max_index_level,
920  ESearchMode search_mode = eSearchByOverlap)
921  {
922  AddRanges(index, ref_index, ref_range,
923  index.ToIndexLevel(min_index_level),
924  index.ToIndexLevel(max_index_level),
925  search_mode);
926  }
927  void SetWhole(const CBamHeader& header);
928  void AddWhole(const CBamHeader& header);
929 
932 
933  const TRanges& GetRanges() const
934  {
935  return m_Ranges;
936  }
938  {
939  return m_Ranges.begin();
940  }
942  {
943  return m_Ranges.end();
944  }
945 
946  static Uint8 GetFileSize(CBGZFRange range);
947  Uint8 GetFileSize() const;
948 
949 protected:
950  void AddSortedRanges(const vector<CBGZFRange>& ranges);
951 
952 private:
954 };
955 
956 
958 {
959 public:
961  {
962  }
963  explicit
964  CBamRawDb(const string& bam_path)
965  {
966  Open(bam_path);
967  }
968  CBamRawDb(const string& bam_path, const string& index_path)
969  {
970  Open(bam_path, index_path);
971  }
972  ~CBamRawDb();
973 
974 
975  void Open(const string& bam_path);
976  void Open(const string& bam_path, const string& index_path);
977 
978 
979  const CBamHeader& GetHeader() const
980  {
981  return m_Header;
982  }
983  const CBamIndex& GetIndex() const
984  {
985  return m_Index;
986  }
987  const string& GetIndexName() const
988  {
989  return m_Index.GetFileName();
990  }
991  size_t GetRefCount() const
992  {
993  return GetHeader().GetRefCount();
994  }
995  size_t GetRefIndex(const string& ref_label) const
996  {
997  return GetHeader().GetRefIndex(ref_label);
998  }
999  const string& GetRefName(size_t ref_index) const
1000  {
1001  return GetHeader().GetRefName(ref_index);
1002  }
1003  TSeqPos GetRefSeqLength(size_t ref_index) const
1004  {
1005  return GetHeader().GetRefLength(ref_index);
1006  }
1007 
1008 
1010  {
1011  return *m_File;
1012  }
1013 
1014  vector<Uint8> EstimateDataSizeByAlnStartPos(const string& ref_label) const
1015  {
1016  size_t ref_index = GetRefIndex(ref_label);
1017  return GetIndex().GetRef(ref_index).EstimateDataSizeByAlnStartPos(GetRefSeqLength(ref_index));
1018  }
1019 
1020  double GetEstimatedSecondsPerByte() const;
1021 
1022 private:
1026 };
1027 
1028 
1029 class CBamAuxIterator;
1030 
1031 
1033 {
1035  : m_Tag(),
1036  m_DataType(),
1037  m_IsArray(false),
1038  m_ElementCount(),
1039  m_DataPtr(0)
1040  {
1041  }
1042 
1044 
1045  CTempString GetTag() const { return CTempString(m_Tag, 2); }
1046  bool IsTag(char c1, char c2) const { return m_Tag[0] == c1 && m_Tag[1] == c2; }
1047 
1048  char GetDataType() const { return m_DataType; }
1049 
1050  bool IsArray() const { return m_IsArray; }
1051  size_t size() const { return m_ElementCount; }
1052 
1053  bool IsChar() const { return m_DataType == 'A'; }
1054  bool IsString() const { return m_DataType == 'Z' || m_DataType == 'H'; }
1055  bool IsFloat() const { return m_DataType == 'f'; }
1056  bool IsInt() const { return !IsString() && !IsFloat() && !IsChar(); }
1057 
1058  NCBI_BAMREAD_EXPORT char GetChar() const;
1060  NCBI_BAMREAD_EXPORT float GetFloat(size_t index = 0) const;
1061  NCBI_BAMREAD_EXPORT Int8 GetInt(size_t index = 0) const;
1062 
1063 private:
1064  friend class CBamAuxIterator;
1065 
1066  char m_Tag[2];
1069  uint32_t m_ElementCount; // either string length or array element count
1070  const char* m_DataPtr;
1071 };
1072 
1074 {
1075  public:
1077  : m_AuxPtr(0),
1078  m_AuxEnd(0)
1079  {
1080  }
1081  CBamAuxIterator(const char* aux_ptr, const char* aux_end)
1082  : m_AuxPtr(aux_ptr),
1083  m_AuxEnd(aux_end)
1084  {
1085  x_InitData();
1086  }
1087 
1089  {
1090  x_InitData();
1091  return *this;
1092  }
1093 
1095 
1097 
1098  const SBamAuxData& operator*() const { return m_AuxData; }
1099  const SBamAuxData* operator->() const { return &m_AuxData; }
1100 
1101 private:
1103 
1105  const char* m_AuxPtr;
1106  const char* m_AuxEnd;
1107 };
1108 
1110 {
1111  void Read(CBGZFStream& in);
1112 
1114  {
1115  return m_FilePos;
1116  }
1117  size_t get_record_size() const
1118  {
1119  return m_RecordSize;
1120  }
1121  const char* get_record_ptr() const
1122  {
1123  return m_RecordPtr;
1124  }
1125  const char* get_record_end() const
1126  {
1127  return get_record_ptr() + get_record_size();
1128  }
1129 
1131  {
1132  return SBamUtil::MakeUint4(get_record_ptr());
1133  }
1135  {
1136  return SBamUtil::MakeUint4(get_record_ptr()+4);
1137  }
1138 
1140  {
1141  return get_record_ptr()[8];
1142  }
1144  {
1145  return get_record_ptr()[9];
1146  }
1148  {
1149  return SBamUtil::MakeUint2(get_record_ptr()+10);
1150  }
1151  static const char kCIGARSymbols[];
1152  enum ECIGARType { // matches to kCIGARSymbols
1153  kCIGAR_M, // 0
1154  kCIGAR_I, // 1
1155  kCIGAR_D, // 2
1156  kCIGAR_N, // 3
1157  kCIGAR_S, // 4
1158  kCIGAR_H, // 5
1159  kCIGAR_P, // 6
1161  kCIGAR_X // 8
1162  };
1164  {
1165  return SBamUtil::MakeUint2(get_record_ptr()+12);
1166  }
1167  enum EFlag {
1168  fAlign_WasPaired = 1 << 0,
1169  fAlign_IsMappedAsPair = 1 << 1,
1170  fAlign_SelfIsUnmapped = 1 << 2,
1171  fAlign_MateIsUnmapped = 1 << 3,
1172  fAlign_SelfIsReverse = 1 << 4,
1173  fAlign_MateIsReverse = 1 << 5,
1174  fAlign_IsFirst = 1 << 6,
1175  fAlign_IsSecond = 1 << 7,
1176  fAlign_IsNotPrimary = 1 << 8,
1177  fAlign_IsLowQuality = 1 << 9,
1178  fAlign_IsDuplicate = 1 << 10,
1179  fAlign_IsSupplementary = 1 << 11
1180  };
1182  {
1183  return SBamUtil::MakeUint2(get_record_ptr()+14);
1184  }
1186  {
1187  return SBamUtil::MakeUint4(get_record_ptr()+16);
1188  }
1190  {
1191  return SBamUtil::MakeUint4(get_record_ptr()+20);
1192  }
1194  {
1195  return SBamUtil::MakeUint4(get_record_ptr()+24);
1196  }
1198  {
1199  return SBamUtil::MakeUint4(get_record_ptr()+28);
1200  }
1201  const char* get_read_name_ptr() const
1202  {
1203  return get_record_ptr()+32;
1204  }
1205  const char* get_read_name_end() const
1206  {
1207  return m_CIGARPtr;
1208  }
1209  const char* get_cigar_ptr() const
1210  {
1211  return get_read_name_end();
1212  }
1213  const char* get_cigar_end() const
1214  {
1215  return m_ReadPtr;
1216  }
1218  {
1219  return SBamUtil::MakeUint4(get_cigar_ptr()+index*4);
1220  }
1221  void get_cigar(vector<uint32_t>& raw_cigar) const
1222  {
1223  size_t count = get_cigar_ops_count();
1224  raw_cigar.resize(count);
1225  uint32_t* dst = raw_cigar.data();
1226  memcpy(dst, get_cigar_ptr(), count*sizeof(uint32_t));
1227  for ( size_t i = 0; i < count; ++i ) {
1228  dst[i] = SBamUtil::MakeUint4(reinterpret_cast<const char*>(dst+i));
1229  }
1230  }
1231  void get_cigar(CBamString& dst) const;
1232  const char* get_read_ptr() const
1233  {
1234  return get_cigar_end();
1235  }
1236  const char* get_read_end() const
1237  {
1238  return get_read_ptr() + (get_read_len()+1)/2;
1239  }
1240  const char* get_phred_quality_ptr() const
1241  {
1242  return get_read_end();
1243  }
1244  const char* get_phred_quality_end() const
1245  {
1246  return get_phred_quality_ptr() + get_read_len();
1247  }
1248  const char* get_aux_data_ptr() const
1249  {
1250  return get_phred_quality_end();
1251  }
1252  const char* get_aux_data_end() const
1253  {
1254  return get_record_end();
1255  }
1256 
1258  {
1259  return CTempString(get_read_ptr(), (get_read_len()+1)/2);
1260  }
1261  static const char kBaseSymbols[];
1262  string get_read() const;
1263  void get_read(CBamString& str) const;
1264  uint32_t get_cigar_pos() const;
1265  uint32_t get_cigar_ref_size() const;
1266  uint32_t get_cigar_read_size() const;
1267  pair< COpenRange<uint32_t>, COpenRange<uint32_t> > get_cigar_alignment(void) const;
1268  string get_cigar() const;
1269  bool has_ambiguous_match() const;
1270 
1271  SBamAuxData get_aux_data(char c1, char c2, bool allow_missing = false) const;
1272  CTempString get_short_seq_accession_id() const;
1273 
1274 private:
1276  const char* m_RecordPtr;
1277  const char* m_CIGARPtr;
1278  const char* m_ReadPtr;
1280 };
1281 
1282 
1284 {
1285 public:
1287  : m_CurrentRangeEnd(0)
1288  {
1289  }
1290  explicit
1292  : m_Reader(bam_db.GetFile())
1293  {
1294  Select(bam_db);
1295  }
1297  const string& ref_label,
1298  CRange<TSeqPos> ref_range,
1299  ESearchMode search_mode = eSearchByOverlap)
1300  : m_Reader(bam_db.GetFile())
1301  {
1302  Select(bam_db, ref_label, ref_range, search_mode);
1303  }
1305  const string& ref_label,
1306  CRange<TSeqPos> ref_range,
1307  TIndexLevel index_level,
1308  ESearchMode search_mode = eSearchByOverlap)
1309  : m_Reader(bam_db.GetFile())
1310  {
1311  Select(bam_db, ref_label, ref_range, index_level, search_mode);
1312  }
1314  const string& ref_label,
1315  CRange<TSeqPos> ref_range,
1316  EIndexLevel index_level,
1317  ESearchMode search_mode)
1318  : m_Reader(bam_db.GetFile())
1319  {
1320  Select(bam_db, ref_label, ref_range, index_level, search_mode);
1321  }
1323  const string& ref_label,
1324  CRange<TSeqPos> ref_range,
1325  TIndexLevel min_index_level,
1326  TIndexLevel max_index_level,
1327  ESearchMode search_mode = eSearchByOverlap)
1328  : m_Reader(bam_db.GetFile())
1329  {
1330  Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode);
1331  }
1333  const string& ref_label,
1334  CRange<TSeqPos> ref_range,
1335  EIndexLevel min_index_level,
1336  EIndexLevel max_index_level,
1337  ESearchMode search_mode)
1338  : m_Reader(bam_db.GetFile())
1339  {
1340  Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode);
1341  }
1343  const string& ref_label,
1344  TSeqPos ref_pos,
1345  TSeqPos window = 0,
1346  ESearchMode search_mode = eSearchByOverlap);
1348  const string& ref_label,
1349  TSeqPos ref_pos,
1350  TSeqPos window,
1351  TIndexLevel min_index_level,
1352  TIndexLevel max_index_level,
1353  ESearchMode search_mode = eSearchByOverlap);
1355  const string& ref_label,
1356  TSeqPos ref_pos,
1357  TSeqPos window,
1358  EIndexLevel min_index_level,
1359  EIndexLevel max_index_level,
1360  ESearchMode search_mode);
1362  {
1363  }
1364 
1365  DECLARE_OPERATOR_BOOL(m_CurrentRangeEnd);
1366 
1367  void Select(CBamRawDb& bam_db)
1368  {
1369  x_Select(bam_db.GetHeader());
1370  }
1371  void Select(CBamRawDb& bam_db,
1372  const string& ref_label,
1373  CRange<TSeqPos> ref_range,
1374  ESearchMode search_mode = eSearchByOverlap)
1375  {
1376  x_Select(bam_db.GetIndex(),
1377  bam_db.GetRefIndex(ref_label), ref_range, search_mode);
1378  }
1379  void Select(CBamRawDb& bam_db,
1380  const string& ref_label,
1381  CRange<TSeqPos> ref_range,
1382  TIndexLevel index_level,
1383  ESearchMode search_mode = eSearchByOverlap)
1384  {
1385  x_Select(bam_db.GetIndex(),
1386  bam_db.GetRefIndex(ref_label), ref_range, index_level, search_mode);
1387  }
1388  void Select(CBamRawDb& bam_db,
1389  const string& ref_label,
1390  CRange<TSeqPos> ref_range,
1391  EIndexLevel index_level,
1392  ESearchMode search_mode)
1393  {
1394  x_Select(bam_db.GetIndex(),
1395  bam_db.GetRefIndex(ref_label), ref_range, index_level, search_mode);
1396  }
1397  void Select(CBamRawDb& bam_db,
1398  const string& ref_label,
1399  CRange<TSeqPos> ref_range,
1400  TIndexLevel min_index_level,
1401  TIndexLevel max_index_level,
1402  ESearchMode search_mode = eSearchByOverlap)
1403  {
1404  x_Select(bam_db.GetIndex(),
1405  bam_db.GetRefIndex(ref_label), ref_range, min_index_level, max_index_level, search_mode);
1406  }
1407  void Select(CBamRawDb& bam_db,
1408  const string& ref_label,
1409  CRange<TSeqPos> ref_range,
1410  EIndexLevel min_index_level,
1411  EIndexLevel max_index_level,
1412  ESearchMode search_mode = eSearchByOverlap)
1413  {
1414  x_Select(bam_db.GetIndex(),
1415  bam_db.GetRefIndex(ref_label), ref_range, min_index_level, max_index_level, search_mode);
1416  }
1417  void Select(const CBamIndex& index,
1418  size_t ref_index,
1419  CRange<TSeqPos> ref_range,
1420  ESearchMode search_mode = eSearchByOverlap)
1421  {
1422  x_Select(index, ref_index, ref_range, search_mode);
1423  }
1424  void Select(const CBamIndex& index,
1425  size_t ref_index,
1426  CRange<TSeqPos> ref_range,
1427  TIndexLevel index_level,
1428  ESearchMode search_mode = eSearchByOverlap)
1429  {
1430  x_Select(index, ref_index, ref_range, index_level, search_mode);
1431  }
1432  void Select(const CBamIndex& index,
1433  size_t ref_index,
1434  CRange<TSeqPos> ref_range,
1435  EIndexLevel index_level,
1436  ESearchMode search_mode = eSearchByOverlap)
1437  {
1438  x_Select(index, ref_index, ref_range, index_level, search_mode);
1439  }
1440  void Next();
1441 
1443  {
1444  Next();
1445  return *this;
1446  }
1447 
1449  {
1450  return m_AlignInfo.get_file_pos();
1451  }
1452 
1454  {
1455  return m_AlignInfo.get_ref_index();
1456  }
1458  {
1459  return m_AlignRefRange.GetFrom();
1460  }
1461 
1462  // next segment in template (mate)
1464  {
1465  return m_AlignInfo.get_next_ref_index();
1466  }
1468  {
1469  return m_AlignInfo.get_next_ref_pos();
1470  }
1471 
1473  {
1474  return CTempString(m_AlignInfo.get_read_name_ptr(),
1475  m_AlignInfo.get_read_name_len()-1); // exclude trailing zero
1476  }
1478  {
1479  return m_AlignInfo.get_short_seq_accession_id();
1480  }
1482  {
1483  return m_AlignInfo.get_read_len();
1484  }
1485  string GetShortSequence() const
1486  {
1487  return m_AlignInfo.get_read();
1488  }
1490  {
1491  return m_AlignInfo.get_read_raw();
1492  }
1494  {
1495  return m_AlignInfo.get_read(str);
1496  }
1497 
1499  {
1500  return m_AlignInfo.get_cigar_ops_count();
1501  }
1502  Uint4 GetCIGAROp(Uint2 index) const
1503  {
1504  return m_AlignInfo.get_cigar_op_data(index);
1505  }
1506  void GetCIGAR(vector<Uint4>& raw_cigar) const
1507  {
1508  return m_AlignInfo.get_cigar(raw_cigar);
1509  }
1510  void GetCIGAR(CBamString& dst) const
1511  {
1512  m_AlignInfo.get_cigar(dst);
1513  }
1515  {
1516  return m_AlignReadRange.GetFrom();
1517  }
1519  {
1520  return m_AlignReadRange.GetLength();
1521  }
1523  {
1524  return m_AlignRefRange.GetLength();
1525  }
1526  pair< COpenRange<TSeqPos>, COpenRange<TSeqPos> > GetCIGARAlignment(void) const
1527  {
1528  return make_pair(m_AlignRefRange, m_AlignReadRange);
1529  }
1530  bool HasAmbiguousMatch() const
1531  {
1532  return m_AlignInfo.has_ambiguous_match();
1533  }
1534 
1535  string GetCIGAR() const
1536  {
1537  return m_AlignInfo.get_cigar();
1538  }
1539 
1541  {
1542  return m_AlignInfo.get_bin();
1543  }
1545  {
1546  return Bin2IndexLevel(GetIndexBin());
1547  }
1548 
1549  Uint2 GetFlags() const
1550  {
1551  return m_AlignInfo.get_flag();
1552  }
1553  // returns false if BAM flags are not available
1554  bool TryGetFlags(Uint2& flags) const
1555  {
1556  flags = GetFlags();
1557  return true;
1558  }
1559 
1560  bool IsSetStrand() const
1561  {
1562  return true;
1563  }
1565  {
1566  return (GetFlags() & m_AlignInfo.fAlign_SelfIsReverse)?
1568  }
1569 
1570  bool IsMapped() const
1571  {
1572  return (GetFlags() & m_AlignInfo.fAlign_SelfIsUnmapped) == 0;
1573  }
1574 
1576  {
1577  return IsMapped()? m_AlignInfo.get_map_quality(): 0;
1578  }
1579 
1580  bool IsPaired() const
1581  {
1582  return (GetFlags() & m_AlignInfo.fAlign_IsMappedAsPair) != 0;
1583  }
1584  bool IsFirstInPair() const
1585  {
1586  return (GetFlags() & m_AlignInfo.fAlign_IsFirst) != 0;
1587  }
1588  bool IsSecondInPair() const
1589  {
1590  return (GetFlags() & m_AlignInfo.fAlign_IsSecond) != 0;
1591  }
1592  bool IsSecondary() const
1593  {
1594  return (GetFlags() & m_AlignInfo.fAlign_IsNotPrimary) != 0;
1595  }
1596 
1597  void GetSegments(vector<int>& starts, vector<TSeqPos>& lens) const;
1598 
1600  {
1601  return CBamAuxIterator(m_AlignInfo.get_aux_data_ptr(), m_AlignInfo.get_aux_data_end());
1602  }
1603  SBamAuxData GetAuxData(char c1, char c2, bool allow_missing = false) const
1604  {
1605  return m_AlignInfo.get_aux_data(c1, c2, allow_missing);
1606  }
1607  Int8 GetAuxInt(char c1, char c2, size_t index = 0) const
1608  {
1609  return GetAuxData(c1, c2).GetInt(index);
1610  }
1611 
1612 protected:
1613  void x_Select(const CBamHeader& header);
1614  void x_Select(const CBamIndex& index,
1615  size_t ref_index, CRange<TSeqPos> ref_range,
1616  TIndexLevel min_index_level, TIndexLevel max_index_level,
1617  ESearchMode search_mode);
1618  void x_Select(const CBamIndex& index,
1619  size_t ref_index, CRange<TSeqPos> ref_range,
1620  EIndexLevel min_index_level, EIndexLevel max_index_level,
1621  ESearchMode search_mode)
1622  {
1623  x_Select(index, ref_index, ref_range,
1624  index.ToIndexLevel(min_index_level),
1625  index.ToIndexLevel(max_index_level),
1626  search_mode);
1627  }
1628  void x_Select(const CBamIndex& index,
1629  size_t ref_index, CRange<TSeqPos> ref_range,
1630  ESearchMode search_mode)
1631  {
1632  x_Select(index, ref_index, ref_range, 0, index.GetMaxIndexLevel(), search_mode);
1633  }
1634  void x_Select(const CBamIndex& index,
1635  size_t ref_index, CRange<TSeqPos> ref_range,
1636  TIndexLevel index_level,
1637  ESearchMode search_mode)
1638  {
1639  x_Select(index, ref_index, ref_range, index_level, index_level, search_mode);
1640  }
1641  void x_Select(const CBamIndex& index,
1642  size_t ref_index, CRange<TSeqPos> ref_range,
1643  EIndexLevel index_level,
1644  ESearchMode search_mode)
1645  {
1646  x_Select(index, ref_index, ref_range, index_level, index_level, search_mode);
1647  }
1648  bool x_UpdateRange();
1650  {
1651  _ASSERT(*this);
1652  return m_Reader.HaveNextAvailableBytes() || x_UpdateRange();
1653  }
1654  void x_Stop()
1655  {
1656  m_NextRange = m_Ranges.end();
1657  m_CurrentRangeEnd = CBGZFPos(0);
1658  }
1659  bool x_NeedToSkip();
1660 
1661 private:
1662  size_t m_RefIndex;
1664  TIndexLevel m_MinIndexLevel, m_MaxIndexLevel;
1673 };
1674 
1675 
1678 
1679 #endif // SRA__READER__BAM__BAMINDEX__HPP
static bool operator<(const SBamIndexBinInfo &b1, const SBamIndexBinInfo &b2)
Definition: bamindex.hpp:363
pair< CBGZFPos, CBGZFPos > CBGZFRange
Definition: bgzf.hpp:272
#define false
Definition: bool.h:36
const char * m_AuxEnd
Definition: bamindex.hpp:1106
SBamAuxData value_type
Definition: bamindex.hpp:1094
CBamAuxIterator & operator++()
Definition: bamindex.hpp:1088
SBamAuxData m_AuxData
Definition: bamindex.hpp:1104
CBamAuxIterator(const char *aux_ptr, const char *aux_end)
Definition: bamindex.hpp:1081
const SBamAuxData & operator*() const
Definition: bamindex.hpp:1098
const char * m_AuxPtr
Definition: bamindex.hpp:1105
const SBamAuxData * operator->() const
Definition: bamindex.hpp:1099
DECLARE_OPERATOR_BOOL(m_AuxData)
CRangeUnion< CBGZFPos > TRanges
Definition: bamindex.hpp:930
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:917
const_iterator end() const
Definition: bamindex.hpp:941
TRanges m_Ranges
Definition: bamindex.hpp:953
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:892
const TRanges & GetRanges() const
Definition: bamindex.hpp:933
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:881
const_iterator begin() const
Definition: bamindex.hpp:937
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:903
TRanges::const_iterator const_iterator
Definition: bamindex.hpp:931
const string & GetText() const
Definition: bamindex.hpp:67
TRefs m_Refs
Definition: bamindex.hpp:109
map< string, string > TSBamTags
Definition: bamindex.hpp:72
const TRefs & GetRefs() const
Definition: bamindex.hpp:79
vector< SBamHeaderRefInfo > TRefs
Definition: bamindex.hpp:78
size_t GetRefCount() const
Definition: bamindex.hpp:84
CBGZFPos m_AlignStart
Definition: bamindex.hpp:110
list< TSBamRecord > TSBamRecords
Definition: bamindex.hpp:74
CBGZFPos GetAlignStart() const
Definition: bamindex.hpp:101
string m_Text
Definition: bamindex.hpp:107
pair< string, TSBamTags > TSBamRecord
Definition: bamindex.hpp:73
TSeqPos GetRefLength(size_t index) const
Definition: bamindex.hpp:94
static SBamHeaderRefInfo ReadRef(CBGZFStream &in)
const string & GetRefName(size_t index) const
Definition: bamindex.hpp:90
map< string, size_t > m_RefByName
Definition: bamindex.hpp:108
Uint8 m_TotalReadBytes
Definition: bamindex.hpp:753
const TRefs & GetRefs() const
Definition: bamindex.hpp:457
double m_TotalReadSeconds
Definition: bamindex.hpp:754
TRefs m_Refs
Definition: bamindex.hpp:751
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index) const
Definition: bamindex.hpp:732
size_t GetRefCount() const
Definition: bamindex.hpp:461
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:557
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, EIndexLevel index_level) const
Definition: bamindex.hpp:724
const string & GetFileName() const
Definition: bamindex.hpp:447
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level) const
Definition: bamindex.hpp:651
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:668
Uint8 m_UnmappedCount
Definition: bamindex.hpp:752
string m_FileName
Definition: bamindex.hpp:750
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level) const
Definition: bamindex.hpp:539
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, EIndexLevel min_index_level) const
Definition: bamindex.hpp:577
pair< Uint8, double > GetReadStatistics() const
Definition: bamindex.hpp:744
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level) const
Definition: bamindex.hpp:612
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:680
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, EIndexLevel min_index_level) const
Definition: bamindex.hpp:500
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:490
vector< uint64_t > EstimateDataSizeByAlnStartPos(size_t ref_index) const
Definition: bamindex.hpp:739
vector< SBamIndexRefIndex > TRefs
Definition: bamindex.hpp:456
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level) const
Definition: bamindex.hpp:690
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:478
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:568
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:592
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:529
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:517
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:641
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, TIndexLevel index_level) const
Definition: bamindex.hpp:718
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:629
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:603
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:707
Uint2 GetCIGAROpsCount() const
Definition: bamindex.hpp:1498
TSeqPos GetRefSeqPos() const
Definition: bamindex.hpp:1457
Int8 GetAuxInt(char c1, char c2, size_t index=0) const
Definition: bamindex.hpp:1607
Uint1 GetMapQuality() const
Definition: bamindex.hpp:1575
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1424
Uint2 GetIndexBin() const
Definition: bamindex.hpp:1540
CTempString GetShortSeqAcc() const
Definition: bamindex.hpp:1477
TSeqPos GetCIGARPos() const
Definition: bamindex.hpp:1514
TSeqPos GetNextRefSeqPos() const
Definition: bamindex.hpp:1467
SBamAlignInfo m_AlignInfo
Definition: bamindex.hpp:1666
bool TryGetFlags(Uint2 &flags) const
Definition: bamindex.hpp:1554
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel min_index_level, TIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1397
ESearchMode m_SearchMode
Definition: bamindex.hpp:1665
int32_t GetNextRefSeqIndex() const
Definition: bamindex.hpp:1463
void GetCIGAR(CBamString &dst) const
Definition: bamindex.hpp:1510
pair< COpenRange< TSeqPos >, COpenRange< TSeqPos > > GetCIGARAlignment(void) const
Definition: bamindex.hpp:1526
CBamFileRangeSet::const_iterator m_NextRange
Definition: bamindex.hpp:1670
bool IsSecondInPair() const
Definition: bamindex.hpp:1588
string GetCIGAR() const
Definition: bamindex.hpp:1535
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1432
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1371
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode)
Definition: bamindex.hpp:1618
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1296
bool IsMapped() const
Definition: bamindex.hpp:1570
TSeqPos GetShortSequenceLength(void) const
Definition: bamindex.hpp:1481
TIndexLevel GetIndexLevel() const
Definition: bamindex.hpp:1544
SBamAuxData GetAuxData(char c1, char c2, bool allow_missing=false) const
Definition: bamindex.hpp:1603
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode)
Definition: bamindex.hpp:1634
TSeqPos GetCIGARRefSize() const
Definition: bamindex.hpp:1522
TSeqPos GetCIGARShortSize() const
Definition: bamindex.hpp:1518
CTempString GetShortSeqId() const
Definition: bamindex.hpp:1472
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1407
CTempString GetShortSequenceRaw() const
Definition: bamindex.hpp:1489
CBamAuxIterator GetAuxIterator() const
Definition: bamindex.hpp:1599
void Select(CBamRawDb &bam_db)
Definition: bamindex.hpp:1367
CBGZFPos GetFilePos() const
Definition: bamindex.hpp:1448
string GetShortSequence() const
Definition: bamindex.hpp:1485
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1417
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode)
Definition: bamindex.hpp:1313
CBamFileRangeSet m_Ranges
Definition: bamindex.hpp:1669
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel min_index_level, TIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1322
Uint4 GetCIGAROp(Uint2 index) const
Definition: bamindex.hpp:1502
bool HasAmbiguousMatch() const
Definition: bamindex.hpp:1530
Uint2 GetFlags() const
Definition: bamindex.hpp:1549
TIndexLevel m_MinIndexLevel
Definition: bamindex.hpp:1664
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode)
Definition: bamindex.hpp:1641
COpenRange< TSeqPos > m_QueryRefRange
Definition: bamindex.hpp:1663
bool IsFirstInPair() const
Definition: bamindex.hpp:1584
bool IsSecondary() const
Definition: bamindex.hpp:1592
bool IsPaired() const
Definition: bamindex.hpp:1580
ENa_strand GetStrand() const
Definition: bamindex.hpp:1564
void GetShortSequence(CBamString &str) const
Definition: bamindex.hpp:1493
DECLARE_OPERATOR_BOOL(m_CurrentRangeEnd)
COpenRange< TSeqPos > m_AlignRefRange
Definition: bamindex.hpp:1667
CBamRawAlignIterator & operator++()
Definition: bamindex.hpp:1442
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, ESearchMode search_mode)
Definition: bamindex.hpp:1628
bool IsSetStrand() const
Definition: bamindex.hpp:1560
CBGZFPos m_CurrentRangeEnd
Definition: bamindex.hpp:1671
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode)
Definition: bamindex.hpp:1332
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode)
Definition: bamindex.hpp:1388
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1304
COpenRange< TSeqPos > m_AlignReadRange
Definition: bamindex.hpp:1668
int32_t GetRefSeqIndex() const
Definition: bamindex.hpp:1453
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
Definition: bamindex.hpp:1379
void GetCIGAR(vector< Uint4 > &raw_cigar) const
Definition: bamindex.hpp:1506
CBamRawAlignIterator(CBamRawDb &bam_db)
Definition: bamindex.hpp:1291
CBGZFStream m_Reader
Definition: bamindex.hpp:1672
size_t GetRefIndex(const string &ref_label) const
Definition: bamindex.hpp:995
CBamRawDb(const string &bam_path)
Definition: bamindex.hpp:964
CRef< CBGZFFile > m_File
Definition: bamindex.hpp:1023
const string & GetIndexName() const
Definition: bamindex.hpp:987
const string & GetRefName(size_t ref_index) const
Definition: bamindex.hpp:999
size_t GetRefCount() const
Definition: bamindex.hpp:991
const CBamHeader & GetHeader() const
Definition: bamindex.hpp:979
const CBamIndex & GetIndex() const
Definition: bamindex.hpp:983
TSeqPos GetRefSeqLength(size_t ref_index) const
Definition: bamindex.hpp:1003
CBGZFFile & GetFile()
Definition: bamindex.hpp:1009
vector< Uint8 > EstimateDataSizeByAlnStartPos(const string &ref_label) const
Definition: bamindex.hpp:1014
CBamIndex m_Index
Definition: bamindex.hpp:1025
CBamHeader m_Header
Definition: bamindex.hpp:1024
CBamRawDb(const string &bam_path, const string &index_path)
Definition: bamindex.hpp:968
bool empty() const
Definition: bamindex.hpp:774
Position position_type
Definition: bamindex.hpp:762
TRanges::const_iterator const_iterator
Definition: bamindex.hpp:768
TRanges m_Ranges
Definition: bamindex.hpp:846
const_iterator end() const
Definition: bamindex.hpp:782
map< position_type, position_type > TRanges
Definition: bamindex.hpp:766
CRangeUnion< position_type > TThisType
Definition: bamindex.hpp:763
TThisType & operator+=(const TRange &range)
Definition: bamindex.hpp:839
void add_range(TRange range)
Definition: bamindex.hpp:787
const_iterator begin() const
Definition: bamindex.hpp:778
pair< position_type, position_type > TRange
Definition: bamindex.hpp:765
void clear()
Definition: bamindex.hpp:770
TRanges::iterator iterator
Definition: bamindex.hpp:767
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
void erase(iterator pos)
Definition: map.hpp:167
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
bool empty() const
Definition: map.hpp:149
const_iterator upper_bound(const key_type &key) const
Definition: map.hpp:155
void clear()
Definition: map.hpp:169
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
void Read(CObjectIStream &in, TObjectPtr object, const CTypeRef &type)
Definition: serial.cpp:60
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define NCBI_BAMREAD_EXPORT
Definition: ncbi_export.h:1235
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
int i
int len
string GetHeader()
Definition: file_names.hpp:62
range(_Ty, _Ty) -> range< _Ty >
std::istream & in(std::istream &in_, double &x_)
static int buffer_size
Definition: pcretest.c:1050
#define assert(x)
Definition: srv_diag.hpp:58
static const char * str(char *buf, int n)
Definition: stats.c:84
unsigned short uint16_t
Definition: stdint.h:125
unsigned int uint32_t
Definition: stdint.h:126
signed int int32_t
Definition: stdint.h:123
unsigned char uint8_t
Definition: stdint.h:124
const char * get_phred_quality_end() const
Definition: bamindex.hpp:1244
const char * get_cigar_ptr() const
Definition: bamindex.hpp:1209
const char * m_ReadPtr
Definition: bamindex.hpp:1278
const char * m_CIGARPtr
Definition: bamindex.hpp:1277
void get_cigar(vector< uint32_t > &raw_cigar) const
Definition: bamindex.hpp:1221
uint16_t get_bin() const
Definition: bamindex.hpp:1147
int32_t get_ref_pos() const
Definition: bamindex.hpp:1134
uint8_t get_map_quality() const
Definition: bamindex.hpp:1143
int32_t get_next_ref_pos() const
Definition: bamindex.hpp:1193
uint8_t get_read_name_len() const
Definition: bamindex.hpp:1139
CBGZFPos get_file_pos() const
Definition: bamindex.hpp:1113
const char * get_read_end() const
Definition: bamindex.hpp:1236
const char * get_read_ptr() const
Definition: bamindex.hpp:1232
uint32_t get_cigar_op_data(uint16_t index) const
Definition: bamindex.hpp:1217
const char * get_cigar_end() const
Definition: bamindex.hpp:1213
const char * get_aux_data_end() const
Definition: bamindex.hpp:1252
const char * get_read_name_ptr() const
Definition: bamindex.hpp:1201
const char * get_phred_quality_ptr() const
Definition: bamindex.hpp:1240
const char * get_read_name_end() const
Definition: bamindex.hpp:1205
const char * m_RecordPtr
Definition: bamindex.hpp:1276
const char * get_record_ptr() const
Definition: bamindex.hpp:1121
uint16_t get_flag() const
Definition: bamindex.hpp:1181
size_t get_record_size() const
Definition: bamindex.hpp:1117
const char * get_aux_data_ptr() const
Definition: bamindex.hpp:1248
CBGZFPos m_FilePos
Definition: bamindex.hpp:1275
uint16_t get_cigar_ops_count() const
Definition: bamindex.hpp:1163
const char * get_record_end() const
Definition: bamindex.hpp:1125
int32_t get_next_ref_index() const
Definition: bamindex.hpp:1189
Uint4 m_RecordSize
Definition: bamindex.hpp:1279
int32_t get_tlen() const
Definition: bamindex.hpp:1197
CTempString get_read_raw() const
Definition: bamindex.hpp:1257
int32_t get_ref_index() const
Definition: bamindex.hpp:1130
uint32_t get_read_len() const
Definition: bamindex.hpp:1185
char GetChar() const
Definition: bamindex.cpp:2395
char m_DataType
Definition: bamindex.hpp:1067
uint32_t m_ElementCount
Definition: bamindex.hpp:1069
bool IsChar() const
Definition: bamindex.hpp:1053
float GetFloat(size_t index=0) const
Definition: bamindex.cpp:2449
bool IsInt() const
Definition: bamindex.hpp:1056
char m_Tag[2]
Definition: bamindex.hpp:1066
char GetDataType() const
Definition: bamindex.hpp:1048
bool IsArray() const
Definition: bamindex.hpp:1050
Int8 GetInt(size_t index=0) const
Definition: bamindex.cpp:2417
size_t size() const
Definition: bamindex.hpp:1051
bool IsTag(char c1, char c2) const
Definition: bamindex.hpp:1046
bool IsFloat() const
Definition: bamindex.hpp:1055
CTempString GetTag() const
Definition: bamindex.hpp:1045
DECLARE_OPERATOR_BOOL(m_DataPtr)
bool m_IsArray
Definition: bamindex.hpp:1068
CTempString GetString() const
Definition: bamindex.cpp:2406
const char * m_DataPtr
Definition: bamindex.hpp:1070
bool IsString() const
Definition: bamindex.hpp:1054
TSeqPos m_Length
Definition: bamindex.hpp:52
CBGZFPos GetEndFilePos() const
Definition: bamindex.hpp:358
CBGZFPos m_Overlap
Definition: bamindex.hpp:350
vector< CBGZFRange > m_Chunks
Definition: bamindex.hpp:352
CBGZFPos GetStartFilePos() const
Definition: bamindex.hpp:354
COpenRange< TSeqPos > GetSeqRange(SBamIndexParams params) const
Definition: bamindex.hpp:343
uint32_t TBin
Definition: bamindex.hpp:120
static const TShift kLevelStepBinShift
Definition: bamindex.hpp:137
uint8_t TIndexLevel
Definition: bamindex.hpp:121
static const TShift kBAI_min_shift
Definition: bamindex.hpp:138
uint8_t TShift
Definition: bamindex.hpp:136
static const TIndexLevel kMinBinIndexLevel
Definition: bamindex.hpp:134
static const TIndexLevel kBAI_depth
Definition: bamindex.hpp:139
static const TBin kMaxBinNumber
Definition: bamindex.hpp:133
constexpr TShift GetMinBinShift() const
Definition: bamindex.hpp:204
constexpr TSeqPos GetBinSize(TIndexLevel level) const
Definition: bamindex.hpp:196
constexpr TBin GetPseudoBin() const
Definition: bamindex.hpp:265
TIndexLevel depth
Definition: bamindex.hpp:157
constexpr TBin GetBinNumberBase(int level) const
Definition: bamindex.hpp:239
TIndexLevel Bin2IndexLevel(TBin bin) const
Definition: bamindex.hpp:302
constexpr TBin GetBinNumberBaseReversed(int reversed_level) const
Definition: bamindex.hpp:230
constexpr TBin GetMinBinNumberBase() const
Definition: bamindex.hpp:248
constexpr TBin GetFirstBin(TIndexLevel level) const
Definition: bamindex.hpp:257
TBin GetBinNumber(TSeqPos pos, EIndexLevel level) const
Definition: bamindex.hpp:289
TBin GetBinNumberOffset(TSeqPos pos, EIndexLevel level) const
Definition: bamindex.hpp:281
constexpr TShift GetLevelBinShift(EIndexLevel level) const
Definition: bamindex.hpp:191
constexpr TSeqPos GetPageSize() const
Definition: bamindex.hpp:218
TShift min_shift
Definition: bamindex.hpp:156
pair< TBin, TBin > GetBinRange(COpenRange< TSeqPos > ref_range, TIndexLevel index_level) const
Definition: bamindex.cpp:853
COpenRange< TSeqPos > GetSeqRange(TBin bin) const
Definition: bamindex.hpp:325
constexpr TSeqPos GetMaxBinSize() const
Definition: bamindex.hpp:212
TBin GetBinNumber(TSeqPos pos, TIndexLevel level) const
Definition: bamindex.hpp:285
bool IsOverflowPos(TSeqPos pos) const
Definition: bamindex.hpp:273
TBin GetBinNumberOffset(TSeqPos pos, TIndexLevel level) const
Definition: bamindex.hpp:277
TBin GetUpperBinNumber(TBin bin) const
Definition: bamindex.hpp:297
constexpr TIndexLevel ToIndexLevel(EIndexLevel level) const
Definition: bamindex.hpp:166
constexpr TSeqPos GetBinSize(EIndexLevel level) const
Definition: bamindex.hpp:200
constexpr TBin GetFirstOverflowBin(TIndexLevel level=0) const
Definition: bamindex.hpp:253
bool IsOverflowBin(TBin bin, TIndexLevel level=0) const
Definition: bamindex.hpp:269
constexpr TBin GetBinNumberBase(EIndexLevel level) const
Definition: bamindex.hpp:243
constexpr TShift GetPageShift() const
Definition: bamindex.hpp:223
constexpr TShift GetMinLevelBinShift() const
Definition: bamindex.hpp:158
TIndexLevel GetRangeIndexLevel(CRange< TSeqPos > range) const
Definition: bamindex.hpp:311
constexpr TSeqPos GetMinBinSize() const
Definition: bamindex.hpp:208
constexpr TShift GetLevelBinShift(TIndexLevel level) const
Definition: bamindex.hpp:187
constexpr TIndexLevel GetMaxIndexLevel() const
Definition: bamindex.hpp:162
constexpr TBin GetLastBin(TIndexLevel level) const
Definition: bamindex.hpp:261
TSeqPos m_EstimatedLength
Definition: bamindex.hpp:435
vector< uint64_t > CollectEstimatedCoverage(EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:394
pair< TBinsIter, TBinsIter > GetLevelBins(EIndexLevel level) const
Definition: bamindex.hpp:413
TBins::const_iterator TBinsIter
Definition: bamindex.hpp:411
vector< SBamIndexBinInfo > TBins
Definition: bamindex.hpp:410
CBGZFRange m_UnmappedChunk
Definition: bamindex.hpp:430
vector< CBGZFPos > m_Overlaps
Definition: bamindex.hpp:433
static Uint4 MakeUint4(const char *buf)
Definition: bgzf.hpp:159
static Uint2 MakeUint2(const char *buf)
Definition: bgzf.hpp:153
#define _ASSERT
Modified on Tue Dec 05 02:18:07 2023 by modify_doxy.py rev. 669887