NCBI C++ ToolKit
bamindex.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef SRA__READER__BAM__BAMINDEX__HPP
2 #define SRA__READER__BAM__BAMINDEX__HPP
3 /* $Id: bamindex.hpp 101450 2023-12-14 19:15:52Z vasilche $
4  * ===========================================================================
5  *
6  * PUBLIC DOMAIN NOTICE
7  * National Center for Biotechnology Information
8  *
9  * This software/database is a "United States Government Work" under the
10  * terms of the United States Copyright Act. It was written as part of
11  * the author's official duties as a United States Government employee and
12  * thus cannot be copyrighted. This software/database is freely available
13  * to the public for use. The National Library of Medicine and the U.S.
14  * Government have not placed any restriction on its use or reproduction.
15  *
16  * Although all reasonable efforts have been taken to ensure the accuracy
17  * and reliability of the software and data, the NLM and the U.S.
18  * Government do not and cannot warrant the performance or results that
19  * may be obtained by using this software or data. The NLM and the U.S.
20  * Government disclaim all warranties, express or implied, including
21  * warranties of performance, merchantability or fitness for any particular
22  * purpose.
23  *
24  * Please cite the author in any work or product based on this material.
25  *
26  * ===========================================================================
27  *
28  * Authors: Eugene Vasilchenko
29  *
30  * File Description:
31  * Access to BAM index files
32  *
33  */
34 
35 #include <corelib/ncbistd.hpp>
36 #include <util/range.hpp>
37 #include <sra/readers/bam/bgzf.hpp>
39 
42 
43 class CSeq_annot;
44 class CBGZFStream;
45 class CBamString;
46 
48 {
49  void Read(CBGZFStream& in);
50 
51  string m_Name;
53 };
54 
55 
57 {
58 public:
59  CBamHeader();
60  explicit
61  CBamHeader(const string& bam_file_name);
62  ~CBamHeader();
63 
64  void Read(CBGZFStream& stream);
65  void Read(const string& bam_file_name);
66 
67  const string& GetText() const
68  {
69  return m_Text;
70  }
71 
73  typedef pair<string, TSBamTags> TSBamRecord;
74  typedef list<TSBamRecord> TSBamRecords;
75  // parse m_Text (SAM header), return number of records
76  size_t GetSBamRecords(TSBamRecords& records) const;
77 
78  typedef vector<SBamHeaderRefInfo> TRefs;
79  const TRefs& GetRefs() const
80  {
81  return m_Refs;
82  }
83 
84  size_t GetRefCount() const
85  {
86  return m_Refs.size();
87  }
88  const SBamHeaderRefInfo& GetRef(size_t ref_index) const;
89  size_t GetRefIndex(const string& name) const;
90  const string& GetRefName(size_t index) const
91  {
92  return m_Refs[index].m_Name;
93  }
94  TSeqPos GetRefLength(size_t index) const
95  {
96  return m_Refs[index].m_Length;
97  }
98 
100 
102  {
103  return m_AlignStart;
104  }
105 
106 private:
107  string m_Text;
111 };
112 
113 
115 {
116  enum ESearchMode {
119  };
120  typedef uint32_t TBin;
122  // Unfortunately, the bins and index levels are reversly ordered
123  // smallest by size bins are at min index level (0) and has largest bin numbers
124  // bin size = smallest, index level = smallest (=0), bin numbers = largest
125  // the largest by size bin is at max index level (variable) and has bin number = 0
126  // bin size = largest, index level = largest, bin number = smallest (=0)
127  //
128  // To avoid ambiguity we use Min and Max always with Bin and {Index}Level
129  // MinBin refers to bins smallest size (unfortunately with largest bin numbers)
130  // MaxBin refers to the bin with largest size (unfortunately with smallest bin number = 0)
131  // Min{Index}Level refers to index level 0 (largest bin numbers, but smallest bin size)
132  // Max{Index}Level refers to max index level (smallest bin number, but largest bin size)
133  static const TBin kMaxBinNumber = 0; // single max bin (largest in size)
134  static const TIndexLevel kMinBinIndexLevel = 0; // there're multiple min bins (smallest in size)
135 
136  typedef uint8_t TShift;
137  static const TShift kLevelStepBinShift = 3;
138  static const TShift kBAI_min_shift = 14;
139  static const TIndexLevel kBAI_depth = 5;
140 
142  // number of index levels
143  kMinLevel = 0, // bins smallest in size
146  kMaxLevel = kBAI_depth // special value, to be treated as actual max level
147  };
148 };
149 
150 #define BAM_SUPPORT_CSI
151 
153 {
154 #ifdef BAM_SUPPORT_CSI
155  bool is_CSI;
158  constexpr TShift GetMinLevelBinShift() const
159  {
160  return min_shift;
161  }
162  constexpr TIndexLevel GetMaxIndexLevel() const
163  {
164  return depth;
165  }
166  constexpr TIndexLevel ToIndexLevel(EIndexLevel level) const
167  {
168  return level == kMaxLevel? GetMaxIndexLevel(): TIndexLevel(level);
169  }
170 #else
171  static const bool is_CSI = false;
172  static constexpr TShift GetMinLevelBinShift()
173  {
174  return kBAI_min_shift;
175  }
176  static constexpr TIndexLevel GetMaxIndexLevel()
177  {
178  return kBAI_depth;
179  }
180  static constexpr TIndexLevel ToIndexLevel(EIndexLevel level)
181  {
182  return TIndexLevel(level); // direct mapping
183  }
184 #endif
185 
186  // return bit shift for size of bin on a specific index level
187  constexpr TShift GetLevelBinShift(TIndexLevel level) const
188  {
189  return GetMinLevelBinShift() + kLevelStepBinShift*level;
190  }
191  constexpr TShift GetLevelBinShift(EIndexLevel level) const
192  {
193  return GetLevelBinShift(ToIndexLevel(level));
194  }
195  // return size of bin on a specific index level
196  constexpr TSeqPos GetBinSize(TIndexLevel level) const
197  {
198  return TSeqPos(1) << GetLevelBinShift(level);
199  }
200  constexpr TSeqPos GetBinSize(EIndexLevel level) const
201  {
202  return GetBinSize(ToIndexLevel(level));
203  }
204  constexpr TShift GetMinBinShift() const
205  {
207  }
208  constexpr TSeqPos GetMinBinSize() const
209  {
211  }
212  constexpr TSeqPos GetMaxBinSize() const
213  {
214  return GetBinSize(GetMaxIndexLevel());
215  }
216 
217  // Min bin size is page size
218  constexpr TSeqPos GetPageSize() const
219  {
220  return GetMinBinSize();
221  }
222  // number of bits to shift to convert between page and position
223  constexpr TShift GetPageShift() const
224  {
225  return GetMinBinShift();
226  }
227 
228  // normal TIndexLevel=0 - has smallest bin sizes and bins numbers are biggest
229  // for bin number calculation it's better to count from the maximal level with bin=0
230  constexpr TBin GetBinNumberBaseReversed(int reversed_level) const
231  {
232  // (kAllowedLevels*kLevelStepBinShift+1) bits must fit in unsigned
233  constexpr int kAllowedLevels = 10;
234  constexpr unsigned kBaseBits =
235  ((1u<<(kAllowedLevels*kLevelStepBinShift))-1)/((1<<kLevelStepBinShift)-1);
236  return kBaseBits >> ((kAllowedLevels-reversed_level)*kLevelStepBinShift);
237  }
238  // base bin number of a specific index level
239  constexpr TBin GetBinNumberBase(int level) const
240  {
242  }
243  constexpr TBin GetBinNumberBase(EIndexLevel level) const
244  {
245  return GetBinNumberBase(ToIndexLevel(level));
246  }
247  // base for bin numbers calculation
248  constexpr TBin GetMinBinNumberBase() const
249  {
250  // kBinNumberBase == 4681 == 011111 in octal for 5 levels with 3 bits per level
252  }
253  constexpr TBin GetFirstOverflowBin(TIndexLevel level = 0) const
254  {
255  return GetBinNumberBase(level-1);
256  }
257  constexpr TBin GetFirstBin(TIndexLevel level) const
258  {
259  return GetBinNumberBase(level);
260  }
261  constexpr TBin GetLastBin(TIndexLevel level) const
262  {
263  return GetBinNumberBase(level-1)-1;
264  }
265  constexpr TBin GetPseudoBin() const
266  {
267  return GetFirstOverflowBin()+1;
268  }
269  bool IsOverflowBin(TBin bin, TIndexLevel level = 0) const
270  {
271  return bin >= GetFirstOverflowBin(level);
272  }
273  bool IsOverflowPos(TSeqPos pos) const
274  {
275  return pos < GetMaxBinSize();
276  }
278  {
279  return TBin(pos >> GetLevelBinShift(level));
280  }
282  {
283  return GetBinNumberOffset(pos, ToIndexLevel(level));
284  }
286  {
287  return GetBinNumberBase(level) + GetBinNumberOffset(pos, level);
288  }
290  {
291  return GetBinNumber(pos, ToIndexLevel(level));
292  }
293  // return range of bins from an index level covering a sequence range
294  // the range may be empty (second < first) if sequence range is beyond index
295  pair<TBin, TBin> GetBinRange(COpenRange<TSeqPos> ref_range,
296  TIndexLevel index_level) const;
298  {
299  _ASSERT(bin != 0);
300  return IsOverflowBin(bin)? 0: (bin-1)>>kLevelStepBinShift;
301  }
303  {
304  TBin bin_start = GetMinBinNumberBase();
305  for ( TIndexLevel level = 0; ; ++level, bin_start >>= kLevelStepBinShift ) {
306  if ( bin >= bin_start ) {
307  return level;
308  }
309  }
310  }
312  {
313  TIndexLevel level = 0;
314  auto local_min_shift = GetMinLevelBinShift();
315  TSeqPos pos1 = range.GetFrom() >> local_min_shift;
316  TSeqPos pos2 = range.GetTo() >> local_min_shift;
317  while ( level < GetMaxIndexLevel() && pos1 != pos2 ) {
318  ++level;
319  pos1 >>= kLevelStepBinShift;
320  pos2 >>= kLevelStepBinShift;
321  }
322  return level;
323  }
325  {
326  auto local_min_shift = GetMinLevelBinShift();
327  TSeqPos pos1 = range.GetFrom() >> local_min_shift;
328  TSeqPos pos2 = range.GetTo() >> local_min_shift;
329  return pos1 == pos2;
330  }
331 
333  {
334  TIndexLevel level = Bin2IndexLevel(bin);
335  TSeqPos len = GetBinSize(level);
336  TSeqPos index = bin - GetBinNumberBase(level);
337  TSeqPos pos = index*len;
338  return COpenRange<TSeqPos>(pos, pos+len);
339  }
340 };
341 
342 
344 {
345  void Read(CNcbiIstream& in,
346  SBamIndexParams params);
347  const char* Read(const char* buffer_ptr, const char* buffer_end,
348  SBamIndexParams params);
349 
351  {
352  return params.GetSeqRange(m_Bin);
353  }
354 
356 #ifdef BAM_SUPPORT_CSI
358 #endif
359  vector<CBGZFRange> m_Chunks;
360 
362  {
363  return m_Chunks.front().first;
364  }
366  {
367  return m_Chunks.back().second;
368  }
369 };
370 static inline bool operator<(const SBamIndexBinInfo& b1, const SBamIndexBinInfo& b2)
371 {
372  return b1.m_Bin < b2.m_Bin;
373 }
374 static inline bool operator<(const SBamIndexBinInfo& b1, SBamIndexBinInfo::TBin b2)
375 {
376  return b1.m_Bin < b2;
377 }
378 static inline bool operator<(SBamIndexBinInfo::TBin b1, const SBamIndexBinInfo& b2)
379 {
380  return b1 < b2.m_Bin;
381 }
382 
383 
385 {
386  const char* Read(const char* buffer_ptr, const char* buffer_end,
387  SBamIndexParams params,
388  int32_t ref_index);
389  void Read(CNcbiIstream& in,
390  SBamIndexParams params,
391  int32_t ref_index);
392 
393  // return limits of data in file based on linear index
394  // also adjusts argument ref_range to be within reference sequence
395  CBGZFRange GetLimitRange(COpenRange<TSeqPos>& ref_range,
396  ESearchMode search_mode) const;
397 
398  CBGZFRange GetFileRange() const;
399  vector<uint64_t> CollectEstimatedCoverage(TIndexLevel min_index_level,
400  TIndexLevel max_index_level) const;
401  vector<uint64_t> CollectEstimatedCoverage(EIndexLevel min_index_level,
402  EIndexLevel max_index_level) const
403  {
404  return CollectEstimatedCoverage(ToIndexLevel(min_index_level),
405  ToIndexLevel(max_index_level));
406  }
407  vector<Uint8> EstimateDataSizeByAlnStartPos(TSeqPos seqlen = kInvalidSeqPos) const;
408 
409  // return array of min start position of alignments overlapping with each page
410  // may return shorter array if the remaining alignments are completely within their page
411  vector<TSeqPos> GetAlnOverStarts(void) const;
412  // return array of max end position of alignments overlapping with each page
413  // may return shorter array if the remaining alignments are completely within their page
414  vector<TSeqPos> GetAlnOverEnds(void) const;
415 
416 
417  typedef vector<SBamIndexBinInfo> TBins;
418  typedef TBins::const_iterator TBinsIter;
419  pair<TBinsIter, TBinsIter> GetLevelBins(TIndexLevel level) const;
420  pair<TBinsIter, TBinsIter> GetLevelBins(EIndexLevel level) const
421  {
422  return GetLevelBins(ToIndexLevel(level));
423  }
424  // add file ranges with alignments from specific index level
425  // return first bin in the range, and first bin iter after the range
426  // the TBinsIter range is always valid, if no bins in the range both iters are the same
427  pair<TBinsIter, TBinsIter> AddLevelFileRanges(vector<CBGZFRange>& ranges,
428  CBGZFRange limit_file_range,
429  pair<TBin, TBin> bin_range) const;
430  pair<TBinsIter, TBinsIter> GetBinsIterRange(pair<TBin, TBin> bin_range) const;
431 
432  void SetLengthFromHeader(TSeqPos length);
433  void ProcessBin(const SBamIndexBinInfo& bin);
434  bool ProcessPseudoBin(SBamIndexBinInfo& bin);
435 
440  vector<CBGZFPos> m_Overlaps;
441  // estimation of sequence length for practical use, rounded to min bin size
443 };
444 
445 
447 {
448 public:
449  CBamIndex();
450  explicit
451  CBamIndex(const string& index_file_name);
452  ~CBamIndex();
453 
454  const string& GetFileName() const
455  {
456  return m_FileName;
457  }
458 
459  void Read(const string& index_file_name);
460  void Read(const char* buffer_ptr, size_t buffer_size);
461  void Read(CNcbiIstream& in);
462 
463  typedef vector<SBamIndexRefIndex> TRefs;
464  const TRefs& GetRefs() const
465  {
466  return m_Refs;
467  }
468  size_t GetRefCount() const
469  {
470  return m_Refs.size();
471  }
472  const SBamIndexRefIndex& GetRef(size_t ref_index) const;
473  void SetLengthFromHeader(const CBamHeader& header);
474 
475  CBGZFRange GetTotalFileRange(size_t ref_index) const;
476 
478  MakeEstimatedCoverageAnnot(const CBamHeader& header,
479  const string& ref_name,
480  const string& seq_id,
481  const string& annot_name,
482  TIndexLevel min_index_level,
483  TIndexLevel max_index_level) const;
486  const string& ref_name,
487  const string& seq_id,
488  const string& annot_name,
489  EIndexLevel min_index_level,
490  EIndexLevel max_index_level) const
491  {
492  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
493  ToIndexLevel(min_index_level),
494  ToIndexLevel(max_index_level));
495  }
498  const string& ref_name,
499  const string& seq_id,
500  const string& annot_name,
501  TIndexLevel min_index_level = 0) const
502  {
503  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
504  min_index_level, GetMaxIndexLevel());
505  }
508  const string& ref_name,
509  const string& seq_id,
510  const string& annot_name,
511  EIndexLevel min_index_level) const
512  {
513  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
514  ToIndexLevel(min_index_level));
515  }
517  MakeEstimatedCoverageAnnot(const CBamHeader& header,
518  const string& ref_name,
519  const CSeq_id& seq_id,
520  const string& annot_name,
521  TIndexLevel min_index_level,
522  TIndexLevel max_index_level) const;
525  const string& ref_name,
526  const CSeq_id& seq_id,
527  const string& annot_name,
528  EIndexLevel min_index_level,
529  EIndexLevel max_index_level) const
530  {
531  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
532  ToIndexLevel(min_index_level),
533  ToIndexLevel(max_index_level));
534  }
537  const string& ref_name,
538  const CSeq_id& seq_id,
539  const string& annot_name,
540  TIndexLevel min_index_level = 0) const
541  {
542  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
543  min_index_level, GetMaxIndexLevel());
544  }
547  const string& ref_name,
548  const CSeq_id& seq_id,
549  const string& annot_name,
550  EIndexLevel min_index_level) const
551  {
552  return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
553  ToIndexLevel(min_index_level));
554  }
555 
556 
558  MakeEstimatedCoverageAnnot(size_t ref_index,
559  const string& seq_id,
560  const string& annot_name,
561  TIndexLevel min_index_level,
562  TIndexLevel max_index_level) const;
564  MakeEstimatedCoverageAnnot(size_t ref_index,
565  const string& seq_id,
566  const string& annot_name,
567  EIndexLevel min_index_level,
568  EIndexLevel max_index_level) const
569  {
570  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
571  ToIndexLevel(min_index_level),
572  ToIndexLevel(max_index_level));
573  }
575  MakeEstimatedCoverageAnnot(size_t ref_index,
576  const string& seq_id,
577  const string& annot_name,
578  TIndexLevel min_index_level = 0) const
579  {
580  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
581  min_index_level, GetMaxIndexLevel());
582  }
584  MakeEstimatedCoverageAnnot(size_t ref_index,
585  const string& seq_id,
586  const string& annot_name,
587  EIndexLevel min_index_level) const
588  {
589  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
590  ToIndexLevel(min_index_level));
591  }
593  MakeEstimatedCoverageAnnot(size_t ref_index,
594  const CSeq_id& seq_id,
595  const string& annot_name,
596  TIndexLevel min_index_level,
597  TIndexLevel max_index_level) const;
599  MakeEstimatedCoverageAnnot(size_t ref_index,
600  const CSeq_id& seq_id,
601  const string& annot_name,
602  EIndexLevel min_index_level,
603  EIndexLevel max_index_level) const
604  {
605  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
606  ToIndexLevel(min_index_level),
607  ToIndexLevel(max_index_level));
608  }
610  MakeEstimatedCoverageAnnot(size_t ref_index,
611  const CSeq_id& seq_id,
612  const string& annot_name,
613  TIndexLevel min_index_level = 0) const
614  {
615  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
616  min_index_level, GetMaxIndexLevel());
617  }
619  MakeEstimatedCoverageAnnot(size_t ref_index,
620  const CSeq_id& seq_id,
621  const string& annot_name,
622  EIndexLevel min_index_level) const
623  {
624  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
625  ToIndexLevel(min_index_level));
626  }
627 
629  MakeEstimatedCoverageAnnot(size_t ref_index,
630  const string& seq_id,
631  const string& annot_name,
632  TSeqPos ref_length,
633  TIndexLevel min_index_level,
634  TIndexLevel max_index_level) const;
636  MakeEstimatedCoverageAnnot(size_t ref_index,
637  const string& seq_id,
638  const string& annot_name,
639  TSeqPos ref_length,
640  EIndexLevel min_index_level,
641  EIndexLevel max_index_level) const
642  {
643  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
644  ToIndexLevel(min_index_level),
645  ToIndexLevel(max_index_level));
646  }
648  MakeEstimatedCoverageAnnot(size_t ref_index,
649  const string& seq_id,
650  const string& annot_name,
651  TSeqPos ref_length,
652  TIndexLevel min_index_level = 0) const
653  {
654  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
655  min_index_level, GetMaxIndexLevel());
656  }
658  MakeEstimatedCoverageAnnot(size_t ref_index,
659  const string& seq_id,
660  const string& annot_name,
661  TSeqPos ref_length,
662  EIndexLevel min_index_level) const
663  {
664  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
665  ToIndexLevel(min_index_level));
666  }
668  MakeEstimatedCoverageAnnot(size_t ref_index,
669  const CSeq_id& seq_id,
670  const string& annot_name,
671  TSeqPos ref_length,
672  TIndexLevel min_index_level,
673  TIndexLevel max_index_level) const;
675  MakeEstimatedCoverageAnnot(size_t ref_index,
676  const CSeq_id& seq_id,
677  const string& annot_name,
678  TSeqPos ref_length,
679  EIndexLevel min_index_level,
680  EIndexLevel max_index_level) const
681  {
682  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
683  ToIndexLevel(min_index_level),
684  ToIndexLevel(max_index_level));
685  }
687  MakeEstimatedCoverageAnnot(size_t ref_index,
688  const CSeq_id& seq_id,
689  const string& annot_name,
690  TSeqPos ref_length,
691  TIndexLevel min_index_level = 0) const
692  {
693  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
694  min_index_level, GetMaxIndexLevel());
695  }
697  MakeEstimatedCoverageAnnot(size_t ref_index,
698  const CSeq_id& seq_id,
699  const string& annot_name,
700  TSeqPos ref_length,
701  EIndexLevel min_index_level) const
702  {
703  return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
704  ToIndexLevel(min_index_level));
705  }
706 
707  // collect estimated coverage from index level range
708  // result bin size will be equal to bin size of min_index_level
709  vector<uint64_t>
710  CollectEstimatedCoverage(size_t ref_index,
711  TIndexLevel min_index_level,
712  TIndexLevel max_index_level) const;
713  vector<uint64_t>
714  CollectEstimatedCoverage(size_t ref_index,
715  EIndexLevel min_index_level,
716  EIndexLevel max_index_level) const
717  {
718  return CollectEstimatedCoverage(ref_index,
719  ToIndexLevel(min_index_level),
720  ToIndexLevel(max_index_level));
721  }
722  // collect estimated coverage from specified index level
723  // result bin size will be equal to bin size of index_level
724  vector<uint64_t>
725  CollectEstimatedCoverage(size_t ref_index,
726  TIndexLevel index_level) const
727  {
728  return CollectEstimatedCoverage(ref_index, index_level, index_level);
729  }
730  vector<uint64_t>
731  CollectEstimatedCoverage(size_t ref_index,
732  EIndexLevel index_level) const
733  {
734  return CollectEstimatedCoverage(ref_index, ToIndexLevel(index_level));
735  }
736  // collect estimated coverage from all index levels
737  // result bin size will be equal to bin size of most detailed index level
738  vector<uint64_t>
739  CollectEstimatedCoverage(size_t ref_index) const
740  {
741  return CollectEstimatedCoverage(ref_index, 0, GetMaxIndexLevel());
742  }
743  // collect estimated coverage from all index levels
744  // result bin size will be equal to bin size of most detailed index level
745  vector<uint64_t>
746  EstimateDataSizeByAlnStartPos(size_t ref_index) const
747  {
748  return GetRef(ref_index).EstimateDataSizeByAlnStartPos();
749  }
750 
751  pair<Uint8, double> GetReadStatistics() const
752  {
753  return make_pair(m_TotalReadBytes, m_TotalReadSeconds);
754  }
755 
756 private:
757  string m_FileName;
762 };
763 
764 
765 template<class Position>
767 {
768 public:
769  typedef Position position_type;
771 
772  typedef pair<position_type, position_type> TRange;
774  typedef typename TRanges::iterator iterator;
776 
777  void clear()
778  {
779  m_Ranges.clear();
780  }
781  bool empty() const
782  {
783  return m_Ranges.empty();
784  }
786  {
787  return m_Ranges.begin();
788  }
790  {
791  return m_Ranges.end();
792  }
793 
795  {
796  if ( !(range.first < range.second) ) {
797  // empty range, do nothing
798  return;
799  }
800 
801  // find insertion point
802  // iterator next points to ranges that start after new range start
804  assert(next == m_Ranges.end() || (range.first < next->first));
805 
806  // check for overlapping with previous range
807  iterator iter;
808  if ( next != m_Ranges.begin() &&
809  !((iter = prev(next))->second < range.first) ) {
810  // overlaps with previous range
811  // update it if necessary
812  if ( !(iter->second < range.second) ) {
813  // new range is completely within an old one
814  // no more work to do
815  return;
816  }
817  // need to extend previous range to include inserted range
818  // next ranges may need to be removed
819  }
820  else {
821  // new range, use found iterator as an insertion hint
822  iter = m_Ranges.insert(next, range);
823  // next ranges may need to be removed
824  }
825  assert(iter != m_Ranges.end() && next != m_Ranges.begin() &&
826  iter == prev(next) &&
827  !(range.first < iter->first) &&
828  !(range.second < iter->second));
829 
830  // erase all existing ranges that start within inserted range
831  // and extend inserted range if necessary
832  while ( next != m_Ranges.end() &&
833  !(range.second < next->first) ) {
834  if ( range.second < next->second ) {
835  // range that start within inserted range is bigger,
836  // extend inserted range
837  range.second = next->second;
838  }
839  // erase completely covered range
840  m_Ranges.erase(next++);
841  }
842  // update current range
843  iter->second = range.second;
844  }
845 
847  {
848  add_range(range);
849  return *this;
850  }
851 
852 private:
854 };
855 
856 
858 {
859 public:
861  CBamFileRangeSet(const CBamIndex& index,
862  size_t ref_index, COpenRange<TSeqPos> ref_range,
863  ESearchMode search_mode = eSearchByOverlap);
864  CBamFileRangeSet(const CBamIndex& index,
865  size_t ref_index, COpenRange<TSeqPos> ref_range,
866  TIndexLevel min_level, TIndexLevel max_level,
867  ESearchMode search_mode = eSearchByOverlap);
868  CBamFileRangeSet(const CBamIndex& index,
869  size_t ref_index, COpenRange<TSeqPos> ref_range,
870  EIndexLevel min_level, EIndexLevel max_level,
871  ESearchMode search_mode = eSearchByOverlap);
872  ~CBamFileRangeSet();
873 
874  void Clear();
875  void SetRanges(const CBamIndex& index,
876  size_t ref_index, COpenRange<TSeqPos> ref_range,
877  ESearchMode search_mode = eSearchByOverlap,
878  const CBGZFPos* file_pos = nullptr);
879  void AddRanges(const CBamIndex& index,
880  size_t ref_index, COpenRange<TSeqPos> ref_range,
881  ESearchMode search_mode = eSearchByOverlap,
882  const CBGZFPos* file_pos = nullptr);
883  void SetRanges(const CBamIndex& index,
884  size_t ref_index, COpenRange<TSeqPos> ref_range,
885  TIndexLevel index_level,
886  ESearchMode search_mode = eSearchByOverlap,
887  const CBGZFPos* file_pos = nullptr);
888  void SetRanges(const CBamIndex& index,
889  size_t ref_index, COpenRange<TSeqPos> ref_range,
890  EIndexLevel index_level,
891  ESearchMode search_mode = eSearchByOverlap,
892  const CBGZFPos* file_pos = nullptr)
893  {
894  SetRanges(index, ref_index, ref_range, index.ToIndexLevel(index_level), search_mode, file_pos);
895  }
896  void AddRanges(const CBamIndex& index,
897  size_t ref_index, COpenRange<TSeqPos> ref_range,
898  TIndexLevel index_level,
899  ESearchMode search_mode = eSearchByOverlap,
900  const CBGZFPos* file_pos = nullptr);
901  void AddRanges(const CBamIndex& index,
902  size_t ref_index, COpenRange<TSeqPos> ref_range,
903  EIndexLevel index_level,
904  ESearchMode search_mode = eSearchByOverlap,
905  const CBGZFPos* file_pos = nullptr)
906  {
907  AddRanges(index, ref_index, ref_range, index.ToIndexLevel(index_level), search_mode, file_pos);
908  }
909  void SetRanges(const CBamIndex& index,
910  size_t ref_index, COpenRange<TSeqPos> ref_range,
911  TIndexLevel min_index_level, TIndexLevel max_index_level,
912  ESearchMode search_mode = eSearchByOverlap,
913  const CBGZFPos* file_pos = nullptr);
914  void SetRanges(const CBamIndex& index,
915  size_t ref_index, COpenRange<TSeqPos> ref_range,
916  EIndexLevel min_index_level, EIndexLevel max_index_level,
917  ESearchMode search_mode = eSearchByOverlap,
918  const CBGZFPos* file_pos = nullptr)
919  {
920  SetRanges(index, ref_index, ref_range,
921  index.ToIndexLevel(min_index_level),
922  index.ToIndexLevel(max_index_level),
923  search_mode, file_pos);
924  }
925  void AddRanges(const CBamIndex& index,
926  size_t ref_index, COpenRange<TSeqPos> ref_range,
927  TIndexLevel min_index_level, TIndexLevel max_index_level,
928  ESearchMode search_mode = eSearchByOverlap,
929  const CBGZFPos* file_pos = nullptr);
930  void AddRanges(const CBamIndex& index,
931  size_t ref_index, COpenRange<TSeqPos> ref_range,
932  EIndexLevel min_index_level, EIndexLevel max_index_level,
933  ESearchMode search_mode = eSearchByOverlap,
934  const CBGZFPos* file_pos = nullptr)
935  {
936  AddRanges(index, ref_index, ref_range,
937  index.ToIndexLevel(min_index_level),
938  index.ToIndexLevel(max_index_level),
939  search_mode, file_pos);
940  }
941  void AddWhole(const CBamHeader& header);
942  void SetWhole(const CBamHeader& header)
943  {
944  Clear();
945  AddWhole(header);
946  }
947  void AddFrom(CBGZFPos file_pos);
948  void SetFrom(CBGZFPos file_pos)
949  {
950  Clear();
951  AddFrom(file_pos);
952  }
953  void AddFrom(const CBamHeader& header, const CBGZFPos* file_pos);
954  void SetFrom(const CBamHeader& header, const CBGZFPos* file_pos)
955  {
956  Clear();
957  AddFrom(header, file_pos);
958  }
959 
962 
963  const TRanges& GetRanges() const
964  {
965  return m_Ranges;
966  }
968  {
969  return m_Ranges.begin();
970  }
972  {
973  return m_Ranges.end();
974  }
975 
976  static Uint8 GetFileSize(CBGZFRange range);
977  Uint8 GetFileSize() const;
978 
979 protected:
980  void AddSortedRanges(const vector<CBGZFRange>& ranges,
981  const CBGZFPos* file_pos = nullptr);
982 
983 private:
985 };
986 
987 
989 {
990 public:
992  {
993  }
994  explicit
995  CBamRawDb(const string& bam_path)
996  {
997  Open(bam_path);
998  }
999  CBamRawDb(const string& bam_path, const string& index_path)
1000  {
1001  Open(bam_path, index_path);
1002  }
1003  ~CBamRawDb();
1004 
1005 
1006  void Open(const string& bam_path);
1007  void Open(const string& bam_path, const string& index_path);
1008 
1009 
1010  const CBamHeader& GetHeader() const
1011  {
1012  return m_Header;
1013  }
1014  const CBamIndex& GetIndex() const
1015  {
1016  return m_Index;
1017  }
1018  const string& GetIndexName() const
1019  {
1020  return m_Index.GetFileName();
1021  }
1022  size_t GetRefCount() const
1023  {
1024  return GetHeader().GetRefCount();
1025  }
1026  size_t GetRefIndex(const string& ref_label) const
1027  {
1028  return GetHeader().GetRefIndex(ref_label);
1029  }
1030  const string& GetRefName(size_t ref_index) const
1031  {
1032  return GetHeader().GetRefName(ref_index);
1033  }
1034  TSeqPos GetRefSeqLength(size_t ref_index) const
1035  {
1036  return GetHeader().GetRefLength(ref_index);
1037  }
1038 
1039 
1041  {
1042  return *m_File;
1043  }
1044 
1045  vector<Uint8> EstimateDataSizeByAlnStartPos(const string& ref_label) const
1046  {
1047  size_t ref_index = GetRefIndex(ref_label);
1048  return GetIndex().GetRef(ref_index).EstimateDataSizeByAlnStartPos(GetRefSeqLength(ref_index));
1049  }
1050 
1051  double GetEstimatedSecondsPerByte() const;
1052 
1053 private:
1057 };
1058 
1059 
1060 class CBamAuxIterator;
1061 
1062 
1064 {
1066  : m_Tag(),
1067  m_DataType(),
1068  m_IsArray(false),
1069  m_ElementCount(),
1070  m_DataPtr(0)
1071  {
1072  }
1073 
1075 
1076  CTempString GetTag() const { return CTempString(m_Tag, 2); }
1077  bool IsTag(char c1, char c2) const { return m_Tag[0] == c1 && m_Tag[1] == c2; }
1078 
1079  char GetDataType() const { return m_DataType; }
1080 
1081  bool IsArray() const { return m_IsArray; }
1082  size_t size() const { return m_ElementCount; }
1083 
1084  bool IsChar() const { return m_DataType == 'A'; }
1085  bool IsString() const { return m_DataType == 'Z' || m_DataType == 'H'; }
1086  bool IsFloat() const { return m_DataType == 'f'; }
1087  bool IsInt() const { return !IsString() && !IsFloat() && !IsChar(); }
1088 
1089  NCBI_BAMREAD_EXPORT char GetChar() const;
1091  NCBI_BAMREAD_EXPORT float GetFloat(size_t index = 0) const;
1092  NCBI_BAMREAD_EXPORT Int8 GetInt(size_t index = 0) const;
1093 
1094 private:
1095  friend class CBamAuxIterator;
1096 
1097  char m_Tag[2];
1100  uint32_t m_ElementCount; // either string length or array element count
1101  const char* m_DataPtr;
1102 };
1103 
1105 {
1106  public:
1108  : m_AuxPtr(0),
1109  m_AuxEnd(0)
1110  {
1111  }
1112  CBamAuxIterator(const char* aux_ptr, const char* aux_end)
1113  : m_AuxPtr(aux_ptr),
1114  m_AuxEnd(aux_end)
1115  {
1116  x_InitData();
1117  }
1118 
1120  {
1121  x_InitData();
1122  return *this;
1123  }
1124 
1126 
1128 
1129  const SBamAuxData& operator*() const { return m_AuxData; }
1130  const SBamAuxData* operator->() const { return &m_AuxData; }
1131 
1132 private:
1134 
1136  const char* m_AuxPtr;
1137  const char* m_AuxEnd;
1138 };
1139 
1141 {
1142  void Read(CBGZFStream& in);
1143 
1145  {
1146  return m_FilePos;
1147  }
1148  size_t get_record_size() const
1149  {
1150  return m_RecordSize;
1151  }
1152  const char* get_record_ptr() const
1153  {
1154  return m_RecordPtr;
1155  }
1156  const char* get_record_end() const
1157  {
1158  return get_record_ptr() + get_record_size();
1159  }
1160 
1162  {
1163  return SBamUtil::MakeUint4(get_record_ptr());
1164  }
1166  {
1167  return SBamUtil::MakeUint4(get_record_ptr()+4);
1168  }
1169 
1171  {
1172  return get_record_ptr()[8];
1173  }
1175  {
1176  return get_record_ptr()[9];
1177  }
1179  {
1180  return SBamUtil::MakeUint2(get_record_ptr()+10);
1181  }
1182  static const char kCIGARSymbols[];
1183  enum ECIGARType { // matches to kCIGARSymbols
1184  kCIGAR_M, // 0
1185  kCIGAR_I, // 1
1186  kCIGAR_D, // 2
1187  kCIGAR_N, // 3
1188  kCIGAR_S, // 4
1189  kCIGAR_H, // 5
1190  kCIGAR_P, // 6
1192  kCIGAR_X // 8
1193  };
1195  {
1196  return SBamUtil::MakeUint2(get_record_ptr()+12);
1197  }
1198  enum EFlag {
1199  fAlign_WasPaired = 1 << 0,
1200  fAlign_IsMappedAsPair = 1 << 1,
1201  fAlign_SelfIsUnmapped = 1 << 2,
1202  fAlign_MateIsUnmapped = 1 << 3,
1203  fAlign_SelfIsReverse = 1 << 4,
1204  fAlign_MateIsReverse = 1 << 5,
1205  fAlign_IsFirst = 1 << 6,
1206  fAlign_IsSecond = 1 << 7,
1207  fAlign_IsNotPrimary = 1 << 8,
1208  fAlign_IsLowQuality = 1 << 9,
1209  fAlign_IsDuplicate = 1 << 10,
1210  fAlign_IsSupplementary = 1 << 11
1211  };
1213  {
1214  return SBamUtil::MakeUint2(get_record_ptr()+14);
1215  }
1217  {
1218  return SBamUtil::MakeUint4(get_record_ptr()+16);
1219  }
1221  {
1222  return SBamUtil::MakeUint4(get_record_ptr()+20);
1223  }
1225  {
1226  return SBamUtil::MakeUint4(get_record_ptr()+24);
1227  }
1229  {
1230  return SBamUtil::MakeUint4(get_record_ptr()+28);
1231  }
1232  const char* get_read_name_ptr() const
1233  {
1234  return get_record_ptr()+32;
1235  }
1236  const char* get_read_name_end() const
1237  {
1238  return m_CIGARPtr;
1239  }
1240  const char* get_cigar_ptr() const
1241  {
1242  return get_read_name_end();
1243  }
1244  const char* get_cigar_end() const
1245  {
1246  return m_ReadPtr;
1247  }
1249  {
1250  return SBamUtil::MakeUint4(get_cigar_ptr()+index*4);
1251  }
1252  void get_cigar(vector<uint32_t>& raw_cigar) const
1253  {
1254  size_t count = get_cigar_ops_count();
1255  raw_cigar.resize(count);
1256  uint32_t* dst = raw_cigar.data();
1257  memcpy(dst, get_cigar_ptr(), count*sizeof(uint32_t));
1258  for ( size_t i = 0; i < count; ++i ) {
1259  dst[i] = SBamUtil::MakeUint4(reinterpret_cast<const char*>(dst+i));
1260  }
1261  }
1262  void get_cigar(CBamString& dst) const;
1263  const char* get_read_ptr() const
1264  {
1265  return get_cigar_end();
1266  }
1267  const char* get_read_end() const
1268  {
1269  return get_read_ptr() + (get_read_len()+1)/2;
1270  }
1271  const char* get_phred_quality_ptr() const
1272  {
1273  return get_read_end();
1274  }
1275  const char* get_phred_quality_end() const
1276  {
1277  return get_phred_quality_ptr() + get_read_len();
1278  }
1279  const char* get_aux_data_ptr() const
1280  {
1281  return get_phred_quality_end();
1282  }
1283  const char* get_aux_data_end() const
1284  {
1285  return get_record_end();
1286  }
1287 
1289  {
1290  return CTempString(get_read_ptr(), (get_read_len()+1)/2);
1291  }
1292  static const char kBaseSymbols[];
1293  string get_read() const;
1294  void get_read(CBamString& str) const;
1295  uint32_t get_cigar_pos() const;
1296  uint32_t get_cigar_ref_size() const;
1297  uint32_t get_cigar_read_size() const;
1298  pair< COpenRange<uint32_t>, COpenRange<uint32_t> > get_cigar_alignment(void) const;
1299  string get_cigar() const;
1300  bool has_ambiguous_match() const;
1301 
1302  SBamAuxData get_aux_data(char c1, char c2, bool allow_missing = false) const;
1303  CTempString get_short_seq_accession_id() const;
1304 
1305 private:
1307  const char* m_RecordPtr;
1308  const char* m_CIGARPtr;
1309  const char* m_ReadPtr;
1311 };
1312 
1313 
1315 {
1316 public:
1318  : m_CurrentRangeEnd(0)
1319  {
1320  }
1321  explicit
1323  const CBGZFPos* file_pos = nullptr)
1324  : m_Reader(bam_db.GetFile())
1325  {
1326  Select(bam_db, file_pos);
1327  }
1329  : m_Reader(bam_db.GetFile())
1330  {
1331  Select(bam_db, &file_pos);
1332  }
1334  const string& ref_label,
1335  CRange<TSeqPos> ref_range,
1336  ESearchMode search_mode = eSearchByOverlap,
1337  const CBGZFPos* file_pos = nullptr)
1338  : m_Reader(bam_db.GetFile())
1339  {
1340  Select(bam_db, ref_label, ref_range, search_mode, file_pos);
1341  }
1343  const string& ref_label,
1344  CRange<TSeqPos> ref_range,
1345  TIndexLevel index_level,
1346  ESearchMode search_mode = eSearchByOverlap,
1347  const CBGZFPos* file_pos = nullptr)
1348  : m_Reader(bam_db.GetFile())
1349  {
1350  Select(bam_db, ref_label, ref_range, index_level, search_mode, file_pos);
1351  }
1353  const string& ref_label,
1354  CRange<TSeqPos> ref_range,
1355  EIndexLevel index_level,
1356  ESearchMode search_mode,
1357  const CBGZFPos* file_pos = nullptr)
1358  : m_Reader(bam_db.GetFile())
1359  {
1360  Select(bam_db, ref_label, ref_range, index_level, search_mode, file_pos);
1361  }
1363  const string& ref_label,
1364  CRange<TSeqPos> ref_range,
1365  TIndexLevel min_index_level,
1366  TIndexLevel max_index_level,
1367  ESearchMode search_mode = eSearchByOverlap,
1368  const CBGZFPos* file_pos = nullptr)
1369  : m_Reader(bam_db.GetFile())
1370  {
1371  Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode, file_pos);
1372  }
1374  const string& ref_label,
1375  CRange<TSeqPos> ref_range,
1376  EIndexLevel min_index_level,
1377  EIndexLevel max_index_level,
1378  ESearchMode search_mode,
1379  const CBGZFPos* file_pos = nullptr)
1380  : m_Reader(bam_db.GetFile())
1381  {
1382  Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode, file_pos);
1383  }
1385  const string& ref_label,
1386  TSeqPos ref_pos,
1387  TSeqPos window = 0,
1388  ESearchMode search_mode = eSearchByOverlap,
1389  const CBGZFPos* file_pos = nullptr);
1391  const string& ref_label,
1392  TSeqPos ref_pos,
1393  TSeqPos window,
1394  TIndexLevel min_index_level,
1395  TIndexLevel max_index_level,
1396  ESearchMode search_mode = eSearchByOverlap,
1397  const CBGZFPos* file_pos = nullptr);
1399  const string& ref_label,
1400  TSeqPos ref_pos,
1401  TSeqPos window,
1402  EIndexLevel min_index_level,
1403  EIndexLevel max_index_level,
1404  ESearchMode search_mode,
1405  const CBGZFPos* file_pos = nullptr);
1407  {
1408  }
1409 
1410  DECLARE_OPERATOR_BOOL(m_CurrentRangeEnd);
1411 
1412  void Select(CBamRawDb& bam_db,
1413  const CBGZFPos* file_pos = nullptr)
1414  {
1415  x_Select(bam_db.GetHeader(), file_pos);
1416  }
1417  void Select(CBamRawDb& bam_db,
1418  const string& ref_label,
1419  CRange<TSeqPos> ref_range,
1420  ESearchMode search_mode = eSearchByOverlap,
1421  const CBGZFPos* file_pos = nullptr)
1422  {
1423  x_Select(bam_db.GetIndex(),
1424  bam_db.GetRefIndex(ref_label), ref_range,
1425  search_mode, file_pos);
1426  }
1427  void Select(CBamRawDb& bam_db,
1428  const string& ref_label,
1429  CRange<TSeqPos> ref_range,
1430  TIndexLevel index_level,
1431  ESearchMode search_mode = eSearchByOverlap,
1432  const CBGZFPos* file_pos = nullptr)
1433  {
1434  x_Select(bam_db.GetIndex(),
1435  bam_db.GetRefIndex(ref_label), ref_range,
1436  index_level, search_mode, file_pos);
1437  }
1438  void Select(CBamRawDb& bam_db,
1439  const string& ref_label,
1440  CRange<TSeqPos> ref_range,
1441  EIndexLevel index_level,
1442  ESearchMode search_mode,
1443  const CBGZFPos* file_pos = nullptr)
1444  {
1445  x_Select(bam_db.GetIndex(),
1446  bam_db.GetRefIndex(ref_label), ref_range,
1447  index_level, search_mode, file_pos);
1448  }
1449  void Select(CBamRawDb& bam_db,
1450  const string& ref_label,
1451  CRange<TSeqPos> ref_range,
1452  TIndexLevel min_index_level,
1453  TIndexLevel max_index_level,
1454  ESearchMode search_mode = eSearchByOverlap,
1455  const CBGZFPos* file_pos = nullptr)
1456  {
1457  x_Select(bam_db.GetIndex(),
1458  bam_db.GetRefIndex(ref_label), ref_range,
1459  min_index_level, max_index_level, search_mode, file_pos);
1460  }
1461  void Select(CBamRawDb& bam_db,
1462  const string& ref_label,
1463  CRange<TSeqPos> ref_range,
1464  EIndexLevel min_index_level,
1465  EIndexLevel max_index_level,
1466  ESearchMode search_mode = eSearchByOverlap,
1467  const CBGZFPos* file_pos = nullptr)
1468  {
1469  x_Select(bam_db.GetIndex(),
1470  bam_db.GetRefIndex(ref_label), ref_range,
1471  min_index_level, max_index_level, search_mode, file_pos);
1472  }
1473  void Select(const CBamIndex& index,
1474  size_t ref_index,
1475  CRange<TSeqPos> ref_range,
1476  ESearchMode search_mode = eSearchByOverlap,
1477  const CBGZFPos* file_pos = nullptr)
1478  {
1479  x_Select(index, ref_index, ref_range,
1480  search_mode, file_pos);
1481  }
1482  void Select(const CBamIndex& index,
1483  size_t ref_index,
1484  CRange<TSeqPos> ref_range,
1485  TIndexLevel index_level,
1486  ESearchMode search_mode = eSearchByOverlap,
1487  const CBGZFPos* file_pos = nullptr)
1488  {
1489  x_Select(index, ref_index, ref_range,
1490  index_level, search_mode, file_pos);
1491  }
1492  void Select(const CBamIndex& index,
1493  size_t ref_index,
1494  CRange<TSeqPos> ref_range,
1495  EIndexLevel index_level,
1496  ESearchMode search_mode = eSearchByOverlap,
1497  const CBGZFPos* file_pos = nullptr)
1498  {
1499  x_Select(index, ref_index, ref_range,
1500  index_level, search_mode, file_pos);
1501  }
1502  void Next();
1503 
1505  {
1506  Next();
1507  return *this;
1508  }
1509 
1511  {
1512  return m_AlignInfo.get_file_pos();
1513  }
1514 
1516  {
1517  return m_AlignInfo.get_ref_index();
1518  }
1520  {
1521  return m_AlignRefRange.GetFrom();
1522  }
1523 
1524  // next segment in template (mate)
1526  {
1527  return m_AlignInfo.get_next_ref_index();
1528  }
1530  {
1531  return m_AlignInfo.get_next_ref_pos();
1532  }
1533 
1535  {
1536  return CTempString(m_AlignInfo.get_read_name_ptr(),
1537  m_AlignInfo.get_read_name_len()-1); // exclude trailing zero
1538  }
1540  {
1541  return m_AlignInfo.get_short_seq_accession_id();
1542  }
1544  {
1545  return m_AlignInfo.get_read_len();
1546  }
1547  string GetShortSequence() const
1548  {
1549  return m_AlignInfo.get_read();
1550  }
1552  {
1553  return m_AlignInfo.get_read_raw();
1554  }
1556  {
1557  return m_AlignInfo.get_read(str);
1558  }
1559 
1561  {
1562  return m_AlignInfo.get_cigar_ops_count();
1563  }
1564  Uint4 GetCIGAROp(Uint2 index) const
1565  {
1566  return m_AlignInfo.get_cigar_op_data(index);
1567  }
1568  void GetCIGAR(vector<Uint4>& raw_cigar) const
1569  {
1570  return m_AlignInfo.get_cigar(raw_cigar);
1571  }
1572  void GetCIGAR(CBamString& dst) const
1573  {
1574  m_AlignInfo.get_cigar(dst);
1575  }
1577  {
1578  return m_AlignReadRange.GetFrom();
1579  }
1581  {
1582  return m_AlignReadRange.GetLength();
1583  }
1585  {
1586  return m_AlignRefRange.GetLength();
1587  }
1588  pair< COpenRange<TSeqPos>, COpenRange<TSeqPos> > GetCIGARAlignment(void) const
1589  {
1590  return make_pair(m_AlignRefRange, m_AlignReadRange);
1591  }
1592  bool HasAmbiguousMatch() const
1593  {
1594  return m_AlignInfo.has_ambiguous_match();
1595  }
1596 
1597  string GetCIGAR() const
1598  {
1599  return m_AlignInfo.get_cigar();
1600  }
1601 
1603  {
1604  return m_AlignInfo.get_bin();
1605  }
1607  {
1608  return GetBAIIndexBin();
1609  }
1611  {
1612  return GetRangeIndexLevel(m_AlignRefRange);
1613  }
1615  {
1616  return RangeIsOnMinBinIndexLevel(m_AlignRefRange);
1617  }
1618 
1619  Uint2 GetFlags() const
1620  {
1621  return m_AlignInfo.get_flag();
1622  }
1623  // returns false if BAM flags are not available
1624  bool TryGetFlags(Uint2& flags) const
1625  {
1626  flags = GetFlags();
1627  return true;
1628  }
1629 
1630  bool IsSetStrand() const
1631  {
1632  return true;
1633  }
1635  {
1636  return (GetFlags() & m_AlignInfo.fAlign_SelfIsReverse)?
1638  }
1639 
1640  bool IsMapped() const
1641  {
1642  return (GetFlags() & m_AlignInfo.fAlign_SelfIsUnmapped) == 0;
1643  }
1644 
1646  {
1647  return IsMapped()? m_AlignInfo.get_map_quality(): 0;
1648  }
1649 
1650  bool IsPaired() const
1651  {
1652  return (GetFlags() & m_AlignInfo.fAlign_IsMappedAsPair) != 0;
1653  }
1654  bool IsFirstInPair() const
1655  {
1656  return (GetFlags() & m_AlignInfo.fAlign_IsFirst) != 0;
1657  }
1658  bool IsSecondInPair() const
1659  {
1660  return (GetFlags() & m_AlignInfo.fAlign_IsSecond) != 0;
1661  }
1662  bool IsSecondary() const
1663  {
1664  return (GetFlags() & m_AlignInfo.fAlign_IsNotPrimary) != 0;
1665  }
1666 
1667  void GetSegments(vector<int>& starts, vector<TSeqPos>& lens) const;
1668 
1670  {
1671  return CBamAuxIterator(m_AlignInfo.get_aux_data_ptr(), m_AlignInfo.get_aux_data_end());
1672  }
1673  SBamAuxData GetAuxData(char c1, char c2, bool allow_missing = false) const
1674  {
1675  return m_AlignInfo.get_aux_data(c1, c2, allow_missing);
1676  }
1677  Int8 GetAuxInt(char c1, char c2, size_t index = 0) const
1678  {
1679  return GetAuxData(c1, c2).GetInt(index);
1680  }
1681 
1682 protected:
1683  void x_Select(const CBamHeader& header,
1684  const CBGZFPos* file_pos = nullptr);
1685  void x_Select(const CBamIndex& index,
1686  size_t ref_index, CRange<TSeqPos> ref_range,
1687  TIndexLevel min_index_level, TIndexLevel max_index_level,
1688  ESearchMode search_mode,
1689  const CBGZFPos* file_pos = nullptr);
1690  void x_Select(const CBamIndex& index,
1691  size_t ref_index, CRange<TSeqPos> ref_range,
1692  EIndexLevel min_index_level, EIndexLevel max_index_level,
1693  ESearchMode search_mode,
1694  const CBGZFPos* file_pos = nullptr)
1695  {
1696  x_Select(index, ref_index, ref_range,
1697  index.ToIndexLevel(min_index_level),
1698  index.ToIndexLevel(max_index_level),
1699  search_mode, file_pos);
1700  }
1701  void x_Select(const CBamIndex& index,
1702  size_t ref_index, CRange<TSeqPos> ref_range,
1703  ESearchMode search_mode,
1704  const CBGZFPos* file_pos = nullptr)
1705  {
1706  x_Select(index, ref_index, ref_range, 0, index.GetMaxIndexLevel(), search_mode, file_pos);
1707  }
1708  void x_Select(const CBamIndex& index,
1709  size_t ref_index, CRange<TSeqPos> ref_range,
1710  TIndexLevel index_level,
1711  ESearchMode search_mode,
1712  const CBGZFPos* file_pos = nullptr)
1713  {
1714  x_Select(index, ref_index, ref_range, index_level, index_level, search_mode, file_pos);
1715  }
1716  void x_Select(const CBamIndex& index,
1717  size_t ref_index, CRange<TSeqPos> ref_range,
1718  EIndexLevel index_level,
1719  ESearchMode search_mode,
1720  const CBGZFPos* file_pos = nullptr)
1721  {
1722  x_Select(index, ref_index, ref_range, index_level, index_level, search_mode, file_pos);
1723  }
1724  bool x_UpdateRange();
1726  {
1727  _ASSERT(*this);
1728  return m_Reader.HaveNextAvailableBytes() || x_UpdateRange();
1729  }
1730  void x_Stop()
1731  {
1732  m_NextRange = m_Ranges.end();
1733  m_CurrentRangeEnd = CBGZFPos(0);
1734  }
1735  bool x_NeedToSkip();
1736 
1737 private:
1738  size_t m_RefIndex;
1740  TIndexLevel m_MinIndexLevel, m_MaxIndexLevel;
1749 };
1750 
1751 
1754 
1755 #endif // SRA__READER__BAM__BAMINDEX__HPP
static bool operator<(const SBamIndexBinInfo &b1, const SBamIndexBinInfo &b2)
Definition: bamindex.hpp:370
pair< CBGZFPos, CBGZFPos > CBGZFRange
Definition: bgzf.hpp:272
const char * m_AuxEnd
Definition: bamindex.hpp:1137
SBamAuxData value_type
Definition: bamindex.hpp:1125
CBamAuxIterator & operator++()
Definition: bamindex.hpp:1119
SBamAuxData m_AuxData
Definition: bamindex.hpp:1135
CBamAuxIterator(const char *aux_ptr, const char *aux_end)
Definition: bamindex.hpp:1112
const SBamAuxData & operator*() const
Definition: bamindex.hpp:1129
const char * m_AuxPtr
Definition: bamindex.hpp:1136
const SBamAuxData * operator->() const
Definition: bamindex.hpp:1130
DECLARE_OPERATOR_BOOL(m_AuxData)
void SetFrom(CBGZFPos file_pos)
Definition: bamindex.hpp:948
CRangeUnion< CBGZFPos > TRanges
Definition: bamindex.hpp:960
const_iterator end() const
Definition: bamindex.hpp:971
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:930
TRanges m_Ranges
Definition: bamindex.hpp:984
const TRanges & GetRanges() const
Definition: bamindex.hpp:963
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:888
const_iterator begin() const
Definition: bamindex.hpp:967
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:914
void SetFrom(const CBamHeader &header, const CBGZFPos *file_pos)
Definition: bamindex.hpp:954
void SetWhole(const CBamHeader &header)
Definition: bamindex.hpp:942
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:901
TRanges::const_iterator const_iterator
Definition: bamindex.hpp:961
const string & GetText() const
Definition: bamindex.hpp:67
TRefs m_Refs
Definition: bamindex.hpp:109
map< string, string > TSBamTags
Definition: bamindex.hpp:72
const TRefs & GetRefs() const
Definition: bamindex.hpp:79
vector< SBamHeaderRefInfo > TRefs
Definition: bamindex.hpp:78
size_t GetRefCount() const
Definition: bamindex.hpp:84
CBGZFPos m_AlignStart
Definition: bamindex.hpp:110
list< TSBamRecord > TSBamRecords
Definition: bamindex.hpp:74
CBGZFPos GetAlignStart() const
Definition: bamindex.hpp:101
string m_Text
Definition: bamindex.hpp:107
pair< string, TSBamTags > TSBamRecord
Definition: bamindex.hpp:73
TSeqPos GetRefLength(size_t index) const
Definition: bamindex.hpp:94
static SBamHeaderRefInfo ReadRef(CBGZFStream &in)
const string & GetRefName(size_t index) const
Definition: bamindex.hpp:90
map< string, size_t > m_RefByName
Definition: bamindex.hpp:108
Uint8 m_TotalReadBytes
Definition: bamindex.hpp:760
const TRefs & GetRefs() const
Definition: bamindex.hpp:464
double m_TotalReadSeconds
Definition: bamindex.hpp:761
TRefs m_Refs
Definition: bamindex.hpp:758
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index) const
Definition: bamindex.hpp:739
size_t GetRefCount() const
Definition: bamindex.hpp:468
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:564
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, EIndexLevel index_level) const
Definition: bamindex.hpp:731
const string & GetFileName() const
Definition: bamindex.hpp:454
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level) const
Definition: bamindex.hpp:658
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:675
Uint8 m_UnmappedCount
Definition: bamindex.hpp:759
string m_FileName
Definition: bamindex.hpp:757
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level) const
Definition: bamindex.hpp:546
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, EIndexLevel min_index_level) const
Definition: bamindex.hpp:584
pair< Uint8, double > GetReadStatistics() const
Definition: bamindex.hpp:751
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level) const
Definition: bamindex.hpp:619
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:687
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, EIndexLevel min_index_level) const
Definition: bamindex.hpp:507
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:497
vector< uint64_t > EstimateDataSizeByAlnStartPos(size_t ref_index) const
Definition: bamindex.hpp:746
vector< SBamIndexRefIndex > TRefs
Definition: bamindex.hpp:463
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level) const
Definition: bamindex.hpp:697
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:485
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:575
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:599
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:536
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:524
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:648
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, TIndexLevel index_level) const
Definition: bamindex.hpp:725
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:636
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
Definition: bamindex.hpp:610
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:714
Uint2 GetCIGAROpsCount() const
Definition: bamindex.hpp:1560
TSeqPos GetRefSeqPos() const
Definition: bamindex.hpp:1519
Int8 GetAuxInt(char c1, char c2, size_t index=0) const
Definition: bamindex.hpp:1677
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1342
Uint1 GetMapQuality() const
Definition: bamindex.hpp:1645
Uint2 GetIndexBin() const
Definition: bamindex.hpp:1606
CTempString GetShortSeqAcc() const
Definition: bamindex.hpp:1539
TSeqPos GetCIGARPos() const
Definition: bamindex.hpp:1576
TSeqPos GetNextRefSeqPos() const
Definition: bamindex.hpp:1529
Uint2 GetBAIIndexBin() const
Definition: bamindex.hpp:1602
CBamRawAlignIterator(CBamRawDb &bam_db, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1322
SBamAlignInfo m_AlignInfo
Definition: bamindex.hpp:1742
bool TryGetFlags(Uint2 &flags) const
Definition: bamindex.hpp:1624
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1417
ESearchMode m_SearchMode
Definition: bamindex.hpp:1741
int32_t GetNextRefSeqIndex() const
Definition: bamindex.hpp:1525
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1373
void GetCIGAR(CBamString &dst) const
Definition: bamindex.hpp:1572
CBamRawAlignIterator(CBamRawDb &bam_db, CBGZFPos file_pos)
Definition: bamindex.hpp:1328
pair< COpenRange< TSeqPos >, COpenRange< TSeqPos > > GetCIGARAlignment(void) const
Definition: bamindex.hpp:1588
CBamFileRangeSet::const_iterator m_NextRange
Definition: bamindex.hpp:1746
bool IsSecondInPair() const
Definition: bamindex.hpp:1658
string GetCIGAR() const
Definition: bamindex.hpp:1597
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1461
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel min_index_level, TIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1449
bool IsMapped() const
Definition: bamindex.hpp:1640
TSeqPos GetShortSequenceLength(void) const
Definition: bamindex.hpp:1543
void Select(CBamRawDb &bam_db, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1412
TIndexLevel GetIndexLevel() const
Definition: bamindex.hpp:1610
SBamAuxData GetAuxData(char c1, char c2, bool allow_missing=false) const
Definition: bamindex.hpp:1673
TSeqPos GetCIGARRefSize() const
Definition: bamindex.hpp:1584
TSeqPos GetCIGARShortSize() const
Definition: bamindex.hpp:1580
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1690
CTempString GetShortSeqId() const
Definition: bamindex.hpp:1534
CTempString GetShortSequenceRaw() const
Definition: bamindex.hpp:1551
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1482
CBamAuxIterator GetAuxIterator() const
Definition: bamindex.hpp:1669
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1427
CBGZFPos GetFilePos() const
Definition: bamindex.hpp:1510
string GetShortSequence() const
Definition: bamindex.hpp:1547
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel min_index_level, TIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1362
CBamFileRangeSet m_Ranges
Definition: bamindex.hpp:1745
Uint4 GetCIGAROp(Uint2 index) const
Definition: bamindex.hpp:1564
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1492
bool HasAmbiguousMatch() const
Definition: bamindex.hpp:1592
Uint2 GetFlags() const
Definition: bamindex.hpp:1619
TIndexLevel m_MinIndexLevel
Definition: bamindex.hpp:1740
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1716
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1352
COpenRange< TSeqPos > m_QueryRefRange
Definition: bamindex.hpp:1739
bool IsFirstInPair() const
Definition: bamindex.hpp:1654
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1473
bool IsSecondary() const
Definition: bamindex.hpp:1662
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1708
bool IsPaired() const
Definition: bamindex.hpp:1650
ENa_strand GetStrand() const
Definition: bamindex.hpp:1634
void GetShortSequence(CBamString &str) const
Definition: bamindex.hpp:1555
DECLARE_OPERATOR_BOOL(m_CurrentRangeEnd)
COpenRange< TSeqPos > m_AlignRefRange
Definition: bamindex.hpp:1743
CBamRawAlignIterator & operator++()
Definition: bamindex.hpp:1504
bool IsSetStrand() const
Definition: bamindex.hpp:1630
CBGZFPos m_CurrentRangeEnd
Definition: bamindex.hpp:1747
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1438
COpenRange< TSeqPos > m_AlignReadRange
Definition: bamindex.hpp:1744
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1701
int32_t GetRefSeqIndex() const
Definition: bamindex.hpp:1515
bool IsOnMinBinIndexLevel() const
Definition: bamindex.hpp:1614
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Definition: bamindex.hpp:1333
void GetCIGAR(vector< Uint4 > &raw_cigar) const
Definition: bamindex.hpp:1568
CBGZFStream m_Reader
Definition: bamindex.hpp:1748
size_t GetRefIndex(const string &ref_label) const
Definition: bamindex.hpp:1026
CBamRawDb(const string &bam_path)
Definition: bamindex.hpp:995
CRef< CBGZFFile > m_File
Definition: bamindex.hpp:1054
const string & GetIndexName() const
Definition: bamindex.hpp:1018
const string & GetRefName(size_t ref_index) const
Definition: bamindex.hpp:1030
size_t GetRefCount() const
Definition: bamindex.hpp:1022
const CBamHeader & GetHeader() const
Definition: bamindex.hpp:1010
const CBamIndex & GetIndex() const
Definition: bamindex.hpp:1014
TSeqPos GetRefSeqLength(size_t ref_index) const
Definition: bamindex.hpp:1034
CBGZFFile & GetFile()
Definition: bamindex.hpp:1040
vector< Uint8 > EstimateDataSizeByAlnStartPos(const string &ref_label) const
Definition: bamindex.hpp:1045
CBamIndex m_Index
Definition: bamindex.hpp:1056
CBamHeader m_Header
Definition: bamindex.hpp:1055
CBamRawDb(const string &bam_path, const string &index_path)
Definition: bamindex.hpp:999
bool empty() const
Definition: bamindex.hpp:781
Position position_type
Definition: bamindex.hpp:769
TRanges::const_iterator const_iterator
Definition: bamindex.hpp:775
TRanges m_Ranges
Definition: bamindex.hpp:853
const_iterator end() const
Definition: bamindex.hpp:789
map< position_type, position_type > TRanges
Definition: bamindex.hpp:773
CRangeUnion< position_type > TThisType
Definition: bamindex.hpp:770
TThisType & operator+=(const TRange &range)
Definition: bamindex.hpp:846
void add_range(TRange range)
Definition: bamindex.hpp:794
const_iterator begin() const
Definition: bamindex.hpp:785
pair< position_type, position_type > TRange
Definition: bamindex.hpp:772
void clear()
Definition: bamindex.hpp:777
TRanges::iterator iterator
Definition: bamindex.hpp:774
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
void erase(iterator pos)
Definition: map.hpp:167
const_iterator begin() const
Definition: map.hpp:151
const_iterator end() const
Definition: map.hpp:152
iterator_bool insert(const value_type &val)
Definition: map.hpp:165
bool empty() const
Definition: map.hpp:149
const_iterator upper_bound(const key_type &key) const
Definition: map.hpp:155
void clear()
Definition: map.hpp:169
Include a standard set of the NCBI C++ Toolkit most basic headers.
static uch flags
#define false
Definition: bool.h:36
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
static const char * str(char *buf, int n)
Definition: stats.c:84
Int4 int32_t
unsigned char uint8_t
Uint2 uint16_t
Uint4 uint32_t
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
Definition: ncbimisc.hpp:878
void Read(CObjectIStream &in, TObjectPtr object, const CTypeRef &type)
Definition: serial.cpp:60
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
#define NCBI_DEPRECATED
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define NCBI_BAMREAD_EXPORT
Definition: ncbi_export.h:1235
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
int i
int len
string GetHeader()
Definition: file_names.hpp:62
range(_Ty, _Ty) -> range< _Ty >
std::istream & in(std::istream &in_, double &x_)
static int buffer_size
Definition: pcretest.c:1050
#define assert(x)
Definition: srv_diag.hpp:58
const char * get_phred_quality_end() const
Definition: bamindex.hpp:1275
const char * get_cigar_ptr() const
Definition: bamindex.hpp:1240
const char * m_ReadPtr
Definition: bamindex.hpp:1309
const char * m_CIGARPtr
Definition: bamindex.hpp:1308
void get_cigar(vector< uint32_t > &raw_cigar) const
Definition: bamindex.hpp:1252
uint16_t get_bin() const
Definition: bamindex.hpp:1178
int32_t get_ref_pos() const
Definition: bamindex.hpp:1165
uint8_t get_map_quality() const
Definition: bamindex.hpp:1174
int32_t get_next_ref_pos() const
Definition: bamindex.hpp:1224
uint8_t get_read_name_len() const
Definition: bamindex.hpp:1170
CBGZFPos get_file_pos() const
Definition: bamindex.hpp:1144
const char * get_read_end() const
Definition: bamindex.hpp:1267
const char * get_read_ptr() const
Definition: bamindex.hpp:1263
uint32_t get_cigar_op_data(uint16_t index) const
Definition: bamindex.hpp:1248
const char * get_cigar_end() const
Definition: bamindex.hpp:1244
const char * get_aux_data_end() const
Definition: bamindex.hpp:1283
const char * get_read_name_ptr() const
Definition: bamindex.hpp:1232
const char * get_phred_quality_ptr() const
Definition: bamindex.hpp:1271
const char * get_read_name_end() const
Definition: bamindex.hpp:1236
const char * m_RecordPtr
Definition: bamindex.hpp:1307
const char * get_record_ptr() const
Definition: bamindex.hpp:1152
uint16_t get_flag() const
Definition: bamindex.hpp:1212
size_t get_record_size() const
Definition: bamindex.hpp:1148
const char * get_aux_data_ptr() const
Definition: bamindex.hpp:1279
CBGZFPos m_FilePos
Definition: bamindex.hpp:1306
uint16_t get_cigar_ops_count() const
Definition: bamindex.hpp:1194
const char * get_record_end() const
Definition: bamindex.hpp:1156
int32_t get_next_ref_index() const
Definition: bamindex.hpp:1220
Uint4 m_RecordSize
Definition: bamindex.hpp:1310
int32_t get_tlen() const
Definition: bamindex.hpp:1228
CTempString get_read_raw() const
Definition: bamindex.hpp:1288
int32_t get_ref_index() const
Definition: bamindex.hpp:1161
uint32_t get_read_len() const
Definition: bamindex.hpp:1216
char GetChar() const
Definition: bamindex.cpp:2426
char m_DataType
Definition: bamindex.hpp:1098
uint32_t m_ElementCount
Definition: bamindex.hpp:1100
bool IsChar() const
Definition: bamindex.hpp:1084
float GetFloat(size_t index=0) const
Definition: bamindex.cpp:2480
bool IsInt() const
Definition: bamindex.hpp:1087
char m_Tag[2]
Definition: bamindex.hpp:1097
char GetDataType() const
Definition: bamindex.hpp:1079
bool IsArray() const
Definition: bamindex.hpp:1081
Int8 GetInt(size_t index=0) const
Definition: bamindex.cpp:2448
size_t size() const
Definition: bamindex.hpp:1082
bool IsTag(char c1, char c2) const
Definition: bamindex.hpp:1077
bool IsFloat() const
Definition: bamindex.hpp:1086
CTempString GetTag() const
Definition: bamindex.hpp:1076
DECLARE_OPERATOR_BOOL(m_DataPtr)
bool m_IsArray
Definition: bamindex.hpp:1099
CTempString GetString() const
Definition: bamindex.cpp:2437
const char * m_DataPtr
Definition: bamindex.hpp:1101
bool IsString() const
Definition: bamindex.hpp:1085
TSeqPos m_Length
Definition: bamindex.hpp:52
CBGZFPos GetEndFilePos() const
Definition: bamindex.hpp:365
CBGZFPos m_Overlap
Definition: bamindex.hpp:357
vector< CBGZFRange > m_Chunks
Definition: bamindex.hpp:359
CBGZFPos GetStartFilePos() const
Definition: bamindex.hpp:361
COpenRange< TSeqPos > GetSeqRange(SBamIndexParams params) const
Definition: bamindex.hpp:350
uint32_t TBin
Definition: bamindex.hpp:120
static const TShift kLevelStepBinShift
Definition: bamindex.hpp:137
uint8_t TIndexLevel
Definition: bamindex.hpp:121
static const TShift kBAI_min_shift
Definition: bamindex.hpp:138
uint8_t TShift
Definition: bamindex.hpp:136
static const TIndexLevel kMinBinIndexLevel
Definition: bamindex.hpp:134
static const TIndexLevel kBAI_depth
Definition: bamindex.hpp:139
static const TBin kMaxBinNumber
Definition: bamindex.hpp:133
constexpr TShift GetMinBinShift() const
Definition: bamindex.hpp:204
constexpr TSeqPos GetBinSize(TIndexLevel level) const
Definition: bamindex.hpp:196
constexpr TBin GetPseudoBin() const
Definition: bamindex.hpp:265
TIndexLevel depth
Definition: bamindex.hpp:157
constexpr TBin GetBinNumberBase(int level) const
Definition: bamindex.hpp:239
TIndexLevel Bin2IndexLevel(TBin bin) const
Definition: bamindex.hpp:302
constexpr TBin GetBinNumberBaseReversed(int reversed_level) const
Definition: bamindex.hpp:230
constexpr TBin GetMinBinNumberBase() const
Definition: bamindex.hpp:248
bool RangeIsOnMinBinIndexLevel(CRange< TSeqPos > range) const
Definition: bamindex.hpp:324
constexpr TBin GetFirstBin(TIndexLevel level) const
Definition: bamindex.hpp:257
TBin GetBinNumber(TSeqPos pos, EIndexLevel level) const
Definition: bamindex.hpp:289
TBin GetBinNumberOffset(TSeqPos pos, EIndexLevel level) const
Definition: bamindex.hpp:281
constexpr TShift GetLevelBinShift(EIndexLevel level) const
Definition: bamindex.hpp:191
constexpr TSeqPos GetPageSize() const
Definition: bamindex.hpp:218
TShift min_shift
Definition: bamindex.hpp:156
pair< TBin, TBin > GetBinRange(COpenRange< TSeqPos > ref_range, TIndexLevel index_level) const
Definition: bamindex.cpp:851
COpenRange< TSeqPos > GetSeqRange(TBin bin) const
Definition: bamindex.hpp:332
constexpr TSeqPos GetMaxBinSize() const
Definition: bamindex.hpp:212
TBin GetBinNumber(TSeqPos pos, TIndexLevel level) const
Definition: bamindex.hpp:285
bool IsOverflowPos(TSeqPos pos) const
Definition: bamindex.hpp:273
TBin GetBinNumberOffset(TSeqPos pos, TIndexLevel level) const
Definition: bamindex.hpp:277
TBin GetUpperBinNumber(TBin bin) const
Definition: bamindex.hpp:297
constexpr TIndexLevel ToIndexLevel(EIndexLevel level) const
Definition: bamindex.hpp:166
constexpr TSeqPos GetBinSize(EIndexLevel level) const
Definition: bamindex.hpp:200
constexpr TBin GetFirstOverflowBin(TIndexLevel level=0) const
Definition: bamindex.hpp:253
bool IsOverflowBin(TBin bin, TIndexLevel level=0) const
Definition: bamindex.hpp:269
constexpr TBin GetBinNumberBase(EIndexLevel level) const
Definition: bamindex.hpp:243
constexpr TShift GetPageShift() const
Definition: bamindex.hpp:223
constexpr TShift GetMinLevelBinShift() const
Definition: bamindex.hpp:158
TIndexLevel GetRangeIndexLevel(CRange< TSeqPos > range) const
Definition: bamindex.hpp:311
constexpr TSeqPos GetMinBinSize() const
Definition: bamindex.hpp:208
constexpr TShift GetLevelBinShift(TIndexLevel level) const
Definition: bamindex.hpp:187
constexpr TIndexLevel GetMaxIndexLevel() const
Definition: bamindex.hpp:162
constexpr TBin GetLastBin(TIndexLevel level) const
Definition: bamindex.hpp:261
TSeqPos m_EstimatedLength
Definition: bamindex.hpp:442
vector< uint64_t > CollectEstimatedCoverage(EIndexLevel min_index_level, EIndexLevel max_index_level) const
Definition: bamindex.hpp:401
pair< TBinsIter, TBinsIter > GetLevelBins(EIndexLevel level) const
Definition: bamindex.hpp:420
TBins::const_iterator TBinsIter
Definition: bamindex.hpp:418
vector< SBamIndexBinInfo > TBins
Definition: bamindex.hpp:417
CBGZFRange m_UnmappedChunk
Definition: bamindex.hpp:437
vector< CBGZFPos > m_Overlaps
Definition: bamindex.hpp:440
static Uint4 MakeUint4(const char *buf)
Definition: bgzf.hpp:159
static Uint2 MakeUint2(const char *buf)
Definition: bgzf.hpp:153
#define _ASSERT
Modified on Tue Apr 30 06:41:00 2024 by modify_doxy.py rev. 669887