1 #ifndef SRA__READER__BAM__BAMINDEX__HPP
2 #define SRA__READER__BAM__BAMINDEX__HPP
65 void Read(
const string& bam_file_name);
78 typedef vector<SBamHeaderRefInfo>
TRefs;
89 size_t GetRefIndex(
const string& name)
const;
92 return m_Refs[index].m_Name;
96 return m_Refs[index].m_Length;
150 #define BAM_SUPPORT_CSI
154 #ifdef BAM_SUPPORT_CSI
171 static const bool is_CSI =
false;
233 constexpr
int kAllowedLevels = 10;
234 constexpr
unsigned kBaseBits =
306 if ( bin >= bin_start ) {
340 const char*
Read(
const char* buffer_ptr,
const char* buffer_end,
349 #ifdef BAM_SUPPORT_CSI
356 return m_Chunks.front().first;
360 return m_Chunks.back().second;
369 return b1.
m_Bin < b2;
373 return b1 < b2.
m_Bin;
379 const char*
Read(
const char* buffer_ptr,
const char* buffer_end,
392 vector<uint64_t> CollectEstimatedCoverage(
TIndexLevel min_index_level,
397 return CollectEstimatedCoverage(ToIndexLevel(min_index_level),
398 ToIndexLevel(max_index_level));
404 vector<TSeqPos> GetAlnOverStarts(
void)
const;
407 vector<TSeqPos> GetAlnOverEnds(
void)
const;
410 typedef vector<SBamIndexBinInfo>
TBins;
412 pair<TBinsIter, TBinsIter> GetLevelBins(
TIndexLevel level)
const;
415 return GetLevelBins(ToIndexLevel(level));
420 pair<TBinsIter, TBinsIter> AddLevelFileRanges(vector<CBGZFRange>& ranges,
422 pair<TBin, TBin> bin_range)
const;
423 pair<TBinsIter, TBinsIter> GetBinsIterRange(pair<TBin, TBin> bin_range)
const;
425 void SetLengthFromHeader(
TSeqPos length);
444 CBamIndex(
const string& index_file_name);
452 void Read(
const string& index_file_name);
456 typedef vector<SBamIndexRefIndex>
TRefs;
463 return m_Refs.size();
466 void SetLengthFromHeader(
const CBamHeader& header);
468 CBGZFRange GetTotalFileRange(
size_t ref_index)
const;
471 MakeEstimatedCoverageAnnot(
const CBamHeader& header,
472 const string& ref_name,
473 const string& seq_id,
474 const string& annot_name,
475 TIndexLevel min_index_level,
476 TIndexLevel max_index_level)
const;
479 const string& ref_name,
480 const string& seq_id,
481 const string& annot_name,
485 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
486 ToIndexLevel(min_index_level),
487 ToIndexLevel(max_index_level));
491 const string& ref_name,
492 const string& seq_id,
493 const string& annot_name,
496 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
497 min_index_level, GetMaxIndexLevel());
501 const string& ref_name,
502 const string& seq_id,
503 const string& annot_name,
506 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
507 ToIndexLevel(min_index_level));
510 MakeEstimatedCoverageAnnot(
const CBamHeader& header,
511 const string& ref_name,
513 const string& annot_name,
514 TIndexLevel min_index_level,
515 TIndexLevel max_index_level)
const;
518 const string& ref_name,
520 const string& annot_name,
524 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
525 ToIndexLevel(min_index_level),
526 ToIndexLevel(max_index_level));
530 const string& ref_name,
532 const string& annot_name,
535 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
536 min_index_level, GetMaxIndexLevel());
540 const string& ref_name,
542 const string& annot_name,
545 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
546 ToIndexLevel(min_index_level));
551 MakeEstimatedCoverageAnnot(
size_t ref_index,
552 const string& seq_id,
553 const string& annot_name,
554 TIndexLevel min_index_level,
555 TIndexLevel max_index_level)
const;
558 const string& seq_id,
559 const string& annot_name,
563 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
564 ToIndexLevel(min_index_level),
565 ToIndexLevel(max_index_level));
569 const string& seq_id,
570 const string& annot_name,
573 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
574 min_index_level, GetMaxIndexLevel());
578 const string& seq_id,
579 const string& annot_name,
582 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
583 ToIndexLevel(min_index_level));
586 MakeEstimatedCoverageAnnot(
size_t ref_index,
588 const string& annot_name,
589 TIndexLevel min_index_level,
590 TIndexLevel max_index_level)
const;
594 const string& annot_name,
598 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
599 ToIndexLevel(min_index_level),
600 ToIndexLevel(max_index_level));
605 const string& annot_name,
608 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
609 min_index_level, GetMaxIndexLevel());
614 const string& annot_name,
617 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
618 ToIndexLevel(min_index_level));
622 MakeEstimatedCoverageAnnot(
size_t ref_index,
623 const string& seq_id,
624 const string& annot_name,
626 TIndexLevel min_index_level,
627 TIndexLevel max_index_level)
const;
630 const string& seq_id,
631 const string& annot_name,
636 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
637 ToIndexLevel(min_index_level),
638 ToIndexLevel(max_index_level));
642 const string& seq_id,
643 const string& annot_name,
647 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
648 min_index_level, GetMaxIndexLevel());
652 const string& seq_id,
653 const string& annot_name,
657 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
658 ToIndexLevel(min_index_level));
661 MakeEstimatedCoverageAnnot(
size_t ref_index,
663 const string& annot_name,
665 TIndexLevel min_index_level,
666 TIndexLevel max_index_level)
const;
670 const string& annot_name,
675 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
676 ToIndexLevel(min_index_level),
677 ToIndexLevel(max_index_level));
682 const string& annot_name,
686 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
687 min_index_level, GetMaxIndexLevel());
692 const string& annot_name,
696 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
697 ToIndexLevel(min_index_level));
703 CollectEstimatedCoverage(
size_t ref_index,
704 TIndexLevel min_index_level,
705 TIndexLevel max_index_level)
const;
711 return CollectEstimatedCoverage(ref_index,
712 ToIndexLevel(min_index_level),
713 ToIndexLevel(max_index_level));
721 return CollectEstimatedCoverage(ref_index, index_level, index_level);
727 return CollectEstimatedCoverage(ref_index, ToIndexLevel(index_level));
734 return CollectEstimatedCoverage(ref_index, 0, GetMaxIndexLevel());
741 return GetRef(ref_index).EstimateDataSizeByAlnStartPos();
746 return make_pair(m_TotalReadBytes, m_TotalReadSeconds);
758 template<
class Position>
765 typedef pair<position_type, position_type>
TRange;
805 if ( !(iter->second <
range.second) ) {
820 !(
range.first < iter->first) &&
821 !(
range.second < iter->second));
836 iter->second =
range.second;
873 ESearchMode search_mode = eSearchByOverlap);
876 ESearchMode search_mode = eSearchByOverlap);
879 TIndexLevel index_level,
880 ESearchMode search_mode = eSearchByOverlap);
886 SetRanges(index, ref_index, ref_range, index.
ToIndexLevel(index_level), search_mode);
890 TIndexLevel index_level,
891 ESearchMode search_mode = eSearchByOverlap);
897 AddRanges(index, ref_index, ref_range, index.
ToIndexLevel(index_level), search_mode);
901 TIndexLevel min_index_level, TIndexLevel max_index_level,
902 ESearchMode search_mode = eSearchByOverlap);
908 SetRanges(index, ref_index, ref_range,
915 TIndexLevel min_index_level, TIndexLevel max_index_level,
916 ESearchMode search_mode = eSearchByOverlap);
922 AddRanges(index, ref_index, ref_range,
939 return m_Ranges.begin();
943 return m_Ranges.end();
947 Uint8 GetFileSize()
const;
950 void AddSortedRanges(
const vector<CBGZFRange>& ranges);
968 CBamRawDb(
const string& bam_path,
const string& index_path)
970 Open(bam_path, index_path);
975 void Open(
const string& bam_path);
976 void Open(
const string& bam_path,
const string& index_path);
989 return m_Index.GetFileName();
997 return GetHeader().GetRefIndex(ref_label);
1001 return GetHeader().GetRefName(ref_index);
1005 return GetHeader().GetRefLength(ref_index);
1016 size_t ref_index = GetRefIndex(ref_label);
1017 return GetIndex().GetRef(ref_index).EstimateDataSizeByAlnStartPos(GetRefSeqLength(ref_index));
1020 double GetEstimatedSecondsPerByte()
const;
1119 return m_RecordSize;
1127 return get_record_ptr() + get_record_size();
1141 return get_record_ptr()[8];
1145 return get_record_ptr()[9];
1151 static const char kCIGARSymbols[];
1168 fAlign_WasPaired = 1 << 0,
1169 fAlign_IsMappedAsPair = 1 << 1,
1170 fAlign_SelfIsUnmapped = 1 << 2,
1171 fAlign_MateIsUnmapped = 1 << 3,
1172 fAlign_SelfIsReverse = 1 << 4,
1173 fAlign_MateIsReverse = 1 << 5,
1174 fAlign_IsFirst = 1 << 6,
1175 fAlign_IsSecond = 1 << 7,
1176 fAlign_IsNotPrimary = 1 << 8,
1177 fAlign_IsLowQuality = 1 << 9,
1178 fAlign_IsDuplicate = 1 << 10,
1179 fAlign_IsSupplementary = 1 << 11
1203 return get_record_ptr()+32;
1211 return get_read_name_end();
1223 size_t count = get_cigar_ops_count();
1224 raw_cigar.resize(count);
1226 memcpy(dst, get_cigar_ptr(), count*
sizeof(
uint32_t));
1227 for (
size_t i = 0;
i < count; ++
i ) {
1234 return get_cigar_end();
1238 return get_read_ptr() + (get_read_len()+1)/2;
1242 return get_read_end();
1246 return get_phred_quality_ptr() + get_read_len();
1250 return get_phred_quality_end();
1254 return get_record_end();
1259 return CTempString(get_read_ptr(), (get_read_len()+1)/2);
1261 static const char kBaseSymbols[];
1262 string get_read()
const;
1265 uint32_t get_cigar_ref_size()
const;
1266 uint32_t get_cigar_read_size()
const;
1268 string get_cigar()
const;
1269 bool has_ambiguous_match()
const;
1271 SBamAuxData get_aux_data(
char c1,
char c2,
bool allow_missing =
false)
const;
1287 : m_CurrentRangeEnd(0)
1292 : m_Reader(bam_db.GetFile())
1297 const string& ref_label,
1300 : m_Reader(bam_db.GetFile())
1302 Select(bam_db, ref_label, ref_range, search_mode);
1305 const string& ref_label,
1309 : m_Reader(bam_db.GetFile())
1311 Select(bam_db, ref_label, ref_range, index_level, search_mode);
1314 const string& ref_label,
1318 : m_Reader(bam_db.GetFile())
1320 Select(bam_db, ref_label, ref_range, index_level, search_mode);
1323 const string& ref_label,
1328 : m_Reader(bam_db.GetFile())
1330 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode);
1333 const string& ref_label,
1338 : m_Reader(bam_db.GetFile())
1340 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode);
1343 const string& ref_label,
1346 ESearchMode search_mode = eSearchByOverlap);
1348 const string& ref_label,
1351 TIndexLevel min_index_level,
1352 TIndexLevel max_index_level,
1353 ESearchMode search_mode = eSearchByOverlap);
1355 const string& ref_label,
1358 EIndexLevel min_index_level,
1359 EIndexLevel max_index_level,
1360 ESearchMode search_mode);
1372 const string& ref_label,
1377 bam_db.
GetRefIndex(ref_label), ref_range, search_mode);
1380 const string& ref_label,
1386 bam_db.
GetRefIndex(ref_label), ref_range, index_level, search_mode);
1389 const string& ref_label,
1395 bam_db.
GetRefIndex(ref_label), ref_range, index_level, search_mode);
1398 const string& ref_label,
1405 bam_db.
GetRefIndex(ref_label), ref_range, min_index_level, max_index_level, search_mode);
1408 const string& ref_label,
1415 bam_db.
GetRefIndex(ref_label), ref_range, min_index_level, max_index_level, search_mode);
1422 x_Select(index, ref_index, ref_range, search_mode);
1430 x_Select(index, ref_index, ref_range, index_level, search_mode);
1438 x_Select(index, ref_index, ref_range, index_level, search_mode);
1450 return m_AlignInfo.get_file_pos();
1455 return m_AlignInfo.get_ref_index();
1459 return m_AlignRefRange.GetFrom();
1465 return m_AlignInfo.get_next_ref_index();
1469 return m_AlignInfo.get_next_ref_pos();
1474 return CTempString(m_AlignInfo.get_read_name_ptr(),
1475 m_AlignInfo.get_read_name_len()-1);
1479 return m_AlignInfo.get_short_seq_accession_id();
1483 return m_AlignInfo.get_read_len();
1487 return m_AlignInfo.get_read();
1491 return m_AlignInfo.get_read_raw();
1495 return m_AlignInfo.get_read(
str);
1500 return m_AlignInfo.get_cigar_ops_count();
1504 return m_AlignInfo.get_cigar_op_data(index);
1508 return m_AlignInfo.get_cigar(raw_cigar);
1512 m_AlignInfo.get_cigar(dst);
1516 return m_AlignReadRange.GetFrom();
1520 return m_AlignReadRange.GetLength();
1524 return m_AlignRefRange.GetLength();
1528 return make_pair(m_AlignRefRange, m_AlignReadRange);
1532 return m_AlignInfo.has_ambiguous_match();
1537 return m_AlignInfo.get_cigar();
1542 return m_AlignInfo.get_bin();
1546 return Bin2IndexLevel(GetIndexBin());
1551 return m_AlignInfo.get_flag();
1566 return (GetFlags() & m_AlignInfo.fAlign_SelfIsReverse)?
1572 return (GetFlags() & m_AlignInfo.fAlign_SelfIsUnmapped) == 0;
1577 return IsMapped()? m_AlignInfo.get_map_quality(): 0;
1582 return (GetFlags() & m_AlignInfo.fAlign_IsMappedAsPair) != 0;
1586 return (GetFlags() & m_AlignInfo.fAlign_IsFirst) != 0;
1590 return (GetFlags() & m_AlignInfo.fAlign_IsSecond) != 0;
1594 return (GetFlags() & m_AlignInfo.fAlign_IsNotPrimary) != 0;
1597 void GetSegments(vector<int>& starts, vector<TSeqPos>& lens)
const;
1601 return CBamAuxIterator(m_AlignInfo.get_aux_data_ptr(), m_AlignInfo.get_aux_data_end());
1605 return m_AlignInfo.get_aux_data(c1, c2, allow_missing);
1609 return GetAuxData(c1, c2).GetInt(index);
1616 TIndexLevel min_index_level, TIndexLevel max_index_level,
1617 ESearchMode search_mode);
1623 x_Select(index, ref_index, ref_range,
1632 x_Select(index, ref_index, ref_range, 0, index.
GetMaxIndexLevel(), search_mode);
1639 x_Select(index, ref_index, ref_range, index_level, index_level, search_mode);
1646 x_Select(index, ref_index, ref_range, index_level, index_level, search_mode);
1648 bool x_UpdateRange();
1652 return m_Reader.HaveNextAvailableBytes() || x_UpdateRange();
1656 m_NextRange = m_Ranges.end();
1659 bool x_NeedToSkip();
static bool operator<(const SBamIndexBinInfo &b1, const SBamIndexBinInfo &b2)
pair< CBGZFPos, CBGZFPos > CBGZFRange
CBamAuxIterator & operator++()
CBamAuxIterator(const char *aux_ptr, const char *aux_end)
const SBamAuxData & operator*() const
const SBamAuxData * operator->() const
DECLARE_OPERATOR_BOOL(m_AuxData)
CRangeUnion< CBGZFPos > TRanges
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
const_iterator end() const
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
const TRanges & GetRanges() const
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
const_iterator begin() const
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
TRanges::const_iterator const_iterator
const TRefs & GetRefs() const
double m_TotalReadSeconds
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index) const
size_t GetRefCount() const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, EIndexLevel index_level) const
const string & GetFileName() const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, EIndexLevel min_index_level) const
pair< Uint8, double > GetReadStatistics() const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, TIndexLevel min_index_level=0) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
vector< uint64_t > EstimateDataSizeByAlnStartPos(size_t ref_index) const
vector< SBamIndexRefIndex > TRefs
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, TIndexLevel min_index_level=0) const
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, TIndexLevel index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Uint2 GetCIGAROpsCount() const
TSeqPos GetRefSeqPos() const
Int8 GetAuxInt(char c1, char c2, size_t index=0) const
Uint1 GetMapQuality() const
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
Uint2 GetIndexBin() const
CTempString GetShortSeqAcc() const
TSeqPos GetCIGARPos() const
TSeqPos GetNextRefSeqPos() const
SBamAlignInfo m_AlignInfo
bool TryGetFlags(Uint2 &flags) const
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel min_index_level, TIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
int32_t GetNextRefSeqIndex() const
void GetCIGAR(CBamString &dst) const
pair< COpenRange< TSeqPos >, COpenRange< TSeqPos > > GetCIGARAlignment(void) const
CBamFileRangeSet::const_iterator m_NextRange
bool IsSecondInPair() const
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap)
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode)
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap)
TSeqPos GetShortSequenceLength(void) const
TIndexLevel GetIndexLevel() const
SBamAuxData GetAuxData(char c1, char c2, bool allow_missing=false) const
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode)
TSeqPos GetCIGARRefSize() const
TSeqPos GetCIGARShortSize() const
CTempString GetShortSeqId() const
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
CTempString GetShortSequenceRaw() const
CBamAuxIterator GetAuxIterator() const
void Select(CBamRawDb &bam_db)
CBGZFPos GetFilePos() const
string GetShortSequence() const
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap)
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode)
CBamFileRangeSet m_Ranges
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel min_index_level, TIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap)
Uint4 GetCIGAROp(Uint2 index) const
bool HasAmbiguousMatch() const
TIndexLevel m_MinIndexLevel
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode)
COpenRange< TSeqPos > m_QueryRefRange
bool IsFirstInPair() const
ENa_strand GetStrand() const
void GetShortSequence(CBamString &str) const
DECLARE_OPERATOR_BOOL(m_CurrentRangeEnd)
COpenRange< TSeqPos > m_AlignRefRange
CBamRawAlignIterator & operator++()
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, ESearchMode search_mode)
CBGZFPos m_CurrentRangeEnd
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode)
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode)
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
COpenRange< TSeqPos > m_AlignReadRange
int32_t GetRefSeqIndex() const
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap)
void GetCIGAR(vector< Uint4 > &raw_cigar) const
CBamRawAlignIterator(CBamRawDb &bam_db)
size_t GetRefIndex(const string &ref_label) const
CBamRawDb(const string &bam_path)
const string & GetIndexName() const
const string & GetRefName(size_t ref_index) const
size_t GetRefCount() const
const CBamHeader & GetHeader() const
const CBamIndex & GetIndex() const
TSeqPos GetRefSeqLength(size_t ref_index) const
vector< Uint8 > EstimateDataSizeByAlnStartPos(const string &ref_label) const
CBamRawDb(const string &bam_path, const string &index_path)
TRanges::const_iterator const_iterator
const_iterator end() const
map< position_type, position_type > TRanges
CRangeUnion< position_type > TThisType
TThisType & operator+=(const TRange &range)
void add_range(TRange range)
const_iterator begin() const
pair< position_type, position_type > TRange
TRanges::iterator iterator
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator upper_bound(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
unsigned int TSeqPos
Type for sequence locations and lengths.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
void Read(CObjectIStream &in, TObjectPtr object, const CTypeRef &type)
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
#define NCBI_BAMREAD_EXPORT
ENa_strand
strand of nucleic acid
range(_Ty, _Ty) -> range< _Ty >
std::istream & in(std::istream &in_, double &x_)
static const char * str(char *buf, int n)
const char * get_phred_quality_end() const
const char * get_cigar_ptr() const
void get_cigar(vector< uint32_t > &raw_cigar) const
int32_t get_ref_pos() const
uint8_t get_map_quality() const
int32_t get_next_ref_pos() const
uint8_t get_read_name_len() const
CBGZFPos get_file_pos() const
const char * get_read_end() const
const char * get_read_ptr() const
uint32_t get_cigar_op_data(uint16_t index) const
const char * get_cigar_end() const
const char * get_aux_data_end() const
const char * get_read_name_ptr() const
const char * get_phred_quality_ptr() const
const char * get_read_name_end() const
const char * get_record_ptr() const
uint16_t get_flag() const
size_t get_record_size() const
const char * get_aux_data_ptr() const
uint16_t get_cigar_ops_count() const
const char * get_record_end() const
int32_t get_next_ref_index() const
CTempString get_read_raw() const
int32_t get_ref_index() const
uint32_t get_read_len() const
float GetFloat(size_t index=0) const
Int8 GetInt(size_t index=0) const
bool IsTag(char c1, char c2) const
CTempString GetTag() const
DECLARE_OPERATOR_BOOL(m_DataPtr)
CTempString GetString() const
CBGZFPos GetEndFilePos() const
vector< CBGZFRange > m_Chunks
CBGZFPos GetStartFilePos() const
COpenRange< TSeqPos > GetSeqRange(SBamIndexParams params) const
static const TShift kLevelStepBinShift
static const TShift kBAI_min_shift
static const TIndexLevel kMinBinIndexLevel
static const TIndexLevel kBAI_depth
static const TBin kMaxBinNumber
constexpr TShift GetMinBinShift() const
constexpr TSeqPos GetBinSize(TIndexLevel level) const
constexpr TBin GetPseudoBin() const
constexpr TBin GetBinNumberBase(int level) const
TIndexLevel Bin2IndexLevel(TBin bin) const
constexpr TBin GetBinNumberBaseReversed(int reversed_level) const
constexpr TBin GetMinBinNumberBase() const
constexpr TBin GetFirstBin(TIndexLevel level) const
TBin GetBinNumber(TSeqPos pos, EIndexLevel level) const
TBin GetBinNumberOffset(TSeqPos pos, EIndexLevel level) const
constexpr TShift GetLevelBinShift(EIndexLevel level) const
constexpr TSeqPos GetPageSize() const
pair< TBin, TBin > GetBinRange(COpenRange< TSeqPos > ref_range, TIndexLevel index_level) const
COpenRange< TSeqPos > GetSeqRange(TBin bin) const
constexpr TSeqPos GetMaxBinSize() const
TBin GetBinNumber(TSeqPos pos, TIndexLevel level) const
bool IsOverflowPos(TSeqPos pos) const
TBin GetBinNumberOffset(TSeqPos pos, TIndexLevel level) const
TBin GetUpperBinNumber(TBin bin) const
constexpr TIndexLevel ToIndexLevel(EIndexLevel level) const
constexpr TSeqPos GetBinSize(EIndexLevel level) const
constexpr TBin GetFirstOverflowBin(TIndexLevel level=0) const
bool IsOverflowBin(TBin bin, TIndexLevel level=0) const
constexpr TBin GetBinNumberBase(EIndexLevel level) const
constexpr TShift GetPageShift() const
constexpr TShift GetMinLevelBinShift() const
TIndexLevel GetRangeIndexLevel(CRange< TSeqPos > range) const
constexpr TSeqPos GetMinBinSize() const
constexpr TShift GetLevelBinShift(TIndexLevel level) const
constexpr TIndexLevel GetMaxIndexLevel() const
constexpr TBin GetLastBin(TIndexLevel level) const
TSeqPos m_EstimatedLength
vector< uint64_t > CollectEstimatedCoverage(EIndexLevel min_index_level, EIndexLevel max_index_level) const
pair< TBinsIter, TBinsIter > GetLevelBins(EIndexLevel level) const
TBins::const_iterator TBinsIter
vector< SBamIndexBinInfo > TBins
CBGZFRange m_UnmappedChunk
vector< CBGZFPos > m_Overlaps
static Uint4 MakeUint4(const char *buf)
static Uint2 MakeUint2(const char *buf)