1 #ifndef SRA__READER__BAM__BAMINDEX__HPP
2 #define SRA__READER__BAM__BAMINDEX__HPP
65 void Read(
const string& bam_file_name);
78 typedef vector<SBamHeaderRefInfo>
TRefs;
89 size_t GetRefIndex(
const string& name)
const;
92 return m_Refs[index].m_Name;
96 return m_Refs[index].m_Length;
150 #define BAM_SUPPORT_CSI
154 #ifdef BAM_SUPPORT_CSI
171 static const bool is_CSI =
false;
233 constexpr
int kAllowedLevels = 10;
234 constexpr
unsigned kBaseBits =
306 if ( bin >= bin_start ) {
347 const char*
Read(
const char* buffer_ptr,
const char* buffer_end,
356 #ifdef BAM_SUPPORT_CSI
363 return m_Chunks.front().first;
367 return m_Chunks.back().second;
376 return b1.
m_Bin < b2;
380 return b1 < b2.
m_Bin;
386 const char*
Read(
const char* buffer_ptr,
const char* buffer_end,
399 vector<uint64_t> CollectEstimatedCoverage(
TIndexLevel min_index_level,
404 return CollectEstimatedCoverage(ToIndexLevel(min_index_level),
405 ToIndexLevel(max_index_level));
411 vector<TSeqPos> GetAlnOverStarts(
void)
const;
414 vector<TSeqPos> GetAlnOverEnds(
void)
const;
417 typedef vector<SBamIndexBinInfo>
TBins;
419 pair<TBinsIter, TBinsIter> GetLevelBins(
TIndexLevel level)
const;
422 return GetLevelBins(ToIndexLevel(level));
427 pair<TBinsIter, TBinsIter> AddLevelFileRanges(vector<CBGZFRange>& ranges,
429 pair<TBin, TBin> bin_range)
const;
430 pair<TBinsIter, TBinsIter> GetBinsIterRange(pair<TBin, TBin> bin_range)
const;
432 void SetLengthFromHeader(
TSeqPos length);
451 CBamIndex(
const string& index_file_name);
459 void Read(
const string& index_file_name);
463 typedef vector<SBamIndexRefIndex>
TRefs;
470 return m_Refs.size();
473 void SetLengthFromHeader(
const CBamHeader& header);
475 CBGZFRange GetTotalFileRange(
size_t ref_index)
const;
478 MakeEstimatedCoverageAnnot(
const CBamHeader& header,
479 const string& ref_name,
480 const string& seq_id,
481 const string& annot_name,
482 TIndexLevel min_index_level,
483 TIndexLevel max_index_level)
const;
486 const string& ref_name,
487 const string& seq_id,
488 const string& annot_name,
492 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
493 ToIndexLevel(min_index_level),
494 ToIndexLevel(max_index_level));
498 const string& ref_name,
499 const string& seq_id,
500 const string& annot_name,
503 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
504 min_index_level, GetMaxIndexLevel());
508 const string& ref_name,
509 const string& seq_id,
510 const string& annot_name,
513 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
514 ToIndexLevel(min_index_level));
517 MakeEstimatedCoverageAnnot(
const CBamHeader& header,
518 const string& ref_name,
520 const string& annot_name,
521 TIndexLevel min_index_level,
522 TIndexLevel max_index_level)
const;
525 const string& ref_name,
527 const string& annot_name,
531 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
532 ToIndexLevel(min_index_level),
533 ToIndexLevel(max_index_level));
537 const string& ref_name,
539 const string& annot_name,
542 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
543 min_index_level, GetMaxIndexLevel());
547 const string& ref_name,
549 const string& annot_name,
552 return MakeEstimatedCoverageAnnot(header, ref_name, seq_id, annot_name,
553 ToIndexLevel(min_index_level));
558 MakeEstimatedCoverageAnnot(
size_t ref_index,
559 const string& seq_id,
560 const string& annot_name,
561 TIndexLevel min_index_level,
562 TIndexLevel max_index_level)
const;
565 const string& seq_id,
566 const string& annot_name,
570 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
571 ToIndexLevel(min_index_level),
572 ToIndexLevel(max_index_level));
576 const string& seq_id,
577 const string& annot_name,
580 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
581 min_index_level, GetMaxIndexLevel());
585 const string& seq_id,
586 const string& annot_name,
589 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
590 ToIndexLevel(min_index_level));
593 MakeEstimatedCoverageAnnot(
size_t ref_index,
595 const string& annot_name,
596 TIndexLevel min_index_level,
597 TIndexLevel max_index_level)
const;
601 const string& annot_name,
605 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
606 ToIndexLevel(min_index_level),
607 ToIndexLevel(max_index_level));
612 const string& annot_name,
615 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
616 min_index_level, GetMaxIndexLevel());
621 const string& annot_name,
624 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name,
625 ToIndexLevel(min_index_level));
629 MakeEstimatedCoverageAnnot(
size_t ref_index,
630 const string& seq_id,
631 const string& annot_name,
633 TIndexLevel min_index_level,
634 TIndexLevel max_index_level)
const;
637 const string& seq_id,
638 const string& annot_name,
643 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
644 ToIndexLevel(min_index_level),
645 ToIndexLevel(max_index_level));
649 const string& seq_id,
650 const string& annot_name,
654 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
655 min_index_level, GetMaxIndexLevel());
659 const string& seq_id,
660 const string& annot_name,
664 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
665 ToIndexLevel(min_index_level));
668 MakeEstimatedCoverageAnnot(
size_t ref_index,
670 const string& annot_name,
672 TIndexLevel min_index_level,
673 TIndexLevel max_index_level)
const;
677 const string& annot_name,
682 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
683 ToIndexLevel(min_index_level),
684 ToIndexLevel(max_index_level));
689 const string& annot_name,
693 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
694 min_index_level, GetMaxIndexLevel());
699 const string& annot_name,
703 return MakeEstimatedCoverageAnnot(ref_index, seq_id, annot_name, ref_length,
704 ToIndexLevel(min_index_level));
710 CollectEstimatedCoverage(
size_t ref_index,
711 TIndexLevel min_index_level,
712 TIndexLevel max_index_level)
const;
718 return CollectEstimatedCoverage(ref_index,
719 ToIndexLevel(min_index_level),
720 ToIndexLevel(max_index_level));
728 return CollectEstimatedCoverage(ref_index, index_level, index_level);
734 return CollectEstimatedCoverage(ref_index, ToIndexLevel(index_level));
741 return CollectEstimatedCoverage(ref_index, 0, GetMaxIndexLevel());
748 return GetRef(ref_index).EstimateDataSizeByAlnStartPos();
753 return make_pair(m_TotalReadBytes, m_TotalReadSeconds);
765 template<
class Position>
772 typedef pair<position_type, position_type>
TRange;
812 if ( !(iter->second <
range.second) ) {
827 !(
range.first < iter->first) &&
828 !(
range.second < iter->second));
843 iter->second =
range.second;
878 const CBGZFPos* file_pos =
nullptr);
882 const CBGZFPos* file_pos =
nullptr);
887 const CBGZFPos* file_pos =
nullptr);
894 SetRanges(index, ref_index, ref_range, index.
ToIndexLevel(index_level), search_mode, file_pos);
898 TIndexLevel index_level,
899 ESearchMode search_mode = eSearchByOverlap,
900 const CBGZFPos* file_pos =
nullptr);
907 AddRanges(index, ref_index, ref_range, index.
ToIndexLevel(index_level), search_mode, file_pos);
911 TIndexLevel min_index_level, TIndexLevel max_index_level,
912 ESearchMode search_mode = eSearchByOverlap,
913 const CBGZFPos* file_pos =
nullptr);
920 SetRanges(index, ref_index, ref_range,
923 search_mode, file_pos);
927 TIndexLevel min_index_level, TIndexLevel max_index_level,
928 ESearchMode search_mode = eSearchByOverlap,
929 const CBGZFPos* file_pos =
nullptr);
936 AddRanges(index, ref_index, ref_range,
939 search_mode, file_pos);
957 AddFrom(header, file_pos);
969 return m_Ranges.begin();
973 return m_Ranges.end();
977 Uint8 GetFileSize()
const;
980 void AddSortedRanges(
const vector<CBGZFRange>& ranges,
981 const CBGZFPos* file_pos =
nullptr);
999 CBamRawDb(
const string& bam_path,
const string& index_path)
1001 Open(bam_path, index_path);
1006 void Open(
const string& bam_path);
1007 void Open(
const string& bam_path,
const string& index_path);
1020 return m_Index.GetFileName();
1028 return GetHeader().GetRefIndex(ref_label);
1032 return GetHeader().GetRefName(ref_index);
1036 return GetHeader().GetRefLength(ref_index);
1047 size_t ref_index = GetRefIndex(ref_label);
1048 return GetIndex().GetRef(ref_index).EstimateDataSizeByAlnStartPos(GetRefSeqLength(ref_index));
1051 double GetEstimatedSecondsPerByte()
const;
1150 return m_RecordSize;
1158 return get_record_ptr() + get_record_size();
1172 return get_record_ptr()[8];
1176 return get_record_ptr()[9];
1182 static const char kCIGARSymbols[];
1199 fAlign_WasPaired = 1 << 0,
1200 fAlign_IsMappedAsPair = 1 << 1,
1201 fAlign_SelfIsUnmapped = 1 << 2,
1202 fAlign_MateIsUnmapped = 1 << 3,
1203 fAlign_SelfIsReverse = 1 << 4,
1204 fAlign_MateIsReverse = 1 << 5,
1205 fAlign_IsFirst = 1 << 6,
1206 fAlign_IsSecond = 1 << 7,
1207 fAlign_IsNotPrimary = 1 << 8,
1208 fAlign_IsLowQuality = 1 << 9,
1209 fAlign_IsDuplicate = 1 << 10,
1210 fAlign_IsSupplementary = 1 << 11
1234 return get_record_ptr()+32;
1242 return get_read_name_end();
1254 size_t count = get_cigar_ops_count();
1255 raw_cigar.resize(count);
1257 memcpy(dst, get_cigar_ptr(), count*
sizeof(
uint32_t));
1258 for (
size_t i = 0;
i < count; ++
i ) {
1265 return get_cigar_end();
1269 return get_read_ptr() + (get_read_len()+1)/2;
1273 return get_read_end();
1277 return get_phred_quality_ptr() + get_read_len();
1281 return get_phred_quality_end();
1285 return get_record_end();
1290 return CTempString(get_read_ptr(), (get_read_len()+1)/2);
1292 static const char kBaseSymbols[];
1293 string get_read()
const;
1296 uint32_t get_cigar_ref_size()
const;
1297 uint32_t get_cigar_read_size()
const;
1299 string get_cigar()
const;
1300 bool has_ambiguous_match()
const;
1302 SBamAuxData get_aux_data(
char c1,
char c2,
bool allow_missing =
false)
const;
1318 : m_CurrentRangeEnd(0)
1323 const CBGZFPos* file_pos =
nullptr)
1324 : m_Reader(bam_db.GetFile())
1326 Select(bam_db, file_pos);
1329 : m_Reader(bam_db.GetFile())
1331 Select(bam_db, &file_pos);
1334 const string& ref_label,
1337 const CBGZFPos* file_pos =
nullptr)
1338 : m_Reader(bam_db.GetFile())
1340 Select(bam_db, ref_label, ref_range, search_mode, file_pos);
1343 const string& ref_label,
1347 const CBGZFPos* file_pos =
nullptr)
1348 : m_Reader(bam_db.GetFile())
1350 Select(bam_db, ref_label, ref_range, index_level, search_mode, file_pos);
1353 const string& ref_label,
1357 const CBGZFPos* file_pos =
nullptr)
1358 : m_Reader(bam_db.GetFile())
1360 Select(bam_db, ref_label, ref_range, index_level, search_mode, file_pos);
1363 const string& ref_label,
1368 const CBGZFPos* file_pos =
nullptr)
1369 : m_Reader(bam_db.GetFile())
1371 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode, file_pos);
1374 const string& ref_label,
1379 const CBGZFPos* file_pos =
nullptr)
1380 : m_Reader(bam_db.GetFile())
1382 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode, file_pos);
1385 const string& ref_label,
1388 ESearchMode search_mode = eSearchByOverlap,
1389 const CBGZFPos* file_pos =
nullptr);
1391 const string& ref_label,
1394 TIndexLevel min_index_level,
1395 TIndexLevel max_index_level,
1396 ESearchMode search_mode = eSearchByOverlap,
1397 const CBGZFPos* file_pos =
nullptr);
1399 const string& ref_label,
1402 EIndexLevel min_index_level,
1403 EIndexLevel max_index_level,
1404 ESearchMode search_mode,
1405 const CBGZFPos* file_pos =
nullptr);
1413 const CBGZFPos* file_pos =
nullptr)
1418 const string& ref_label,
1421 const CBGZFPos* file_pos =
nullptr)
1425 search_mode, file_pos);
1428 const string& ref_label,
1432 const CBGZFPos* file_pos =
nullptr)
1436 index_level, search_mode, file_pos);
1439 const string& ref_label,
1443 const CBGZFPos* file_pos =
nullptr)
1447 index_level, search_mode, file_pos);
1450 const string& ref_label,
1455 const CBGZFPos* file_pos =
nullptr)
1459 min_index_level, max_index_level, search_mode, file_pos);
1462 const string& ref_label,
1467 const CBGZFPos* file_pos =
nullptr)
1471 min_index_level, max_index_level, search_mode, file_pos);
1477 const CBGZFPos* file_pos =
nullptr)
1479 x_Select(index, ref_index, ref_range,
1480 search_mode, file_pos);
1487 const CBGZFPos* file_pos =
nullptr)
1489 x_Select(index, ref_index, ref_range,
1490 index_level, search_mode, file_pos);
1497 const CBGZFPos* file_pos =
nullptr)
1499 x_Select(index, ref_index, ref_range,
1500 index_level, search_mode, file_pos);
1512 return m_AlignInfo.get_file_pos();
1517 return m_AlignInfo.get_ref_index();
1521 return m_AlignRefRange.GetFrom();
1527 return m_AlignInfo.get_next_ref_index();
1531 return m_AlignInfo.get_next_ref_pos();
1536 return CTempString(m_AlignInfo.get_read_name_ptr(),
1537 m_AlignInfo.get_read_name_len()-1);
1541 return m_AlignInfo.get_short_seq_accession_id();
1545 return m_AlignInfo.get_read_len();
1549 return m_AlignInfo.get_read();
1553 return m_AlignInfo.get_read_raw();
1557 return m_AlignInfo.get_read(
str);
1562 return m_AlignInfo.get_cigar_ops_count();
1566 return m_AlignInfo.get_cigar_op_data(index);
1570 return m_AlignInfo.get_cigar(raw_cigar);
1574 m_AlignInfo.get_cigar(dst);
1578 return m_AlignReadRange.GetFrom();
1582 return m_AlignReadRange.GetLength();
1586 return m_AlignRefRange.GetLength();
1590 return make_pair(m_AlignRefRange, m_AlignReadRange);
1594 return m_AlignInfo.has_ambiguous_match();
1599 return m_AlignInfo.get_cigar();
1604 return m_AlignInfo.get_bin();
1608 return GetBAIIndexBin();
1612 return GetRangeIndexLevel(m_AlignRefRange);
1616 return RangeIsOnMinBinIndexLevel(m_AlignRefRange);
1621 return m_AlignInfo.get_flag();
1636 return (GetFlags() & m_AlignInfo.fAlign_SelfIsReverse)?
1642 return (GetFlags() & m_AlignInfo.fAlign_SelfIsUnmapped) == 0;
1647 return IsMapped()? m_AlignInfo.get_map_quality(): 0;
1652 return (GetFlags() & m_AlignInfo.fAlign_IsMappedAsPair) != 0;
1656 return (GetFlags() & m_AlignInfo.fAlign_IsFirst) != 0;
1660 return (GetFlags() & m_AlignInfo.fAlign_IsSecond) != 0;
1664 return (GetFlags() & m_AlignInfo.fAlign_IsNotPrimary) != 0;
1667 void GetSegments(vector<int>& starts, vector<TSeqPos>& lens)
const;
1671 return CBamAuxIterator(m_AlignInfo.get_aux_data_ptr(), m_AlignInfo.get_aux_data_end());
1675 return m_AlignInfo.get_aux_data(c1, c2, allow_missing);
1679 return GetAuxData(c1, c2).GetInt(index);
1684 const CBGZFPos* file_pos =
nullptr);
1687 TIndexLevel min_index_level, TIndexLevel max_index_level,
1688 ESearchMode search_mode,
1689 const CBGZFPos* file_pos =
nullptr);
1694 const CBGZFPos* file_pos =
nullptr)
1696 x_Select(index, ref_index, ref_range,
1699 search_mode, file_pos);
1704 const CBGZFPos* file_pos =
nullptr)
1706 x_Select(index, ref_index, ref_range, 0, index.
GetMaxIndexLevel(), search_mode, file_pos);
1712 const CBGZFPos* file_pos =
nullptr)
1714 x_Select(index, ref_index, ref_range, index_level, index_level, search_mode, file_pos);
1720 const CBGZFPos* file_pos =
nullptr)
1722 x_Select(index, ref_index, ref_range, index_level, index_level, search_mode, file_pos);
1724 bool x_UpdateRange();
1728 return m_Reader.HaveNextAvailableBytes() || x_UpdateRange();
1732 m_NextRange = m_Ranges.end();
1735 bool x_NeedToSkip();
static bool operator<(const SBamIndexBinInfo &b1, const SBamIndexBinInfo &b2)
pair< CBGZFPos, CBGZFPos > CBGZFRange
CBamAuxIterator & operator++()
CBamAuxIterator(const char *aux_ptr, const char *aux_end)
const SBamAuxData & operator*() const
const SBamAuxData * operator->() const
DECLARE_OPERATOR_BOOL(m_AuxData)
void SetFrom(CBGZFPos file_pos)
CRangeUnion< CBGZFPos > TRanges
const_iterator end() const
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
const TRanges & GetRanges() const
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
const_iterator begin() const
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
void SetFrom(const CBamHeader &header, const CBGZFPos *file_pos)
void SetWhole(const CBamHeader &header)
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
TRanges::const_iterator const_iterator
const TRefs & GetRefs() const
double m_TotalReadSeconds
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index) const
size_t GetRefCount() const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, EIndexLevel index_level) const
const string & GetFileName() const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, EIndexLevel min_index_level) const
pair< Uint8, double > GetReadStatistics() const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, TIndexLevel min_index_level=0) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
vector< uint64_t > EstimateDataSizeByAlnStartPos(size_t ref_index) const
vector< SBamIndexRefIndex > TRefs
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const CSeq_id &seq_id, const string &annot_name, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, TIndexLevel min_index_level=0) const
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, TIndexLevel index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const string &seq_id, const string &annot_name, TSeqPos ref_length, EIndexLevel min_index_level, EIndexLevel max_index_level) const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(size_t ref_index, const CSeq_id &seq_id, const string &annot_name, TIndexLevel min_index_level=0) const
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, EIndexLevel min_index_level, EIndexLevel max_index_level) const
Uint2 GetCIGAROpsCount() const
TSeqPos GetRefSeqPos() const
Int8 GetAuxInt(char c1, char c2, size_t index=0) const
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
Uint1 GetMapQuality() const
Uint2 GetIndexBin() const
CTempString GetShortSeqAcc() const
TSeqPos GetCIGARPos() const
TSeqPos GetNextRefSeqPos() const
Uint2 GetBAIIndexBin() const
CBamRawAlignIterator(CBamRawDb &bam_db, const CBGZFPos *file_pos=nullptr)
SBamAlignInfo m_AlignInfo
bool TryGetFlags(Uint2 &flags) const
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
int32_t GetNextRefSeqIndex() const
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
void GetCIGAR(CBamString &dst) const
CBamRawAlignIterator(CBamRawDb &bam_db, CBGZFPos file_pos)
pair< COpenRange< TSeqPos >, COpenRange< TSeqPos > > GetCIGARAlignment(void) const
CBamFileRangeSet::const_iterator m_NextRange
bool IsSecondInPair() const
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel min_index_level, TIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
TSeqPos GetShortSequenceLength(void) const
void Select(CBamRawDb &bam_db, const CBGZFPos *file_pos=nullptr)
TIndexLevel GetIndexLevel() const
SBamAuxData GetAuxData(char c1, char c2, bool allow_missing=false) const
TSeqPos GetCIGARRefSize() const
TSeqPos GetCIGARShortSize() const
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel min_index_level, EIndexLevel max_index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
CTempString GetShortSeqId() const
CTempString GetShortSequenceRaw() const
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
CBamAuxIterator GetAuxIterator() const
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
CBGZFPos GetFilePos() const
string GetShortSequence() const
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, TIndexLevel min_index_level, TIndexLevel max_index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
CBamFileRangeSet m_Ranges
Uint4 GetCIGAROp(Uint2 index) const
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
bool HasAmbiguousMatch() const
TIndexLevel m_MinIndexLevel
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
COpenRange< TSeqPos > m_QueryRefRange
bool IsFirstInPair() const
void Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, TIndexLevel index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
ENa_strand GetStrand() const
void GetShortSequence(CBamString &str) const
DECLARE_OPERATOR_BOOL(m_CurrentRangeEnd)
COpenRange< TSeqPos > m_AlignRefRange
CBamRawAlignIterator & operator++()
CBGZFPos m_CurrentRangeEnd
void Select(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, EIndexLevel index_level, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
COpenRange< TSeqPos > m_AlignReadRange
void x_Select(const CBamIndex &index, size_t ref_index, CRange< TSeqPos > ref_range, ESearchMode search_mode, const CBGZFPos *file_pos=nullptr)
int32_t GetRefSeqIndex() const
bool IsOnMinBinIndexLevel() const
CBamRawAlignIterator(CBamRawDb &bam_db, const string &ref_label, CRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
void GetCIGAR(vector< Uint4 > &raw_cigar) const
size_t GetRefIndex(const string &ref_label) const
CBamRawDb(const string &bam_path)
const string & GetIndexName() const
const string & GetRefName(size_t ref_index) const
size_t GetRefCount() const
const CBamHeader & GetHeader() const
const CBamIndex & GetIndex() const
TSeqPos GetRefSeqLength(size_t ref_index) const
vector< Uint8 > EstimateDataSizeByAlnStartPos(const string &ref_label) const
CBamRawDb(const string &bam_path, const string &index_path)
TRanges::const_iterator const_iterator
const_iterator end() const
map< position_type, position_type > TRanges
CRangeUnion< position_type > TThisType
TThisType & operator+=(const TRange &range)
void add_range(TRange range)
const_iterator begin() const
pair< position_type, position_type > TRange
TRanges::iterator iterator
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator begin() const
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator upper_bound(const key_type &key) const
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
unsigned int TSeqPos
Type for sequence locations and lengths.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
void Read(CObjectIStream &in, TObjectPtr object, const CTypeRef &type)
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
#define NCBI_BAMREAD_EXPORT
ENa_strand
strand of nucleic acid
range(_Ty, _Ty) -> range< _Ty >
std::istream & in(std::istream &in_, double &x_)
const char * get_phred_quality_end() const
const char * get_cigar_ptr() const
void get_cigar(vector< uint32_t > &raw_cigar) const
int32_t get_ref_pos() const
uint8_t get_map_quality() const
int32_t get_next_ref_pos() const
uint8_t get_read_name_len() const
CBGZFPos get_file_pos() const
const char * get_read_end() const
const char * get_read_ptr() const
uint32_t get_cigar_op_data(uint16_t index) const
const char * get_cigar_end() const
const char * get_aux_data_end() const
const char * get_read_name_ptr() const
const char * get_phred_quality_ptr() const
const char * get_read_name_end() const
const char * get_record_ptr() const
uint16_t get_flag() const
size_t get_record_size() const
const char * get_aux_data_ptr() const
uint16_t get_cigar_ops_count() const
const char * get_record_end() const
int32_t get_next_ref_index() const
CTempString get_read_raw() const
int32_t get_ref_index() const
uint32_t get_read_len() const
float GetFloat(size_t index=0) const
Int8 GetInt(size_t index=0) const
bool IsTag(char c1, char c2) const
CTempString GetTag() const
DECLARE_OPERATOR_BOOL(m_DataPtr)
CTempString GetString() const
CBGZFPos GetEndFilePos() const
vector< CBGZFRange > m_Chunks
CBGZFPos GetStartFilePos() const
COpenRange< TSeqPos > GetSeqRange(SBamIndexParams params) const
static const TShift kLevelStepBinShift
static const TShift kBAI_min_shift
static const TIndexLevel kMinBinIndexLevel
static const TIndexLevel kBAI_depth
static const TBin kMaxBinNumber
constexpr TShift GetMinBinShift() const
constexpr TSeqPos GetBinSize(TIndexLevel level) const
constexpr TBin GetPseudoBin() const
constexpr TBin GetBinNumberBase(int level) const
TIndexLevel Bin2IndexLevel(TBin bin) const
constexpr TBin GetBinNumberBaseReversed(int reversed_level) const
constexpr TBin GetMinBinNumberBase() const
bool RangeIsOnMinBinIndexLevel(CRange< TSeqPos > range) const
constexpr TBin GetFirstBin(TIndexLevel level) const
TBin GetBinNumber(TSeqPos pos, EIndexLevel level) const
TBin GetBinNumberOffset(TSeqPos pos, EIndexLevel level) const
constexpr TShift GetLevelBinShift(EIndexLevel level) const
constexpr TSeqPos GetPageSize() const
pair< TBin, TBin > GetBinRange(COpenRange< TSeqPos > ref_range, TIndexLevel index_level) const
COpenRange< TSeqPos > GetSeqRange(TBin bin) const
constexpr TSeqPos GetMaxBinSize() const
TBin GetBinNumber(TSeqPos pos, TIndexLevel level) const
bool IsOverflowPos(TSeqPos pos) const
TBin GetBinNumberOffset(TSeqPos pos, TIndexLevel level) const
TBin GetUpperBinNumber(TBin bin) const
constexpr TIndexLevel ToIndexLevel(EIndexLevel level) const
constexpr TSeqPos GetBinSize(EIndexLevel level) const
constexpr TBin GetFirstOverflowBin(TIndexLevel level=0) const
bool IsOverflowBin(TBin bin, TIndexLevel level=0) const
constexpr TBin GetBinNumberBase(EIndexLevel level) const
constexpr TShift GetPageShift() const
constexpr TShift GetMinLevelBinShift() const
TIndexLevel GetRangeIndexLevel(CRange< TSeqPos > range) const
constexpr TSeqPos GetMinBinSize() const
constexpr TShift GetLevelBinShift(TIndexLevel level) const
constexpr TIndexLevel GetMaxIndexLevel() const
constexpr TBin GetLastBin(TIndexLevel level) const
TSeqPos m_EstimatedLength
vector< uint64_t > CollectEstimatedCoverage(EIndexLevel min_index_level, EIndexLevel max_index_level) const
pair< TBinsIter, TBinsIter > GetLevelBins(EIndexLevel level) const
TBins::const_iterator TBinsIter
vector< SBamIndexBinInfo > TBins
CBGZFRange m_UnmappedChunk
vector< CBGZFPos > m_Overlaps
static Uint4 MakeUint4(const char *buf)
static Uint2 MakeUint2(const char *buf)