47 #ifndef NCBI_THROW2_FMT
48 # define NCBI_THROW2_FMT(exception_class, err_code, message, extra) \
49 throw NCBI_EXCEPTION2(exception_class, err_code, FORMAT(message), extra)
70 #ifdef BAM_SUPPORT_CSI
71 static const char kCsiExt[] =
".csi";
72 static const char kIndexMagicCSI[] =
"CSI\1";
86 size_t cnt =
in.gcount();
94 const char*
s_Read(
const char*& buffer_ptr,
const char* buffer_end,
size_t len)
96 const char* ret_ptr = buffer_ptr;
97 const char* ret_end = ret_ptr +
len;
98 if ( ret_end > buffer_end ) {
101 buffer_ptr = ret_end;
131 if ( memcmp(
buf, magic, 4) != 0 ) {
204 #ifdef BAM_SUPPORT_CSI
214 for (
int32_t i_chunk = 0; i_chunk < n_chunks; ++i_chunk ) {
224 #ifdef BAM_SUPPORT_CSI
226 const char* header =
s_Read(ptr, end, 16);
232 const char* header =
s_Read(ptr, end, 8);
239 const char* data =
s_Read(ptr, end, n_chunks*16);
240 for (
size_t i = 0;
i < n_chunks; ++
i ) {
254 pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>
258 pair<TBinsIter, TBinsIter> ret;
260 ret.second =
m_Bins.end();
284 return p1 < p2.second;
288 return p1.second < p2;
306 "Bad unmapped bin format");
320 "No chunks in bin "<<bin.
m_Bin);
322 for (
size_t i = 0;
i < bin.
m_Chunks.size(); ++
i ) {
326 "Empty BAM BGZF range in bin "<<bin.
m_Bin<<
331 "Overlapping BAM BGZF ranges in bin "<<bin.
m_Bin<<
351 SBamIndexParams::operator=(params);
353 size_t bin_count = 0;
357 for (
int32_t i_bin = 0; i_bin < n_bin; ++i_bin ) {
385 SBamIndexParams::operator=(params);
387 size_t bin_count = 0;
391 for (
size_t i_bin = 0; i_bin < n_bin; ++i_bin ) {
393 buffer_ptr = bin.
Read(buffer_ptr, buffer_end, *
this);
406 const char* data =
s_Read(buffer_ptr, buffer_end, n_intv*8);
407 for (
size_t i = 0;
i < n_intv; ++
i ) {
420 const pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
422 if ( iters.first == iters.second ) {
430 return iters.first->GetSeqRange(params);
437 s_GetOverlap(
const pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
439 if ( iters.first == iters.second ) {
443 return iters.first->m_Overlap;
450 s_GetFilePos(
const pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
452 auto iter = iters.first;
453 if ( iter == iters.second ) {
456 return iter->GetStartFilePos();
462 s_GetNextFilePos(
const pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
464 auto iter = iters.first;
465 if ( iter == iters.second ) {
469 if ( iter == iters.second ) {
472 return iter->GetStartFilePos();
504 vector<TSeqPos> aln_over_starts(nBins);
513 auto it = lower_bound(
m_Bins.begin(),
m_Bins.end(), bin);
514 if ( it !=
m_Bins.end() && it->m_Bin == bin ) {
515 for (
auto c : it->m_Chunks ) {
516 if ( c.first >= min_fp ) {
519 if ( c.first >= limit.second ) {
522 if ( c.second <= limit.first ) {
525 if ( c.first < limit.first ) {
526 c.first = limit.first;
528 _ASSERT(c.first >= limit.first);
529 _ASSERT(c.first < limit.second);
531 if ( c.first < min_fp ) {
540 min_aln_start = ref_range.
GetFrom();
546 auto it = lower_bound(level_bins.first, level_bins.second, min_fp,
PByStartFilePos());
547 if ( it == level_bins.first ) {
551 min_aln_start =
max(min_aln_start, it->GetSeqRange(*this).GetFrom());
552 if ( it->GetEndFilePos() > min_fp ) {
559 aln_over_starts[
i] = min_aln_start;
561 return aln_over_starts;
565 vector<TSeqPos> aln_over_starts(nBins);
567 auto bin_it_start =
GetLevelBins(0).first, next_bin_it = bin_it_start;
568 for (
size_t i = 0;
i < nBins; ++
i ) {
573 aln_over_starts[
i] = ref_pos;
577 while ( next_bin_it !=
m_Bins.end() && next_bin_it->GetStartFilePos() <= min_fp ) {
580 TSeqPos min_aln_start =
i? aln_over_starts[
i-1]: 0;
581 bool inside_min_bin =
false;
582 if ( next_bin_it != bin_it_start ) {
583 auto& bin = next_bin_it[-1];
584 _ASSERT(bin.GetStartFilePos() <= min_fp);
585 inside_min_bin = bin.GetEndFilePos() > min_fp;
588 if ( min_aln_start+
GetMinBinSize() < ref_pos && !inside_min_bin ) {
592 auto it = upper_bound(level_bins.first, level_bins.second, min_fp,
PByStartFilePos());
593 if ( it == level_bins.first ) {
597 min_aln_start =
max(min_aln_start, it->GetSeqRange(*this).GetFrom());
598 if ( it->GetEndFilePos() > min_fp ) {
605 if ( min_aln_start > ref_pos ) {
607 "Inconsistent linear index at ref pos "<<ref_pos<<
608 ": align starts after end bin start "<<min_aln_start);
610 aln_over_starts[
i] = min_aln_start;
612 return aln_over_starts;
616 vector<TSeqPos> aln_over_starts(nBins);
617 vector<pair<TBinsIter, TBinsIter>> levelBins;
618 vector<COpenRange<TSeqPos>> levelBinSeqRange;
619 vector<CBGZFPos> levelPrevOverlap;
626 levelBinSeqRange.push_back(
s_GetSeqRange(*
this, levelBins.back()));
631 return aln_over_starts;
634 for (
auto& bin :
m_Bins ) {
635 auto sp = bin.GetSeqRange(*this).GetFrom();
636 auto fp = bin.GetStartFilePos();
637 auto ins = sp2minfp.
insert(make_pair(sp,
fp));
640 auto& minfp = ins.first->second;
641 minfp =
min(minfp,
fp);
645 for (
auto p : sp2minfp ) {
646 auto ins = fp2sp.
insert(make_pair(p.second, p.first));
648 auto iter = ins.first;
650 while ( iter != fp2sp.
end() && iter->second < p.first ) {
651 iter = fp2sp.
erase(iter);
664 while ( levelBinSeqRange[level].GetToOpen() <= seqPos ) {
666 levelPrevOverlap[level] =
s_GetOverlap(levelBins[level]);
668 ++(levelBins[level].first);
669 levelBinSeqRange[level] =
s_GetSeqRange(*
this, levelBins[level]);
673 if ( seqPos >= levelBinSeqRange[level].GetFrom() ) {
677 overlap_fp = levelPrevOverlap[level];
679 prev_overlap_fp =
max(prev_overlap_fp, overlap_fp);
686 while ( levelBinSeqRange[level].GetToOpen() <= seqPos ) {
688 levelPrevOverlap[level] =
s_GetOverlap(levelBins[level]);
690 ++(levelBins[level].first);
691 levelBinSeqRange[level] =
s_GetSeqRange(*
this, levelBins[level]);
693 if ( seqPos < levelBinSeqRange[level].GetFrom() ) {
699 overlap_fp =
max(prev_overlap_fp, levelBins[level].
first->m_Overlap);
704 auto& chunks = levelBins[level].first->m_Chunks;
705 auto it = upper_bound(chunks.begin(), chunks.end(), overlap_fp,
PByEndFilePos());
706 if ( it != chunks.end() && it->first <
min(found_fp, limit_fp) ) {
708 found_fp =
max(it->first, overlap_fp);
709 if ( found_fp <= overlap_fp ) {
716 aln_over_starts[
b] = seqPos;
723 auto osp =
min(seqPos,
prev(iter)->second);
727 osp =
max(osp, aln_over_starts[
b-1]);
729 aln_over_starts[
b] = osp;
732 return aln_over_starts;
742 vector<TSeqPos> ends(count);
744 for ( ; ei < count; ++ei ) {
745 while (
si*bin_size < starts[ei] ) {
746 ends[
si++] = ei*bin_size-1;
749 while (
si < count ) {
750 ends[
si++] = ei*bin_size-1;
766 if ( file_pos1 >= file_pos2 ) {
793 if ( ref_range.
Empty() ) {
805 #ifdef BAM_SUPPORT_CSI
811 auto it = lower_bound(bins.first, bins.second, bin_num);
812 if ( it != bins.second && it->m_Bin <= bin_num_last ) {
813 if ( it->m_Overlap ) {
814 if ( !limit.first || it->m_Overlap < limit.first ) {
815 limit.first = it->m_Overlap;
818 if ( it->m_Bin == bin_num ) {
832 auto it = lower_bound(bins.first, bins.second, bin_num);
833 if ( it != bins.first ) {
834 limit.first =
max(limit.first,
prev(it)->GetEndFilePos());
843 auto it = lower_bound(bins.first, bins.second, bin_num);
844 if ( it != bins.second ) {
845 limit.second =
min(limit.second, it->GetStartFilePos());
852 pair<SBamIndexRefIndex::TBin, SBamIndexRefIndex::TBin>
856 pair<TBin, TBin> bin_range;
868 bin_range.second = bin_range.first-1;
888 pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>
891 pair<TBin, TBin> bin_range)
const
895 for ( ; it !=
m_Bins.end() && it->m_Bin <= bin_range.second; ++it ) {
896 for (
auto c : it->m_Chunks ) {
897 if ( c.first < limit_file_range.first ) {
898 c.first = limit_file_range.first;
900 if ( limit_file_range.second && limit_file_range.second < c.second ) {
901 c.second = limit_file_range.second;
903 if ( c.first < c.second ) {
908 return make_pair(
first, it);
912 pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>
917 return make_pair(
first, it);
937 size_t page_count = end_pos - beg_pos + 1;
938 Uint8 add_size = (file_size + page_count/2) / page_count;
940 for (
size_t i = beg_pos;
i <= end_pos; ++
i ) {
947 for (
size_t i = beg_pos;
i <= end_pos; ++
i ) {
981 _ASSERT(new_file_beg < new_file_end);
1008 const vector<SBamRangeBlock>&
bb,
size_t bb_beg,
size_t bb_end)
1010 for (
size_t i = bb_beg;
i <= bb_end; ++
i ) {
1012 if ( !
b.file_end ) {
1039 if ( bins.first != bins.second ) {
1040 CBGZFPos pos_beg = bins.first->GetStartFilePos();
1042 if ( pos_beg <
range.first ) {
1043 range.first = pos_beg;
1045 if ( pos_end >
range.second ) {
1046 range.second = pos_end;
1050 if (
range.first.IsInvalid() ) {
1068 vector<Uint8> vv(bin_count);
1070 vector<SBamRangeBlock>
bb(bin_count);
1071 size_t bb_end = bin_count-1;
1072 for (
size_t i = 0;
i <= bb_end; ++
i ) {
1079 for (
auto bin_it = level_bins.first; bin_it != level_bins.second; ++bin_it ) {
1080 size_t i = bin_it->m_Bin - bin_number_base;
1082 bb[
i].InitData(vv, *bin_it);
1098 for (
auto bin_it = level_bins.first; bin_it != level_bins.second; ++bin_it ) {
1099 size_t i = bin_it->m_Bin - bin_number_base;
1101 bb[
i].ExpandData(vv, *bin_it);
1113 for (
TIndexLevel level = min_index_level; level <= max_index_level; ++level ) {
1115 uint32_t vv_bin_count = 1 << vv_bin_shift;
1118 for (
auto it = level_bins.first; it != level_bins.second; ++it ) {
1120 for (
auto& c : it->m_Chunks ) {
1126 uint32_t pos = (it->m_Bin - bin_base) << vv_bin_shift;
1158 size_t fsz =
file.GetSize();
1159 data.
reset(
new char[fsz]);
1160 file.ReadExactly(0, data.
get(), fsz);
1171 : m_UnmappedCount(0),
1172 m_TotalReadBytes(0),
1173 m_TotalReadSeconds(0)
1179 : m_UnmappedCount(0),
1180 m_TotalReadBytes(0),
1181 m_TotalReadSeconds(0)
1183 Read(index_file_name);
1235 *bytes_read = count;
1254 #ifdef BAM_SUPPORT_CSI
1265 #ifdef BAM_SUPPORT_CSI
1274 size_t count =
min(l_aux,
sizeof(
buf));
1286 for (
int32_t i_ref = 0; i_ref < n_ref; ++i_ref ) {
1287 m_Refs[i_ref].Read(
in, *
this, i_ref);
1289 streampos extra_pos =
in.tellg();
1290 in.seekg(0, ios::end);
1291 streampos end_pos =
in.tellg();
1292 in.seekg(extra_pos);
1294 if ( end_pos-extra_pos >= 8 ) {
1298 if ( end_pos != extra_pos ) {
1300 "Extra "<<(end_pos-extra_pos)<<
" bytes in BAM index");
1310 unique_ptr<CNcbiIstream> data_stream =
1313 unique_ptr<CNcbiIstream>
z_stream =
1314 make_unique<CCompressionIStream>(*data_stream,
1321 const char* buffer_end = buffer_ptr +
buffer_size;
1323 #ifdef BAM_SUPPORT_CSI
1333 #ifdef BAM_SUPPORT_CSI
1337 const char* header =
s_Read(buffer_ptr, buffer_end, 12);
1341 s_Read(buffer_ptr, buffer_end, l_aux);
1348 const char* header =
s_Read(buffer_ptr, buffer_end, 4);
1352 buffer_ptr =
m_Refs[
i].Read(buffer_ptr, buffer_end, *
this,
i);
1354 if ( buffer_end - buffer_ptr >= 8 ) {
1358 if ( buffer_ptr != buffer_end ) {
1360 "Extra "<<(buffer_end-buffer_ptr)<<
" bytes in BAM index");
1369 "Bad reference sequence index");
1371 return m_Refs[ref_index];
1379 "Wrong index ref count: "<<
1391 for (
auto&
b :
GetRef(ref_index).m_Bins ) {
1392 CBGZFPos start_pos =
b.GetStartFilePos();
1393 if ( start_pos < total_range.first )
1394 total_range.first = start_pos;
1396 if ( total_range.second < end_pos )
1397 total_range.second = end_pos;
1404 string title,
string name)
1406 if ( name.empty() ) {
1407 name =
"BAM coverage";
1409 if ( title.empty() ) {
1419 const string& ref_name,
1420 const string& seq_id,
1421 const string& annot_name,
1432 const string& ref_name,
1434 const string& annot_name,
1439 if ( ref_index ==
size_t(-1) ) {
1441 "Cannot find RefSeq: "<<ref_name);
1444 header.
GetRefLength(ref_index), min_index_level, max_index_level);
1450 const string& seq_id,
1451 const string& annot_name,
1463 const string& seq_id,
1464 const string& annot_name,
1475 const string& annot_name,
1495 const string& annot_name,
1502 if ( vv.empty() ) vv.push_back(0);
1505 length = count*bin_size;
1510 annot->
SetData().SetGraph().push_back(graph);
1511 sx_SetTitle(*graph, *annot, annot_name, annot_name);
1513 graph->
SetLoc().SetInt().SetId().Assign(seq_id);
1514 graph->
SetLoc().SetInt().SetFrom(0);
1515 graph->
SetLoc().SetInt().SetTo(length-1);
1519 vector<char>& bvalues = bgraph.
SetValues();
1520 bvalues.resize(count);
1522 uint64_t max_value = *max_element(vv.begin(), vv.end());
1523 double mul =
min(1., 255./max_value);
1524 for (
size_t i = 0;
i < count; ++
i ) {
1525 if (
auto v = vv[
i] ) {
1532 bmax =
max(bmax,
b);
1557 Read(bam_file_name);
1592 for (
int32_t i_ref = 0; i_ref < n_ref; ++i_ref ) {
1593 m_Refs[i_ref].Read(stream);
1604 "Bad reference sequence index");
1606 return m_Refs[ref_index];
1616 return iter->second;
1623 bool state_changed =
true;
1624 const char *p, *p0, *pend;
1626 for (p =
m_Text.data(), pend = p +
m_Text.size(); p < pend; ++p) {
1627 if (state_changed) {
1628 state_changed =
false;
1629 for (; p < pend && iswspace(*p); ++p)
1637 else if (*p ==
':') {
1638 if (
state == eRecord) {
1641 state_changed =
true;
1645 else if ( iswspace(*p) ) {
1646 if (
state == eTag) {
1649 state_changed =
true;
1651 else if (
state == eValue) {
1652 records.back().second[record] =
string(p0, p-p0);
1654 state_changed =
true;
1658 if (
state == eValue) {
1659 records.back().second[record] =
string(p0, p-p0);
1661 return records.size();
1680 AddRanges(index, ref_index, ref_range, search_mode);
1690 AddRanges(index, ref_index, ref_range, min_level, max_level, search_mode);
1700 AddRanges(index, ref_index, ref_range, min_level, max_level, search_mode);
1712 for (
auto&
r : ranges ) {
1713 cout <<
" (" <<
r.first<<
" "<<
r.second<<
")";
1715 return cout <<
" )";
1722 for (
auto iter = ranges.begin(); iter != ranges.end(); ) {
1724 for ( ++iter; iter != ranges.end() && !(end < iter->
first); ++iter ) {
1725 if ( end < iter->second ) {
1752 vector<CBGZFRange> ranges;
1757 if ( ref_range.
Empty() ) {
1768 TSeqPos set_limit_by_overlap_at = 0;
1771 auto bin_range = index.
GetBinRange(ref_range, level);
1772 pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter> iter_range;
1773 if ( level >= min_index_level ) {
1785 auto first_bin = iter_range.first;
1786 if ( (first_bin == ref.
m_Bins.end() ||
1787 first_bin->m_Bin != bin_range.first) &&
1788 first_bin != ref.
m_Bins.begin() ) {
1791 if ( first_bin != ref.
m_Bins.end() &&
1792 first_bin->m_Bin <= bin_range.first &&
1795 TSeqPos pos = first_bin->GetSeqRange(index).GetFrom();
1796 if ( pos > set_limit_by_overlap_at ) {
1798 set_limit_by_overlap_at = pos;
1799 limit.first =
max(limit.first, first_bin->m_Overlap);
1818 auto first_bin = iter_range.first;
1819 if ( first_bin != ref.
m_Bins.begin() ) {
1820 auto prev_bin =
prev(first_bin);
1821 _ASSERT(prev_bin->m_Bin < bin_range.first);
1822 if ( prev_bin->m_Bin >= index.
GetFirstBin(level) ) {
1824 limit.first =
max(limit.first, prev_bin->GetEndFilePos());
1830 auto next_bin = iter_range.second;
1831 if ( next_bin != ref.
m_Bins.end() &&
1834 limit.second =
min(limit.second, next_bin->GetStartFilePos());
1858 AddRanges(index, ref_index, ref_range, index_level, index_level, search_mode);
1886 AddRanges(index, ref_index, ref_range, min_index_level, max_index_level, search_mode);
1905 SetRanges(index, ref_index, ref_range, index_level, index_level, search_mode);
1944 const string& base_name,
1949 dst.push_back(base_name.substr(0, base_name.size()-old_ext.
size())+new_ext);
1956 vector<string> index_name_candidates;
1957 if ( index_path.empty() || index_path == bam_path ) {
1958 #ifdef BAM_SUPPORT_CSI
1961 index_name_candidates.push_back(bam_path+kCsiExt);
1965 index_name_candidates.push_back(bam_path+
kBaiExt);
1967 #ifdef BAM_SUPPORT_CSI
1968 if ( !prefer_csi ) {
1969 index_name_candidates.push_back(bam_path+kCsiExt);
1975 index_name_candidates.push_back(index_path);
1977 for (
size_t i = 0;
i < index_name_candidates.size(); ++
i ) {
1983 if (
i < index_name_candidates.size()-1 &&
2004 const double index_read_weight = 10;
2005 const Uint8 add_read_bytes = 100000;
2006 const double add_read_bytes_per_second = 80e6;
2007 const Uint8 add_unzip_bytes = 100000;
2008 const double add_unzip_bytes_per_second = 80e6;
2014 Uint8(index_read_stat.first*index_read_weight) +
2015 data_read_stat.first +
2017 double read_seconds =
2018 index_read_stat.second*index_read_weight +
2019 data_read_stat.second +
2020 add_read_bytes/add_read_bytes_per_second;
2022 Uint8 unzip_bytes = data_unzip_stat.first + add_unzip_bytes;
2023 double unzip_seconds = data_unzip_stat.second + add_unzip_bytes/add_unzip_bytes_per_second;
2025 return read_seconds/read_bytes + unzip_seconds/unzip_bytes;
2038 char* dst = &ret[0];
2062 char* dst =
str.data();
2088 switch ( op & 0xf ) {
2111 switch ( op & 0xf ) {
2137 switch ( op & 0xf ) {
2163 switch ( op & 0xf ) {
2206 switch ( op & 0xf ) {
2226 switch ( op & 0xf ) {
2247 dst[0] =
'0'+(v/10);
2248 dst[1] =
'0'+(v%10);
2257 str.reserve(count*10+1);
2258 char* dst =
str.data();
2260 for ( ; count--; ) {
2263 switch ( op & 0xf ) {
2274 str.resize(dst-
str.data());
2334 ptr =
static_cast<const char*
>(memchr(ptr, 0, end-ptr));
2351 size_t element_size;
2370 if ( element_size == 0 ) {
2389 ERR_POST(
"BAM: Alignment aux tag parse error");
2399 "Conversion error: "
2400 "type "<<
GetDataType()<<
" cannot be converted to char");
2410 "Conversion error: "
2411 "type "<<
GetDataType()<<
" cannot be converted to string");
2421 "Conversion error: "
2422 "type "<<
GetDataType()<<
" cannot be converted to int");
2424 if ( index >=
size() ) {
2426 "Index overflow: "<<index<<
" >= "<<
size());
2453 "Conversion error: "
2454 "type "<<
GetDataType()<<
" cannot be converted to float");
2456 if ( index >=
size() ) {
2458 "Index overflow: "<<index<<
" >= "<<
size());
2468 if ( iter->IsTag(c1, c2) ) {
2472 if ( !allow_missing ) {
2474 "Tag "<<c1<<c2<<
" not found");
2483 return data.GetString();
2491 in.GetNextAvailableBytes();
2507 const string& ref_label,
2511 : m_Reader(bam_db.GetFile())
2520 Select(bam_db, ref_label, ref_range, search_mode);
2525 const string& ref_label,
2531 : m_Reader(bam_db.GetFile())
2540 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode);
2545 const string& ref_label,
2551 : m_Reader(bam_db.GetFile())
2560 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode);
2585 SBamIndexParams::operator=(index);
2588 m_Ranges.
SetRanges(index, ref_index, ref_range, min_index_level, max_index_level, search_mode);
2661 if ( index_level < m_MinIndexLevel || index_level >
m_MaxIndexLevel ) {
2690 int refstart, seqstart;
2691 switch ( op & 0xf ) {
2719 "Bad CIGAR segment: " << (op & 0xf) <<
" in " <<
GetCIGAR());
2721 if ( seglen == 0 ) {
2723 "Zero CIGAR segment: in " <<
GetCIGAR());
2725 starts.push_back(refstart);
2726 starts.push_back(seqstart);
2727 lens.push_back(seglen);
static void sx_SetTitle(CSeq_graph &graph, CSeq_annot &annot, string title, string name)
static char * s_format(char *dst, uint32_t v)
static const size_t kIndexMagicLength
NCBI_PARAM_DEF_EX(int, BAM, OVERLAP_MODE, 2, eParam_NoThread, BAM_OVERLAP_MODE)
static const char kBamExt[]
static const float kEstimatedCompression
static void s_ReadMagic(CBGZFStream &in, const char *magic)
static CBGZFPos s_ReadFilePos(CNcbiIstream &in)
static int s_GetRangesMode()
Uint8 s_EstimatedPos(CBGZFPos pos)
static size_t ReadVDBFile(AutoArray< char > &data, const string &path)
static int32_t s_ReadInt32(CNcbiIstream &in)
static int s_GetOverlapMode()
static CBGZFPos s_GetOverlap(const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static void s_AddReplacedExt(vector< string > &dst, const string &base_name, CTempString old_ext, CTempString new_ext)
NCBI_PARAM_DECL(int, BAM, OVERLAP_MODE)
static CBGZFPos s_GetNextFilePos(const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static CBGZFPos s_GetFilePos(const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static uint32_t s_ReadUInt32(CNcbiIstream &in)
static const char kBaiExt[]
static const size_t kGZipMagicLength
static CBGZFRange s_ReadFileRange(CNcbiIstream &in)
static COpenRange< TSeqPos > s_GetSeqRange(SBamIndexParams params, const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static const char kGZipMagic[]
static const char kIndexMagicBAI[]
static void s_ReadString(CBGZFStream &in, string &ret, size_t len)
ostream & operator<<(ostream &out, const CBamFileRangeSet &ranges)
static void s_Read(CNcbiIstream &in, char *dst, size_t len)
Uint8 s_EstimatedSize(CBGZFPos file_pos1, CBGZFPos file_pos2)
static uint64_t s_ReadUInt64(CNcbiIstream &in)
pair< CBGZFPos, CBGZFPos > CBGZFRange
void SetPreviousReadStatistics(const pair< Uint8, double > &stats)
pair< Uint8, double > GetReadStatistics() const
pair< Uint8, double > GetUncompressStatistics() const
TByteOffset GetByteOffset() const
TFileBlockPos GetFileBlockPos() const
static CBGZFPos GetInvalid()
CBGZFPos GetSeekPos() const
void Seek(CBGZFPos pos, CBGZFPos end_pos=CBGZFPos::GetInvalid())
static int GetDebugLevel()
@ eFileNotFound
File not found.
virtual TErrCode GetErrCode(void) const
void AddWhole(const CBamHeader &header)
void AddSortedRanges(const vector< CBGZFRange > &ranges)
const_iterator end() const
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap)
const_iterator begin() const
void SetWhole(const CBamHeader &header)
Uint8 GetFileSize() const
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap)
const SBamIndexRefIndex & GetRef(size_t ref_index) const
double m_TotalReadSeconds
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, TIndexLevel min_index_level, TIndexLevel max_index_level) const
void Read(const string &index_file_name)
size_t GetRefCount() const
pair< Uint8, double > GetReadStatistics() const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, TIndexLevel min_index_level, TIndexLevel max_index_level) const
CBGZFRange GetTotalFileRange(size_t ref_index) const
void SetLengthFromHeader(const CBamHeader &header)
Uint2 GetCIGAROpsCount() const
TSeqPos GetRefSeqPos() const
void x_Select(const CBamHeader &header)
SBamAlignInfo m_AlignInfo
CBamFileRangeSet::const_iterator m_NextRange
void GetSegments(vector< int > &starts, vector< TSeqPos > &lens) const
TIndexLevel GetIndexLevel() const
void Select(CBamRawDb &bam_db)
TIndexLevel m_MaxIndexLevel
CBamFileRangeSet m_Ranges
TIndexLevel m_MinIndexLevel
COpenRange< TSeqPos > m_QueryRefRange
COpenRange< TSeqPos > m_AlignRefRange
CBGZFPos m_CurrentRangeEnd
COpenRange< TSeqPos > m_AlignReadRange
double GetEstimatedSecondsPerByte() const
void Open(const string &bam_path)
CMemoryReader(const char *ptr, size_t size)
ERW_Result Read(void *buf, size_t count, size_t *bytes_read)
Read as many as "count" bytes into a buffer pointed to by the "buf" argument.
ERW_Result PendingCount(size_t *count)
Via parameter "count" (which is guaranteed to be supplied non-NULL) return the number of bytes that a...
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
@ fOwnReader
Own the underlying reader.
void SetNameDesc(const string &name)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
CZipStreamDecompressor – zlib based decompression stream processor.
A very basic data-read interface.
const_iterator begin() const
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator upper_bound(const key_type &key) const
const_iterator find(const key_type &key) const
static const char si[8][64]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
std::ofstream out("events_result.xml")
main entry point for tests
unsigned int TSeqPos
Type for sequence locations and lengths.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
element_type * get(void) const
Get pointer.
void reset(element_type *p=0)
Reset will delete the old pointer, set content to the new value, and assume the ownership upon the ne...
@ fGZip
Set of flags for gzip file support. See each flag description above.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
TThisType & SetFrom(position_type from)
position_type GetTo(void) const
TThisType & SetToOpen(position_type toOpen)
position_type GetToOpen(void) const
position_type GetFrom(void) const
static TThisType GetEmpty(void)
TThisType & SetLength(position_type length)
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
ERW_Result
Result codes for I/O operations.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
@ eRW_Eof
End of data, should be considered permanent.
@ eRW_Success
Everything is okay, I/O completed.
static string PrintableString(const CTempString str, TPrintableMode mode=fNewLine_Quote|fNonAscii_Passthru)
Get a printable version of the specified string.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
CTempString & assign(const char *src_str, size_type len)
Assign new values to the content of the a string.
size_type size(void) const
Return the length of the represented array.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
@ eStart
Start timer immediately after creating.
void SetA(TA value)
Assign a value to A data member.
void SetMin(TMin value)
Assign a value to Min data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetNumval(TNumval value)
Assign a value to Numval data member.
void SetComp(TComp value)
Assign a value to Comp data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
void SetMax(TMax value)
Assign a value to Max data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetAxis(TAxis value)
Assign a value to Axis data member.
void SetData(TData &value)
Assign a value to Data data member.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
range(_Ty, _Ty) -> range< _Ty >
void timsort(RandomAccessIterator const first, RandomAccessIterator const last)
Same as std::stable_sort(first, last).
const struct ncbi::grid::netcache::search::fields::SIZE size
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static size_t read_size(CNcbiIstream &stream, const char *name)
Reader-writer based streams.
static const char * str(char *buf, int n)
unsigned __int64 uint64_t
bool operator()(const CBGZFPos p1, const CBGZFRange &p2) const
bool operator()(const CBGZFRange &p1, const CBGZFPos p2) const
bool operator()(const CBGZFPos p1, const SBamIndexBinInfo &p2) const
bool operator()(const SBamIndexBinInfo &p1, const CBGZFPos p2) const
const char * get_cigar_ptr() const
uint32_t get_cigar_read_size() const
uint32_t get_cigar_pos() const
CTempString get_short_seq_accession_id() const
int32_t get_ref_pos() const
pair< COpenRange< uint32_t >, COpenRange< uint32_t > > get_cigar_alignment(void) const
uint8_t get_read_name_len() const
const char * get_read_ptr() const
const char * get_aux_data_end() const
bool has_ambiguous_match() const
uint32_t get_cigar_ref_size() const
const char * get_read_name_ptr() const
static const char kBaseSymbols[]
void Read(CBGZFStream &in)
const char * get_aux_data_ptr() const
uint16_t get_cigar_ops_count() const
static const char kCIGARSymbols[]
int32_t get_ref_index() const
uint32_t get_read_len() const
SBamAuxData get_aux_data(char c1, char c2, bool allow_missing=false) const
float GetFloat(size_t index=0) const
Int8 GetInt(size_t index=0) const
CTempString GetString() const
CBGZFPos GetEndFilePos() const
vector< CBGZFRange > m_Chunks
CBGZFPos GetStartFilePos() const
void Read(CNcbiIstream &in, SBamIndexParams params)
COpenRange< TSeqPos > GetSeqRange(SBamIndexParams params) const
static const TShift kLevelStepBinShift
static const TShift kBAI_min_shift
static const TIndexLevel kMinBinIndexLevel
static const TIndexLevel kBAI_depth
static const TBin kMaxBinNumber
constexpr TSeqPos GetBinSize(TIndexLevel level) const
constexpr TBin GetPseudoBin() const
constexpr TBin GetBinNumberBase(int level) const
constexpr TBin GetFirstBin(TIndexLevel level) const
pair< TBin, TBin > GetBinRange(COpenRange< TSeqPos > ref_range, TIndexLevel index_level) const
TBin GetBinNumber(TSeqPos pos, TIndexLevel level) const
TBin GetBinNumberOffset(TSeqPos pos, TIndexLevel level) const
bool IsOverflowBin(TBin bin, TIndexLevel level=0) const
constexpr TShift GetMinLevelBinShift() const
constexpr TSeqPos GetMinBinSize() const
constexpr TShift GetLevelBinShift(TIndexLevel level) const
constexpr TIndexLevel GetMaxIndexLevel() const
constexpr TBin GetLastBin(TIndexLevel level) const
vector< TSeqPos > GetAlnOverStarts(void) const
pair< TBinsIter, TBinsIter > GetBinsIterRange(pair< TBin, TBin > bin_range) const
vector< Uint8 > EstimateDataSizeByAlnStartPos(TSeqPos seqlen=kInvalidSeqPos) const
TSeqPos m_EstimatedLength
CBGZFRange GetFileRange() const
pair< TBinsIter, TBinsIter > GetLevelBins(TIndexLevel level) const
bool ProcessPseudoBin(SBamIndexBinInfo &bin)
TBins::const_iterator TBinsIter
CBGZFRange GetLimitRange(COpenRange< TSeqPos > &ref_range, ESearchMode search_mode) const
const char * Read(const char *buffer_ptr, const char *buffer_end, SBamIndexParams params, int32_t ref_index)
vector< uint64_t > CollectEstimatedCoverage(TIndexLevel min_index_level, TIndexLevel max_index_level) const
CBGZFRange m_UnmappedChunk
vector< CBGZFPos > m_Overlaps
void SetLengthFromHeader(TSeqPos length)
pair< TBinsIter, TBinsIter > AddLevelFileRanges(vector< CBGZFRange > &ranges, CBGZFRange limit_file_range, pair< TBin, TBin > bin_range) const
void ProcessBin(const SBamIndexBinInfo &bin)
vector< TSeqPos > GetAlnOverEnds(void) const
static void x_AddDataSize(vector< Uint8 > &vv, size_t beg_pos, size_t end_pos, CBGZFPos file_beg, CBGZFPos file_end)
void InitData(vector< Uint8 > &vv, const SBamIndexBinInfo &bin)
SBamRangeBlock(vector< Uint8 > &vv, const vector< SBamRangeBlock > &bb, size_t bb_beg, size_t bb_end)
void ExpandData(vector< Uint8 > &vv, const SBamIndexBinInfo &bin)
static Uint8 MakeUint8(const char *buf)
static Uint4 MakeUint4(const char *buf)
static Uint2 MakeUint2(const char *buf)
static float MakeFloat(const char *buf)