45 #ifndef NCBI_THROW2_FMT
46 # define NCBI_THROW2_FMT(exception_class, err_code, message, extra) \
47 throw NCBI_EXCEPTION2(exception_class, err_code, FORMAT(message), extra)
68 #ifdef BAM_SUPPORT_CSI
69 static const char kCsiExt[] =
".csi";
70 static const char kIndexMagicCSI[] =
"CSI\1";
84 size_t cnt =
in.gcount();
92 const char*
s_Read(
const char*& buffer_ptr,
const char* buffer_end,
size_t len)
94 const char* ret_ptr = buffer_ptr;
95 const char* ret_end = ret_ptr +
len;
96 if ( ret_end > buffer_end ) {
129 if ( memcmp(
buf, magic, 4) != 0 ) {
202 #ifdef BAM_SUPPORT_CSI
212 for (
int32_t i_chunk = 0; i_chunk < n_chunks; ++i_chunk ) {
222 #ifdef BAM_SUPPORT_CSI
224 const char* header =
s_Read(ptr, end, 16);
230 const char* header =
s_Read(ptr, end, 8);
237 const char*
data =
s_Read(ptr, end, n_chunks*16);
238 for (
size_t i = 0;
i < n_chunks; ++
i ) {
252 pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>
256 pair<TBinsIter, TBinsIter> ret;
258 ret.second =
m_Bins.end();
282 return p1 < p2.second;
286 return p1.second < p2;
304 "Bad unmapped bin format");
318 "No chunks in bin "<<bin.
m_Bin);
320 for (
size_t i = 0;
i < bin.
m_Chunks.size(); ++
i ) {
324 "Empty BAM BGZF range in bin "<<bin.
m_Bin<<
329 "Overlapping BAM BGZF ranges in bin "<<bin.
m_Bin<<
349 SBamIndexParams::operator=(params);
351 size_t bin_count = 0;
355 for (
int32_t i_bin = 0; i_bin < n_bin; ++i_bin ) {
383 SBamIndexParams::operator=(params);
385 size_t bin_count = 0;
389 for (
size_t i_bin = 0; i_bin < n_bin; ++i_bin ) {
391 buffer_ptr = bin.
Read(buffer_ptr, buffer_end, *
this);
404 const char*
data =
s_Read(buffer_ptr, buffer_end, n_intv*8);
405 for (
size_t i = 0;
i < n_intv; ++
i ) {
418 const pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
420 if ( iters.first == iters.second ) {
428 return iters.first->GetSeqRange(params);
435 s_GetOverlap(
const pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
437 if ( iters.first == iters.second ) {
441 return iters.first->m_Overlap;
448 s_GetFilePos(
const pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
450 auto iter = iters.first;
451 if ( iter == iters.second ) {
454 return iter->GetStartFilePos();
460 s_GetNextFilePos(
const pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>& iters)
462 auto iter = iters.first;
463 if ( iter == iters.second ) {
467 if ( iter == iters.second ) {
470 return iter->GetStartFilePos();
502 vector<TSeqPos> aln_over_starts(nBins);
511 auto it = lower_bound(
m_Bins.begin(),
m_Bins.end(), bin);
512 if ( it !=
m_Bins.end() && it->m_Bin == bin ) {
513 for (
auto c : it->m_Chunks ) {
514 if ( c.first >= min_fp ) {
517 if ( c.first >= limit.second ) {
520 if ( c.second <= limit.first ) {
523 if ( c.first < limit.first ) {
524 c.first = limit.first;
526 _ASSERT(c.first >= limit.first);
527 _ASSERT(c.first < limit.second);
529 if ( c.first < min_fp ) {
538 min_aln_start = ref_range.
GetFrom();
544 auto it = lower_bound(level_bins.first, level_bins.second, min_fp,
PByStartFilePos());
545 if ( it == level_bins.first ) {
549 min_aln_start =
max(min_aln_start, it->GetSeqRange(*this).GetFrom());
550 if ( it->GetEndFilePos() > min_fp ) {
557 aln_over_starts[
i] = min_aln_start;
559 return aln_over_starts;
563 vector<TSeqPos> aln_over_starts(nBins);
565 auto bin_it_start =
GetLevelBins(0).first, next_bin_it = bin_it_start;
566 for (
size_t i = 0;
i < nBins; ++
i ) {
571 aln_over_starts[
i] = ref_pos;
575 while ( next_bin_it !=
m_Bins.end() && next_bin_it->GetStartFilePos() <= min_fp ) {
578 TSeqPos min_aln_start =
i? aln_over_starts[
i-1]: 0;
579 bool inside_min_bin =
false;
580 if ( next_bin_it != bin_it_start ) {
581 auto& bin = next_bin_it[-1];
582 _ASSERT(bin.GetStartFilePos() <= min_fp);
583 inside_min_bin = bin.GetEndFilePos() > min_fp;
586 if ( min_aln_start+
GetMinBinSize() < ref_pos && !inside_min_bin ) {
590 auto it = upper_bound(level_bins.first, level_bins.second, min_fp,
PByStartFilePos());
591 if ( it == level_bins.first ) {
595 min_aln_start =
max(min_aln_start, it->GetSeqRange(*this).GetFrom());
596 if ( it->GetEndFilePos() > min_fp ) {
603 if ( min_aln_start > ref_pos ) {
605 "Inconsistent linear index at ref pos "<<ref_pos<<
606 ": align starts after end bin start "<<min_aln_start);
608 aln_over_starts[
i] = min_aln_start;
610 return aln_over_starts;
614 vector<TSeqPos> aln_over_starts(nBins);
615 vector<pair<TBinsIter, TBinsIter>> levelBins;
616 vector<COpenRange<TSeqPos>> levelBinSeqRange;
617 vector<CBGZFPos> levelPrevOverlap;
624 levelBinSeqRange.push_back(
s_GetSeqRange(*
this, levelBins.back()));
629 return aln_over_starts;
632 for (
auto& bin :
m_Bins ) {
633 auto sp = bin.GetSeqRange(*this).GetFrom();
634 auto fp = bin.GetStartFilePos();
635 auto ins = sp2minfp.
insert(make_pair(sp,
fp));
638 auto& minfp = ins.first->second;
639 minfp =
min(minfp,
fp);
643 for (
auto p : sp2minfp ) {
644 auto ins = fp2sp.
insert(make_pair(p.second, p.first));
646 auto iter = ins.first;
648 while ( iter != fp2sp.
end() && iter->second < p.first ) {
649 iter = fp2sp.
erase(iter);
662 while ( levelBinSeqRange[level].GetToOpen() <= seqPos ) {
664 levelPrevOverlap[level] =
s_GetOverlap(levelBins[level]);
666 ++(levelBins[level].first);
667 levelBinSeqRange[level] =
s_GetSeqRange(*
this, levelBins[level]);
671 if ( seqPos >= levelBinSeqRange[level].GetFrom() ) {
675 overlap_fp = levelPrevOverlap[level];
677 prev_overlap_fp =
max(prev_overlap_fp, overlap_fp);
684 while ( levelBinSeqRange[level].GetToOpen() <= seqPos ) {
686 levelPrevOverlap[level] =
s_GetOverlap(levelBins[level]);
688 ++(levelBins[level].first);
689 levelBinSeqRange[level] =
s_GetSeqRange(*
this, levelBins[level]);
691 if ( seqPos < levelBinSeqRange[level].GetFrom() ) {
697 overlap_fp =
max(prev_overlap_fp, levelBins[level].
first->m_Overlap);
702 auto& chunks = levelBins[level].first->m_Chunks;
703 auto it = upper_bound(chunks.begin(), chunks.end(), overlap_fp,
PByEndFilePos());
704 if ( it != chunks.end() && it->first <
min(found_fp, limit_fp) ) {
706 found_fp =
max(it->first, overlap_fp);
707 if ( found_fp <= overlap_fp ) {
714 aln_over_starts[
b] = seqPos;
721 auto osp =
min(seqPos,
prev(iter)->second);
725 osp =
max(osp, aln_over_starts[
b-1]);
727 aln_over_starts[
b] = osp;
730 return aln_over_starts;
740 vector<TSeqPos> ends(
count);
742 for ( ; ei <
count; ++ei ) {
743 while (
si*bin_size < starts[ei] ) {
744 ends[
si++] = ei*bin_size-1;
748 ends[
si++] = ei*bin_size-1;
764 if ( file_pos1 >= file_pos2 ) {
791 if ( ref_range.
Empty() ) {
803 #ifdef BAM_SUPPORT_CSI
809 auto it = lower_bound(bins.first, bins.second, bin_num);
810 if ( it != bins.second && it->m_Bin <= bin_num_last ) {
811 if ( it->m_Overlap ) {
812 if ( !limit.first || it->m_Overlap < limit.first ) {
813 limit.first = it->m_Overlap;
816 if ( it->m_Bin == bin_num ) {
830 auto it = lower_bound(bins.first, bins.second, bin_num);
831 if ( it != bins.first ) {
832 limit.first =
max(limit.first,
prev(it)->GetEndFilePos());
841 auto it = lower_bound(bins.first, bins.second, bin_num);
842 if ( it != bins.second ) {
843 limit.second =
min(limit.second, it->GetStartFilePos());
850 pair<SBamIndexRefIndex::TBin, SBamIndexRefIndex::TBin>
854 pair<TBin, TBin> bin_range;
866 bin_range.second = bin_range.first-1;
886 pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>
889 pair<TBin, TBin> bin_range)
const
893 for ( ; it !=
m_Bins.end() && it->m_Bin <= bin_range.second; ++it ) {
894 for (
auto c : it->m_Chunks ) {
895 if ( c.first < limit_file_range.first ) {
896 c.first = limit_file_range.first;
898 if ( limit_file_range.second && limit_file_range.second < c.second ) {
899 c.second = limit_file_range.second;
901 if ( c.first < c.second ) {
906 return make_pair(
first, it);
910 pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter>
915 return make_pair(
first, it);
935 size_t page_count = end_pos - beg_pos + 1;
936 Uint8 add_size = (file_size + page_count/2) / page_count;
938 for (
size_t i = beg_pos;
i <= end_pos; ++
i ) {
945 for (
size_t i = beg_pos;
i <= end_pos; ++
i ) {
979 _ASSERT(new_file_beg < new_file_end);
1006 const vector<SBamRangeBlock>&
bb,
size_t bb_beg,
size_t bb_end)
1008 for (
size_t i = bb_beg;
i <= bb_end; ++
i ) {
1010 if ( !
b.file_end ) {
1037 if ( bins.first != bins.second ) {
1038 CBGZFPos pos_beg = bins.first->GetStartFilePos();
1040 if ( pos_beg <
range.first ) {
1041 range.first = pos_beg;
1043 if ( pos_end >
range.second ) {
1044 range.second = pos_end;
1048 if (
range.first.IsInvalid() ) {
1066 vector<Uint8> vv(bin_count);
1068 vector<SBamRangeBlock>
bb(bin_count);
1069 size_t bb_end = bin_count-1;
1070 for (
size_t i = 0;
i <= bb_end; ++
i ) {
1077 for (
auto bin_it = level_bins.first; bin_it != level_bins.second; ++bin_it ) {
1078 size_t i = bin_it->m_Bin - bin_number_base;
1080 bb[
i].InitData(vv, *bin_it);
1096 for (
auto bin_it = level_bins.first; bin_it != level_bins.second; ++bin_it ) {
1097 size_t i = bin_it->m_Bin - bin_number_base;
1099 bb[
i].ExpandData(vv, *bin_it);
1111 for (
TIndexLevel level = min_index_level; level <= max_index_level; ++level ) {
1113 uint32_t vv_bin_count = 1 << vv_bin_shift;
1116 for (
auto it = level_bins.first; it != level_bins.second; ++it ) {
1118 for (
auto& c : it->m_Chunks ) {
1124 uint32_t pos = (it->m_Bin - bin_base) << vv_bin_shift;
1156 size_t fsz =
file.GetSize();
1157 data.reset(
new char[fsz]);
1158 file.ReadExactly(0,
data.get(), fsz);
1169 : m_UnmappedCount(0),
1170 m_TotalReadBytes(0),
1171 m_TotalReadSeconds(0)
1177 : m_UnmappedCount(0),
1178 m_TotalReadBytes(0),
1179 m_TotalReadSeconds(0)
1181 Read(index_file_name);
1233 *bytes_read =
count;
1252 #ifdef BAM_SUPPORT_CSI
1263 #ifdef BAM_SUPPORT_CSI
1284 for (
int32_t i_ref = 0; i_ref < n_ref; ++i_ref ) {
1285 m_Refs[i_ref].Read(
in, *
this, i_ref);
1287 streampos extra_pos =
in.tellg();
1288 in.seekg(0, ios::end);
1289 streampos end_pos =
in.tellg();
1290 in.seekg(extra_pos);
1292 if ( end_pos-extra_pos >= 8 ) {
1296 if ( end_pos != extra_pos ) {
1298 "Extra "<<(end_pos-extra_pos)<<
" bytes in BAM index");
1308 unique_ptr<CNcbiIstream> data_stream =
1309 make_unique<CRStream>(
new CMemoryReader(buffer_ptr, buffer_size),
1311 unique_ptr<CNcbiIstream>
z_stream =
1312 make_unique<CCompressionIStream>(*data_stream,
1319 const char* buffer_end = buffer_ptr + buffer_size;
1321 #ifdef BAM_SUPPORT_CSI
1331 #ifdef BAM_SUPPORT_CSI
1335 const char* header =
s_Read(buffer_ptr, buffer_end, 12);
1339 s_Read(buffer_ptr, buffer_end, l_aux);
1346 const char* header =
s_Read(buffer_ptr, buffer_end, 4);
1350 buffer_ptr =
m_Refs[
i].Read(buffer_ptr, buffer_end, *
this,
i);
1352 if ( buffer_end - buffer_ptr >= 8 ) {
1356 if ( buffer_ptr != buffer_end ) {
1358 "Extra "<<(buffer_end-buffer_ptr)<<
" bytes in BAM index");
1367 "Bad reference sequence index");
1369 return m_Refs[ref_index];
1377 "Wrong index ref count: "<<
1389 for (
auto&
b :
GetRef(ref_index).m_Bins ) {
1390 CBGZFPos start_pos =
b.GetStartFilePos();
1391 if ( start_pos < total_range.first )
1392 total_range.first = start_pos;
1394 if ( total_range.second < end_pos )
1395 total_range.second = end_pos;
1402 string title,
string name)
1404 if ( name.empty() ) {
1405 name =
"BAM coverage";
1407 if ( title.empty() ) {
1417 const string& ref_name,
1418 const string& seq_id,
1419 const string& annot_name,
1430 const string& ref_name,
1432 const string& annot_name,
1437 if ( ref_index ==
size_t(-1) ) {
1439 "Cannot find RefSeq: "<<ref_name);
1442 header.
GetRefLength(ref_index), min_index_level, max_index_level);
1448 const string& seq_id,
1449 const string& annot_name,
1461 const string& seq_id,
1462 const string& annot_name,
1473 const string& annot_name,
1493 const string& annot_name,
1500 if ( vv.empty() ) vv.push_back(0);
1503 length =
count*bin_size;
1508 annot->
SetData().SetGraph().push_back(graph);
1509 sx_SetTitle(*graph, *annot, annot_name, annot_name);
1511 graph->
SetLoc().SetInt().SetId().Assign(seq_id);
1512 graph->
SetLoc().SetInt().SetFrom(0);
1513 graph->
SetLoc().SetInt().SetTo(length-1);
1518 bvalues.resize(
count);
1520 uint64_t max_value = *max_element(vv.begin(), vv.end());
1521 double mul =
min(1., 255./max_value);
1522 for (
size_t i = 0;
i <
count; ++
i ) {
1523 if (
auto v = vv[
i] ) {
1555 Read(bam_file_name);
1590 for (
int32_t i_ref = 0; i_ref < n_ref; ++i_ref ) {
1591 m_Refs[i_ref].Read(stream);
1602 "Bad reference sequence index");
1604 return m_Refs[ref_index];
1614 return iter->second;
1621 bool state_changed =
true;
1622 const char *p, *p0, *pend;
1624 for (p =
m_Text.data(), pend = p +
m_Text.size(); p < pend; ++p) {
1625 if (state_changed) {
1626 state_changed =
false;
1627 for (; p < pend && iswspace(*p); ++p)
1635 else if (*p ==
':') {
1636 if (
state == eRecord) {
1639 state_changed =
true;
1643 else if ( iswspace(*p) ) {
1644 if (
state == eTag) {
1647 state_changed =
true;
1649 else if (
state == eValue) {
1650 records.back().second[record] =
string(p0, p-p0);
1652 state_changed =
true;
1656 if (
state == eValue) {
1657 records.back().second[record] =
string(p0, p-p0);
1659 return records.size();
1678 AddRanges(index, ref_index, ref_range, search_mode);
1688 AddRanges(index, ref_index, ref_range, min_level, max_level, search_mode);
1698 AddRanges(index, ref_index, ref_range, min_level, max_level, search_mode);
1710 for (
auto&
r : ranges ) {
1711 cout <<
" (" <<
r.first<<
" "<<
r.second<<
")";
1713 return cout <<
" )";
1728 for (
auto iter = ranges.begin(); iter != ranges.end(); ) {
1730 for ( ++iter; iter != ranges.end() && !(end < iter->
first); ++iter ) {
1731 if ( end < iter->second ) {
1735 if ( start < min_pos ) {
1737 if (
end <= min_pos ) {
1768 vector<CBGZFRange> ranges;
1773 if ( ref_range.
Empty() ) {
1784 TSeqPos set_limit_by_overlap_at = 0;
1787 auto bin_range = index.
GetBinRange(ref_range, level);
1788 pair<SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter> iter_range;
1789 if ( level >= min_index_level ) {
1801 auto first_bin = iter_range.first;
1802 if ( (first_bin == ref.
m_Bins.end() ||
1803 first_bin->m_Bin != bin_range.first) &&
1804 first_bin != ref.
m_Bins.begin() ) {
1807 if ( first_bin != ref.
m_Bins.end() &&
1808 first_bin->m_Bin <= bin_range.first &&
1811 TSeqPos pos = first_bin->GetSeqRange(index).GetFrom();
1812 if ( pos > set_limit_by_overlap_at ) {
1814 set_limit_by_overlap_at = pos;
1815 limit.first =
max(limit.first, first_bin->m_Overlap);
1834 auto first_bin = iter_range.first;
1835 if ( first_bin != ref.
m_Bins.begin() ) {
1836 auto prev_bin =
prev(first_bin);
1837 _ASSERT(prev_bin->m_Bin < bin_range.first);
1838 if ( prev_bin->m_Bin >= index.
GetFirstBin(level) ) {
1840 limit.first =
max(limit.first, prev_bin->GetEndFilePos());
1846 auto next_bin = iter_range.second;
1847 if ( next_bin != ref.
m_Bins.end() &&
1850 limit.second =
min(limit.second, next_bin->GetStartFilePos());
1876 AddRanges(index, ref_index, ref_range, index_level, index_level, search_mode, file_pos);
1883 whole.first = file_pos;
1897 if ( file_pos && *file_pos ) {
1915 AddRanges(index, ref_index, ref_range, min_index_level, max_index_level, search_mode, file_pos);
1936 SetRanges(index, ref_index, ref_range, index_level, index_level, search_mode, file_pos);
1975 const string& base_name,
1980 dst.push_back(base_name.substr(0, base_name.size()-old_ext.
size())+new_ext);
1987 vector<string> index_name_candidates;
1988 if ( index_path.empty() || index_path == bam_path ) {
1989 #ifdef BAM_SUPPORT_CSI
1992 index_name_candidates.push_back(bam_path+kCsiExt);
1996 index_name_candidates.push_back(bam_path+
kBaiExt);
1998 #ifdef BAM_SUPPORT_CSI
1999 if ( !prefer_csi ) {
2000 index_name_candidates.push_back(bam_path+kCsiExt);
2006 index_name_candidates.push_back(index_path);
2008 for (
size_t i = 0;
i < index_name_candidates.size(); ++
i ) {
2014 if (
i < index_name_candidates.size()-1 &&
2035 const double index_read_weight = 10;
2036 const Uint8 add_read_bytes = 100000;
2037 const double add_read_bytes_per_second = 80e6;
2038 const Uint8 add_unzip_bytes = 100000;
2039 const double add_unzip_bytes_per_second = 80e6;
2045 Uint8(index_read_stat.first*index_read_weight) +
2046 data_read_stat.first +
2048 double read_seconds =
2049 index_read_stat.second*index_read_weight +
2050 data_read_stat.second +
2051 add_read_bytes/add_read_bytes_per_second;
2053 Uint8 unzip_bytes = data_unzip_stat.first + add_unzip_bytes;
2054 double unzip_seconds = data_unzip_stat.second + add_unzip_bytes/add_unzip_bytes_per_second;
2056 return read_seconds/read_bytes + unzip_seconds/unzip_bytes;
2069 char* dst = &ret[0];
2093 char* dst =
str.data();
2119 switch ( op & 0xf ) {
2142 switch ( op & 0xf ) {
2168 switch ( op & 0xf ) {
2194 switch ( op & 0xf ) {
2237 switch ( op & 0xf ) {
2257 switch ( op & 0xf ) {
2278 dst[0] =
'0'+(v/10);
2279 dst[1] =
'0'+(v%10);
2289 char* dst =
str.data();
2291 for ( ;
count--; ) {
2294 switch ( op & 0xf ) {
2305 str.resize(dst-
str.data());
2365 ptr =
static_cast<const char*
>(memchr(ptr, 0, end-ptr));
2382 size_t element_size;
2401 if ( element_size == 0 ) {
2420 ERR_POST(
"BAM: Alignment aux tag parse error");
2430 "Conversion error: "
2431 "type "<<
GetDataType()<<
" cannot be converted to char");
2441 "Conversion error: "
2442 "type "<<
GetDataType()<<
" cannot be converted to string");
2452 "Conversion error: "
2453 "type "<<
GetDataType()<<
" cannot be converted to int");
2455 if ( index >=
size() ) {
2457 "Index overflow: "<<index<<
" >= "<<
size());
2484 "Conversion error: "
2485 "type "<<
GetDataType()<<
" cannot be converted to float");
2487 if ( index >=
size() ) {
2489 "Index overflow: "<<index<<
" >= "<<
size());
2499 if ( iter->IsTag(c1, c2) ) {
2503 if ( !allow_missing ) {
2505 "Tag "<<c1<<c2<<
" not found");
2514 return data.GetString();
2522 in.GetNextAvailableBytes();
2538 const string& ref_label,
2543 : m_Reader(bam_db.GetFile())
2552 Select(bam_db, ref_label, ref_range, search_mode, file_pos);
2557 const string& ref_label,
2564 : m_Reader(bam_db.GetFile())
2573 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode, file_pos);
2578 const string& ref_label,
2585 : m_Reader(bam_db.GetFile())
2594 Select(bam_db, ref_label, ref_range, min_index_level, max_index_level, search_mode, file_pos);
2603 if ( file_pos && *file_pos ) {
2626 SBamIndexParams::operator=(index);
2629 m_Ranges.
SetRanges(index, ref_index, ref_range, min_index_level, max_index_level, search_mode, file_pos);
2702 if ( index_level < m_MinIndexLevel || index_level >
m_MaxIndexLevel ) {
2731 int refstart, seqstart;
2732 switch ( op & 0xf ) {
2760 "Bad CIGAR segment: " << (op & 0xf) <<
" in " <<
GetCIGAR());
2762 if ( seglen == 0 ) {
2764 "Zero CIGAR segment: in " <<
GetCIGAR());
2766 starts.push_back(refstart);
2767 starts.push_back(seqstart);
2768 lens.push_back(seglen);
static void sx_SetTitle(CSeq_graph &graph, CSeq_annot &annot, string title, string name)
static char * s_format(char *dst, uint32_t v)
static const size_t kIndexMagicLength
NCBI_PARAM_DEF_EX(int, BAM, OVERLAP_MODE, 2, eParam_NoThread, BAM_OVERLAP_MODE)
static const char kBamExt[]
static const float kEstimatedCompression
static void s_ReadMagic(CBGZFStream &in, const char *magic)
static CBGZFPos s_ReadFilePos(CNcbiIstream &in)
static int s_GetRangesMode()
Uint8 s_EstimatedPos(CBGZFPos pos)
static size_t ReadVDBFile(AutoArray< char > &data, const string &path)
static int32_t s_ReadInt32(CNcbiIstream &in)
static int s_GetOverlapMode()
static CBGZFPos s_GetOverlap(const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static void s_AddReplacedExt(vector< string > &dst, const string &base_name, CTempString old_ext, CTempString new_ext)
NCBI_PARAM_DECL(int, BAM, OVERLAP_MODE)
static CBGZFPos s_GetNextFilePos(const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static CBGZFPos s_GetFilePos(const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static uint32_t s_ReadUInt32(CNcbiIstream &in)
static const char kBaiExt[]
static const size_t kGZipMagicLength
static CBGZFRange s_ReadFileRange(CNcbiIstream &in)
static COpenRange< TSeqPos > s_GetSeqRange(SBamIndexParams params, const pair< SBamIndexRefIndex::TBinsIter, SBamIndexRefIndex::TBinsIter > &iters)
static const char kGZipMagic[]
static const char kIndexMagicBAI[]
static void s_ReadString(CBGZFStream &in, string &ret, size_t len)
ostream & operator<<(ostream &out, const CBamFileRangeSet &ranges)
static void s_Read(CNcbiIstream &in, char *dst, size_t len)
Uint8 s_EstimatedSize(CBGZFPos file_pos1, CBGZFPos file_pos2)
static uint64_t s_ReadUInt64(CNcbiIstream &in)
pair< CBGZFPos, CBGZFPos > CBGZFRange
void SetPreviousReadStatistics(const pair< Uint8, double > &stats)
pair< Uint8, double > GetReadStatistics() const
pair< Uint8, double > GetUncompressStatistics() const
TByteOffset GetByteOffset() const
TFileBlockPos GetFileBlockPos() const
static CBGZFPos GetInvalid()
CBGZFPos GetSeekPos() const
void Seek(CBGZFPos pos, CBGZFPos end_pos=CBGZFPos::GetInvalid())
static int GetDebugLevel()
@ eFileNotFound
File not found.
virtual TErrCode GetErrCode(void) const
void SetFrom(CBGZFPos file_pos)
void AddWhole(const CBamHeader &header)
const_iterator end() const
void AddFrom(CBGZFPos file_pos)
const_iterator begin() const
void AddSortedRanges(const vector< CBGZFRange > &ranges, const CBGZFPos *file_pos=nullptr)
void AddRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
void SetWhole(const CBamHeader &header)
Uint8 GetFileSize() const
void SetRanges(const CBamIndex &index, size_t ref_index, COpenRange< TSeqPos > ref_range, ESearchMode search_mode=eSearchByOverlap, const CBGZFPos *file_pos=nullptr)
const SBamIndexRefIndex & GetRef(size_t ref_index) const
double m_TotalReadSeconds
vector< uint64_t > CollectEstimatedCoverage(size_t ref_index, TIndexLevel min_index_level, TIndexLevel max_index_level) const
void Read(const string &index_file_name)
size_t GetRefCount() const
pair< Uint8, double > GetReadStatistics() const
CRef< CSeq_annot > MakeEstimatedCoverageAnnot(const CBamHeader &header, const string &ref_name, const string &seq_id, const string &annot_name, TIndexLevel min_index_level, TIndexLevel max_index_level) const
CBGZFRange GetTotalFileRange(size_t ref_index) const
void SetLengthFromHeader(const CBamHeader &header)
Uint2 GetCIGAROpsCount() const
TSeqPos GetRefSeqPos() const
SBamAlignInfo m_AlignInfo
CBamFileRangeSet::const_iterator m_NextRange
void GetSegments(vector< int > &starts, vector< TSeqPos > &lens) const
void Select(CBamRawDb &bam_db, const CBGZFPos *file_pos=nullptr)
TIndexLevel GetIndexLevel() const
void x_Select(const CBamHeader &header, const CBGZFPos *file_pos=nullptr)
TIndexLevel m_MaxIndexLevel
CBamFileRangeSet m_Ranges
TIndexLevel m_MinIndexLevel
COpenRange< TSeqPos > m_QueryRefRange
COpenRange< TSeqPos > m_AlignRefRange
CBGZFPos m_CurrentRangeEnd
COpenRange< TSeqPos > m_AlignReadRange
double GetEstimatedSecondsPerByte() const
void Open(const string &bam_path)
CMemoryReader(const char *ptr, size_t size)
ERW_Result Read(void *buf, size_t count, size_t *bytes_read)
Read as many as "count" bytes into a buffer pointed to by the "buf" argument.
ERW_Result PendingCount(size_t *count)
Via parameter "count" (which is guaranteed to be supplied non-NULL) return the number of bytes that a...
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
@ fOwnReader
Own the underlying reader.
void SetNameDesc(const string &name)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
CZipStreamDecompressor – zlib based decompression stream processor.
A very basic data-read interface.
const_iterator begin() const
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator upper_bound(const key_type &key) const
const_iterator find(const key_type &key) const
static const char si[8][64]
std::ofstream out("events_result.xml")
main entry point for tests
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
unsigned int TSeqPos
Type for sequence locations and lengths.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
@ fGZip
Set of flags for gzip file support. See each flag description above.
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
TThisType & SetFrom(position_type from)
position_type GetTo(void) const
TThisType & SetToOpen(position_type toOpen)
position_type GetToOpen(void) const
position_type GetFrom(void) const
static TThisType GetEmpty(void)
TThisType & SetLength(position_type length)
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
ERW_Result
Result codes for I/O operations.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
@ eRW_Eof
End of data, should be considered permanent.
@ eRW_Success
Everything is okay, I/O completed.
static string PrintableString(const CTempString str, TPrintableMode mode=fNewLine_Quote|fNonAscii_Passthru)
Get a printable version of the specified string.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
CTempString & assign(const char *src_str, size_type len)
Assign new values to the content of the a string.
size_type size(void) const
Return the length of the represented array.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
@ eStart
Start timer immediately after creating.
void SetA(TA value)
Assign a value to A data member.
void SetMin(TMin value)
Assign a value to Min data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetNumval(TNumval value)
Assign a value to Numval data member.
void SetComp(TComp value)
Assign a value to Comp data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
void SetMax(TMax value)
Assign a value to Max data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetAxis(TAxis value)
Assign a value to Axis data member.
void SetData(TData &value)
Assign a value to Data data member.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
static constexpr streamsize bmax
range(_Ty, _Ty) -> range< _Ty >
void timsort(RandomAccessIterator const first, RandomAccessIterator const last)
Same as std::stable_sort(first, last).
const struct ncbi::grid::netcache::search::fields::SIZE size
const GenericPointer< typename T::ValueType > T2 value
std::istream & in(std::istream &in_, double &x_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
static size_t read_size(CNcbiIstream &stream, const char *name)
Reader-writer based streams.
bool operator()(const CBGZFPos p1, const CBGZFRange &p2) const
bool operator()(const CBGZFRange &p1, const CBGZFPos p2) const
bool operator()(const CBGZFPos p1, const SBamIndexBinInfo &p2) const
bool operator()(const SBamIndexBinInfo &p1, const CBGZFPos p2) const
const char * get_cigar_ptr() const
uint32_t get_cigar_read_size() const
uint32_t get_cigar_pos() const
CTempString get_short_seq_accession_id() const
int32_t get_ref_pos() const
pair< COpenRange< uint32_t >, COpenRange< uint32_t > > get_cigar_alignment(void) const
uint8_t get_read_name_len() const
const char * get_read_ptr() const
const char * get_aux_data_end() const
bool has_ambiguous_match() const
uint32_t get_cigar_ref_size() const
const char * get_read_name_ptr() const
static const char kBaseSymbols[]
void Read(CBGZFStream &in)
const char * get_aux_data_ptr() const
uint16_t get_cigar_ops_count() const
static const char kCIGARSymbols[]
int32_t get_ref_index() const
uint32_t get_read_len() const
SBamAuxData get_aux_data(char c1, char c2, bool allow_missing=false) const
float GetFloat(size_t index=0) const
Int8 GetInt(size_t index=0) const
CTempString GetString() const
CBGZFPos GetEndFilePos() const
vector< CBGZFRange > m_Chunks
CBGZFPos GetStartFilePos() const
void Read(CNcbiIstream &in, SBamIndexParams params)
COpenRange< TSeqPos > GetSeqRange(SBamIndexParams params) const
static const TShift kLevelStepBinShift
static const TShift kBAI_min_shift
static const TIndexLevel kMinBinIndexLevel
static const TIndexLevel kBAI_depth
static const TBin kMaxBinNumber
constexpr TSeqPos GetBinSize(TIndexLevel level) const
constexpr TBin GetPseudoBin() const
constexpr TBin GetBinNumberBase(int level) const
constexpr TBin GetFirstBin(TIndexLevel level) const
pair< TBin, TBin > GetBinRange(COpenRange< TSeqPos > ref_range, TIndexLevel index_level) const
TBin GetBinNumber(TSeqPos pos, TIndexLevel level) const
TBin GetBinNumberOffset(TSeqPos pos, TIndexLevel level) const
bool IsOverflowBin(TBin bin, TIndexLevel level=0) const
constexpr TShift GetMinLevelBinShift() const
constexpr TSeqPos GetMinBinSize() const
constexpr TShift GetLevelBinShift(TIndexLevel level) const
constexpr TIndexLevel GetMaxIndexLevel() const
constexpr TBin GetLastBin(TIndexLevel level) const
vector< TSeqPos > GetAlnOverStarts(void) const
pair< TBinsIter, TBinsIter > GetBinsIterRange(pair< TBin, TBin > bin_range) const
vector< Uint8 > EstimateDataSizeByAlnStartPos(TSeqPos seqlen=kInvalidSeqPos) const
TSeqPos m_EstimatedLength
CBGZFRange GetFileRange() const
pair< TBinsIter, TBinsIter > GetLevelBins(TIndexLevel level) const
bool ProcessPseudoBin(SBamIndexBinInfo &bin)
TBins::const_iterator TBinsIter
CBGZFRange GetLimitRange(COpenRange< TSeqPos > &ref_range, ESearchMode search_mode) const
const char * Read(const char *buffer_ptr, const char *buffer_end, SBamIndexParams params, int32_t ref_index)
vector< uint64_t > CollectEstimatedCoverage(TIndexLevel min_index_level, TIndexLevel max_index_level) const
CBGZFRange m_UnmappedChunk
vector< CBGZFPos > m_Overlaps
void SetLengthFromHeader(TSeqPos length)
pair< TBinsIter, TBinsIter > AddLevelFileRanges(vector< CBGZFRange > &ranges, CBGZFRange limit_file_range, pair< TBin, TBin > bin_range) const
void ProcessBin(const SBamIndexBinInfo &bin)
vector< TSeqPos > GetAlnOverEnds(void) const
static void x_AddDataSize(vector< Uint8 > &vv, size_t beg_pos, size_t end_pos, CBGZFPos file_beg, CBGZFPos file_end)
void InitData(vector< Uint8 > &vv, const SBamIndexBinInfo &bin)
SBamRangeBlock(vector< Uint8 > &vv, const vector< SBamRangeBlock > &bb, size_t bb_beg, size_t bb_end)
void ExpandData(vector< Uint8 > &vv, const SBamIndexBinInfo &bin)
static Uint8 MakeUint8(const char *buf)
static Uint4 MakeUint4(const char *buf)
static Uint2 MakeUint2(const char *buf)
static float MakeFloat(const char *buf)