66 #define NCBI_USE_ERRCODE_X BAMLoader
92 #define SEPARATE_PILEUP_READS
133 #define PILEUP_NAME_SUFFIX "pileup graphs"
194 return NCBI_PARAM_TYPE(BAM_LOADER, SKIP_EMPTY_PILEUP_GRAPHS)::GetDefault();
200 NCBI_PARAM_TYPE(BAM_LOADER, SKIP_EMPTY_PILEUP_GRAPHS)::SetDefault(param);
235 return NCBI_PARAM_TYPE(BAM_LOADER, ESTIMATED_COVERAGE_GRAPH)::GetDefault();
241 NCBI_PARAM_TYPE(BAM_LOADER, ESTIMATED_COVERAGE_GRAPH)::SetDefault(param);
257 return NCBI_PARAM_TYPE(BAM_LOADER, PREFER_RAW_INDEX_OVER_COVERAGE_GRAPH)::GetDefault();
337 return NCBI_PARAM_TYPE(BAM_LOADER, SPLIT_BIN_MIN_LENGTH)::GetDefault();
348 if ( retry_count == 0 ) {
351 for (
int t = 1;
t < retry_count; ++
t ) {
362 catch ( exception& exc ) {
377 #define RETRY(expr) CallWithRetry([&]()->auto{return (expr);}, #expr)
394 : m_BamName(bam_name), m_SeqId(seq_id)
432 : m_IdMapper(params.m_IdMapper)
436 if ( !mapper_file_name.empty() ) {
489 "CBAMDataLoader: no def file: "+def_name);
492 vector<string> tokens;
493 while ( getline(
in, line) ) {
496 if ( tokens.size() < 4 ) {
498 "CBAMDataLoader: bad def line: \""+line+
"\"");
501 info.m_BamSeqLabel = tokens[0];
502 info.m_Label = tokens[1];
503 if ( tokens[2].
empty() ) {
509 info.m_BamFileName = tokens[3];
510 if ( tokens.size() >= 4 ) {
511 info.m_CovFileName = tokens[4];
552 bam_info = iter->second;
555 if ( !
info.m_CovFileName.empty() ) {
580 info.m_BamFileName = bam;
626 it->second->GetShortSeqBlobId(ret, idh);
639 it->second->GetRefSeqBlobId(ret, idh);
649 const string& name = it->m_AnnotName;
650 if ( name.empty() ) {
726 const string& refseq_label,
739 string refseq_label = rit.GetRefSeqId();
761 if (
impl.m_IdMapper.get() ) {
765 if ( !include_tags.empty() ) {
768 for (
auto&
tag : tags ) {
780 "Found "<<refseq_label<<
" -> "<<refseq_id);
785 "duplicate Seq-id "<<refseq_id<<
" for ref "<<refseq_label<<
" in "<<
GetBamName());
797 it->second->GetShortSeqBlobId(ret, idh);
807 info->SetBlobId(ret, idh);
818 return it->second.GetNCPointer();
838 if (
info.spot1.empty() ) {
843 if (
info.spot1 == seq ) {
847 if (
info.spot2.empty() ) {
852 if (
info.spot2 == seq ) {
869 const string& refseqid,
872 m_RefSeqId(refseqid),
875 m_LoadedRanges(
false)
887 "Seq-id "<<idh<<
" appears in two files: "
888 <<ret->ToString()<<
" & "<<id->ToString());
929 void Collect(
const CBamDb& bam_db,
const string& ref_id,
930 TSeqPos ref_pos,
unsigned count,
int min_quality);
932 unsigned GetStatCount(
void)
const {
935 double GetStatLen(
void)
const {
936 return m_RefPosLast - m_RefPosFirst + .5;
941 void SRefStat::Collect(
const CBamDb& bam_db,
const string& ref_id,
942 TSeqPos ref_pos,
unsigned count,
int min_quality)
944 m_RefPosQuery = ref_pos;
948 for ( ; ait; ++ait ) {
950 if ( pos < ref_pos ) {
954 if ( min_quality > 0 && ait.
GetMapQuality() < min_quality ) {
960 if (
max > ref_len ) {
965 if (
len > m_RefLenMax ) {
968 if (
max > m_RefPosMax ) {
971 if ( m_Count == 0 ) {
974 if ( ++m_Count == count ) {
980 "Stat @ "<<m_RefPosQuery<<
": "<<m_Count<<
" entries: "<<
981 m_RefPosFirst<<
"-"<<m_RefPosLast<<
982 "(+"<<m_RefPosMax-m_RefPosLast<<
")"<<
983 " max len: "<<m_RefLenMax<<
984 " skipped: "<<skipped);
1015 "outlier value not found: "<<
id);
1036 if ( alist.size() != 1 ) {
1038 "wrong number of annots in cov entry: "<<
1041 annot = alist.front();
1043 if ( glist.size() != 1 ) {
1045 "wrong number of graphs in cov entry: "<<
1048 graph = glist.front();
1052 if ( (*it)->IsUser() &&
1053 (*it)->GetUser().GetType().GetStr() ==
"BAM coverage" ) {
1054 params = &(*it)->GetUser();
1062 double vmul = graph->
GetA();
1063 double vadd = graph->
GetB();
1064 double outliers_mul = 1./slot;
1066 size_t non_zero_count = 0;
1067 vector<double> cov(
cnt);
1071 int vmin =
g.GetMin();
1072 int vmax =
g.GetMax();
1092 int vmin =
g.GetMin();
1093 int vmax =
g.GetMax();
1114 double total_cov = avg_cov*non_zero_count*slot;
1115 double avg_align_len = total_cov/align_cnt;
1122 max_align_len =
int(avg_align_len*2+50);
1124 double cov_to_align_cnt = (align_cnt*slot)/total_cov;
1131 double next_cnt =
i==
cnt? 0: cov[
i] * cov_to_align_cnt;
1141 " Slots "<<cur_first<<
"-"<<cur_last<<
1147 TSeqPos end = cur_last*slot+slot;
1155 if (
empty )
continue;
1160 cur_cnt +=
max(next_cnt, 1e-9);
1177 return i < over_ends.size()? over_ends[
i]:
i*bin_size+bin_size-1;
1191 vector<Uint4> over_ends = refseq.GetAlnOverEnds();
1196 " Bin size: "<<bin_size<<
1197 " count: "<<data_sizes.size()<<
1198 " length: "<<(bin_size*data_sizes.size()));
1200 " Total cov: "<<accumulate(data_sizes.begin(), data_sizes.end(),
Uint8(0)));
1202 static const TSeqPos kZeroBlocks = 8;
1203 static const TSeqPos kMaxChunkLength = 300*1024*1024;
1208 Uint8 cur_data_size = 0;
1212 if ( split_bin_data_size == 0 || split_bin_min_length == 0 ) {
1217 if (
i < bin_count && !data_sizes[
i] ) {
1222 if (
i == bin_count || zero_count >= kZeroBlocks ) {
1224 if ( cur_data_size ) {
1226 _ASSERT(last_pos < pos-zero_count*bin_size);
1228 TSeqPos non_zero_end = pos - zero_count*bin_size;
1230 info.m_DataSize = cur_data_size;
1231 info.m_RefSeqRange.SetFrom(last_pos);
1232 info.m_RefSeqRange.SetTo(
s_GetEnd(over_ends,
i-zero_count-1, bin_size));
1233 info.m_MaxRefSeqFrom = non_zero_end-1;
1237 " Range "<<
info.m_RefSeqRange.GetFrom()<<
"-"<<
info.m_MaxRefSeqFrom<<
1238 " (.."<<
info.m_RefSeqRange.GetTo()<<
")"
1239 " size: "<<
info.m_DataSize);
1244 last_pos = non_zero_end;
1248 if ( zero_count > 0 ) {
1251 _ASSERT(last_pos == pos-zero_count*bin_size);
1254 info.m_DataSize = 0;
1255 info.m_RefSeqRange.SetFrom(last_pos);
1256 info.m_RefSeqRange.SetTo(
s_GetEnd(over_ends,
i-zero_count, bin_size));
1257 info.m_MaxRefSeqFrom = last_pos;
1261 " Range "<<
info.m_RefSeqRange.GetFrom()<<
"-"<<
info.m_MaxRefSeqFrom<<
1262 " (.."<<
info.m_RefSeqRange.GetTo()<<
")"
1263 " size: "<<
info.m_DataSize);
1272 if (
i == bin_count ) {
1276 cur_data_size += data_sizes[
i];
1278 pos+bin_size-last_pos >= kMaxChunkLength ||
1279 (
i+1 < bin_count && data_sizes[
i+1] > split_bin_data_size) ) {
1281 data_sizes[
i] > split_bin_data_size &&
1282 bin_size > split_bin_min_length ) {
1285 _ASSERT(cur_data_size == data_sizes[
i]);
1286 int split_shift = 0;
1287 while ( (cur_data_size >> split_shift) > split_bin_data_size &&
1288 (bin_size >> split_shift) > split_bin_min_length ) {
1291 int sub_chunk_count = 1 << split_shift;
1292 auto sub_chunk_data_size = cur_data_size >> split_shift;
1293 TSeqPos sub_chunk_len = bin_size >> split_shift;
1298 " Range "<<last_pos<<
"-"<<(pos+bin_size)<<
1299 " (.."<<ref_end<<
")"<<
1300 " size: "<<cur_data_size);
1302 for (
int i = 0;
i < sub_chunk_count; ++
i ) {
1304 info.m_DataSize = sub_chunk_data_size;
1305 info.m_RefSeqRange.SetFrom(last_pos+
i*sub_chunk_len);
1306 info.m_MaxRefSeqFrom = pos+(
i+1)*sub_chunk_len-1;
1307 info.m_RefSeqRange.SetTo(
i==0? ref_end:
info.m_MaxRefSeqFrom);
1308 info.m_PileupChunkCount =
i==0? sub_chunk_count: 0;
1312 " Range "<<
info.m_RefSeqRange.GetFrom()<<
"-"<<
info.m_MaxRefSeqFrom<<
1313 " (.."<<
info.m_RefSeqRange.GetTo()<<
")"
1314 " size: "<<
info.m_DataSize);
1319 last_pos = pos+bin_size;
1327 info.m_DataSize = cur_data_size;
1328 info.m_RefSeqRange.SetFrom(last_pos);
1330 info.m_MaxRefSeqFrom = pos+bin_size-1;
1334 " Range "<<
info.m_RefSeqRange.GetFrom()<<
"-"<<
info.m_MaxRefSeqFrom<<
1335 " (.."<<
info.m_RefSeqRange.GetTo()<<
")"
1336 " size: "<<
info.m_DataSize);
1340 last_pos = pos+bin_size;
1365 const unsigned kStatCount = 1000;
1367 TSeqPos ref_begin = 0, ref_end_min = 0, ref_end = 0, max_len = 0;
1368 double stat_len = 0, stat_cnt = 0;
1369 const unsigned scan_first = 1;
1373 if ( stat[0].m_Count != kStatCount ) {
1375 if ( stat[0].m_Count > 0 ) {
1386 ref_begin = stat[0].m_RefPosFirst;
1387 ref_end_min = stat[0].m_RefPosLast;
1388 max_len = stat[0].m_RefLenMax;
1389 stat_len = stat[0].GetStatLen();
1390 stat_cnt = stat[0].GetStatCount();
1396 while (
max >
min+max_len+1 ) {
1409 for (
unsigned k = scan_first; k <
kNumStat; ++k ) {
1412 if ( k && ref_pos < stat[k-1].m_RefPosLast ) {
1413 ref_pos = stat[k-1].m_RefPosLast;
1415 _TRACE(
"stat["<<k<<
"] @ "<<ref_pos);
1418 stat_len += stat[k].GetStatLen();
1419 stat_cnt += stat[k].GetStatCount();
1420 if ( stat[k].m_RefLenMax > max_len ) {
1421 max_len = stat[k].m_RefLenMax;
1424 double density = stat_cnt / stat_len;
1425 double exp_count = (ref_end-ref_begin)*density;
1426 unsigned chunks = unsigned(exp_count/
kChunkSize+1);
1427 chunks =
min(chunks,
unsigned(sqrt(exp_count)+1));
1431 "Total range: "<<ref_begin<<
"-"<<ref_end-1<<
1432 " exp count: "<<exp_count<<
" chunks: "<<chunks);
1434 vector<TSeqPos> pp(chunks+1);
1435 for (
unsigned k = 1; k < chunks; ++k ) {
1437 TSeqPos(
double(ref_end-ref_begin)*k/chunks);
1440 pp[chunks] = ref_end;
1441 for (
unsigned k = 0; k < chunks; ++k ) {
1448 if ( k+1 < chunks ) {
1449 end2 =
min(end2, pp[k+2]);
1465 if ( min_quality > 0 && ait.
GetMapQuality() < min_quality ) {
1470 if ( ref_end > ref_len ) {
1473 rr.push_back(
TRange(ref_pos, ref_end));
1475 if ( !rr.empty() ) {
1476 sort(rr.begin(), rr.end());
1477 for (
size_t p = 0; p < rr.size(); ) {
1479 TSeqPos min_from = rr[p].GetFrom();
1480 TSeqPos max_from = rr[e++].GetFrom();
1481 while ( e < rr.size() && rr[e].GetFrom() == max_from ) {
1484 TSeqPos max_to_open = max_from;
1485 for (
size_t i = p;
i < e; ++
i ) {
1486 max_to_open =
max(max_to_open, rr[
i].GetToOpen());
1492 " "<<
TRange(min_from, max_from)<<
" "<<max_to_open-1);
1499 rr[0].GetFrom()<<
"-"<<rr.back().GetTo()<<
1500 " count: "<<rr.size()<<
" chunks: "<<
m_Chunks.size());
1511 auto last_range_id = range_id;
1512 if ( chunk_count ) {
1513 last_range_id += chunk_count-1;
1514 range.SetToOpen(
m_Chunks[last_range_id].GetRefSeqRange().GetToOpen());
1516 if ( last_range_id+1 <
m_Chunks.size() ) {
1517 range.SetToOpen(
m_Chunks[last_range_id+1].GetRefSeqRange().GetFrom());
1615 size_t refseq_index = size_t(-1);
1622 for (
size_t range_id = 0; range_id <
m_Chunks.size(); ++range_id ) {
1624 auto align_count =
m_Chunks[range_id].GetAlignCount();
1625 auto data_size =
m_Chunks[range_id].m_DataSize;
1626 if ( align_count == 0 && data_size != 0 ) {
1629 else if ( data_size == 0 && align_count != 0 ) {
1633 if ( has_pileup &&
m_Chunks[range_id].m_PileupChunkCount ) {
1638 Uint8 bytes = data_size*
m_Chunks[range_id].m_PileupChunkCount;
1646 "Pileup Chunk id="<<chunk->
GetChunkId()<<
": "<<pileup_range<<
1647 " with "<<bytes<<
" bytes");
1649 if (
m_Chunks[range_id].m_PileupChunkCount > 1 ) {
1657 "Pileup Chunk id="<<chunk->
GetChunkId()<<
": aligns: "<<pileup_range);
1662 if (
m_Chunks[range_id].m_PileupChunkCount > 1 ) {
1676 "Align Chunk id="<<chunk->
GetChunkId()<<
": "<<wide_range<<
1677 " with "<<bytes<<
" bytes");
1683 if ( align_count ) {
1685 if (
m_Chunks[range_id].m_PileupChunkCount != 1 ) {
1697 "Align sub-page Chunk id="<<chunk->
GetChunkId()<<
": "<<start_range<<
1698 " with "<<data_size<<
" bytes");
1717 "Align Chunk id="<<chunk->
GetChunkId()<<
": "<<start_range<<
1718 " with "<<bytes<<
" bytes");
1734 "Align Chunk id="<<chunk->
GetChunkId()<<
": "<<wide_range<<
1735 " with "<<bytes<<
" bytes");
1748 "Align Chunk id="<<chunk->
GetChunkId()<<
": "<<wide_range<<
1749 " with "<<data_size<<
" bytes");
1755 "Align Chunk id="<<chunk->
GetChunkId()<<
": "<<wide_range<<
1756 " with "<<align_count<<
" aligns");
1865 #ifdef SKIP_TOO_LONG_ALIGNMENTS
1871 auto start_range =
m_Chunks[range_id].GetAlignStartRange();
1873 start_range.GetFrom(), start_range.GetLength(),
1876 &
m_Chunks[range_id].m_FilePosFirstStarting);
1878 max_end_pos =
min(max_end_pos, start_range.GetToOpen());
1883 start_range.GetFrom(), start_range.GetLength(),
1889 auto start_range =
m_Chunks[range_id].GetAlignStartRange();
1891 start_range.GetFrom(), start_range.GetLength(),
1901 const vector<CSeq_id_Handle>& short_ids)
1907 vector<CSeq_id_Handle> new_short_ids;
1910 for (
auto&
id : short_ids ) {
1912 new_short_ids.push_back(
id);
1916 if ( new_short_ids.empty() ) {
1920 for (
auto&
id : new_short_ids ) {
1935 double time = sw_attach.
Elapsed();
1938 "Created short reads chunk "<<
1939 range_id<<
"/"<<(seq_chunk_id-range_id*
kChunkIdMul)<<
" "<<
1941 m_Chunks[range_id].GetRefSeqRange()<<
" in "<<time);
1957 size_t skipped = 0, count = 0, repl_count = 0, fail_count = 0;
1958 vector<CSeq_id_Handle> short_ids;
1966 for( ; ait; ++ait ) {
1970 if ( min_quality > 0 && ait.
GetMapQuality() < min_quality ) {
1976 if ( align_end > max_end_pos ) {
1983 if ( !align_list ) {
1985 align_list = &annot->
SetData().SetAlign();
1999 double time = sw_create.
Elapsed();
2003 " @ "<<
m_Chunks[range_id].GetRefSeqRange()<<
": "<<
2004 count<<
" repl: "<<repl_count<<
" fail: "<<fail_count<<
2005 " skipped: "<<skipped<<
" in "<<time);
2013 double time = sw_attach.
Elapsed();
2017 " @ "<<
m_Chunks[range_id].GetRefSeqRange()<<
" in "<<time);
2026 " @ "<<
m_Chunks[range_id].GetRefSeqRange()<<
": "<<
2027 count<<
" repl: "<<repl_count<<
" fail: "<<fail_count<<
2028 " skipped: "<<skipped<<
" in "<<
sw.
Elapsed());
2048 size_t count = 0, skipped = 0, dups = 0, far_refs = 0;
2051 list< CRef<CBioseq> > bioseqs;
2054 #ifdef SEPARATE_PILEUP_READS
2056 for (
int i = 1;
i < sub_chunk_count; ++
i ) {
2059 start_range.GetFrom(), split_pos-start_range.GetFrom(),
2062 &
m_Chunks[range_id].m_FilePosFirstCrossing);
2067 for ( ; ait; ++ait ) {
2070 if ( min_quality > 0 && ait.
GetMapQuality() < min_quality ) {
2075 if ( align_end <= split_pos ) {
2094 if ( !loaded.
insert(seq_id).second ) {
2109 for( ; ait; ++ait ){
2111 if ( align_pos < start_range.GetFrom() ) {
2116 if ( min_quality > 0 && ait.
GetMapQuality() < min_quality ) {
2122 if ( align_end > max_end_pos ) {
2142 if ( !loaded.
insert(seq_id).second ) {
2157 count<<
" skipped: "<<skipped<<
" dups: "<<dups<<
" far: "<<far_refs<<
" in "<<
sw.
Elapsed());
2163 #define USE_NEW_PILEUP_COLLECTOR
2165 #if defined USE_NEW_PILEUP_COLLECTOR && !defined HAVE_NEW_PILEUP_COLLECTOR
2166 # undef USE_NEW_PILEUP_COLLECTOR
2169 #ifdef USE_NEW_PILEUP_COLLECTOR
2171 static Uint8 total_pileup_range;
2172 static Uint8 total_pileup_aligns;
2173 static double total_pileup_time_collect;
2174 static double total_pileup_time_max;
2175 static double total_pileup_time_make;
2177 static struct STimePrinter {
2179 if ( total_pileup_range ) {
2181 "Total pileup bases: "<<total_pileup_range<<
2182 " aligns: "<<total_pileup_aligns<<
2183 " collect time: "<<total_pileup_time_collect<<
2184 " max: "<<total_pileup_time_max<<
2185 " make: "<<total_pileup_time_make);
2222 file_pos_first_crossing(),
2223 file_pos_first_starting()
2231 typedef vector<SSplit> TSplits;
2233 TSplits::iterator cur_split;
2237 #ifdef SEPARATE_PILEUP_READS
2238 vector<CSeq_id_Handle> short_ids;
2240 TSeq2Chunk* seq2chunk = 0;
2241 int seq_chunk_id = 0;
2242 size_t seq_skipped = 0;
2243 size_t seq_dups = 0;
2244 size_t seq_count = 0;
2246 list<CRef<CBioseq>> bioseqs;
2249 SPileupGraphCreator(
const string& annot_name,
2253 : annot_name(annot_name),
2255 ref_range(ref_range),
2256 min_quality(min_quality),
2258 cur_split(splits.
begin())
2264 if ( min_quality > 0 && ait.
GetMapQuality() < min_quality ) {
2267 while ( cur_split != splits.end() ) {
2269 if ( !cur_split->file_pos_first_starting ) {
2273 if ( pos >= cur_split->seq_pos ) {
2283 if ( end > cur_split->seq_pos ) {
2285 if ( !cur_split->file_pos_first_crossing ) {
2288 if ( !align_list ) {
2290 align_list = &annot->
SetData().SetAlign();
2295 #ifdef SEPARATE_PILEUP_READS
2296 short_ids.push_back(seq_id);
2298 auto iter = seq2chunk->insert(make_pair(seq_id, seq_chunk_id)).first;
2299 if ( iter->second != seq_chunk_id ) {
2302 else if ( !loaded.
insert(seq_id).second ) {
2317 void x_CreateGraph(SGraph&
g)
2321 static const char*
const titles[
kNumStat] = {
2322 "Number of A bases",
2323 "Number of C bases",
2324 "Number of G bases",
2325 "Number of T bases",
2326 "Number of inserts",
2327 "Number of matches",
2330 graph->SetTitle(titles[&
g-graphs]);
2336 graph->SetNumval(length);
2339 void x_FinalizeGraph(SGraph&
g)
2344 if (
g.max_value < 256 ) {
2346 _ASSERT(
g.graph->GetGraph().IsByte());
2347 _ASSERT(
g.graph->GetGraph().GetByte().GetValues().size() == ref_range.
GetLength());
2350 data.SetMax(
g.max_value);
2355 _ASSERT(
g.graph->GetGraph().IsInt());
2356 _ASSERT(
g.graph->GetGraph().GetInt().GetValues().size() == ref_range.
GetLength());
2359 data.SetMax(
g.max_value);
2364 void x_AdjustACGT(
TSeqPos ref_offset)
2370 bool have_acgt =
false;
2371 for (
int k = 0; k < kNumStat_ACGT; ++k ) {
2372 if ( graphs[k].graph ) {
2378 for (
int k = 0; k < kNumStat_ACGT; ++k ) {
2379 SGraph&
g = graphs[k];
2385 g.bytes = &
g.graph->SetGraph().SetByte().SetValues();
2396 for (
int k = 0; k <
kNumStat; ++k ) {
2397 SGraph&
g = graphs[k];
2408 g.bytes = &
g.graph->SetGraph().SetByte().SetValues();
2409 g.bytes->reserve(
len);
2413 for (
int k = 0; k <
kNumStat; ++k ) {
2414 x_FinalizeGraph(graphs[k]);
2418 virtual void AddZerosBy16(
TSeqPos len)
override
2420 for (
int k = 0; k <
kNumStat; ++k ) {
2421 SGraph&
g = graphs[k];
2433 bool x_UpdateMaxIsInt(SGraph&
g, TCount max_added,
TSeqPos ref_offset)
2437 g.max_value = max_added;
2439 if ( max_added >= 256 ) {
2440 g.ints = &
g.graph->SetGraph().SetInt().SetValues();
2446 g.bytes = &
g.graph->SetGraph().SetByte().SetValues();
2452 else if ( max_added >= 256 ) {
2453 g.max_value =
max(
g.max_value, max_added);
2456 g.ints = &int_graph->SetValues();
2458 size_t size =
g.bytes->size();
2462 g.graph->SetGraph().SetInt(*int_graph);
2466 else if (
g.ints ) {
2470 g.max_value =
max(
g.max_value, max_added);
2475 void x_AddValuesBy16(SGraph&
g,
TSeqPos len,
const TCount* src)
2480 else if (
g.ints ) {
2484 void x_AddValues(SGraph&
g,
TSeqPos len,
const TCount* src)
2489 else if (
g.ints ) {
2497 if ( make_intron ) {
2500 int dst_byte = 0, dst_int = 0;
2501 for (
int k = 0; k < kNumStat_ACGT; ++k ) {
2502 SGraph&
g = graphs[k];
2506 else if (
g.ints ) {
2510 if ( dst_byte == kNumStat_ACGT ) {
2517 else if ( dst_int == kNumStat_ACGT ) {
2526 for (
int k = 0; k < kNumStat_ACGT; ++k ) {
2527 SGraph&
g = graphs[k];
2536 if ( make_intron ) {
2540 for (
int k = 0; k < kNumStat_ACGT; ++k ) {
2541 SGraph&
g = graphs[k];
2553 for (
int k = 0; k <
kNumStat; ++k ) {
2554 SGraph&
g = graphs[k];
2556 if ( max_added != 0 ||
g.graph ) {
2557 x_UpdateMaxIsInt(
g, max_added, ref_offset);
2560 x_AdjustACGT(ref_offset);
2561 x_AddValuesBy16(
len, values);
2571 for (
int k = 0; k <
kNumStat; ++k ) {
2572 SGraph&
g = graphs[k];
2574 if ( max_added != 0 ||
g.graph ) {
2575 x_UpdateMaxIsInt(
g, max_added, ref_offset);
2578 x_AdjustACGT(ref_offset);
2579 x_AddValues(
len, values);
2602 gg.splits.push_back(
m_Chunks[range_id+
i].GetRefSeqRange().GetFrom());
2604 gg.cur_split = gg.splits.begin();
2605 #ifndef SEPARATE_PILEUP_READS
2619 intron_mode, gap_to_intron_threshold);
2621 #ifndef SEPARATE_PILEUP_READS
2623 seq2chunk_guard.Release();
2627 m_Chunks[range_id+
i].m_FilePosFirstCrossing = gg.splits[
i-1].file_pos_first_crossing;
2628 m_Chunks[range_id+
i].m_FilePosFirstStarting = gg.splits[
i-1].file_pos_first_starting;
2651 desc->SetName(name);
2652 annot->
SetDesc().Set().push_back(desc);
2654 size_t total_bytes = 0;
2655 for (
int k = 0; k < ss.
kNumStat; ++k ) {
2656 SPileupGraphCreator::SGraph&
g = gg.graphs[k];
2658 annot->
SetData().SetGraph().push_back(
g.graph);
2660 total_bytes +=
g.bytes->size()*
sizeof(
g.bytes->front())+10000;
2663 total_bytes +=
g.ints->size()*
sizeof(
g.ints->front())+10000;
2668 size_t align_count = 0;
2671 align_count = gg.annot->GetData().GetAlign().size();
2674 #ifdef SEPARATE_PILEUP_READS
2687 count<<
" ("<<align_count<<
" aligns) in "<<
sw.
Elapsed());
2723 for (
int k = 0; k <
kNumStat; ++k ) {
2766 if ( gap_pos < 0 ) {
2778 TSeqPos gap_end = gap_pos + gap_len;
2779 if ( gap_end >
len ) {
2783 cc[stat][gap_pos] += 1;
2784 if ( gap_end <
len ) {
2785 cc[stat][gap_end] -= 1;
2826 for (
int k = 0; k <
kNumStat; ++k ) {
2836 Uint1 b2 = read_raw[pos/2];
2837 return pos%2? b2&0xf: b2>>4;
2856 size_t count = 0, skipped = 0;
2860 #ifdef SKIP_TOO_LONG_ALIGNMENTS
2868 for( ; ait; ++ait ) {
2869 if ( !ss.AcceptAlign(ait) ) {
2873 TSeqPos align_pos = rit->GetRefSeqPos();
2874 #ifdef SKIP_TOO_LONG_ALIGNMENTS
2876 if ( align_end > ref_len ) {
2883 TSeqPos ss_pos = align_pos - graph_range.GetFrom();
2885 CTempString read_raw = rit->GetShortSequenceRaw();
2886 for (
Uint2 i = 0, count = rit->GetCIGAROpsCount();
i < count; ++
i ) {
2887 Uint4 op = rit->GetCIGAROp(
i);
2888 Uint4 seglen = op >> 4;
2889 switch ( op & 0xf ) {
2929 for( ; ait; ++ait ) {
2930 if ( !ss.AcceptAlign(ait) ) {
2935 #ifdef SKIP_TOO_LONG_ALIGNMENTS
2937 if ( align_end > ref_len ) {
2948 const char* ptr = cigar.
data();
2949 const char* end = ptr + cigar.
size();
2950 while ( ptr != end ) {
2953 for ( ; ++ptr != end; ) {
2955 if ( c >=
'0' && c <=
'9' ) {
2956 seglen = seglen*10+(c-
'0');
2962 if ( seglen == 0 ) {
2963 ERR_POST_X(4,
"CBAMDataLoader: Bad CIGAR length: "<<
type<<
"0 in "<<cigar);
2966 if (
type ==
'=' ) {
2974 else if (
type ==
'M' ||
type ==
'X' ) {
2978 ss.
add_base(ss_pos, read[read_pos]);
2983 else if (
type ==
'I' ||
type ==
'S' ) {
2984 if (
type ==
'S' ) {
2990 else if (
type ==
'N' ) {
2995 else if (
type ==
'D' ) {
3000 else if (
type !=
'P' ) {
3001 ERR_POST_X(14,
"CBAMDataLoader: Bad CIGAR char: "<<
type<<
" in "<<cigar);
3012 count<<
" skipped: "<<skipped<<
" in "<<
sw.
Elapsed());
3029 annot->
SetDesc().Set().push_back(desc);
3031 size_t total_bytes = 0;
3047 "Number of A bases",
3048 "Number of C bases",
3049 "Number of G bases",
3050 "Number of T bases",
3051 "Number of inserts",
3052 "Number of matches",
3058 loc.
SetFrom(graph_range.GetFrom());
3059 loc.
SetTo(graph_range.GetTo());
3060 graph->
SetNumval(graph_range.GetLength());
3064 data.SetValues().assign(ss.
cc[k].begin(), ss.
cc[k].end());
3068 total_bytes += graph_range.GetLength()*
sizeof(
data.GetValues()[0])+10000;
3072 data.SetValues().assign(ss.
cc[k].begin(), ss.
cc[k].end());
3076 total_bytes += graph_range.GetLength()*
sizeof(
data.GetValues()[0])+10000;
3078 annot->
SetData().SetGraph().push_back(graph);
3088 count<<
" skipped: "<<skipped<<
" in "<<
sw.
Elapsed());
bool IsReverse(ENa_strand s)
static const size_t kChunkSize
static bool GetMinMapQualityParam(void)
static const Uint8 kDefaultSplitBinDataSize
NCBI_DEFINE_ERR_SUBCODE_X(30)
static double s_CreateTime
static string GetMapperContext(void)
static const size_t kChunkDataSize
static const double k_make_align_seconds
static const TSeqPos kDefaultSplitBinMinLength
static string GetIncludeAlignTagsParam(void)
std::invoke_result< Call >::type CallWithRetry(Call &&call, const char *name, int retry_count=0)
static const CUser_field & GetIdField(const CUser_field &field, int id)
static TSeqPos s_GetGapToIntronThreshold(void)
static int GetDebugLevel(void)
NCBI_PARAM_DEF(bool, BAM_LOADER, PILEUP_GRAPHS, true)
static const double k_make_graph_seconds
static const Uint8 kSingleAlignBytes
static const double k_make_read_seconds
static Uint8 GetSplitBinDataSize(void)
static bool GetPreferRawIndexOverCoverageGraphParam(void)
static string GetMapperFileName(void)
static bool GetSkipEmptyPileupGraphsParam(void)
static TSeqPos s_GetEnd(const vector< TSeqPos > &over_ends, TSeqPos i, TSeqPos bin_size)
static bool s_GetMakeIntronGraph(void)
static const size_t kSplitLevelsChunkDataSize
static bool GetEstimatedCoverageGraphParam(void)
static bool GetPileupGraphsParam(void)
NCBI_PARAM_DEF_EX(int, BAM_LOADER, DEBUG, 0, eParam_NoThread, BAM_LOADER_DEBUG)
static TSeqPos GetSplitBinMinLength(void)
static const int kMainChunkId
static double s_AttachTime
static Uint1 sx_GetBaseRaw(CTempString read_raw, TSeqPos pos)
NCBI_PARAM_DECL(int, BAM_LOADER, DEBUG)
#define PILEUP_NAME_SUFFIX
@ eChunk_short_seq_pileup
bool operator==(const CBlobId &id) const
bool operator<(const CBlobId &id) const
CBAMBlobId(const CTempString &str)
string ToString(void) const
Get string representation of blob id.
CDataSource::SGiFound GetGi(const CSeq_id_Handle &idh)
CRef< CBAMBlobId > GetRefSeqBlobId(const CSeq_id_Handle &idh)
CBamRefSeqInfo * GetRefSeqInfo(const CBAMBlobId &blob_id)
CBAMDataLoader::TAnnotNames GetPossibleAnnotNames(void) const
bool IsShortSeq(const CSeq_id_Handle &idh)
void LoadChunk(const CBAMBlobId &blob_id, CTSE_Chunk_Info &chunk)
double EstimateLoadSeconds(const CBAMBlobId &blob_id, const CTSE_Chunk_Info &chunk, Uint4 bytes)
CDataSource::SAccVerFound GetAccVer(const CSeq_id_Handle &idh)
string GetLabel(const CSeq_id_Handle &idh)
CBAMDataLoader_Impl(const CBAMDataLoader::SLoaderParams ¶ms)
void GetIds(const CSeq_id_Handle &idh, TIds &ids)
TTaxId GetTaxId(const CSeq_id_Handle &idh)
vector< CSeq_id_Handle > TIds
bool BAMFilesOpened() const
friend class CBamFileInfo
void AddBamFile(const CBAMDataLoader::SBamFileName &bam)
~CBAMDataLoader_Impl(void)
void LoadBAMEntry(const CBAMBlobId &blob_id, CTSE_LoadLock &load_lock)
AutoPtr< IIdMapper > m_IdMapper
CRef< CBAMBlobId > GetShortSeqBlobId(const CSeq_id_Handle &idh)
vector< SDirSeqInfo > TSeqInfos
static void SetMinMapQualityParamDefault(int param)
static void SetIncludeAlignTagsParamDefault(const string ¶m)
static void SetPileupGraphsParamDefault(bool param)
static string GetIncludeAlignTagsParamDefault(void)
static bool GetPreOpenParam(void)
static void SetEstimatedCoverageGraphParamDefault(bool param)
static void SetSkipEmptyPileupGraphsParamDefault(bool param)
static bool GetPileupGraphsParamDefault(void)
static bool GetEstimatedCoverageGraphParamDefault(void)
vector< CAnnotName > TAnnotNames
static int GetMinMapQualityParamDefault(void)
static void SetPreOpenParam(bool param)
static bool GetSkipEmptyPileupGraphsParamDefault(void)
void SetEstimated(bool estimated=true)
void SetAnnotName(const string &name)
CRef< CSeq_annot > MakeSeq_annot(CBamMgr &mgr, const string &bam_file, const string &bam_index)
Generate Seq-annot for BAM file using BAM file index.
void SetRefLabel(const string &ref_label)
void SetRefId(const CSeq_id &ref_id)
void SetMinMapQuality(int qual)
ISpotIdDetector interface is used to detect spot id in case of incorrect flag combination.
CRef< CSeq_align > GetMatchAlign(void) const
CRef< CBioseq > GetShortBioseq(void) const
CTempString GetShortSequence(void) const
TSeqPos GetShortSequenceLength(void) const
CRef< CSeq_id > GetShortSeq_id(void) const
CRef< CSeq_annot > GetSeq_annot(void) const
TSeqPos GetRefSeqPos(void) const
CTempString GetCIGAR(void) const
TSeqPos GetCIGARRefSize(void) const
CBamRawAlignIterator * GetRawIndexIteratorPtr() const
Uint1 GetMapQuality(void) const
void SetSpotIdDetector(ISpotIdDetector *spot_id_detector)
TSeqPos GetCIGARPos(void) const
bool IsSetStrand(void) const
ENa_strand GetStrand(void) const
size_t CollectPileup(SPileupValues &values, const string &ref_id, CRange< TSeqPos > graph_range, ICollectPileupCallback *callback=0, SPileupValues::EIntronMode intron_mode=SPileupValues::eNoCountIntron, TSeqPos gap_to_intron_threshold=kInvalidSeqPos) const
bool UsesRawIndex() const
bool IncludeAlignTag(CTempString tag)
void SetIdMapper(IIdMapper *idmapper, EOwnership ownership)
TSeqPos GetRefSeqLength(const string &str) const
const string & GetBamName(void) const
void GetRefSeqBlobId(CRef< CBAMBlobId > &ret, const CSeq_id_Handle &idh) const
const string & GetAnnotName(void) const
void GetShortSeqBlobId(CRef< CBAMBlobId > &ret, const CSeq_id_Handle &idh) const
void x_Initialize(const CBAMDataLoader_Impl &impl, const CBAMDataLoader::SBamFileName &bam)
CBamRefSeqInfo * GetRefSeqInfo(const CSeq_id_Handle &seq_id) const
void AddRefSeq(const string &refseq_label, const CSeq_id_Handle &refseq_id)
CBamFileInfo(const CBAMDataLoader_Impl &impl, const CBAMDataLoader::SBamFileName &bam, const string &refseq_label=kEmptyStr, const CSeq_id_Handle &seq_id=CSeq_id_Handle())
TSeqPos GetRefSeqLength(const string &id) const
static Uint8 GetFileSize(CBGZFRange range)
const SBamIndexRefIndex & GetRef(size_t ref_index) const
CBGZFPos GetFilePos() const
bool IsOnMinBinIndexLevel() const
size_t GetRefIndex(const string &ref_label) const
double GetEstimatedSecondsPerByte() const
const CBamIndex & GetIndex() const
TSeqPos GetRefSeqLength(size_t ref_index) const
void AddRefSeqRange(const TRange &range)
TRange GetAlignStartRange() const
const TRange & GetRefSeqRange(void) const
void LoadMainChunk(CTSE_Chunk_Info &chunk_info)
void SetBlobId(CRef< CBAMBlobId > &ret, const CSeq_id_Handle &idh) const
void x_LoadRangesScan(void)
CBamRefSeqInfo(CBamFileInfo *bam_file, const string &refseqid, const CSeq_id_Handle &seq_id)
void x_AddSeqChunk(CTSE_Chunk_Info &chunk_info, const vector< CSeq_id_Handle > &short_ids)
void x_LoadRangesStat(void)
void LoadSeqChunk(CTSE_Chunk_Info &chunk_info)
void LoadPileupChunk(CTSE_Chunk_Info &chunk_info)
const string & GetRefSeqId(void) const
void GetShortSeqBlobId(CRef< CBAMBlobId > &ret, const CSeq_id_Handle &idh) const
CRef< CSeq_entry > m_CovEntry
double EstimatePileupLoadSeconds(const CTSE_Chunk_Info &chunk, Uint4 bytes) const
void LoadMainSplit(CTSE_LoadLock &load_lock)
void LoadAlignChunk(CTSE_Chunk_Info &chunk_info)
double EstimateLoadSeconds(const CTSE_Chunk_Info &chunk, Uint4 bytes) const
const CSeq_id_Handle & GetRefSeq_id(void) const
double EstimateSeqLoadSeconds(const CTSE_Chunk_Info &chunk, Uint4 bytes) const
void CreateChunks(CTSE_Split_Info &split_info)
CRange< TSeqPos > GetChunkGraphRange(size_t range_id)
void LoadMainEntry(CTSE_LoadLock &load_lock)
bool x_LoadRangesEstimated(void)
double EstimateAlignLoadSeconds(const CTSE_Chunk_Info &chunk, Uint4 bytes) const
void LoadChunk(CTSE_Chunk_Info &chunk_info)
CIRef< CBamAlignIterator::ISpotIdDetector > m_SpotIdDetector
bool x_LoadRangesCov(void)
void x_InitAlignIterator(CBamAlignIterator &ait, TSeqPos &max_end_pos, CTSE_Chunk_Info &chunk_info, int base_id)
Blob state exceptions, used by GenBank loader.
void AddSpotId(string &short_id, const CBamAlignIterator *iter)
map< string, SShortSeqInfo > m_ShortSeqs
IdMapper implementation using an external configuration file.
IdMapper base class implementation.
Data loader exceptions, used by GenBank loader.
static SIZE_TYPE ReverseComplement(const string &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst)
const TAnnot & GetAnnot(void) const
const value_type * data() const
string FindAccPathNoThrow(const string &acc)
pair< TBioseqId, TBioseq_setId > TPlace
void x_LoadAnnot(const TPlace &place, const CSeq_annot &annot)
void x_LoadBioseqs(const TPlace &place, const list< CRef< CBioseq > > &bioseqs)
void x_AddUsedMemory(size_t size)
void SetLoaded(CObject *obj=0)
void x_AddBioseqPlace(TBioseq_setId id)
TChunkId GetChunkId(void) const
void x_AddAnnotType(const CAnnotName &annot_name, const SAnnotTypeSelector &annot_type, const TLocationId &location_id)
void x_SetLoadBytes(Uint4 bytes)
const CTSE_Split_Info & GetSplitInfo(void) const
void x_AddBioseqId(const TBioseqId &id)
CTSE_Split_Info & GetSplitInfo(void)
void SetSeq_entry(CSeq_entry &entry, CTSE_SetObjectInfo *set_info=0)
void AddChunk(CTSE_Chunk_Info &chunk_info)
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
CConstRef< CUser_field > GetFieldRef(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
const CUser_field & GetField(const string &str, const string &delim=".", NStr::ECase use_case=NStr::eCase) const
Access a named field in this user object.
static void CopyBuffer(const int *src, size_t count, int *dest)
Copy memory buffer (only when source and destination do not overlap!).
static void ConvertBuffer(const char *src, size_t count, int *dest)
Convert memory buffer elements from one type to another.
static void AppendZeros(vector< V, A > &dest, size_t count)
Append count zeros to dest vector vector must have enough memory reserved.
static void SplitBufferInto4(const int *src, size_t count, int *dest0, int *dest1, int *dest2, int *dest3)
Split source memory buffer into 4 buffers Source buffer contains 4*count elements Each destination bu...
static V * AppendUninitialized(vector< V, A > &dest, size_t count)
Append count unitialized elements to dest vector return pointer to appended elements for proper initi...
static void AppendZerosAligned16(vector< V, A > &dest, size_t count)
Append count zeros to dest vector vector must have enough memory reserved dst.end() pointer and count...
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator end() const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
Include a standard set of the NCBI C++ Toolkit most basic headers.
constexpr auto begin(const ct_const_array< T, N > &in) noexcept
constexpr auto end(const ct_const_array< T, N > &in) noexcept
static const struct name_t names[]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
void reset(element_type *p=0, EOwnership ownership=eTakeOwnership)
Reset will delete the old pointer (if owned), set content to the new value, and assume the ownership ...
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int TSignedSeqPos
Type for signed sequence position.
SStrictId_Tax::TId TTaxId
Taxon id type.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
element_type * release(void)
Release will release ownership of pointer to caller.
@ eNoOwnership
No ownership is assumed.
#define LOG_POST_X(err_subcode, message)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
virtual const char * what(void) const noexcept
Standard report (includes full backlog).
void Info(CExceptionArgs_Base &args)
string GetBase(void) const
Get the base entry name without extension.
C * SerialClone(const C &src)
Create on heap a clone of the source object.
@ eSerial_AsnBinary
ASN.1 binary.
CConstRef< CSeq_id > GetSeqId(void) const
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
string AsString(void) const
string GetLabel(const CSeq_id &id)
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
static CObjectIStream * Open(ESerialDataFormat format, CNcbiIstream &inStream, bool deleteInStream)
Create serial object reader and attach it to an input stream.
TObjectType * GetNCPointer(void) const THROWS_NONE
Get pointer,.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
position_type GetLength(void) const
TThisType & SetToOpen(position_type toOpen)
position_type GetToOpen(void) const
static TThisType GetWhole(void)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
NCBI_NS_STD::string::size_type SIZE_TYPE
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
const char * data(void) const
Return a pointer to the array represented.
size_type size(void) const
Return the length of the represented array.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void Stop(void)
Suspend the timer.
void Start(void)
Start the timer.
@ eStart
Start timer immediately after creating.
void SetFrom(TFrom value)
Assign a value to From data member.
TTo GetTo(void) const
Get the To member data.
TFrom GetFrom(void) const
Get the From member data.
bool IsId(void) const
Check if variant Id is selected.
const TData & GetData(void) const
Get the Data member data.
const TFields & GetFields(void) const
Get the variant data.
vector< CRef< CUser_field > > TFields
bool IsSetLabel(void) const
field label Check if a value has been assigned to Label data member.
TInt GetInt(void) const
Get the variant data.
TReal GetReal(void) const
Get the variant data.
const TLabel & GetLabel(void) const
Get the Label member data.
TId GetId(void) const
Get the variant data.
void SetTo(TTo value)
Assign a value to To data member.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
const TInt & GetInt(void) const
Get the variant data.
void SetNumval(TNumval value)
Assign a value to Numval data member.
const TGraph & GetGraph(void) const
Get the Graph member data.
TB GetB(void) const
Get the B member data.
const TByte & GetByte(void) const
Get the variant data.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
bool IsByte(void) const
Check if variant Byte is selected.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
TNumval GetNumval(void) const
Get the Numval member data.
TA GetA(void) const
Get the A member data.
TComp GetComp(void) const
Get the Comp member data.
const TSeq & GetSeq(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
void SetId(TId &value)
Assign a value to Id data member.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
void SetData(TData &value)
Assign a value to Data data member.
const Tdata & Get(void) const
Get the member data.
const TGraph & GetGraph(void) const
Get the variant data.
list< CRef< CSeq_graph > > TGraph
void SetDesc(TDesc &value)
Assign a value to Desc data member.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
TName & SetName(void)
Select the variant.
list< CRef< CSeq_align > > TAlign
const TDesc & GetDesc(void) const
Get the Desc member data.
const TData & GetData(void) const
Get the Data member data.
list< CRef< CSeq_annot > > TAnnot
list< CRef< CAnnotdesc > > Tdata
unsigned int
A callback function used to compare two keys in a database.
Definition of all error codes used in SRA C++ support libraries.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
const struct ncbi::grid::netcache::search::fields::SIZE size
string s_Value(TValue value)
const GenericPointer< typename T::ValueType > T2 value
void SleepMilliSec(unsigned long ml_sec, EInterruptOnSignal onsignal=eRestartOnSignal)
std::istream & in(std::istream &in_, double &x_)
Helper classes and templates to implement plugins.
const unsigned int kDefaultRetryCount
static bool GetSeqId(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
vector< SBamFileName > m_BamFiles
TCount get_max_count(int type) const
const TCount * get_intron_counts() const
CSimpleBufferT< TCount > cc_gap
CSimpleBufferT< TCount > cc_intron
CSimpleBufferT< TCount > cc_match
CSimpleBufferT< SCountACGT > cc_acgt
const TCount * get_acgt_counts() const
const TCount * get_split_acgt_counts(int k, TSeqPos len) const
const TCount * get_gap_counts() const
Better replacement of GetAccVer(), this method should be defined in data loaders, GetAccVer() is left...
Better replacement of GetGi(), this method should be defined in data loaders, GetGi() is left for com...
constexpr TSeqPos GetMinBinSize() const
vector< Uint8 > EstimateDataSizeByAlnStartPos(TSeqPos seqlen=kInvalidSeqPos) const
TSeqPos gap_to_intron_threshold
void add_match(TSeqPos pos)
TCount get_max_count(int type) const
void add_gap(TSignedSeqPos gap_pos, TSeqPos gap_len)
void x_finish_add(EStat stat)
void add_base_raw(TSeqPos pos, Uint1 b)
void x_add_gap_or_intron(TSignedSeqPos gap_pos, TSeqPos gap_len, EStat stat)
void add_base(TSeqPos pos, char b)
void get_maxs(TCount(&c_max)[kNumStat]) const
vector< TCount > cc[kNumStat]
void add_intron(TSignedSeqPos gap_pos, TSeqPos gap_len)
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)