57 #include <ncbi/ncbi.h>
58 #include <insdc/insdc.h>
60 #include <vdb/vdb-priv.h>
67 #define USE_GLOBAL_AMBIGUITY_CACHE
69 #ifdef USE_GLOBAL_AMBIGUITY_CACHE
70 # define DEFAULT_AMBIGUITY_CACHE_SIZE "128MB"
72 # define DEFAULT_AMBIGUITY_CACHE_SIZE "32MB"
77 #define NCBI_USE_ERRCODE_X WGSReader
117 static bool v =
NCBI_PARAM_TYPE(WGS, USE_AMBIGUITY_MASK)::GetDefault();
153 static bool v =
NCBI_PARAM_TYPE(WGS, USE_FULL_4NA_BLOCKS)::GetDefault();
206 #ifdef COLLECT_PROFILE
212 SProfiler() : name(0),
count(0) {}
218 struct SProfilerGuard
221 SProfilerGuard(SProfiler&
sw,
const char* name)
234 static SProfiler sw_Serialize;
235 static SProfiler sw_Feat;
236 static SProfiler sw_GetAccSeq_id;
237 static SProfiler sw_GetBioseq;
238 static SProfiler sw_GetSeq_entry;
239 static SProfiler sw_GetSeq_entryData;
240 static SProfiler sw_GetSplitInfo;
241 static SProfiler sw_GetSplitInfoData;
242 static SProfiler sw_InitSplit;
243 static SProfiler sw_GetFeatLocIdTypeRange;
244 static SProfiler sw_GetFeatLocIdTypeFeat;
245 static SProfiler sw_GetFeatLocIdTypeFeatBytes;
246 static SProfiler sw_GetFeatBytes;
247 static SProfiler sw_GetChunk;
248 static SProfiler sw_CreateQualityChunk;
249 static SProfiler sw_CreateDataChunk;
250 static SProfiler sw_CreateProductsChunk;
251 static SProfiler sw_CreateFeaturesChunk;
252 static SProfiler sw__GetProtFeat;
253 static SProfiler sw___GetProtAnnot;
254 static SProfiler sw___GetProtInst;
255 static SProfiler sw___GetProtDescr;
256 static SProfiler sw____GetProtWGSAcc;
257 static SProfiler sw____GetProtAccVer;
258 static SProfiler sw____GetProtAcc;
259 static SProfiler sw____GetProtGI;
260 static SProfiler sw____GetProtGISeq_id;
261 static SProfiler sw____GetProtGnlSeq_id;
262 static SProfiler sw____GetProtAccSeq_id;
263 static SProfiler sw___GetProtIds;
264 static SProfiler sw__GetProtBioseq;
265 static SProfiler sw_GetProtEntry;
266 static SProfiler sw__GetScaffoldFeat;
267 static SProfiler sw___GetScaffoldQual;
268 static SProfiler sw___GetScaffoldAnnot;
269 static SProfiler sw___GetScaffoldInst;
270 static SProfiler sw___GetScaffoldDescr;
271 static SProfiler sw___GetScaffoldIds;
272 static SProfiler sw__GetScaffoldBioseq;
273 static SProfiler sw_GetScaffoldEntry;
274 static SProfiler sw__GetContigFeat;
275 static SProfiler sw___GetContigQual;
276 static SProfiler sw____GetContigQualSize;
277 static SProfiler sw____GetContigQualData;
278 static SProfiler sw____GetContigQualMinMax;
279 static SProfiler sw___GetContigAnnot;
280 static SProfiler sw____IsGap;
281 static SProfiler sw____Get2naLen;
282 static SProfiler sw____Get4naLen;
283 static SProfiler sw____GetGapLen;
284 static SProfiler sw____GetRaw2na;
285 static SProfiler sw____GetRaw4na;
286 static SProfiler sw____GetAmb2Mask;
287 static SProfiler sw____Get4na2Mask;
288 static SProfiler sw____Scan4na;
289 static SProfiler sw____GetCvt4na;
290 static SProfiler sw____GetAmb4na;
291 static SProfiler sw____GetBlk4na;
292 static SProfiler sw____SetGaps;
293 static SProfiler sw___GetContigInst;
294 static SProfiler sw___GetContigDescr;
295 static SProfiler sw___GetContigIds;
296 static SProfiler sw__GetContigBioseq;
297 static SProfiler sw_GetContigEntry;
298 static SProfiler sw_FeatIterator;
299 static SProfiler sw_ProtIterator;
300 static SProfiler sw_ScafIterator;
301 static SProfiler sw_SeqIterator;
302 static SProfiler sw_WGSOpen;
304 # define PROFILE(var) SProfilerGuard guard(var, #var)
306 # define PROFILE(var)
366 dst.assign(
data.begin()+2,
data.end()-2);
369 dst.assign(
data.begin(),
data.end());
403 out.Write(
info.m_Bytes.data(),
info.m_Bytes.size());
576 m_READ_2na(m_Cursor,
"(INSDC:2na:packed)READ",
588 if (
s_UseAmbiguity4na() && m_GAP_START && m_GAP_LEN && m_AMBIGUITY_POS && m_AMBIGUITY_4NA ) {
593 m_AMBIGUITY_POS.Reset();
594 m_AMBIGUITY_4NA.Reset();
598 m_QUALITY.ResetIfAlwaysEmpty(
m_Cursor);
778 type.FindVariant(
"str")
783 type.FindMember(
"key")
788 type.FindMember(
"db")
793 type.FindMember(
"qual")
1099 size_t byte_index = block_index/8;
1100 Uint1 byte_bit = 1<<(block_index%8);
1105 size_t byte_index = block_index/8;
1106 Uint1 byte_bit = 1<<(block_index%8);
1163 void Advance(S4naReader& reader)
const;
1167 template<
class Value>
1170 dst.resize(src.
size());
1171 copy_n(src.
begin(), src.
size(), dst.data());
1176 : m_Prefix(db.GetIdPrefixWithVersion()),
1178 m_HasGapInfo(
false),
1179 m_HasAmbiguityMask(
false),
1180 m_HasAmbiguityPos(
false),
1181 m_Has4naBlocks(
false)
1183 if ( cur.m_GAP_START ) {
1188 if ( cur.m_GAP_LINKAGE ) {
1194 const bool kVerify4na =
false;
1195 vector<Uint1> m_ExpectedAmbiguityMask;
1196 vector<INSDC_coord_zero> m_ExpectedAmbiguityPos;
1197 vector<INSDC_4na_bin> m_ExpectedAmbiguity4na;
1207 if ( cur.m_AMBIGUITY_MASK ) {
1212 if ( cur.m_AMBIGUITY_POS && cur.m_AMBIGUITY_4NA ) {
1222 size_t mask_bit_count = 0;
1245 for (
size_t block_index = 0;
1251 size_t byte_index = block_index/8;
1252 Uint1 byte_bit = 1<<(block_index%8);
1253 exp_bit = byte_index < m_ExpectedAmbiguityMask.size() &&
1254 (m_ExpectedAmbiguityMask[byte_index] & byte_bit);
1256 if ( bit != exp_bit ) {
1258 "mask["<<block_index<<
" = "<<oct<<block_index<<dec<<
"] "<<bit<<
", expected "<<exp_bit);
1261 size_t index = 0, exp_index = 0;
1262 while ( index <
m_AmbiguityPos.size() || exp_index < m_ExpectedAmbiguityPos.size() ) {
1265 TSeqPos exp_pos = exp_index < m_ExpectedAmbiguityPos.size()? m_ExpectedAmbiguityPos[exp_index]:
kInvalidSeqPos;
1266 int exp_base = exp_index < m_ExpectedAmbiguityPos.size()? m_ExpectedAmbiguity4na[exp_index]: 0;
1267 if ( pos == exp_pos ) {
1268 if ( base != exp_base ) {
1270 "amb["<<pos<<
" = "<<oct<<pos<<dec<<
"] "<<base<<
", expected "<<exp_base);
1275 else if ( pos < exp_pos ) {
1277 "amb["<<pos<<
" = "<<oct<<pos<<dec<<
"] "<<base<<
", expected -");
1282 "amb["<<exp_pos<<
" = "<<oct<<exp_pos<<dec<<
"] -, expected "<<exp_base);
1293 size_t memory = GetUsedMemory();
1295 LOG_POST(
"~SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") "
1303 const size_t kAllocateGap =
sizeof(
void*)*2;
1304 size_t ret = kAllocateGap +
sizeof(*this);
1305 ret += kAllocateGap + m_GapStart.size()*
sizeof(m_GapStart.front());
1306 ret += kAllocateGap + m_GapLen.size()*
sizeof(m_GapLen.front());
1307 ret += kAllocateGap + m_GapProps.size()*
sizeof(m_GapProps.front());
1308 ret += kAllocateGap + m_GapLinkage.size()*
sizeof(m_GapLinkage.front());
1309 ret += kAllocateGap + m_AmbiguityMask.size()*
sizeof(m_AmbiguityMask.front());
1310 if ( m_HasAmbiguityPos || m_Has4naBlocks ) {
1312 ret += kAllocateGap + m_AmbiguityPos.size()*
sizeof(m_AmbiguityPos.front());
1313 ret += kAllocateGap + m_Ambiguity4na.size()*
sizeof(m_Ambiguity4na.front());
1314 const size_t kBlockUsedMemory =
1315 kAllocateGap + 4*
sizeof(
void*) +
sizeof(
S4naBlock);
1316 ret += kBlockUsedMemory * m_4naBlocks.size();
1328 gap_info.
gaps_len = m_GapLen.data();
1348 return b && !(
b&(
b-1));
1356 for ( ; ptr != end; ++ptr ) {
1385 return offset+base_count;
1404 if ( gap_info.
IsInGap(pos) ) {
1430 static const unsigned char table[16] = {
1431 0x11, 0x12, 0x14, 0x18,
1432 0x21, 0x22, 0x24, 0x28,
1433 0x41, 0x42, 0x44, 0x48,
1434 0x81, 0x82, 0x84, 0x88
1436 return table[bits_2na & 0xf];
1462 while ( base_count >= 4 ) {
1463 char bits_2na = src_2na[0];
1471 char bits_2na = src_2na[0] & (0xff00 >> base_count*2);
1474 if ( base_count < 2 ) {
1477 dst_4na[0] = bits_4na;
1479 if ( base_count > 2 ) {
1489 const vector<char>& src_2na_vec,
1492 size_t dst_4na_byte_count = (base_count+1)/2;
1494 dst_4na_vec.reserve((dst_4na_byte_count+7)/8*8);
1495 dst_4na_vec.resize(dst_4na_byte_count);
1507 char& dst = dst_4na_vec[
offset/2];
1509 dst = (dst & 0xf) | (amb << 4);
1512 dst = (dst & 0xf0) | amb;
1524 char* dst = dst_4na_vec.data()+ (
offset/2);
1531 while (
len >= 2 ) {
1546 const char* src_4na,
TSeqPos src_offset,
1549 if ( !base_count ) {
1552 dst_4na += dst_offset/2;
1554 src_4na += src_offset/2;
1557 if ( dst_offset != 0 ) {
1558 Uint1 dst_b = dst_4na[0];
1559 Uint1 src_b = src_4na[0];
1560 src_4na += src_offset;
1561 if ( !src_offset ) {
1565 dst_b = (dst_b & 0xf0) | (src_b & 0xf);
1572 if ( src_offset == 0 ) {
1573 size_t copy_bytes = base_count / 2;
1574 dst_4na = copy_n(src_4na, copy_bytes, dst_4na);
1575 src_4na += copy_bytes;
1579 while ( base_count >= 2 ) {
1580 Uint1 src_b0 = src_4na[0];
1581 Uint1 src_b1 = src_4na[1];
1582 Uint1 dst_b = (src_b0 << 4) | (src_b1 >> 4);
1591 Uint1 dst_b = dst_4na[0];
1592 Uint1 src_b = src_4na[0];
1596 dst_b = (dst_b & 0xf) | (src_b & 0xf0);
1605 const Uint1* src_4na,
1608 while ( base_count >= 2 ) {
1609 auto b0 = src_4na[0];
1610 auto b1 = src_4na[1];
1611 auto packed_bb = (b0 << 4)+b1;
1612 *dst_packed_4na = packed_bb;
1618 auto b0 = src_4na[0];
1619 auto packed_bb = (b0 << 4);
1620 *dst_packed_4na = packed_bb;
1628 const vector<INSDC_coord_zero>& amb_pos,
1629 const vector<INSDC_4na_bin>& amb_4na)
1631 auto iter_pos = lower_bound(amb_pos.begin(), amb_pos.end(),
INSDC_coord_zero(pos));
1632 auto iter_4na = amb_4na.begin() + (iter_pos-amb_pos.begin());
1634 for ( ; iter_pos != amb_pos.end() && *iter_pos < end; ++iter_pos, ++iter_4na ) {
1635 s_Set_4na(dst_4na_vec, *iter_pos-pos, *iter_4na);
1647 for (
auto iter =
blocks.lower_bound(block_pos);
1648 iter !=
blocks.end() && iter->first < end;
1650 TSeqPos block_pos = iter->first;
1654 if ( block_pos < pos ) {
1656 src_offset = pos-block_pos;
1660 dst_offset = block_pos-pos;
1664 s_Copy_4na(dst_4na_vec.data(), dst_offset, iter->second.m_Packed4na, src_offset, copy_len);
1676 for ( ;
len > 0; ) {
1677 if ( gap_info.
IsInGap(pos) ) {
1712 bool ambiguous =
false;
1715 if ( gap_info.
IsInGap(pos) ) {
1728 m_AmbiguityPos.push_back(pos+
i);
1729 m_Ambiguity4na.push_back(
b);
1743 if ( m_HasAmbiguityMask ) {
1747 if ( m_HasAmbiguityPos ) {
1750 if (
size_t ambiguity_count = m_AmbiguityPos.size() ) {
1752 size_t last_byte_index = last_block_index/8;
1753 m_AmbiguityMask.resize(last_byte_index+1);
1754 for (
size_t i = 0;
i < ambiguity_count; ++
i ) {
1759 size_t memory = GetUsedMemory();
1760 size_t mask_bit_count = 0;
1761 for (
auto bb : m_AmbiguityMask ) {
1768 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") "
1769 <<
"calculated mask from ambiguities, "
1785 cur4na = db.
Seq4na(m_RowId);
1786 read4na = cur4na->READ(m_RowId);
1792 size_t mask_bit_count = 0;
1793 m_AmbiguityMask.resize((block_count+7)/8);
1795 for (
size_t block_index = 0; block_index < block_count; ++block_index ) {
1797 const Uint1* base_ptr = read4na.
data() + block_pos;
1799 bool ambiguous =
false;
1800 if ( use_full_4na_blocks ) {
1801 ambiguous = x_AddAmbiguousBlock(base_ptr, base_count, block_pos, gap_info);
1804 ambiguous = x_AddAmbiguities(base_ptr, base_count, block_pos, gap_info);
1807 x_SetAmbiguousBlock(block_index);
1811 if ( use_full_4na_blocks ) {
1812 m_Has4naBlocks =
true;
1815 m_HasAmbiguityPos =
true;
1819 size_t memory = GetUsedMemory();
1821 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") "
1822 "calculated mask from read, "
1830 m_HasAmbiguityMask =
true;
1837 if ( m_HasAmbiguityPos || m_Has4naBlocks ) {
1847 size_t bit_count = 0;
1848 size_t wrong_bit_count = 0;
1850 for (
size_t block_byte = 0; block_byte < m_AmbiguityMask.size(); ++block_byte ) {
1851 if (
auto bits = m_AmbiguityMask[block_byte] ) {
1854 cur4na = db.
Seq4na(m_RowId);
1855 read4na = cur4na->READ(m_RowId);
1858 for (
size_t block_bit = 0; block_bit < 8; ++block_bit ) {
1859 if ( bits & (1<<block_bit) ) {
1861 size_t block_index = block_byte*8+block_bit;
1863 const Uint1* base_ptr = read4na.
data() + block_pos;
1865 bool ambiguous =
false;
1866 gap_info.
SetPos(block_pos);
1867 if ( use_full_4na_blocks ) {
1868 ambiguous = x_AddAmbiguousBlock(base_ptr, base_count, block_pos, gap_info);
1871 ambiguous = x_AddAmbiguities(base_ptr, base_count, block_pos, gap_info);
1879 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") "
1880 <<
"wrong bit set at "<<block_pos);
1887 if ( use_full_4na_blocks ) {
1888 m_Has4naBlocks =
true;
1891 m_HasAmbiguityPos =
true;
1896 size_t memory = GetUsedMemory();
1898 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") "
1899 <<
"calculated 4na, "
1906 for (
size_t i = 0;
i < 2 &&
i < m_AmbiguityPos.size(); ++
i ) {
1907 LOG_POST(
"SAmbiguityInfo("<<m_Prefix<<
"/"<<m_RowId<<
") "
1908 <<
"ambiguity at "<<m_AmbiguityPos[
i]<<
" - "<<m_Ambiguity4na[
i]*1);
1917 if ( m_HasAmbiguityPos ) {
1939 if ( m_HasAmbiguityPos ) {
1942 lower_bound(m_AmbiguityPos.begin(), m_AmbiguityPos.end(),
INSDC_coord_zero(pos)) - m_AmbiguityPos.begin();
1958 if ( m_HasAmbiguityPos ) {
1985 return base == 0xf? eBase_Gap:
sx_Is2na(base)? eBase_2na: eBase_4na;
1995 if ( m_HasAmbiguityPos ) {
2031 if ( m_HasAmbiguityPos ) {
2032 auto iter = lower_bound(m_AmbiguityPos.begin(), m_AmbiguityPos.end(),
INSDC_coord_zero(pos));
2033 if ( iter == m_AmbiguityPos.end() ||
TSeqPos(*iter) >= end ) {
2041 for (
auto block_iter = m_4naBlocks.lower_bound(block_pos);
2042 block_iter != m_4naBlocks.end() && block_iter->first < end;
2044 size_t in_block_pos = pos <= block_iter->first? 0: pos-block_iter->first;
2047 in_block_pos, in_block_len));
2048 if ( amb_pos < in_block_pos+in_block_len ) {
2049 return (block_iter->first+amb_pos) - pos;
2064 if (
len < stop_2na_len ) {
2067 S4naReader reader = Get4naReader(pos, db, cur);
2068 TSeqPos rem_len =
len, len2na = 0, gap_len = 0;
2074 for ( ; rem_len; --rem_len, Advance(reader) ) {
2075 auto base_type = GetBaseType(reader);
2076 if ( base_type == eBase_2na ) {
2077 if ( len2na == stop_2na_len-1 ) {
2078 return len-(rem_len+len2na);
2087 if ( gap_len == stop_gap_len-1 ) {
2088 return len-(rem_len+gap_len);
2095 _ASSERT(len2na < stop_2na_len);
2105 S4naReader reader = Get4naReader(pos, db, cur);
2107 for ( ; rem_len; --rem_len, Advance(reader) ) {
2109 auto base_type = GetBaseType(reader);
2110 if ( base_type != eBase_Gap ) {
2126 size_t bytes = (
len+3)/4;
2128 data.reserve((bytes+7)/8*8);
2144 auto seq_2na = Get2na(pos,
len, cur);
2148 if ( m_HasAmbiguityPos ) {
2171 while ( pos != end ) {
2173 if ( x_AmbiguousBlock(block_index) ) {
2187 while ( pos != end ) {
2189 if ( !x_AmbiguousBlock(block_index) ) {
2199 #ifdef USE_GLOBAL_AMBIGUITY_CACHE
2213 #ifdef USE_GLOBAL_AMBIGUITY_CACHE
2228 size_t used_memory =
info->GetUsedMemory();
2229 #ifdef USE_GLOBAL_AMBIGUITY_CACHE
2242 if ( !seq->m_Cursor.TryOpenRow(1) ) {
2252 if ( m_IdRowDigits < 6 || m_IdRowDigits > 8 ) {
2254 "CWGSDb: bad WGS accession format: "<<acc);
2279 bool has_static_taxid = seq->m_TAXID && seq->m_TAXID.IsStatic(seq->m_Cursor);
2281 if ( has_static_taxid ) {
2282 auto value = seq->TAXID(1);
2283 if (
value.size() != 1 ) {
2284 has_static_taxid =
false;
2287 static_taxid =
value[0];
2297 size_t size = node.GetSize();
2309 if ( node.GetSize() != 0 ) {
2325 #ifdef USE_TEST_PATH
2328 if ( !test_path.empty() ) {
2337 if ( !vol_path.
empty() ) {
2338 vector<CTempString> dirs;
2340 ITERATE ( vector<CTempString>, it, dirs ) {
2352 path_or_acc.
find(
'.') == string::npos ) {
2359 string acc = path_or_acc.
substr(start);
2360 size_t acclen = acc.
size();
2361 size_t digit_pos = acc.find_first_of(
"0123456789");
2362 if (digit_pos == string::npos && (acclen == 4 || acclen == 6)) {
2363 return string(path_or_acc) +
"00";
2364 }
else if ((digit_pos == 4 || digit_pos == 6) &&
2365 acclen > digit_pos + 2) {
2367 return path_or_acc.
substr(0, start+digit_pos+2);
2376 atomic<bool>& table_is_opened,
2380 if ( !table_is_opened.load(memory_order_acquire) ) {
2382 table_is_opened.store(
true, memory_order_release);
2390 atomic<Int1>& index_is_opened,
2391 const char* index_name,
2392 const char* backup_index_name)
2396 if ( !index_is_opened.load(memory_order_acquire) ) {
2403 else if ( backup_index_name ) {
2410 index_is_opened.store(
type, memory_order_release);
2414 index_is_opened.store(-1, memory_order_release);
2452 "contig_name_uc",
"contig_name");
2459 "scaffold_name_uc",
"scaffold_name");
2466 "protein_name_uc",
"protein_name");
2473 "product_name_uc",
"product_name");
2484 pair<TVDBRowId, CWGSDb_Impl::ERowType>
2486 TAllowRowType allow_type)
2491 if (prefix_len ==
NPOS || prefix_len >= acc.
size() - 2)
2493 else prefix_len += 2;
2496 if (
row[0] ==
'S' ) {
2503 else if (
row[0] ==
'P' ) {
2516 if ( ret.first < 0 ) {
2526 if ( is_scaffold ) {
2529 pair<TVDBRowId, TRowType> rt =
ParseRowType(acc, allow_type);
2530 if ( is_scaffold ) {
2541 if (
const CTextseq_id* text_id =
id.GetTextseq_Id() ) {
2551 if (
const CTextseq_id* text_id =
id.GetTextseq_Id() ) {
2552 const_cast<CTextseq_id*
>(text_id)->SetAccession(accession);
2570 const bool kSetErrno = 0;
2571 const bool kSetNcbiError = 0;
2573 int error = 0, ret = -1;
2579 unsigned v =
str.data()[0] -
'0';
2584 for (
size_t i = 1;
i <
len; ++
i) {
2585 unsigned d =
str.data()[
i] -
'0';
2590 unsigned nv = v * 10 + d;
2591 const unsigned kOverflowLimit = (INT_MAX - 9) / 10 + 1;
2592 if ( v >= kOverflowLimit ) {
2594 if ( v > kOverflowLimit || nv > INT_MAX) {
2602 ret =
static_cast<int>(v);
2609 if (kSetNcbiError &&
error) {
2623 if (
str.size() == 1 ||
str.data()[0] !=
'0' ) {
2675 if ( !bytes.
empty() ) {
2683 for (
auto& desc :
tmp.Set() ) {
2684 descr.
Set().push_back(desc);
2688 while (
in.HaveMoreData() ) {
2691 descr.
Set().push_back(desc);
2700 if ( !bytes.
empty() ) {
2702 while (
in.HaveMoreData() ) {
2705 annot_set.push_back(annot);
2714 split_id.
SetGi(
id.GetGi());
2725 split_id.
SetGi(
id.GetGi());
2737 split_ids.push_back(split_id);
2755 loc_gi.
SetGi(
id.GetGi());
2780 if ( prefix.
empty() ) {
2785 CDbtag& dbtag =
id->SetGeneral();
2786 dbtag.
SetDb(prefix);
2794 TGnlIdFlags gnl_id_flags)
const
2801 CDbtag& dbtag =
id->SetGeneral();
2803 if ( colon !=
NPOS ) {
2805 tag =
tag.substr(colon+1);
2812 tag[db.size()] ==
':' ) {
2813 tag =
tag.substr(db.size()+1);
2834 TGnlIdFlags gnl_id_flags)
const
2836 if (
str.empty() ) {
2855 if (
str.empty() ) {
2862 if ( cur.m_SEQID_GNL_PREFIX ) {
2876 if (
str.empty() ) {
2883 if ( cur.m_SEQID_GNL_PREFIX ) {
2897 if (
str.empty() ) {
2904 if ( cur.m_SEQID_GNL_PREFIX ) {
2917 if ( !acc.
empty() ) {
2960 master_acc.resize(master_acc.size() + 2 +
m_IdRowDigits,
'0');
3055 str >> *master_entry;
3060 if ( id->IsPatent() ) {
3100 if ( master_entry->IsSetDescr() ) {
3110 switch ( desc.
Which() ) {
3113 return eDescr_force;
3120 return eDescr_default;
3125 if ( name ==
"DBLink" ||
3126 name ==
"GenomeProjectsDB" ||
3127 name ==
"StructuredComment" ||
3128 name ==
"FeatureFetchPolicy" ||
3129 name ==
"Unverified") {
3130 return eDescr_default;
3164 if (uo_type ==
"StructuredComment") {
3166 if ((*it)->GetLabel().IsStr() &&
3167 (*it)->GetLabel().GetStr() ==
"StructuredCommentPrefix") {
3168 string data = ((*it)->GetData().IsStr() ?
3169 (
string) (*it)->GetData().GetStr() :
3171 uo_type +=
"|" +
data;
3186 if (!uo_type.empty() && existing_uo_types.count(uo_type) == 0) {
3187 existing_uo_types.
insert(uo_type);
3194 unsigned type_mask = 0;
3199 type_mask |= 1 << desc.
Which();
3205 type_mask |= 1 << desc->Which();
3215 (type_mask & (1 << desc.
Which())) ) {
3218 if (!uo_type.empty() && existing_uo_types.count(uo_type) == 0)
3224 descr.
Set().push_back(*it);
3228 auto& user_object = desc->
SetUser();
3230 user_object.SetData();
3231 descr.
Set().push_back(desc);
3304 method<<
": GI is too big: "<<gi);
3314 if ( idx->m_NUC_ROW_ID ) {
3316 idx->m_NUC_ROW_ID.GetRowIdRange(idx->m_Cursor);
3317 if ( row_range.second ) {
3318 ret.first =
s_ToGi(row_range.first,
3319 "CWGSDb::GetNucGiRange()");
3320 ret.second =
s_ToGi(row_range.first + row_range.second - 1,
3321 "CWGSDb::GetNucGiRange()");
3334 if ( idx->m_PROT_ROW_ID ) {
3336 idx->m_PROT_ROW_ID.GetRowIdRange(idx->m_Cursor);
3337 if ( row_range.second ) {
3338 ret.first =
s_ToGi(row_range.first,
3339 "CWGSDb::GetProtGiRange()");
3340 ret.second =
s_ToGi(row_range.first + row_range.second - 1,
3341 "CWGSDb::GetProtGiRange()");
3352 if ( ranges.empty() ) {
3355 sort(ranges.begin(), ranges.end());
3356 TGiRanges::iterator dst = ranges.begin();
3357 for ( TGiRanges::iterator
i = dst+1;
i != ranges.end(); ++
i ) {
3358 if (
i->GetFrom() == dst->GetToOpen() ) {
3359 dst->SetToOpen(
i->GetToOpen());
3365 ranges.erase(dst+1, ranges.end());
3375 TIntId gi_start = -1, gi_end = -1;
3376 TVDBRowIdRange row_range = seq->m_GI.GetRowIdRange(seq->m_Cursor);
3378 row_id = row_range.first+
i;
3383 if ( gi != gi_end ) {
3384 if ( gi_end != gi_start ) {
3385 ranges.push_back(
TGiRange(gi_start, gi_end));
3391 if ( gi_end != gi_start ) {
3392 ranges.push_back(
TGiRange(gi_start, gi_end));
3412 while ( prefix < acc.
size() &&
isalpha(acc[prefix]&0xff) ) {
3415 if ( prefix == acc.
size() || prefix == 0 || acc.
size()-prefix > 9 ) {
3422 if ( c < '0' || c >
'9' ) {
3436 string acc = m_AccPrefix;
3437 acc.resize(m_IdLength,
'0');
3438 for (
SIZE_TYPE i = m_IdLength; id;
id /= 10 ) {
3439 acc[--
i] +=
id % 10;
3450 TVDBRowIdRange row_range = seq->m_GB_ACCESSION.GetRowIdRange(seq->m_Cursor);
3452 row_id = row_range.first+
i;
3454 if ( acc.
empty() ) {
3463 if ( it == ranges.
end() || it->first !=
info ) {
3468 if ( id < it->second.GetFrom() ) {
3469 it->second.SetFrom(
id);
3471 else if (
id >= it->second.GetToOpen() ) {
3472 it->second.SetTo(
id);
3484 pair<TVDBRowId, bool> ret;
3487 if ( idx->m_NUC_ROW_ID ) {
3490 if ( !
value.empty() ) {
3494 if ( !ret.first && idx->m_PROT_ROW_ID ) {
3497 if ( !
value.empty() ) {
3512 if ( idx->m_NUC_ROW_ID ) {
3515 if ( !
value.empty() ) {
3530 if ( idx->m_PROT_ROW_ID ) {
3533 if ( !
value.empty() ) {
3568 if ( seq->m_CONTIG_NAME_ROW_RANGE ) {
3569 seq->m_Cursor.SetParam(
"CONTIG_NAME_QUERY", name);
3573 if ( !
value.empty() ) {
3614 const char* query_param_name;
3615 if (
NStr::Equal(idx->m_ROW_ID.GetName(),
"ROW_ID") ) {
3616 query_param_name =
"NAME_QUERY";
3619 query_param_name =
"ACCESSION_QUERY";
3624 if ( !
value.empty() ) {
3629 if ( !prot_rows.
empty() ) {
3630 if ( ask_version > 0 ) {
3632 size_t version_index = size_t(prot_rows.
size() == 1? 0: ask_version-1);
3633 if ( version_index < prot_rows.
size() ) {
3635 prot_row_id = prot_rows[version_index];
3636 if ( prot_row_id ) {
3638 int actual_version = *
prot->ACC_VERSION(prot_row_id);
3640 if ( actual_version != ask_version ) {
3647 else if ( ask_version == -1 ) {
3649 prot_row_id = prot_rows[prot_rows.
size()-1];
3661 bool can_have_gis =
false;
3664 auto gi_range = cur->m_Cursor.GetRowIdRange(cur->m_GI.GetIndex());
3665 if ( gi_range.second ) {
3668 can_have_gis =
true;
3673 return can_have_gis;
3680 if (
auto cur =
Feat() ) {
3681 feature_count = cur->m_Cursor.GetRowIdRange().second;
3684 return feature_count;
3727 PROFILE(sw_GetFeatLocIdTypeRange);
3729 auto row_range = seq->m_Cursor.GetRowIdRange(seq->m_FEAT_ROW_START.GetIndex());
3731 auto seq_row_id = row_range.first+
i;
3732 auto row_start = seq->FEAT_ROW_START(seq_row_id);
3733 if ( !row_start.empty() ) {
3734 feat_row_id = *row_start;
3740 catch ( exception& ) {
3743 PROFILE(sw_GetFeatLocIdTypeFeat);
3747 PROFILE(sw_GetFeatLocIdTypeFeatBytes);
3748 bytes = *cur->SEQ_FEAT(feat_row_id);
3750 cur.GetNCObject().m_ObjStr.OpenFromBuffer(bytes.
data(), bytes.
size());
3751 cur.GetNCObject().m_ObjStr >> *feat;
3754 if (
const CTextseq_id*
id = !seq_id? 0: seq_id->GetTextseq_Id() ) {
3755 if ( id->IsSetVersion() ) {
3764 catch ( exception& ) {
3791 while ( *
this && GetToOpen() <= pos ) {
3831 x_SetSplitVersion(split_version);
3836 void x_SetFlags(TFlags
flags);
3853 template<
class Iter>
3856 main_id = it.GetId(
flags);
3860 if ( feat_id->
IsGi() ) {
3872 template<
class Iter>
3893 vector<TVDBRowId>& product_row_ids);
3895 vector<TVDBRowId>& product_row_ids);
3897 vector<TVDBRowId>& product_row_ids);
3901 void x_AddProducts(
const vector<TVDBRowId>& product_row_ids);
3908 auto state = GetGBState();
3914 if ( !(m_IncludeFlags & TIncludeFlags(1 <<
state)) ) {
3926 GetDb().Put(m_Cur0, m_CurrId);
3928 GetDb().Put(m_Cur, m_CurrId);
3937 m_CurrId = m_FirstGoodId = m_FirstBadId = 0;
3938 m_AccVersion = eLatest;
3943 : m_AccVersion(eLatest)
3951 if (
this != &iter ) {
3971 m_AccVersion(eLatest),
3972 m_IncludeFlags(fIncludeDefault),
3973 m_ClipByQuality(
true)
3981 : m_AccVersion(eLatest)
3983 x_Select(wgs_db, include_flags, clip_type);
3991 : m_AccVersion(eLatest)
4002 : m_AccVersion(eLatest)
4004 x_Select(wgs_db, include_flags, clip_type, first_row, last_row);
4012 : m_AccVersion(eLatest)
4014 x_Select(wgs_db, include_flags, clip_type, acc);
4019 TIncludeFlags include_flags,
4021 : m_AccVersion(eLatest)
4023 x_Select(wgs_db, include_flags, clip_type);
4029 TIncludeFlags include_flags,
4031 : m_AccVersion(eLatest)
4040 TIncludeFlags include_flags,
4042 : m_AccVersion(eLatest)
4044 x_Select(wgs_db, include_flags, clip_type, first_row, last_row);
4050 TIncludeFlags include_flags,
4052 : m_AccVersion(eLatest)
4054 x_Select(wgs_db, include_flags, clip_type, acc);
4073 : m_AccVersion(eLatest)
4083 : m_AccVersion(eLatest)
4094 : m_AccVersion(eLatest)
4104 : m_AccVersion(eLatest)
4118 TIncludeFlags include_flags,
4121 x_Init(wgs_db, include_flags, clip_type, 0);
4127 TIncludeFlags include_flags,
4132 x_Init(wgs_db, include_flags, clip_type,
row);
4138 TIncludeFlags include_flags,
4144 x_Init(wgs_db, include_flags, clip_type, first_row);
4160 TIncludeFlags include_flags,
4166 x_Init(wgs_db, include_flags, clip_type,
row);
4178 TIncludeFlags include_flags,
4196 switch ( clip_type ) {
4252 "CWGSSeqIterator::"<<method<<
"(): Invalid iterator state");
4291 #ifdef TEST_ACC_VERSION
4309 return version <= latest_version &&
4323 "CWGSSeqIterator: "<<
4326 " is out of VDB version range: "<<
4327 oldest_version<<
"-"<<latest_version);
4352 if (
m_Cur->m_GI ) {
4408 if (
GetDb().HasCommonTaxId() ) {
4418 return m_Cur->m_HASH;
4437 #ifdef TEST_ACC_VERSION
4449 #ifdef TEST_ACC_VERSION
4509 "CWGSSeqIterator::GetId("<<
flags<<
"): "
4510 "no valid id found: "<<
4553 if (
m_Cur->m_DESCR ) {
4571 if (
m_Cur->m_NUC_PROT_DESCR ) {
4613 if ( ret->
Get().empty() ) {
4624 if ( !
m_Cur->m_FEAT_ROW_START ) {
4628 if ( start_val.
empty() ) {
4633 if ( end < start ) {
4635 "CWGSSeqIterator::GetLocFeatRowIdRange: "
4636 "feature row range is invalid: "<<start<<
","<<end);
4668 return m_Cur->m_QUALITY;
4682 PROFILE(sw____GetContigQualSize);
4695 quality_vec.clear();
4699 quality_vec.reserve((
size+7)/8*8);
4700 quality_vec.resize(
size);
4702 quality_vec.data());
4708 return "Phrap Graph";
4715 Uint1 min_v0 = 0xff, max_v0 = 0;
4716 Uint1 min_v1 = 0xff, max_v1 = 0;
4717 Uint1 min_v2 = 0xff, max_v2 = 0;
4718 Uint1 min_v3 = 0xff, max_v3 = 0;
4724 if ( v0 < min_v0 ) min_v0 = v0;
4725 if ( v1 < min_v1 ) min_v1 = v1;
4726 if (
v2 < min_v2 ) min_v2 =
v2;
4727 if ( v3 < min_v3 ) min_v3 = v3;
4728 if ( v0 > max_v0 ) max_v0 = v0;
4729 if ( v1 > max_v1 ) max_v1 = v1;
4730 if (
v2 > max_v2 ) max_v2 =
v2;
4731 if ( v3 > max_v3 ) max_v3 = v3;
4735 if ( v0 < min_v0 ) min_v0 = v0;
4736 if ( v0 > max_v0 ) max_v0 = v0;
4738 min_v0 =
min(min_v0, min_v2);
4739 max_v0 =
max(max_v0, max_v2);
4740 min_v1 =
min(min_v1, min_v3);
4741 max_v1 =
max(max_v1, max_v3);
4742 min_v =
min(min_v0, min_v1);
4743 max_v =
max(max_v0, max_v1);
4751 info.x_SetId(*
this);
4776 PROFILE(sw____GetContigQualData);
4777 values.reserve((
size+7)/8*8);
4778 values.resize(
size);
4783 Uint1 min_q = 0, max_q = 0;
4785 PROFILE(sw____GetContigQualMinMax);
4795 annot->
SetDesc().Set().push_back(name);
4808 annot->
SetData().SetGraph().push_back(graph);
4809 annot_set.push_back(annot);
4844 if (
m_Cur->m_GB_STATE ) {
4855 if ( !
m_Cur->m_PUBLIC_COMMENT ) {
4866 if ( !
m_Cur->m_PUBLIC_COMMENT ) {
4883 return m_Cur->m_GAP_START;
4981 evidence->SetType(
type);
4992 static const int kLenTypeMask =
4995 static const int kGapTypeMask =
5005 int len_type = -(-props & kLenTypeMask);
5006 int gap_type = -(-props & kGapTypeMask);
5011 if ( gap_type || gap_linkage ) {
5013 switch ( gap_type ) {
5050 for ( ; bit && bit <= gap_linkage; bit<<=1, ++
type ) {
5051 if ( gap_linkage & bit ) {
5131 segments.push_back(seg);
5148 TInstSegmentFlags
flags)
const
5158 for ( ;
len > 0; ) {
5159 if ( gap_info.
IsInGap(pos) ) {
5164 x_AddGap(segments, pos - raw_offset, gap_len, gap_info);
5182 rem_len =
min(rem_len, chunk_end - pos);
5189 seg_len = ambiguity->Get2naLengthBlock(pos, rem_len);
5190 if ( seg_len == rem_len ) {
5203 if ( seg_len >=
kMin2naSize || seg_len == rem_len ) {
5212 TSeqPos seg_len_2na = seg_len;
5213 seg_len += ambiguity->Get4naLengthBlock(pos+seg_len,
5215 if ( seg_len == seg_len_2na ) {
5249 segments.push_back(seg);
5266 const TSeqPos kMinGapSize = 20;
5268 const TSeqPos kUnknownGapSize = 100;
5271 for ( ;
len > 0; ) {
5287 TSeqPos seg_len_2na = seg_len;
5295 if ( seg_len == kUnknownGapSize ) {
5299 else if ( seg_len == seg_len_2na ) {
5314 segments.push_back(seg);
5329 _ASSERT(it->range.GetFrom() == pos);
5330 if ( it->literal ) {
5331 _ASSERT(it->range.GetLength() == it->literal->GetLength());
5332 seq->SetLiteral(it->literal.GetNCObject());
5335 seq->SetLiteral().SetLength(it->range.GetLength());
5337 delta.push_back(seq);
5338 pos += it->range.GetLength();
5347 if ( segments.size() == 1 && !segments[0].is_gap ) {
5376 if ( length == 0 ) {
5389 if ( !
info.split_data ) {
5407 TSeqPos pos = it->range.GetFrom();
5408 TSeqPos end = it->range.GetToOpen();
5410 if ( !chunk || chunk->GetId() != chunk_id ) {
5412 chunk->SetId().Set(chunk_id);
5413 info.split->SetChunks().push_back(chunk);
5416 chunk->SetContent().push_back(content);
5440 this->flags =
flags;
5456 "unknown split version");
5465 if ( bytes.
empty() ) {
5509 vector<TVDBRowId>& product_row_ids)
5515 if (
TVDBRowId product_row_id = feat_it.GetProductRowId() ) {
5517 product_row_ids.push_back(product_row_id);
5518 if ( !product_features ) {
5521 product_features = &annot->
SetData().SetFtable();
5523 dst = product_features;
5527 if ( !main_features ) {
5530 main_features = &annot->
SetData().SetFtable();
5532 dst = main_features;
5566 static const TSeqPos kMaxGap = 100000;
5631 const vector<TVDBRowId>& product_row_ids,
5632 size_t product_index);
5640 if ( gi_range_stop == gi_range_start ) {
5644 if ( gi_range_stop == gi_range_start+
GI_CONST(1) ) {
5652 loc_set.push_back(loc);
5658 const vector<TVDBRowId>& product_row_ids,
5659 size_t product_index)
5669 chunk->SetContent().push_back(content);
5670 content->SetSeq_annot_place().SetBioseqs().Set().push_back(
seq_place);
5678 chunk->SetContent().push_back(content);
5679 content->SetSeq_annot_place().SetBioseq_sets().Set().push_back(
kMainEntryId);
5688 auto& loc_set = annot_info.
SetSeq_loc().SetLoc_set();
5689 loc_set.push_back(old_loc);
5692 for (
auto it = product_row_ids.begin()+product_index; it != product_row_ids.end(); ++it ) {
5695 "invalid protein row id: "<<*it);
5706 if ( gi != gi_range_stop ) {
5708 gi_range_start = gi;
5719 loc_set.push_back(loc);
5726 chunk->SetContent().push_back(content);
5727 content->SetFeat_ids();
5747 annot_info.
SetFeat().push_back(type_info);
5756 interval->SetStart(
r.GetFrom());
5757 interval->SetLength(
r.GetLength());
5758 intervals.
SetInts().push_back(interval);
5764 vector<TVDBRowId>& product_row_ids)
5769 int chunk_index = 0;
5772 size_t product_index = 0;
5775 bool with_product =
false;
5776 if (
TVDBRowId product_row_id = feat_it.GetProductRowId() ) {
5778 product_row_ids.push_back(product_row_id);
5779 with_product =
true;
5781 c.
AddFeature(with_product, feat_it.GetFeatType(), feat_it.GetLocRange());
5785 product_row_ids, product_index));
5786 product_index = product_row_ids.size();
5792 product_row_ids, product_index));
5794 if ( !product_row_ids.empty() ) {
5802 vector<TVDBRowId>& product_row_ids)
5805 x_AddFeaturesSplit(
range, product_row_ids);
5808 x_AddFeaturesDirect(
range, product_row_ids);
5818 if ( !main_features ) {
5820 main_seq->SetAnnot().push_back(annot);
5821 main_features = &annot->
SetData().SetFtable();
5823 x_AddFeature(feat_it, *main_features);
5839 info.split->SetChunks().push_back(chunk);
5841 chunk->SetId().Set(chunk_id);
5847 chunk->SetContent().push_back(content);
5848 content->SetFeat_ids();
5850 content->SetSeq_annot();
5851 annot_info.
SetName(GetQualityAnnotName());
5857 chunk->SetContent().push_back(content);
5858 content->SetSeq_annot_place().SetBioseqs().Set().push_back(place);
5868 info.x_SetSeq(*
this);
5871 info.entry->SetSeq(*
info.main_seq);
5878 if ( m_Cur->m_DESCR ) {
5880 if ( !descr.
empty() ) {
5881 info.x_AddDescr(*descr);
5888 info.main_seq->SetDescr(*descr);
5894 GetAnnotSet(
info.main_seq->SetAnnot(),
info.flags);
5895 bool has_split_annot =
false;
5897 if (
info.split_qual ) {
5898 x_AddQualityChunkInfo(
info);
5899 has_split_annot =
true;
5902 x_GetQualityAnnot(
info.main_seq->SetAnnot(),
info);
5905 if ( !has_split_annot &&
info.main_seq->GetAnnot().empty() ) {
5906 info.main_seq->ResetAnnot();
5909 info.main_seq->SetInst(*x_GetSeq_inst(
info));
5917 if ( feat_it.GetProductRowId() ) {
5932 TFlags save_flags =
flags;
5943 chunk->SetData().push_back(chunk_data);
5945 bioseqs = &chunk_data->SetBioseqs();
5950 ITERATE ( vector<TVDBRowId>, it, product_row_ids ) {
5953 "invalid protein row id: "<<*it);
5961 entry->
SetSeq(*main_seq);
5965 bioseqs->push_back(main_seq);
5969 main_seq = save_seq;
5979 _ASSERT(entry->IsSeq() && &entry->GetSeq() == main_seq);
5980 vector<TVDBRowId> product_row_ids;
5983 x_AddFeatures(
range, product_row_ids);
5985 if ( !product_row_ids.empty() ) {
5987 _ASSERT(entry && entry->IsSet());
5989 int chunk_index = 0;
5990 size_t prod_count = 0;
5993 ITERATE ( vector<TVDBRowId>, it, product_row_ids ) {
5996 split->SetChunks().push_back(chunk);
6003 chunk->SetContent().push_back(content);
6004 content->SetFeat_ids();
6007 chunk->SetContent().push_back(content);
6009 content->SetBioseq_place().push_back(place_info);
6011 ids = &place_info->SetSeq_ids().Set();
6016 "invalid protein row id: "<<*it);
6025 x_AddProducts(product_row_ids);
6034 if ( !db->GetMasterDescr().empty() ) {
6035 db->AddMasterDescr(
info.entry->SetDescr(),
info.main_seq,
flags);
6046 x_CreateBioseq(
info);
6055 x_CreateBioseq(
info);
6057 info.x_CreateProtSet(GetLocFeatRowIdRange());
6061 if ( !descr.
empty() ) {
6084 info.split_data =
true;
6089 info.split_prod =
true;
6095 info.split_feat =
true;
6098 CanHaveQualityGraph() ) {
6099 info.split_qual =
true;
6101 if ( !
info.split_data && !
info.split_prod && !
info.split_feat &&
6102 !
info.split_qual ) {
6107 info.split->SetSkeleton(*
info.entry);
6108 info.split->SetChunks();
6115 x_CreateEntry(
info);
6120 unsigned index)
const
6122 PROFILE(sw_CreateQualityChunk);
6125 x_GetQualityAnnot(
data->SetAnnots(),
info,
6127 info.chunk->SetData().push_back(
data);
6132 unsigned index)
const
6143 GetGapInfo(gap_info);
6145 TInstSegmentFlags inst_flags = fInst_MakeData;
6146 x_GetSegmentsWithExplicitGaps(segments,
range, gap_info, inst_flags);
6149 _ASSERT(it->literal && it->literal->IsSetSeq_data());
6151 piece->SetStart(it->range.GetFrom());
6152 piece->SetData().push_back(it->literal);
6153 data->SetSeq_data().push_back(piece);
6155 info.chunk->SetData().push_back(
data);
6160 unsigned index)
const
6162 PROFILE(sw_CreateProductsChunk);
6163 vector<TVDBRowId> product_row_ids;
6166 if (
TVDBRowId row_id = feat_it.GetProductRowId() ) {
6171 product_row_ids.push_back(row_id);
6177 info.x_AddProducts(product_row_ids);
6182 unsigned index)
const
6184 PROFILE(sw_CreateFeaturesChunk);
6186 auto range = GetLocFeatRowIdRange();
6189 range.first = feat_start;
6190 range.second =
max(feat_start, feat_stop)-feat_start;
6192 info.chunk->SetData();
6197 if ( feat_it.GetProductRowId() ) {
6199 if ( !product_features ) {
6201 info.chunk->SetData().push_back(
data);
6204 data->SetAnnots().push_back(annot);
6205 product_features = &annot->
SetData().SetFtable();
6207 dst = product_features;
6211 if ( !main_features ) {
6213 info.chunk->SetData().push_back(
data);
6214 data->SetId().SetSeq_id(*
info.main_id);
6216 data->SetAnnots().push_back(annot);
6217 main_features = &annot->
SetData().SetFtable();
6219 dst = main_features;
6221 info.x_AddFeature(feat_it, *dst);
6231 info.x_SetId(*
this);
6235 x_CreateQualityChunk(
info, index);
6238 x_CreateProductsChunk(
info, index);
6241 x_CreateFeaturesChunk(
info, index);
6244 x_CreateDataChunk(
info, index);
6248 "CWGSSeqIterator::CreateChunk("<<chunk_id<<
"): "
6249 "unsupported chunk type: "<<
type);
6257 x_CheckValid(
"CWGSSeqIterator::GetBioseq");
6259 x_CreateBioseq(
info);
6260 return info.main_seq;
6267 x_CheckValid(
"CWGSSeqIterator::GetSeq_entry");
6270 x_CreateEntry(
info);
6278 x_CheckValid(
"CWGSSeqIterator::GetSeq_entryData");
6282 x_CreateEntry(
info);
6289 return GetSplitInfoAndVersion(
flags).first;
6298 x_CheckValid(
"CWGSSeqIterator::GetSplitInfo");
6300 if ( x_InitSplit(
info) ) {
6301 x_CreateSplit(
info);
6303 return make_pair(
info.split,
info.split_version);
6309 return GetSplitInfoDataAndVersion(
flags).first;
6318 x_CheckValid(
"CWGSSeqIterator::GetSplitInfoData");
6320 if ( x_InitSplit(
info) ) {
6322 x_CreateSplit(
info);
6331 x_CheckValid(
"CWGSSeqIterator::GetChunk");
6334 x_CreateChunk(
info, chunk_id);
6342 x_CheckValid(
"CWGSSeqIterator::GetChunk");
6345 x_CreateChunk(
info, chunk_id);
6353 x_CheckValid(
"CWGSSeqIterator::GetChunkData");
6357 x_CreateChunk(
info, chunk_id);
6365 x_CheckValid(
"CWGSSeqIterator::GetChunkData");
6369 x_CreateChunk(
info, chunk_id);
6390 m_CurrId = m_FirstGoodId = m_FirstBadId = 0;
6406 if (
this != &iter ) {
6493 "CWGSScaffoldIterator::"<<method<<
"(): "
6494 "Invalid iterator state");
6501 if ( !
m_Cur->m_ACCESSION ) {
6543 if (
m_Cur->m_GB_STATE ) {
6555 if ( !acc.
empty() ) {
6611 "CWGSScaffoldIterator::GetId("<<
flags<<
"): "
6612 "no valid id found: "<<
6667 if ( ret->
Get().empty() ) {
6680 for (
size_t i = 0;
i < lens.
size(); ++
i ) {
6698 x_CheckValid(
"CWGSScaffoldIterator::GetLocFeatRowIdRange");
6700 if ( !
m_Cur->m_FEAT_ROW_START ) {
6704 if ( start_val.
empty() ) {
6709 if ( end < start ) {
6711 "CWGSScaffoldIterator::GetLocFeatRowIdRange: "
6712 "feature row range is invalid: "<<start<<
","<<end);
6737 if (
m_Cur->m_COMPONENT_LINKAGE ) {
6739 if ( !linkages_val.
empty() ) {
6740 size_t gaps_count = 0;
6741 for (
size_t i = 0;
i < lens.
size(); ++
i ) {
6748 if ( linkages_val.
size() != gaps_count ) {
6750 "CWGSScaffoldIterator: inconsistent gap info");
6752 linkages = linkages_val.
data();
6756 for (
size_t i = 0;
i < lens.
size(); ++
i ) {
6769 if ( start == 0 ||
len == 0 ) {
6771 "CWGSScaffoldIterator: component is bad for "+
6779 interval.
SetId(*
GetDb().GetContigSeq_id(row_id));
6790 delta.push_back(seg);
6801 PROFILE(sw__GetScaffoldBioseq);
6803 info.x_SetSeq(*
this);
6806 info.entry->SetSeq(*
info.main_seq);
6825 info.main_seq->SetDescr(*descr);
6859 return info.main_seq;
6911 if (
this != &iter ) {
6927 x_Init(wgs_db, seq_type);
6934 x_Init(wgs_db, seq_type);
6969 if ( (seq_type ==
eProt || !
m_Cur->m_NUC_ROW_ID) &&
6970 (seq_type ==
eNuc || !
m_Cur->m_PROT_ROW_ID) ) {
6986 if ( !
value.empty() ) {
6997 if ( !
value.empty() ) {
7043 : m_CurrId(0), m_FirstGoodId(0), m_FirstBadId(0)
7049 : m_CurrId(0), m_FirstGoodId(0), m_FirstBadId(0)
7058 if (
this != &iter ) {
7144 "CWGSProteinIterator::"<<method<<
"(): "
7145 "Invalid iterator state");
7160 if ( !gi.
empty() ) {
7161 return s_ToGi(*gi,
"CWGSProteinIterator::GetGi()");
7172 if (
m_Cur0->m_GB_ACCESSION ) {
7192 PROFILE(sw____GetProtAccSeq_id);
7195 if ( !acc.
empty() ) {
7219 PROFILE(sw____GetProtGISeq_id);
7254 "CWGSProteinIterator::GetId("<<
flags<<
"): "
7255 "no valid id found: "<<
7304 if (
GetDb().HasCommonTaxId() ) {
7308 return m_Cur->m_TAXID;
7315 if (
GetDb().HasCommonTaxId() ) {
7327 return m_Cur->m_HASH;
7349 return m_Cur->m_REF_ACC;
7388 if (
m_Cur->m_GB_STATE ) {
7399 if ( !
m_Cur->m_PUBLIC_COMMENT ) {
7410 if ( !
m_Cur->m_PUBLIC_COMMENT ) {
7429 if ( !
m_Cur->m_TITLE ) {
7438 x_CheckValid(
"CWGSProteinIterator::GetLocFeatRowIdRange");
7440 if ( !
m_Cur->m_FEAT_ROW_START ) {
7444 if ( start_val.
empty() ) {
7449 if ( end < start ) {
7451 "CWGSProteinIterator::GetLocFeatRowIdRange: "
7452 "feature row range is invalid: "<<start<<
","<<end);
7460 x_CheckValid(
"CWGSProteinIterator::GetProductFeatCount");
7462 if ( !
m_Cur->m_FEAT_PRODUCT_ROW_ID ) {
7471 x_CheckValid(
"CWGSProteinIterator::GetProductFeatRowId");
7473 if ( !
m_Cur->m_FEAT_PRODUCT_ROW_ID ) {
7482 x_CheckValid(
"CWGSProteinIterator::GetBestProductFeatRowId");
7484 if ( !
m_Cur->m_FEAT_PRODUCT_ROW_ID ) {
7488 return row.empty()? 0:
row[
row.size()-1];
7494 x_CheckValid(
"CWGSProteinIterator::GetProductFeatRowId");
7496 if ( !
m_Cur->m_FEAT_PRODUCT_ROW_ID ) {
7500 return row.empty()? 0: *
row;
7506 x_CheckValid(
"CWGSProteinIterator::GetReplacedByRowId");
7508 if (
m_Cur->m_REPLACED_BY ) {
7510 if ( !
value.empty() ) {
7514 #ifdef TEST_ACC_VERSION
7527 if (
m_Cur->m_REPLACES ) {
7529 if ( !
value.empty() ) {
7533 #ifdef TEST_ACC_VERSION
7567 if (
m_Cur->m_DESCR ) {
7572 if ( !title.
empty() ) {
7575 ret->
Set().push_back(desc);
7582 if ( ret->
Get().empty() ) {
7622 if ( replaced_by_row || replaces_row ) {
7624 if ( replaced_by_row ) {
7628 if ( replaces_row ) {
7644 info.x_SetSeq(*
this);
7647 info.entry->SetSeq(*
info.main_seq);
7654 if (
m_Cur->m_DESCR ) {
7656 if ( !descr.
empty() ) {
7657 info.x_AddDescr(*descr);
7664 info.main_seq->SetDescr(*descr);
7669 if ( !
info.db->FeatTable() ) {
7673 if (
info.main_seq->GetAnnot().empty() ) {
7674 info.main_seq->ResetAnnot();
7699 return info.main_seq;
7753 if (
this != &iter ) {
7815 row_range.first+row_range.second);
7841 "CWGSFeatureIterator::"<<method<<
"(): "
7842 "Invalid iterator state");
7855 x_CheckValid(
"CWGSFeatureIterator::GetProductSeqType");
7870 if ( !
m_Cur->m_PRODUCT_ROW_ID ) {
7874 return row.empty()? 0: *
row;
#define READ(buf, off, bytes, endian)
CAsnBinData(CSerialObject &obj)
virtual void Serialize(CObjectOStreamAsnBinary &out) const
CRef< CSerialObject > m_MainObject
virtual ~CAsnBinData(void)
const TInfoMap & info_map
CWGSAsnBinData::TDescrInfo TInfo
map< TKey, TInfo > TInfoMap
virtual void WriteClassMember(CObjectOStream &out, const CConstObjectInfoMI &member)
CDescrWriteHook(const TInfoMap &info_map)
virtual void WriteChoiceVariant(CObjectOStream &out, const CConstObjectInfoCV &variant)
map< TKey, TInfo > TInfoMap
const CSeq_annot::TData::TFtable * TKey
CWGSAsnBinData::SFtableInfo TInfo
const TInfoMap & info_map
CFtableWriteHook(const TInfoMap &info_map)
void Release()
Manually force the resource to be released.
CID2S_Bioseq_place_Info –.
CID2S_Feat_type_Info –.
CID2S_Seq_annot_Info –.
CID2S_Seq_id_Interval –.
CID2S_Sequence_Piece –.
void GetData(char *buffer, size_t size, size_t offset=0) const
size_t GetSize(void) const
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Writing containers (SET OF, SEQUENCE OF).
Helper class: installs hooks in constructor, and uninstalls in destructor.
CObjectIStreamAsnBinary –.
CObjectOStreamAsnBinary –.
@Seq_descr.hpp User-defined methods of the data storage class.
namespace ncbi::objects::
Base class for all serializable objects.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Template class for iteration on objects of class C (non-medifiable version)
uint32_t GetElementCount(TVDBRowId row, const CVDBColumn &column, uint32_t elem_bits) const
void ReadElements(TVDBRowId row, const CVDBColumn &column, uint32_t elem_bits, uint32_t start, uint32_t count, void *buffer) const
TVDBRowIdRange GetRowIdRange(TVDBColumnIdx column=0) const
TVDBRowIdRange Find(const string &value) const
const TValue * data() const
const_iterator begin() const
static bool IsPlainAccession(const string &acc_or_path)
void AddFeature(TFtable &ftable, const CTempString &data)
vector< char > TDescrInfo
void AddDescr(CBioseq &seq, const CTempString &data)
CWGSAsnBinData(CSerialObject &obj)
virtual void Serialize(CObjectOStreamAsnBinary &out) const
CSeq_annot::TData::TFtable TFtable
map< const TFtable *, SFtableInfo > TFtableMap
virtual ~CWGSAsnBinData(void)
map< const CBioseq *, TDescrInfo > TDescrMap
CRef< CSeq_descr > m_EmptyDescr
CVDBTableIndex m_ProteinNameIndex
CRef< CSeq_id > GetGeneralOrPatentSeq_id(CTempString str, TVDBRowId row, TGnlIdFlags gnl_id_flags=fGnlId_Default) const
CVDBObjectCache< SProtIdxTableCursor > m_ProtIdx
CRef< SSeqTableCursor > Seq(TVDBRowId row=0)
CVDBObjectCache< SProt0TableCursor > m_Prot0
atomic< bool > m_GiIdxTableIsOpened
NCBI_gb_state m_ProjectGBState
const CVDBTableIndex & ContigNameIndex(void)
const CVDBTableIndex & ProductNameIndex(void)
pair< TGi, TGi > GetNucGiRange(void)
CRef< CSeq_id > GetGeneralSeq_id(CTempString prefix, CTempString tag) const
bool LoadMasterDescr(int filter)
const CVDBTable & GiIdxTable(void)
COpenRange< TIntId > TGiRange
EFeatLocIdType GetFeatLocIdType()
CRef< CSeq_entry > GetMasterDescrEntry(void)
atomic< bool > m_FeatTableIsOpened
CVDBTableIndex m_ScaffoldNameIndex
atomic< bool > m_ScfTableIsOpened
string m_IdPrefixDbWithVersion
CRef< SSeq0TableCursor > Seq0(TVDBRowId row=0)
CRef< SProtTableCursor > Prot(TVDBRowId row=0)
bool HasCommonTaxId(void) const
CRef< CSeq_id > GetAccSeq_id(CTempString acc, int version) const
CRef< SGiIdxTableCursor > GiIdx(TVDBRowId row=0)
void ResetMasterDescr(void)
TAmbiguityCache m_AmbiguityCache
TVDBRowId GetNucGiRowId(TGi gi)
const CVDBTableIndex & ScaffoldNameIndex(void)
CRef< CSeq_entry > GetMasterSeq_entry(void) const
void SetMasterDescr(const TMasterDescr &descr, int filter)
void OpenScaffoldNameIndex(void)
CRef< SAmbiguityInfo > GetAmbiguityInfo(TVDBRowId row)
TTaxId GetCommonTaxId(void) const
void PutAmbiguityInfo(CRef< SAmbiguityInfo > &ambiguity)
pair< TVDBRowId, bool > GetGiRowId(TGi gi)
TVDBRowCount GetTotalFeatureCount()
void x_SortGiRanges(TGiRanges &ranges)
CRef< SProtIdxTableCursor > ProtIdx(TVDBRowId row=0)
void OpenGiIdxTable(void)
EFeatLocIdType DetermineFeatLocIdType()
const CVDBTable & ScfTable(void)
static pair< TVDBRowId, ERowType > ParseRowType(CTempString acc, TAllowRowType allow)
void AddMasterDescr(CSeq_descr &descr, const CBioseq *main_seq=0, TFlags flags=fDefaultFlags) const
CVDBTableIndex m_ProtAccIndex
TMasterDescr m_MasterDescr
CRef< CSeq_entry > m_MasterEntry
void OpenIndex(const CVDBTable &table, CVDBTableIndex &index, atomic< Int1 > &index_is_opened, const char *index_name, const char *backup_index_name=0)
void OpenProteinNameIndex(void)
TVDBRowId GetScaffoldNameRowId(const string &name)
TVDBRowId GetContigNameRowId(const string &name)
CRef< CSeq_id > GetProteinSeq_id(TVDBRowId row_id) const
CVDBObjectCache< SSeqTableCursor > m_Seq
CSeq_inst::TMol GetScaffoldMolType(void) const
atomic< Int1 > m_ScaffoldNameIndexIsOpened
atomic< bool > m_ProtIdxTableIsOpened
NCBI_gb_state GetProjectGBState() const
CRef< SProt0TableCursor > Prot0(TVDBRowId row=0)
TVDBRowId GetProductNameRowId(const string &name)
CRef< CSeq_id > GetMasterSeq_id(void) const
TVDBRowId GetProtAccRowId(const string &acc, int version=-1)
CVDBObjectCache< SProtTableCursor > m_Prot
CRef< SFeatTableCursor > Feat(TVDBRowId row=0)
TVDBRowId GetProteinNameRowId(const string &name)
atomic< Int1 > m_ProductNameIndexIsOpened
vector< TGiRange > TGiRanges
TVDBRowId Lookup(const string &name, const CVDBTableIndex &index, bool upcase)
CRef< CSeq_id > GetPatentSeq_id(int id) const
virtual ~CWGSDb_Impl(void)
void OpenProtIdxTable(void)
list< CRef< CSeqdesc > > TMasterDescr
TProtAccRanges GetProtAccRanges(void)
CFastMutex m_AmbiguityCacheMutex
static string NormalizePathOrAccession(CTempString path_or_acc, CTempString vol_path=CTempString())
const CVDBTableIndex & ProteinNameIndex(void)
TGiRanges GetProtGiRanges(void)
TVDBRowId ParseRow(CTempString acc, bool *is_scaffold) const
TGiRanges GetNucGiRanges(void)
CVDBObjectCache< SScfTableCursor > m_Scf
void Put(CRef< SSeq0TableCursor > &curs, TVDBRowId row=0)
CRef< SSeq4naTableCursor > Seq4na(TVDBRowId row=0)
bool IsSetMasterDescr(void) const
CVDBTableIndex m_ContigNameIndex
void OpenProtAccIndex(void)
CVDBObjectCache< SSeq0TableCursor > m_Seq0
CSeq_inst::TMol GetContigMolType(void) const
void x_LoadMasterDescr(int filter)
CRef< CSeq_id > m_PatentId
void SetPatentId(CRef< CSeq_id > id)
atomic< bool > m_ProtTableIsOpened
TGi GetMasterGi(void) const
CRef< SScfTableCursor > Scf(TVDBRowId row=0)
const CVDBTable & FeatTable(void)
const CVDBTable & SeqTable(void)
CRef< CSeq_id > GetContigSeq_id(TVDBRowId row_id) const
pair< TGi, TGi > GetProtGiRange(void)
CSeq_inst::TMol m_ContigMolType
CWGSDb_Impl(CVDBMgr &mgr, CTempString path_or_acc, CTempString vol_path=CTempString())
string m_IdPrefixWithVersion
size_t GetMasterDescrBytes(TMasterDescrBytes &buffer)
void OpenProductNameIndex(void)
atomic< Int1 > m_ProtAccIndexIsOpened
CSeq_id::E_Choice m_SeqIdType
const TMasterDescr & GetMasterDescr(void) const
atomic< EFeatLocIdType > m_FeatLocIdType
CVDBTableIndex m_ProductNameIndex
CVDBObjectCache< SFeatTableCursor > m_Feat
void OpenContigNameIndex(void)
const CVDBTable & ProtIdxTable(void)
const CVDBTable & ProtTable(void)
bool HasStandardFeatLocIdType()
CSeq_inst::TMol GetProteinMolType(void) const
CVDBObjectCache< SGiIdxTableCursor > m_GiIdx
atomic< Int1 > m_ProteinNameIndexIsOpened
void OpenTable(CVDBTable &table, atomic< bool > &table_is_opened, const char *table_name)
CRef< CSeq_id > GetScaffoldSeq_id(TVDBRowId row_id) const
void x_InitIdParams(void)
TVDBRowId GetProtGiRowId(TGi gi)
const string & GetWGSPath(void) const
atomic< Int1 > m_ContigNameIndexIsOpened
static TVDBRowId ParseProteinRow(CTempString acc)
static TVDBRowId ParseScaffoldRow(CTempString acc)
static EDescrType GetMasterDescrType(const CSeqdesc &desc)
static TVDBRowId ParseContigRow(CTempString acc)
CRange< TSeqPos > GetLocRange(void) const
CWGSDb_Impl & GetDb(void) const
TVDBRowId GetLocRowId(void) const
NCBI_WGS_seqtype GetProductSeqType(void) const
NCBI_WGS_feattype GetFeatType(void) const
TVDBRowId GetProductRowId(void) const
TSeqPos GetLocLength(void) const
CRef< CSeq_feat > GetSeq_feat() const
CWGSFeatureIterator & SelectRow(TVDBRowId row)
CTempString GetSeq_featBytes(void) const
CWGSFeatureIterator & operator=(const CWGSFeatureIterator &iter)
void x_ReportInvalid(const char *method) const
CWGSFeatureIterator & SelectRowRange(TVDBRowIdRange row_range)
NCBI_WGS_seqtype GetLocSeqType(void) const
TSeqPos GetLocStart(void) const
CWGSFeatureIterator(void)
CRef< CWGSDb_Impl::SFeatTableCursor > m_Cur
void x_Init(const CWGSDb &wgs_db)
~CWGSFeatureIterator(void)
void x_CheckValid(const char *method) const
void x_Init(const CWGSDb &wgs_db, ESeqType seq_type)
CRef< CWGSDb_Impl::SGiIdxTableCursor > m_Cur
CWGSDb_Impl & GetDb(void) const
CWGSGiIterator & operator=(const CWGSGiIterator &iter)
CRef< CSeq_id > GetAccSeq_id(void) const
CBioseq::TAnnot TAnnotSet
TVDBRowId GetReplacesRowId(void) const
void x_CreateEntry(SWGSCreateInfo &info) const
CTempString GetPublicComment(void) const
CWGSProteinIterator & SelectRow(TVDBRowId row)
CRef< CBioseq > GetBioseq(TFlags flags=fDefaultFlags) const
CWGSProteinIterator(void)
CRef< CSeq_id > GetGiSeq_id(void) const
NCBI_gb_state GetGBState(void) const
size_t GetProductFeatCount(void) const
bool HasTaxId(void) const
TVDBRowId GetBestProductFeatRowId(void) const
TSeqPos GetSeqLength(void) const
CWGSDb_Impl & GetDb(void) const
bool HasTitle(void) const
NCBI_gb_state GetRawGBState(void) const
TVDBRowIdRange GetLocFeatRowIdRange(void) const
TVDBRowId GetProductFeatRowId(void) const
CWGSProteinIterator & operator=(const CWGSProteinIterator &iter)
CRef< CSeq_inst > GetSeq_inst(TFlags flags=fDefaultFlags) const
CSeq_id::TGi GetGi(void) const
THash GetSeqHash(void) const
bool HasAnnotSet(void) const
void x_CheckValid(const char *method) const
CRef< CWGSDb_Impl::SProtTableCursor > m_Cur
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
int GetAccVersion(void) const
void x_CreateBioseq(SWGSCreateInfo &info) const
CTempString GetTitle(void) const
void GetIds(CBioseq::TId &ids, TFlags flags=fDefaultFlags) const
~CWGSProteinIterator(void)
void x_Init(const CWGSDb &wgs_db)
CTempString GetAccession(void) const
CRef< CSeq_id > GetGeneralOrPatentSeq_id(void) const
CRef< CSeq_id > GetId(TFlags flags=fDefaultFlags) const
bool HasPublicComment(void) const
CRef< CSeq_id > GetGeneralSeq_id(void) const
bool HasSeqHash(void) const
void GetAnnotSet(TAnnotSet &annot_set, TFlags flags=fDefaultFlags) const
CTempString GetRefAcc(void) const
TTaxId GetTaxId(void) const
CTempString GetProductName(void) const
void x_ReportInvalid(const char *method) const
CTempString GetProteinName(void) const
CRef< CWGSDb_Impl::SProt0TableCursor > m_Cur0
CRef< CSeq_descr > GetSeq_descr(TFlags flags=fDefaultFlags) const
TVDBRowId GetReplacedByRowId(void) const
bool HasRefAcc(void) const
bool HasSeq_descr(TFlags flags=fDefaultFlags) const
void x_Init(const CWGSDb &wgs_db)
void x_ReportInvalid(const char *method) const
CWGSScaffoldIterator(void)
bool HasSeq_descr(TFlags flags=fDefaultFlags) const
void x_CreateEntry(SWGSCreateInfo &info) const
CRef< CSeq_id > GetAccSeq_id(void) const
CWGSScaffoldIterator & SelectRow(TVDBRowId row)
CTempString GetScaffoldName(void) const
void x_CreateBioseq(SWGSCreateInfo &info) const
TVDBRowIdRange GetLocFeatRowIdRange(void) const
CRef< CSeq_id > GetGeneralOrPatentSeq_id(void) const
bool IsCircular(void) const
CRef< CSeq_descr > GetSeq_descr(TFlags flags=fDefaultFlags) const
NCBI_gb_state GetRawGBState(void) const
CRef< CBioseq > GetBioseq(TFlags flags=fDefaultFlags) const
void GetIds(CBioseq::TId &ids, TFlags flags=fDefaultFlags) const
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
CRef< CSeq_inst > GetSeq_inst(TFlags flags=fDefaultFlags) const
CWGSScaffoldIterator & operator=(const CWGSScaffoldIterator &iter)
CRef< CWGSDb_Impl::SScfTableCursor > m_Cur
int GetAccVersion(void) const
CRef< CSeq_id > GetGeneralSeq_id(void) const
TSeqPos GetSeqLength(void) const
CRef< CSeq_id > GetId(TFlags flags=fDefaultFlags) const
NCBI_gb_state GetGBState(void) const
void x_CheckValid(const char *method) const
~CWGSScaffoldIterator(void)
CWGSDb_Impl & GetDb(void) const
CTempString GetAccession(void) const
CRef< CSeq_id > GetGiSeq_id(void) const
void x_CreateChunk(SWGSCreateInfo &info, TChunkId chunk_id) const
CRef< CAsnBinData > GetSeq_entryData(TFlags flags=fDefaultFlags) const
CRef< CSeq_data > Get4na(TSeqPos pos, TSeqPos len) const
void x_Select(const CWGSDb &wgs_db, TIncludeFlags include_flags, EClipType clip_type)
void x_Init(const CWGSDb &wgs_db, TIncludeFlags include_flags, EClipType clip_type, TVDBRowId get_row)
bool x_Excluded(void) const
void x_CreateSplit(SWGSCreateInfo &info) const
CRef< CSeq_descr > GetSeq_descr(TFlags flags=fDefaultFlags) const
void x_SetDelta(CSeq_inst &inst, const TSegments &segments) const
CTempString GetTitle(void) const
bool CanHaveQualityGraph(void) const
TIncludeFlags m_IncludeFlags
CTempString GetPublicComment(void) const
bool x_InitSplit(SWGSCreateInfo &info) const
CRef< CAsnBinData > GetChunkDataForVersion(TChunkId chunk_id, TSplitVersion split_version) const
CTempString GetContigName(void) const
CWGSSeqIterator & SelectRow(TVDBRowId row)
CRef< CSeq_id > GetGeneralOrPatentSeq_id(void) const
TSeqPos GetRawSeqLength(void) const
CWGSSeqIterator & operator++(void)
CRef< CID2S_Chunk > GetChunkForVersion(TChunkId chunk_id, TSplitVersion split_version) const
CRef< CAsnBinData > GetChunkData(TChunkId chunk_id, TFlags flags=fDefaultFlags) const
SAmbiguityAccess GetAmbiguity() const
void GetQualityAnnot(TAnnotSet &annot_set, TFlags flags=fDefaultFlags) const
TTaxId GetTaxId(void) const
CTempString GetAccession(void) const
TVDBRowIdRange GetLocFeatRowIdRange(void) const
void x_AddQualityChunkInfo(SWGSCreateInfo &info) const
bool IsCircular(void) const
CRef< CSeq_id > GetGeneralSeq_id(void) const
bool HasQualityGraph(void) const
bool HasClippingInfo(void) const
CRef< CAsnBinData > GetSplitInfoData(TFlags flags=fDefaultFlags) const
TSeqPos GetClipQualityLength(void) const
string GetQualityAnnotName(void) const
NCBI_gb_state GetRawGBState(void) const
COpenRange< TSeqPos > x_NormalizeSeqRange(COpenRange< TSeqPos > range) const
void x_ReportInvalid(const char *method) const
void x_GetSegmentsWithRecoveredGaps(TSegments &segments, COpenRange< TSeqPos > range) const
bool HasPublicComment(void) const
CTempString GetNucProtDescrBytes(void) const
CSeq_id::TGi GetGi(void) const
void SelectAccVersion(int version)
void x_CreateBioseq(SWGSCreateInfo &info) const
bool HasGapInfo(void) const
CWGSSeqIterator & operator=(const CWGSSeqIterator &iter)
bool HasTitle(void) const
void GetQualityVec(vector< INSDC_quality_phred > &quality_vec) const
CRef< CSeq_inst > x_GetSeq_inst(SWGSCreateInfo &info) const
CRef< CSeq_id > GetGiSeq_id(void) const
void x_CreateProductsChunk(SWGSCreateInfo &info, unsigned index) const
CRef< CWGSDb_Impl::SSeq0TableCursor > m_Cur0
bool HasAnnotSet(void) const
CRef< CSeq_id > GetAccSeq_id(void) const
int GetAccVersion(void) const
bool HasSeqHash(void) const
CTempString GetAnnotBytes(void) const
THash GetSeqHash(void) const
SVersionSelector x_GetAccVersionSelector(int version) const
CRef< CID2S_Chunk > GetChunk(TChunkId chunk_id, TFlags flags=fDefaultFlags) const
pair< CRef< CAsnBinData >, TSplitVersion > GetSplitInfoDataAndVersion(TFlags flags=fDefaultFlags) const
void x_CreateDataChunk(SWGSCreateInfo &info, unsigned index) const
void x_GetSegmentsWithExplicitGaps(TSegments &data, COpenRange< TSeqPos > range, TWGSContigGapInfo gap_info, TInstSegmentFlags flags) const
void x_CreateEntry(SWGSCreateInfo &info) const
void x_GetQualityAnnot(TAnnotSet &annot_set, SWGSCreateInfo &info, TSeqPos pos=0, TSeqPos len=kInvalidSeqPos) const
bool HasSeq_descr(TFlags flags=fDefaultFlags) const
void GetGapInfo(TWGSContigGapInfo &gap_info) const
void x_CreateQualityChunk(SWGSCreateInfo &info, unsigned index) const
CRef< CSeq_data > Get2na(TSeqPos pos, TSeqPos len) const
bool HasTaxId(void) const
CBioseq::TAnnot TAnnotSet
struct CWGSSeqIterator::SWGSContigGapInfo TWGSContigGapInfo
TSeqPos GetSeqLength(EClipType clip_type=eDefaultClip) const
CRef< CBioseq > GetBioseq(TFlags flags=fDefaultFlags) const
CRef< CWGSDb_Impl::SAmbiguityInfo > m_AmbiguityInfo
SVersionSelector m_AccVersion
TSeqPos x_GetQualityArraySize(void) const
CRef< CWGSDb_Impl::SSeqTableCursor > m_Cur
void x_SetDeltaOrData(CSeq_inst &inst, const TSegments &segments) const
TSeqPos GetClipQualityLeft(void) const
pair< CRef< CID2S_Split_Info >, TSplitVersion > GetSplitInfoAndVersion(TFlags flags=fDefaultFlags) const
CRef< CID2S_Split_Info > GetSplitInfo(TFlags flags=fDefaultFlags) const
bool HasAccVersion(int version) const
unsigned GetAccVersionCount(void) const
void GetAnnotSet(TAnnotSet &annot_set, TFlags flags=fDefaultFlags) const
CRef< CSeq_inst > GetSeq_inst(TFlags flags=fDefaultFlags) const
vector< Uint1 > GetAmbiguityBytes() const
bool HasNucProtDescrBytes(void) const
CTempString GetSeqDescrBytes(void) const
int GetLatestAccVersion(void) const
vector< SSegment > TSegments
TSeqPos GetSeqOffset(EClipType clip_type=eDefaultClip) const
void GetIds(CBioseq::TId &ids, TFlags flags=fDefaultFlags) const
CRef< CSeq_entry > GetSeq_entry(TFlags flags=fDefaultFlags) const
bool HasSeqDescrBytes(void) const
void x_AddGap(TSegments &segments, TSeqPos pos, TSeqPos len, const TWGSContigGapInfo &gap_info) const
void x_CreateFeaturesChunk(SWGSCreateInfo &info, unsigned index) const
bool GetClipByQualityFlag(EClipType clip_type=eDefaultClip) const
void x_CheckValid(const char *method) const
CRef< CSeq_id > GetId(TFlags flags=fDefaultFlags) const
NCBI_gb_state GetGBState(void) const
CWGSDb_Impl & GetDb(void) const
Write hook for a choice variant (CHOICE)
Write hook for data member of a containing object (eg, SEQUENCE)
void Cleanup(TGlobalAmbiguityCache &)
TGlobalAmbiguityCache * Create()
void put(const key_type &key, const mapped_type &value, const resource_type &resource_used)
mapped_type get(const key_type &key)
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator end() const
const_iterator lower_bound(const key_type &key) const
iterator_bool insert(const value_type &val)
container_type::value_type value_type
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
The NCBI C++ standard methods for dealing with std::string.
int GetSeqLength(const CBioseq &bioseq)
static vector< string > arr
std::ofstream out("events_result.xml")
main entry point for tests
static const char table_name[]
static const char * str(char *buf, int n)
#define GI_FROM(T, value)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
const TSeqPos kInvalidSeqPos
Define special value for invalid sequence position.
#define TAX_ID_FROM(T, value)
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
#define NCBI_THROW_FMT(exception_class, err_code, message)
The same as NCBI_THROW but with message processed as output to ostream.
static void SetErrno(int errno_code)
Set last error using errno code.
static string MakePath(const string &dir=kEmptyStr, const string &base=kEmptyStr, const string &ext=kEmptyStr)
Assemble a path from basic components.
const TPrim & Get(void) const
C * SerialClone(const C &src)
Create on heap a clone of the source object.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
CObjectTypeInfo GetPointedType(void) const
Get type information of data to which this type refers.
CObjectTypeInfo GetMemberType(void) const
Get data type information.
const CConstObjectInfo & GetClassObject(void) const
Get containing class data.
void DefaultWrite(CObjectOStream &out, const CConstObjectInfoCV &variant)
CConstObjectInfo GetVariant(void) const
Get variant data.
ETypeFamily GetTypeFamily(void) const
Get data type family.
TConstObjectPtr GetObjectPtr(void) const
Get pointer to object.
void DefaultWrite(CObjectOStream &out, const CConstObjectInfoMI &member)
void OpenFromBuffer(const char *buffer, size_t size)
Attach reader to a data source.
Uint8 TCount
Alias for value type of counter.
TObjectType * GetNCPointer(void) const THROWS_NONE
Get pointer,.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
TObjectType & GetNCObject(void) const
Get object.
#define NCBI_PARAM_TYPE(section, name)
Generate typename for a parameter from its {section, name} attributes.
@ eParam_NoThread
Do not use per-thread values.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
TThisType & SetFrom(position_type from)
TThisType & SetToOpen(position_type toOpen)
position_type GetToOpen(void) const
position_type GetFrom(void) const
TThisType & SetLength(position_type length)
NCBI_NS_STD::string::size_type SIZE_TYPE
CTempString literal(const char(&str)[Size])
Templatized initialization from a string literal.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static Uint8 StringToUInt8_DataSize(const CTempString str, TStringToNumFlags flags=0)
Convert string that can contain "software" qualifiers to Uint8.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
const char * data(void) const
Return a pointer to the array represented.
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
static bool IsUpper(const CTempString str)
Checks if all letters in the given string have a upper case.
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ToUpper(string &str)
Convert string to upper case – string& version.
size_type size(void) const
Return the length of the represented array.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fWithCommas
Use commas as thousands separator.
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
void Stop(void)
Suspend the timer.
void Start(void)
Start the timer.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
TStr & SetStr(void)
Select the variant.
const TData & GetData(void) const
Get the Data member data.
void SetType(TType &value)
Assign a value to Type data member.
const TType & GetType(void) const
Get the Type member data.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId & SetId(void)
Select the variant.
vector< CRef< CUser_field > > TData
TGi & SetGi(void)
Select the variant.
void SetName(const TName &value)
Assign a value to Name data member.
void SetStart(TStart value)
Assign a value to Start data member.
list< CRef< C_E > > Tdata
TSeq_id_interval & SetSeq_id_interval(void)
Select the variant.
list< CRef< CID2S_Seq_loc > > TLoc_set
TSeq_id & SetSeq_id(void)
Select the variant.
TWhole_gi_range & SetWhole_gi_range(void)
Select the variant.
TGi_interval & SetGi_interval(void)
Select the variant.
TSeq_id & SetSeq_id(void)
Select the variant.
void SetGi(TGi value)
Assign a value to Gi data member.
void SetSeq_id(TSeq_id &value)
Assign a value to Seq_id data member.
void SetSeq_loc(TSeq_loc &value)
Assign a value to Seq_loc data member.
TGi & SetGi(void)
Select the variant.
void SetCount(TCount value)
Assign a value to Count data member.
list< CRef< CBioseq > > TBioseqs
void SetStart(TStart value)
Assign a value to Start data member.
void SetStart(TStart value)
Assign a value to Start data member.
TWhole_gi & SetWhole_gi(void)
Select the variant.
TFeat & SetFeat(void)
Assign a value to Feat data member.
TContent & SetContent(void)
Assign a value to Content data member.
TWhole_seq_id & SetWhole_seq_id(void)
Select the variant.
void SetSeq_id(TSeq_id &value)
Assign a value to Seq_id data member.
void SetGraph(void)
Set NULL data member (assign 'NULL' value to Graph data member).
void SetLength(TLength value)
Assign a value to Length data member.
void ResetSeq_loc(void)
Reset Seq_loc data member.
TInts & SetInts(void)
Assign a value to Ints data member.
void SetLength(TLength value)
Assign a value to Length data member.
const TLocation & GetLocation(void) const
Get the Location member data.
@ e_not_set
No variant selected.
@ e_MaxChoice
== e_Variation+1
void SetSeqid(TSeqid value)
Assign a value to Seqid data member.
void SetTo(TTo value)
Assign a value to To data member.
TPatent & SetPatent(void)
Select the variant.
void SetId(TId &value)
Assign a value to Id data member.
void SetFrom(TFrom value)
Assign a value to From data member.
TGi GetGi(void) const
Get the variant data.
TGi & SetGi(void)
Select the variant.
bool IsGi(void) const
Check if variant Gi is selected.
void SetStrand(TStrand value)
Assign a value to Strand data member.
void SetCit(TCit &value)
Assign a value to Cit data member.
@ e_not_set
No variant selected.
void SetMin(TMin value)
Assign a value to Min data member.
void SetTitle(const TTitle &value)
Assign a value to Title data member.
void SetNumval(TNumval value)
Assign a value to Numval data member.
TValues & SetValues(void)
Assign a value to Values data member.
void SetGraph(TGraph &value)
Assign a value to Graph data member.
void SetMax(TMax value)
Assign a value to Max data member.
void SetLoc(TLoc &value)
Assign a value to Loc data member.
void SetAxis(TAxis value)
Assign a value to Axis data member.
const TSeq & GetSeq(void) const
Get the variant data.
TSet & SetSet(void)
Select the variant.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
bool IsSeq(void) const
Check if variant Seq is selected.
void SetId(TId &value)
Assign a value to Id data member.
bool IsSet(void) const
Check if variant Set is selected.
void SetClass(TClass value)
Assign a value to Class data member.
list< CRef< CSeq_entry > > TSeq_set
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_nuc_prot
nuc acid and coded proteins
@ e_not_set
No variant selected.
void ResetStrand(void)
Reset Strand data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetData(TData &value)
Assign a value to Data data member.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
const TUser & GetUser(void) const
Get the variant data.
TTitle & SetTitle(void)
Select the variant.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
void SetDesc(TDesc &value)
Assign a value to Desc data member.
void SetExt(TExt &value)
Assign a value to Ext data member.
void SetHist(THist &value)
Assign a value to Hist data member.
TAnnot & SetAnnot(void)
Assign a value to Annot data member.
const TId & GetId(void) const
Get the Id member data.
TName & SetName(void)
Select the variant.
const Tdata & Get(void) const
Get the member data.
void SetType(TType value)
Assign a value to Type data member.
TLength GetLength(void) const
Get the Length member data.
list< CRef< CSeq_id > > TId
void SetReplaces(TReplaces &value)
Assign a value to Replaces data member.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
void SetInst(TInst &value)
Assign a value to Inst data member.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
void SetTopology(TTopology value)
Assign a value to Topology data member.
EMol
molecule class in living organism
void SetDescr(TDescr &value)
Assign a value to Descr data member.
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
TUser & SetUser(void)
Select the variant.
void SetRepr(TRepr value)
Assign a value to Repr data member.
TNcbi2na & SetNcbi2na(void)
Select the variant.
list< CRef< CSeq_feat > > TFtable
const TNcbi2na & GetNcbi2na(void) const
Get the variant data.
E_Choice Which(void) const
Which variant is currently selected.
void SetReplaced_by(TReplaced_by &value)
Assign a value to Replaced_by data member.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_annot > > TAnnot
void SetLength(TLength value)
Assign a value to Length data member.
list< CRef< CDelta_seq > > Tdata
void SetStrand(TStrand value)
Assign a value to Strand data member.
TLinkage_evidence & SetLinkage_evidence(void)
Assign a value to Linkage_evidence data member.
void SetLinkage(TLinkage value)
Assign a value to Linkage data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
void ResetExt(void)
Reset Ext data member.
TNcbi4na & SetNcbi4na(void)
Select the variant.
const TDescr & GetDescr(void) const
Get the Descr member data.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
void SetMol(TMol value)
Assign a value to Mol data member.
bool IsUser(void) const
Check if variant User is selected.
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eRepr_virtual
no seq data
@ e_Embl
EMBL specific information.
@ e_User
user defined object
@ e_Update_date
date of last update
@ e_Pub
a reference to the publication
@ e_Genbank
GenBank specific info.
@ e_Comment
a more extensive comment
@ e_Molinfo
info on the molecule and techniques
@ e_Create_date
date entry first created/released
@ e_Source
source of materials, includes Org-ref
@ eStrand_ds
double strand
unsigned int
A callback function used to compare two keys in a database.
Definition of all error codes used in SRA C++ support libraries.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
Lightweight interface for getting lines of data with minimal memory copying.
range(_Ty, _Ty) -> range< _Ty >
constexpr auto sort(_Init &&init)
constexpr bool empty(list< Ts... >) noexcept
const string version
version string
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
string s_Value(TValue value)
const GenericPointer< typename T::ValueType > T2 value
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
std::istream & in(std::istream &in_, double &x_)
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void split(std::vector< std::string > *strVec, const std::string &str_, const std::string &split_)
static bool GetIds(const T &d, set< string > &labels, const string name="", bool detect=false, bool found=false)
#define row(bind, expected)
void AddFeature(const CTempString &data)
char m_Packed4na[kAmbiguityBlockSize/2]
T4naBlocks::const_iterator m_4naBlocksIter
EBaseType GetBaseType(const S4naReader &reader) const
CWGSDb_Impl::SSeqTableCursor SSeqTableCursor
void x_CalculateAmbiguityMask(CWGSDb_Impl &db)
SAmbiguityInfo(TVDBRowId row_id, CWGSDb_Impl &db, SSeqTableCursor &cur)
bool x_AmbiguousBlock(size_t block_index) const
bool x_IsValid(const S4naReader &reader) const
vector< INSDC_coord_len > m_GapLen
vector< NCBI_WGS_component_props > m_GapProps
bool x_AddAmbiguousBlock(const Uint1 *ptr, TSeqPos count, TSeqPos pos, TWGSContigGapInfo &gap_info) const
bool x_AddAmbiguities(const Uint1 *ptr, TSeqPos count, TSeqPos pos, TWGSContigGapInfo &gap_info) const
vector< INSDC_coord_zero > m_GapStart
vector< Uint1 > GetAmbiguityBytes(SSeqTableCursor &cur)
void Advance(S4naReader &reader) const
TWGSContigGapInfo GetGapInfo() const
void x_SetAmbiguousBlock(size_t block_index)
map< TSeqPos, S4naBlock > T4naBlocks
CWGSSeqIterator::TWGSContigGapInfo TWGSContigGapInfo
TSeqPos Get2naLengthExact(TSeqPos pos, TSeqPos len, CWGSDb_Impl &db, SSeqTableCursor &cur) const
CRef< CSeq_data > Get4na(TSeqPos pos, TSeqPos len, CWGSDb_Impl &db, SSeqTableCursor &cur) const
void x_Calculate4na(CWGSDb_Impl &db) const
S4naReader Get4naReader(TSeqPos pos, CWGSDb_Impl &db, SSeqTableCursor &cur) const
vector< INSDC_4na_bin > m_Ambiguity4na
vector< Uint1 > m_AmbiguityMask
CRef< CSeq_data > Get2na(TSeqPos pos, TSeqPos len, SSeqTableCursor &cur) const
TSeqPos Get4naLengthBlock(TSeqPos pos, TSeqPos len) const
TSeqPos Get2naLengthBlock(TSeqPos pos, TSeqPos len) const
vector< NCBI_WGS_gap_linkage > m_GapLinkage
TSeqPos GetGapLengthExact(TSeqPos pos, TSeqPos len, CWGSDb_Impl &db, SSeqTableCursor &cur) const
vector< INSDC_coord_zero > m_AmbiguityPos
void x_Need4na(CWGSDb_Impl &db) const
size_t GetUsedMemory() const
TSeqPos Get4naLengthExact(TSeqPos pos, TSeqPos len, TSeqPos stop_2na_len, TSeqPos stop_gap_len, CWGSDb_Impl &db, SSeqTableCursor &cur) const
SFeatTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(TVDBRowId, PRODUCT_ROW_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, PRODUCT_START)
CObjectIStreamAsnBinary m_ObjStr
DECLARE_VDB_COLUMN_AS_STRING(LOC_ACCESSION)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_feattype, FEAT_TYPE)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_seqtype, LOC_SEQ_TYPE)
DECLARE_VDB_COLUMN_AS_STRING(PRODUCT_ACCESSION)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_seqtype, PRODUCT_SEQ_TYPE)
DECLARE_VDB_COLUMN_AS(TVDBRowId, LOC_ROW_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, LOC_START)
DECLARE_VDB_COLUMN_AS_STRING(SEQ_FEAT)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_loc_strand, LOC_STRAND)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, LOC_LEN)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, PRODUCT_LEN)
SGiIdxTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(TVDBRowId, PROT_ROW_ID)
DECLARE_VDB_COLUMN_AS(TVDBRowId, NUC_ROW_ID)
SProt0TableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(NCBI_gi, GI)
DECLARE_VDB_COLUMN_AS_STRING(GB_ACCESSION)
DECLARE_VDB_COLUMN_AS(uint32_t, ACC_VERSION)
DECLARE_VDB_COLUMN_AS_STRING(SEQID_GNL_PREFIX)
DECLARE_VDB_COLUMN_AS_STRING(PROTEIN_NAME)
string GetAcc(Uint4 id) const
pair< TVDBRowId, TVDBRowId > row_range_t
DECLARE_VDB_COLUMN_AS(row_range_t, NAME_ROW_RANGE)
SProtIdxTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(TVDBRowId, ROW_ID)
DECLARE_VDB_COLUMN_AS_STRING(PUBLIC_COMMENT)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_START)
DECLARE_VDB_COLUMN_AS(TVDBRowId, REPLACED_BY)
SProtTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS_STRING(DESCR)
DECLARE_VDB_COLUMN_AS_STRING(PROTEIN)
DECLARE_VDB_COLUMN_AS(NCBI_taxid, TAXID)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_END)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_PRODUCT_ROW_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, PROTEIN_LEN)
DECLARE_VDB_COLUMN_AS(NCBI_gb_state, GB_STATE)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_hash, HASH)
DECLARE_VDB_COLUMN_AS_STRING(PRODUCT_NAME)
DECLARE_VDB_COLUMN_AS_STRING(TITLE)
DECLARE_VDB_COLUMN_AS_STRING(REF_ACC)
DECLARE_VDB_COLUMN_AS(TVDBRowId, REPLACES)
DECLARE_VDB_COLUMN_AS_STRING(ANNOT)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_component_props, COMPONENT_PROPS)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_START)
SScfTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_gap_linkage, COMPONENT_LINKAGE)
DECLARE_VDB_COLUMN_AS_STRING(ACCESSION)
DECLARE_VDB_COLUMN_AS(TVDBRowId, COMPONENT_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, COMPONENT_LEN)
DECLARE_VDB_COLUMN_AS(NCBI_gb_state, GB_STATE)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_PRODUCT_ROW_ID)
DECLARE_VDB_COLUMN_AS(INSDC_coord_one, COMPONENT_START)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_END)
DECLARE_VDB_COLUMN_AS_STRING(SCAFFOLD_NAME)
DECLARE_VDB_COLUMN_AS(bool, CIRCULAR)
DECLARE_VDB_COLUMN_AS_STRING(SEQID_GNL_PREFIX)
SSeq0TableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(NCBI_taxid, TAXID)
DECLARE_VDB_COLUMN_AS_STRING(SEQID_GNL_PREFIX)
DECLARE_VDB_COLUMN_AS(Uint1, MOL)
DECLARE_VDB_COLUMN_AS_STRING(ACC_PREFIX)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, ACC_CONTIG_LEN)
DECLARE_VDB_COLUMN_AS(INSDC_4na_bin, READ)
SSeq4naTableCursor(const CVDBTable &table)
SSeqTableCursor(const CVDBTable &table)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_PRODUCT_ROW_ID)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_hash, HASH)
CVDBColumnBits< 2 > m_READ_2na
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, TRIM_START)
DECLARE_VDB_COLUMN_AS_STRING(LABEL)
DECLARE_VDB_COLUMN_AS(INSDC_quality_phred, QUALITY)
DECLARE_VDB_COLUMN_AS_STRING(CONTIG_NAME)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_gap_linkage, GAP_LINKAGE)
DECLARE_VDB_COLUMN_AS(INSDC_4na_bin, AMBIGUITY_4NA)
DECLARE_VDB_COLUMN_AS(NCBI_gb_state, GB_STATE)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, AMBIGUITY_POS)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, GAP_LEN)
DECLARE_VDB_COLUMN_AS_STRING(TITLE)
DECLARE_VDB_COLUMN_AS(NCBI_gi, GI)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, READ_START)
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, READ_LEN)
DECLARE_VDB_COLUMN_AS_STRING(ANNOT)
DECLARE_VDB_COLUMN_AS(Uint1, AMBIGUITY_MASK)
DECLARE_VDB_COLUMN_AS(bool, CIRCULAR)
DECLARE_VDB_COLUMN_AS_STRING(ACCESSION)
DECLARE_VDB_COLUMN_AS_STRING(DESCR)
DECLARE_VDB_COLUMN_AS(INSDC_coord_zero, GAP_START)
DECLARE_VDB_COLUMN_AS(row_range_t, CONTIG_NAME_ROW_RANGE)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_START)
DECLARE_VDB_COLUMN_AS(uint32_t, ACC_VERSION)
DECLARE_VDB_COLUMN_AS_STRING(PUBLIC_COMMENT)
DECLARE_VDB_COLUMN_AS(NCBI_WGS_component_props, GAP_PROPS)
DECLARE_VDB_COLUMN_AS_STRING(NUC_PROT_DESCR)
DECLARE_VDB_COLUMN_AS_STRING(NAME)
pair< TVDBRowId, TVDBRowId > row_range_t
DECLARE_VDB_COLUMN_AS(INSDC_coord_len, TRIM_LEN)
DECLARE_VDB_COLUMN_AS(TVDBRowId, FEAT_ROW_END)
CRef< CSeq_data > Get4na(TSeqPos pos, TSeqPos len) const
TSeqPos Get4naLengthExact(TSeqPos pos, TSeqPos len, TSeqPos stop_2na_len, TSeqPos stop_gap_len) const
TSeqPos Get2naLengthExact(TSeqPos pos, TSeqPos len) const
CRef< CWGSDb_Impl::SAmbiguityInfo > m_AmbiguityInfo
CRef< CWGSDb_Impl::SSeqTableCursor > m_Seq
vector< Uint1 > GetAmbiguityBytes() const
SAmbiguityInfo * operator->() const
SAmbiguityAccess(CRef< CWGSDb_Impl::SAmbiguityInfo > &info, CWGSDb_Impl &db, const CRef< CWGSDb_Impl::SSeqTableCursor > &seq, TVDBRowId row_id)
CRef< CSeq_data > Get2na(TSeqPos pos, TSeqPos len) const
SAmbiguityAccess(const SAmbiguityAccess &)=delete
TSeqPos GetGapLengthExact(TSeqPos pos, TSeqPos len) const
void operator=(const SAmbiguityAccess &)=delete
CRef< CSeq_literal > literal
COpenRange< TSeqPos > range
const INSDC_coord_len * gaps_len
bool IsInGap(TSeqPos pos) const
TSeqPos GetGapLength(TSeqPos pos, TSeqPos len) const
const NCBI_WGS_component_props * gaps_props
TSeqPos GetFrom(void) const
const INSDC_coord_zero * gaps_start
TSeqPos GetDataLength(TSeqPos pos, TSeqPos len) const
const NCBI_WGS_gap_linkage * gaps_linkage
CRef< CID2S_Split_Info > split
void x_CreateProtSet(TVDBRowIdRange range)
void x_AddFeature(const CWGSFeatureIterator &it, CSeq_annot::TData::TFtable &dst)
void x_AddProducts(const vector< TVDBRowId > &product_row_ids)
void x_AddDescr(CTempString bytes)
SWGSCreateInfo(const CWGSDb &db)
void x_SetSplitVersion(TSplitVersion split_version)
void x_SetFlags(TFlags flags)
void x_SetSeq(CWGSProteinIterator &it)
void x_AddFeaturesSplit(TVDBRowIdRange range, vector< TVDBRowId > &product_row_ids)
CRef< CWGSAsnBinData > data
CBioseq_set & x_GetProtSet(void)
CRef< CID2S_Chunk > chunk
void x_AddFeatures(TVDBRowIdRange range, vector< TVDBRowId > &product_row_ids)
SWGSCreateInfo(const CWGSDb &db, EFromFlags, TFlags flags)
SWGSCreateInfo(const CWGSDb &db, EFromSplitVersion, TSplitVersion split_version)
TSplitVersion split_version
void x_AddFeaturesDirect(TVDBRowIdRange range, vector< TVDBRowId > &product_row_ids)
@ eFeatLocIdUninitialized
static const TSplitVersion kDefaultSplitVersion
void AddFeature(NCBI_WGS_feattype type, COpenRange< TSeqPos > range)
void AddFeatType(NCBI_WGS_feattype feat_type)
static bool ExpandRange(COpenRange< TSeqPos > &dst, COpenRange< TSeqPos > src)
void AddFeatRange(COpenRange< TSeqPos > range)
bitset< CSeqFeatData::e_MaxChoice > feat_types
vector< COpenRange< TSeqPos > > loc_ranges
void AddContent(CID2S_Chunk_Info &chunk, CSeq_id &feat_id)
void AddFeature(bool with_product, NCBI_WGS_feattype type, COpenRange< TSeqPos > range)
CRef< CID2S_Chunk_Info > CreateChunkInfo(int index, CWGSProteinIterator &prot_it, const vector< TVDBRowId > &product_row_ids, size_t product_index)
CRef< CID2S_Bioseq_Ids::C_E > seq_place
SWGSFeatChunkInfo(CSeq_id &main_id, CSeq_id &feat_id)
static DP_BlockInfo * blocks
#define INIT_VDB_COLUMN(name)
#define INIT_VDB_COLUMN_AS(name, type)
#define INIT_VDB_COLUMN_BACKUP(name, backup_name)
pair< TVDBRowId, TVDBRowCount > TVDBRowIdRange
#define INIT_OPTIONAL_VDB_COLUMN(name)
int32_t NCBI_WGS_gap_linkage
int16_t NCBI_WGS_component_props
@ NCBI_WGS_gap_linkage_evidence_paired_ends
@ NCBI_WGS_gap_linkage_linked
@ NCBI_WGS_gap_unknown_type
@ NCBI_WGS_gap_centromere
@ NCBI_WGS_gap_heterochromatin
static const char kMasterDescrMark[]
static const bool kRecoverGaps
static const size_t kProdPerChunk
static TTaxId s_GetTaxId(const CVDBValueFor< NCBI_taxid > &value)
#define DEFAULT_AMBIGUITY_CACHE_SIZE
void sx_SetSplitInterval(CID2S_Seq_loc &split_loc, CSeq_id &id, TSeqPos pos, TSeqPos end)
static CSafeStatic< TGlobalAmbiguityCache, SStaticGlobalAmbiguityCacheCallbacks > s_GlobalAmbiguityCache
static bool s_UseAmbiguity4na(void)
static void s_Convert_2na_to_4na(char *dst_4na, const char *src_2na, size_t base_count)
static bool sx_HasAmbiguity(const Uint1 *ptr, const Uint1 *end)
static void s_Set_4na_gap(vector< char > &dst_4na_vec, size_t offset, size_t len)
static int kAssignedDefaultSplitVersion
static CRef< CSeq_literal > sx_MakeGapLiteral(TSeqPos len, NCBI_WGS_component_props props, NCBI_WGS_gap_linkage gap_linkage)
static const size_t kMinFeatCountToSplit
static bool s_UseFull4naBlocks(void)
static void s_AddUserObjectType(const CSeqdesc &desc, set< string > &existing_uo_types)
static const TSeqPos kDataChunkSize
static CWGSSeqIterator::TIncludeFlags s_ToFlags(CWGSSeqIterator::EWithdrawn withdrawn)
static const size_t kFeatPerChunk
static const Uint1 * sx_FindAmbiguity(const Uint1 *ptr, const Uint1 *end)
static const TSeqPos kChunk2naSize
static void s_GetMinMax(const Uint1 *arr, size_t size, Uint1 &min_v, Uint1 &max_v)
static void s_Copy_4na(char *dst_4na, TSeqPos dst_offset, const char *src_4na, TSeqPos src_offset, size_t base_count)
static char s_ConvertBits_2na_to_4na_2nd(char bits_2na)
static size_t sx_Find_4na_Ambiguity(const char *ptr, size_t offset, size_t base_count)
void sx_SetSplitId(CID2S_Bioseq_Ids::C_E &split_id, CSeq_id &id)
void sx_AddAnnotBytes(CBioseq::TAnnot &annot_set, CTempString bytes)
static bool s_GetClipByQuality(void)
static const TSeqPos kMinDataSplitSize
static char s_ConvertBits_2na_to_4na(char bits_2na)
static bool kEnableSplitData
static void s_Set_4na(vector< char > &dst_4na_vec, size_t offset, INSDC_4na_bin amb)
static bool kEnableSplitProd
static Uint1 sx_Get_4na(const char *ptr, size_t offset)
static void sx_AddMasterDescr(const CWGSDb &db, SWGSCreateInfo &info, SWGSDb_Defs::TFlags flags)
int sx_StringToNonNegativeInt(const CTempString &str)
void sx_AddSplitIds(CID2S_Bioseq_Ids::Tdata &split_ids, const CBioseq::TId &ids)
static bool s_UseAmbiguityMask(void)
static const TSeqPos kMin2naSize
static bool sx_Is2na(Uint1 b)
bool sx_SetAccession(CSeq_id &id, CTempString accession)
static const char kSeq_descrFirstByte
static const TSeqPos kChunk4naSize
void sx_SetTag(CDbtag &tag, CTempString str)
static int s_GetDebugLevel(void)
static bool kEnableSplitQual
static const TSeqPos kAmbiguityBlockSize
int sx_GetStringId(CTempString str)
static void sx_Assign(vector< Value > &dst, const CVDBValueFor< Value > &src)
void sx_AddDescrBytes(CSeq_descr &descr, CTempString bytes)
static bool kEnableSplitFeat
NCBI_PARAM_DEF_EX(int, WGS, DEBUG, 0, eParam_NoThread, WGS_DEBUG)
static void sx_AddEvidence(CSeq_gap &gap, CLinkage_evidence::TType type)
static TGi s_ToGi(TVDBRowId gi, const char *method)
limited_resource_map< pair< string, TVDBRowId >, CRef< CWGSDb_Impl::SAmbiguityInfo >, size_t > TGlobalAmbiguityCache
static char s_ConvertBits_2na_to_4na_1st(char bits_2na)
static bool s_UseGapInfo(void)
static void s_SetAmbiguitiesPos(vector< char > &dst_4na_vec, TSeqPos pos, TSeqPos len, const vector< INSDC_coord_zero > &amb_pos, const vector< INSDC_4na_bin > &amb_4na)
static bool sx_HasMoreProducts(const CWGSDb &db, TVDBRowIdRange range, size_t count)
NCBI_DEFINE_ERR_SUBCODE_X(19)
DEFINE_STATIC_FAST_MUTEX(s_GlobalAmbiguityCacheMutex)
static void s_AddGiRange(CID2S_Seq_loc::TLoc_set &loc_set, CSeq_id::TGi gi_range_start, CSeq_id::TGi gi_range_stop)
static void s_SetGaps(vector< char > &dst_4na_vec, TSeqPos pos, TSeqPos len, CWGSSeqIterator::TWGSContigGapInfo gap_info)
static size_t s_GetAmbiguityCacheSize(void)
static const TSeqPos kQualChunkSize
static const TSeqPos kSplit2naSize
bool sx_SetVersion(CSeq_id &id, int version)
static void s_SetAmbiguitiesBlocks(vector< char > &dst_4na_vec, TSeqPos pos, TSeqPos len, const CWGSDb_Impl::SAmbiguityInfo::T4naBlocks &blocks)
static string s_GetUserObjectType(const CSeqdesc &desc)
void sx_AddSplitId(CID2S_Bioseq_Ids::Tdata &split_ids, CSeq_id &id)
NCBI_PARAM_DEF(bool, WGS, MASTER_DESCR, true)
static const TSeqPos kSplit4naSize
int sx_NewStringToNonNegativeInt(CTempString str)
static void s_Pack_4na(char *dst_packed_4na, const Uint1 *src_4na, size_t base_count)
NCBI_PARAM_DECL(int, WGS, DEBUG)
@ NCBI_gb_state_eWGSGenBankReplaced
@ NCBI_gb_state_eWGSGenBankMissing
@ NCBI_gb_state_eWGSGenBankLive
uint8_t INSDC_quality_phred
uint8_t NCBI_WGS_loc_strand
uint8_t NCBI_WGS_feattype
static wxAcceleratorEntry entries[3]