80 m_IsAA (prot_nucl ==
'p'),
85 m_VolStart (vol_start),
88 m_HaveColumns (
false),
89 m_SeqFileOpened(
false),
90 m_HdrFileOpened(
false),
91 m_HashFileOpened(
false),
92 m_OidFileOpened(
false)
282 TIndx start_offset = 0;
283 TIndx end_offset = 0;
291 return int(end_offset - start_offset - 1);
298 TIndx start_offset = 0;
299 TIndx end_offset = 0;
306 int whole_bytes =
int(end_offset - start_offset - 1);
315 int remainder = amb_char & 3;
316 return (whole_bytes * 4) + remainder;
321 TIndx start_offset = 0;
322 TIndx end_offset = 0;
329 int whole_bytes =
int(end_offset - start_offset - 1);
337 return (whole_bytes * 4) + (oid & 0x03);
353 vector<Uint1> translated;
354 translated.resize(512);
357 0x21, 0x22, 0x24, 0x28,
358 0x41, 0x42, 0x44, 0x48,
359 0x81, 0x82, 0x84, 0x88 };
364 for(pair1 = 0; pair1 < 16; pair1++) {
365 for(pair2 = 0; pair2 < 16; pair2++) {
366 Int2 index = (pair1 * 16 + pair2) * 2;
368 translated[index] =
convert[pair1];
369 translated[index+1] =
convert[pair2];
390 vector<char> & buf4bit,
398 buf4bit.resize(estimated_length);
402 for(
int i=0;
i<inp_chars;
i++) {
403 Uint4 inp_char = (buf2bit[
i] & 0xFF);
405 buf4bit[bytes] = expanded[ (inp_char*2) ];
406 buf4bit[bytes+1] = expanded[ (inp_char*2) + 1 ];
413 Uint1 remainder_bits = 2 * bases_remain;
414 Uint1 remainder_mask = (0xFF << (8 - remainder_bits)) & 0xFF;
415 Uint4 last_masked = buf2bit[inp_chars] & remainder_mask;
417 buf4bit[bytes++] = expanded[ (last_masked*2) ];
419 if (bases_remain > 2) {
420 buf4bit[bytes ++] = expanded[ (last_masked*2)+1 ];
424 buf4bit.resize(bytes);
426 _ASSERT(estimated_length == (
int)buf4bit.size());
445 vector<Uint1> translated;
446 translated.reserve(1024);
448 for(
int i = 0;
i<256;
i++) {
449 int p1 = (
i >> 6) & 0x3;
450 int p2 = (
i >> 4) & 0x3;
451 int p3 = (
i >> 2) & 0x3;
454 translated.push_back(1 << p1);
455 translated.push_back(1 << p2);
456 translated.push_back(1 << p3);
457 translated.push_back(1 << p4);
492 int pos =
range.begin;
494 int input_chars_begin =
range.begin / 4;
495 int input_chars_end = (
range.end + 3) / 4;
497 int whole_chars_begin = (
range.begin + 3) / 4;
498 int whole_chars_end =
range.end / 4;
500 int p = input_chars_begin;
502 if (p < whole_chars_begin) {
503 Int4 table_offset = (buf2bit[input_chars_begin] & 0xFF) * 4;
505 int endpt = (input_chars_begin + 1) * 4;
507 if (endpt >
range.end) {
511 for(
int k =
range.begin; k < endpt; k++) {
518 buf8bit[pos++] = expanded[ table_offset + 1 ];
522 buf8bit[pos++] = expanded[ table_offset + 2 ];
526 buf8bit[pos++] = expanded[ table_offset + 3 ];
541 p = whole_chars_begin;
543 while(p < whole_chars_end) {
544 Int4 table_offset = (buf2bit[p] & 0xFF) * 4;
546 buf8bit[pos++] = expanded[ table_offset ];
547 buf8bit[pos++] = expanded[ table_offset + 1 ];
548 buf8bit[pos++] = expanded[ table_offset + 2 ];
549 buf8bit[pos++] = expanded[ table_offset + 3 ];
553 if (p < input_chars_end) {
554 Int4 table_offset = (buf2bit[p] & 0xFF) * 4;
556 int remains = (
range.end & 0x3);
559 buf8bit[pos++] = expanded[ table_offset ];
562 buf8bit[pos++] = expanded[ table_offset + 1 ];
565 buf8bit[pos++] = expanded[ table_offset + 2 ];
625 return (ambchars[
i] >> 16) & 0xFFF;
641 return ambchars[
i+1];
661 return (ambchars[
i] >> 28) & 0xF;
677 return (ambchars[
i] >> 24) & 0xF;
693 return ambchars[
i] & 0xFFFFFF;
711 const vector<Int4> & amb_chars)
713 if (amb_chars.empty())
717 Uint4 amb_num = amb_chars[0];
723 bool new_format = (amb_num & 0x80000000) != 0;
726 amb_num &= 0x7FFFFFFF;
729 for(
Uint4 i=1;
i < amb_num+1;
i++) {
744 Int4 pos = position / 2;
745 Int4 rem = position & 1;
746 Uint1 char_l = char_r << 4;
753 for(j = 0; j <= row_len; j++) {
755 buf4bit[index] = (buf4bit[index] & 0x0F) + char_l;
758 buf4bit[index] = (buf4bit[index] & 0xF0) + char_r;
786 const vector<Int4> & amb_chars,
789 if (amb_chars.empty() || !seq )
return;
791 Uint4 amb_num = amb_chars[0];
794 bool new_format = (amb_num & 0x80000000) != 0;
796 if (new_format) amb_num &= 0x7FFFFFFF;
798 for(
Uint4 i = 1;
i < amb_num+1;
i++) {
815 if (position + row_len <= region.
begin)
818 if(position >= region.
end)
821 for (
int j = 0; j < row_len; ++j, ++position)
822 if ( position >= region.
begin && position < region.
end)
823 seq[position] = trans_ch;
841 const char * seq_buffer,
852 vector<char> aa_data;
853 aa_data.resize(length);
855 for(
int i = 0;
i < length;
i++) {
856 aa_data[
i] = seq_buffer[
i];
859 seqinst.
SetSeq_data().SetNcbistdaa().Set().swap(aa_data);
878 const char * seq_buffer,
881 int whole_bytes = length / 4;
882 int partial_byte = ((length & 0x3) != 0) ? 1 : 0;
884 vector<char> na_data;
885 na_data.resize(whole_bytes + partial_byte);
887 for(
int i = 0;
i<whole_bytes;
i++) {
888 na_data[
i] = seq_buffer[
i];
892 na_data[whole_bytes] = seq_buffer[whole_bytes] & (0xFF - 0x03);
895 seqinst.
SetSeq_data().SetNcbi2na().Set().swap(na_data);
916 const char * seq_buffer,
918 vector<Int4> & amb_chars)
920 vector<char> buffer_4na;
924 seqinst.
SetSeq_data().SetNcbi4na().Set().swap(buffer_4na);
944 typedef list< CRef<CBlast_def_line> >::const_iterator TDefIt;
945 typedef list< CRef<CSeq_id > >::const_iterator TSeqIt;
947 const list< CRef<CBlast_def_line> > & dl = deflines->
Get();
949 bool first_defline(
true);
951 for(TDefIt iter = dl.begin(); iter != dl.end(); iter++) {
956 if (! title.empty()) {
961 bool wrote_seqids(
false);
964 const list< CRef<CSeq_id > > & sl = defline.
GetSeqid();
966 bool first_seqid(
true);
973 for (TSeqIt seqit = sl.begin(); seqit != sl.end(); seqit++) {
980 (*seqit)->WriteAsFasta(oss);
988 first_defline =
false;
1015 typedef list< CRef<CSeq_id> > TSeqidList;
1017 ITERATE(TSeqidList, iter, seqids) {
1038 const CSeq_id * preferred_seqid)
1053 if (preferred_gi !=
ZERO_GI || preferred_seqid) {
1057 if (preferred_gi !=
ZERO_GI) {
1060 seqid.
Reset(preferred_seqid);
1068 new_bdls->
Set().push_front(*iter);
1070 new_bdls->
Set().push_back(*iter);
1080 list< CRef<CSeqdesc> >
1083 const CSeq_id * preferred_seqid)
1087 const bool provide_new_taxonomy_info =
true;
1090 const char * TAX_ORGREF_DB_NAME =
"taxon";
1092 list< CRef<CSeqdesc> > taxonomy;
1101 typedef list< CRef<CBlast_def_line> > TBDLL;
1103 typedef TBDLL::const_iterator TBDLLConstIter;
1105 const TBDLL & dl = bdls->
Get();
1111 for(TBDLLConstIter iter = dl.begin(); iter != dl.end(); iter ++) {
1114 if ((*iter)->CanGetTaxid()) {
1115 taxid = (*iter)->GetTaxid();
1121 bool have_org_desc =
false;
1124 have_org_desc =
true;
1128 bool found_taxid_in_taxonomy_blastdb =
true;
1130 if ((! have_org_desc) && provide_new_taxonomy_info) {
1134 found_taxid_in_taxonomy_blastdb =
false;
1138 if (provide_new_taxonomy_info) {
1139 if (have_org_desc) {
1143 org_tag->
SetDb(TAX_ORGREF_DB_NAME);
1147 if (found_taxid_in_taxonomy_blastdb) {
1151 org->
SetDb().push_back(org_tag);
1160 taxonomy.push_back(desc);
1162 if (use_taxinfo_cache) {
1184 if (oss.size() == 1) {
1187 const vector<char> & v = *oss.front();
1194 size += (**iter1).size();
1201 temp.append(& (**iter3)[0], (*iter3)->size());
1218 if ( !bioseq.IsSetDescr() ) {
1224 if ( !(*iter)->IsUser() ) {
1231 const vector< CRef< CUser_field > >& usf = uobj.
GetData();
1233 _ASSERT(usf.front()->CanGetData());
1234 if (usf.front()->GetData().IsOss()) {
1255 vector<char> hdr_data;
1258 if (! hdr_data.empty()) {
1271 vector< vector<char>* > & strs = uf->
SetData().SetOss();
1274 strs.push_back(
new vector<char>);
1275 strs[0]->swap(hdr_data);
1277 uobj->
SetData().push_back(uf);
1289 const CSeq_id * target_seq_id,
1293 typedef list< CRef<CBlast_def_line> > TDeflines;
1297 list< CRef< CSeq_id > > seqids;
1309 if ((target_gi !=
ZERO_GI) || target_seq_id) {
1319 seqid.
Reset(target_seq_id);
1325 ITERATE(TDeflines, iter, orig_deflines->
Get()) {
1333 if (filt_dl.
Empty()) {
1335 "Error: oid headers do not contain target gi/seq_id.");
1337 defline_set->
Set().push_back(filt_dl);
1340 defline_set = orig_deflines;
1343 if (defline_set.
Empty() ||
1344 (! defline_set->
CanGet()) ||
1345 (0 == defline_set->
Get().size())) {
1349 defline = defline_set->
Get().front();
1350 if (! defline->CanGetSeqid()) {
1353 seqids = defline->GetSeqid();
1362 const char * seq_buffer = 0;
1385 vector<Int4> ambchars;
1389 if (ambchars.empty()) {
1413 bioseq->
SetInst().SetMol(is_prot
1420 bioseq->
SetId().swap(seqids);
1434 desc1->
SetTitle().swap(description);
1439 seq_desc_set.
Set().push_back(desc1);
1441 if (! desc2.
Empty()) {
1442 seq_desc_set.
Set().push_back(desc2);
1446 list< CRef<CSeqdesc> > tax =
1450 bioseq->
SetDescr().Set().push_back(*iter);
1467 switch(alloc_type) {
1469 retval = (
char*)
malloc(length);
1473 retval =
new char[length];
1493 x_GetAmbigSeq(oid, & buf1, nucl_code, alloc_type, region, masks);
1504 if (!masks || masks->
empty())
return;
1508 unsigned int begin(
range.begin);
1509 unsigned int end(
range.end);
1511 while (i < masks->
size() && (*masks)[
i].second <= begin) ++
i;
1513 while (i < masks->
size() && (*masks)[
i].
first < end) {
1514 for (
size_t j =
max((*masks)[
i].
first, begin);
1515 j <
min((*masks)[
i].second, end); ++j) {
1516 seq[j] = mask_letter;
1530 if ((partial_ranges ==
NULL) || (partial_ranges->
size() == 0)) {
1534 const char *
tmp(0);
1541 int num_ranges =
static_cast<int>(partial_ranges->
size());
1542 if ((*partial_ranges)[num_ranges - 1].second >
base_length) {
1548 char *seq = *
buffer + (sentinel ? 1 : 0);
1550 vector<Int4> ambchars;
1553 int begin(riter->first);
1554 int end(riter->second);
1574 (*buffer)[0] = (char)15;
1600 const char *
tmp(0);
1623 char *seq = *
buffer -
range.begin + (sentinel ? 1 : 0);
1627 vector<Int4> ambchars;
1636 bool use_range_set =
true;
1642 || rciter->second->GetRanges().empty()
1644 use_range_set =
false;
1647 range_set = rciter->second->GetRanges();
1651 if (!use_range_set) {
1666 int begin(riter->first);
1667 int end(riter->second);
1687 (*buffer)[0] = (char)15;
1693 if (masks) masks->
clear();
1709 if (sequence.
length() == 0) {
1711 "Error: packed sequence data is not valid.");
1714 const char * seq_buffer = sequence.
data();
1716 int whole_bytes =
static_cast<int>(sequence.
length()) - 1;
1717 int remainder = sequence[whole_bytes] & 3;
1726 vector<Int4> ambchars;
1727 ambchars.reserve(ambiguities.
length()/4);
1729 for(
size_t i = 0;
i < ambiguities.
length();
i+=4) {
1731 ambchars.push_back(
A);
1757 const char **
buffer)
const
1759 TIndx start_offset = 0;
1760 TIndx end_offset = 0;
1773 if (
'p' == seqtype) {
1778 length =
int(end_offset - start_offset);
1785 if (! (*
buffer - 1))
return -1;
1787 }
else if (
'n' == seqtype) {
1801 if (! (*
buffer))
return -1;
1815 int whole_bytes =
int(end_offset - start_offset - 1);
1817 char last_char = (*buffer)[whole_bytes];
1819 int remainder = last_char & 3;
1820 length = (whole_bytes * 4) + remainder;
1828 list< CRef< CSeq_id > > seqids;
1833 if ((! defline_set.
Empty()) && defline_set->
CanGet()) {
1835 if (! (*defline)->CanGetSeqid()) {
1840 seqids.push_back(*seqid);
1850 list< CRef< CSeq_id > > seqids;
1855 if ((! defline_set.
Empty()) && defline_set->
CanGet()) {
1857 if (! (*defline)->CanGetSeqid()) {
1862 seqids.push_back(*seqid);
1900 #ifdef NCBI_STRICT_TAX_ID
1903 tax_ids.
insert(leaf_ids.begin(), leaf_ids.end());
1907 if(user_tax_ids.
size() > tax_ids.
size()) {
1909 if(user_tax_ids.
find(*itr) != user_tax_ids.
end()) {
1917 if(tax_ids.
find(*itr) != tax_ids.
end()) {
1941 if(taxid_set.
size() > user_tax_ids.
size()) {
1945 if(user_tax_ids.
find(*itr) == user_tax_ids.
end()) {
1954 bool * changed)
const
1956 typedef list< CRef<CBlast_def_line> > TBDLL;
1957 typedef TBDLL::iterator TBDLLIter;
1964 if (useCache && cached.first.NotEmpty()) {
1966 *changed = cached.second;
1969 return cached.first;
1972 bool asn_changed =
false;
1983 TBDLL & dl = BDLS->
Set();
1985 for(TBDLLIter iter = dl.begin(); iter != dl.end(); ) {
1988 bool have_memb =
true;
1998 int memb_mask = 0x1 << (
m_MemBit-1);
2000 if ((bits & memb_mask) == 0) {
2011 bool have_user =
false, have_volume =
false;
2014 if (have_user && have_volume)
break;
2016 have_memb = have_user && have_volume;
2025 if( (*vtaxid)->GetNumTaxIds() > 0) {
2044 TBDLLIter eraseme = iter++;
2056 cached.first = BDLS;
2057 cached.second = asn_changed;
2059 cached.first = BDLS;
2060 cached.second = asn_changed;
2072 typedef list< CRef<CBlast_def_line> > TBDLL;
2073 typedef TBDLL::iterator TBDLLIter;
2080 if (useCache && cached.first.NotEmpty()) {
2082 *changed = cached.second;
2085 return cached.first;
2088 bool asn_changed =
false;
2099 TBDLL & dl = BDLS->
Set();
2101 for(TBDLLIter iter = dl.begin(); iter != dl.end(); ) {
2104 bool have_memb =
true;
2114 int memb_mask = 0x1 << (
m_MemBit-1);
2116 if ((bits & memb_mask) == 0) {
2127 bool have_user =
false, have_volume =
false;
2130 if (have_user && have_volume)
break;
2132 have_memb = have_user && have_volume;
2141 if( (*vtaxid)->GetNumTaxIds() > 0) {
2161 TBDLLIter eraseme = iter++;
2173 cached.first = BDLS;
2174 cached.second = asn_changed;
2176 cached.first = BDLS;
2177 cached.second = asn_changed;
2186 bool * changed)
const
2192 if (! raw_data.
size()) {
2200 bdls.
Reset(
new objects::CBlast_def_line_set);
2206 if (! (**dl).CanGetSeqid()) {
2216 if (dbt.
GetDb() ==
"BL_ORD_ID") {
2241 if (! raw_data.
size()) {
2253 bdls.
Reset(
new objects::CBlast_def_line_set);
2259 if (! (**dl).CanGetSeqid()) {
2269 if (dbt.
GetDb() ==
"BL_ORD_ID") {
2287 TIndx hdr_start = 0;
2297 return CTempString(asn_region, hdr_end - hdr_start);
2302 vector<char> & hdr_data )
const
2312 bool changed =
false;
2325 hdr_data.assign(s.data(), s.data() + s.size());
2333 vector<Int4> & ambchars)
const
2335 TIndx start_offset = 0;
2336 TIndx end_offset = 0;
2344 "File error: could not get ambiguity data.");
2347 int length =
int(end_offset - start_offset);
2350 int total = length / 4;
2358 total &= 0x7FFFFFFF;
2360 ambchars.resize(total);
2362 for(
int i = 0;
i<total;
i++) {
2416 typedef list< CRef< CBlast_def_line > >::const_iterator TI1;
2419 TI1 it1 = BDLS->
Get().begin();
2421 for(; it1 != BDLS->
Get().end(); it1++) {
2422 if ((*it1)->IsSetOther_info()) {
2423 TI2 it2 = (*it1)->GetOther_info().begin();
2424 TI2 it2end = (*it1)->GetOther_info().end();
2426 for(; it2 != it2end; it2++) {
2461 return ! oids.empty();
2494 "GI list specified but no ISAM file found for GI in " +
m_VolName);
2506 "TI list specified but no ISAM file found for TI in " +
m_VolName);
2518 "IPG list specified but no ISAM file found for IPG in " +
m_VolName);
2530 "SI list specified but no ISAM file found for SI in " +
m_VolName);
2548 "GI list specified but no ISAM file found for GI in " +
m_VolName);
2560 "TI list specified but no ISAM file found for TI in " +
m_VolName);
2572 "SI list specified but no ISAM file found for SI in " +
m_VolName);
2594 typedef list< CRef< CBlast_def_line > >::const_iterator TI1;
2595 typedef list< CRef< CSeq_id > >::const_iterator TI2;
2597 TI1 it1 = BDLS->
Get().begin();
2601 for(; it1 != BDLS->
Get().end(); it1++) {
2602 if ((*it1)->CanGetSeqid()) {
2603 TI2 it2 = (*it1)->GetSeqid().begin();
2604 TI2 it2end = (*it1)->GetSeqid().end();
2608 for(; it2 != it2end; it2++) {
2609 if ((*it2)->IsGi()) {
2610 gi = (*it2)->GetGi();
2623 const string & str_id,
2625 vector<int> & oids)
const
2627 bool vcheck (
false);
2628 bool fits_in_four = (ident == -1) || ! (ident >> 32);
2629 bool needs_four =
true;
2649 oids.push_back(oid);
2662 oids.push_back(oid);
2675 oids.push_back(oid);
2696 oids.push_back((
int) ident);
2703 "Internal error: hashes are not Seq-ids.");
2706 if ((! fits_in_four) && needs_four) {
2709 "ID overflows range of specified type.");
2718 vector<int> & oids)
const
2730 size_t pos = acc.find(
".");
2733 string ver_str(acc, pos+1, acc.size()-(pos+1));
2738 string nover(acc, 0, pos);
2741 while((pos2 = nover.find(
"|")) != nover.npos) {
2742 nover.erase(0, pos2+1);
2746 list< CRef<CSeq_id> > ids =
2752 const CTextseq_id *
id = (*seqid)->GetTextseq_Id();
2770 oids.erase(
remove(oids.begin(), oids.end(), -1), oids.end());
2779 bool simpler (
false);
2793 bool simpler (
false);
2837 if (first_seq >= vol_cnt) {
2840 "OID not in valid range.");
2843 if (residue >= vol_len) {
2846 "Residue offset not in valid range.");
2855 double dresidue = (double(residue) * end_of_bytes) / vol_len;
2860 residue =
Uint8(dresidue);
2862 if (residue > (end_of_bytes-1)) {
2863 residue = end_of_bytes - 1;
2871 int oid_beg = first_seq;
2872 int oid_end = vol_cnt-1;
2876 int oid_mid = (oid_beg + oid_end)/2;
2878 while(oid_beg < oid_end) {
2888 oid_beg = oid_mid + 1;
2891 oid_mid = (oid_beg + oid_end)/2;
2901 TIndx start_offset = 0;
2903 return start_offset;
2926 if ((begin >= end) || (end > length)) {
2929 "Begin and end offsets are not valid.");
2957 v4.reserve((length+1)/2);
2962 for(
TSeqPos i = 0;
i < length_whole;
i += 2) {
2966 if (length_whole != length) {
2967 _ASSERT((length_whole) == (length-1));
2968 v4.push_back(
buffer[length_whole] << 4);
2982 int * amb_length )
const
2997 TIndx map_begin = 0;
3010 end_A = start_A = --end_S;
3014 map_begin = start_S - 1;
3015 map_end = end_A + 1;
3019 map_begin = start_S;
3023 int s_len =
int(end_S - start_S);
3024 int a_len =
int(end_A - start_A);
3026 if (! (s_len && amb_ok)) {
3028 "File error: could not get sequence data.");
3032 *amb_length = a_len;
3036 *seq_length = s_len;
3041 *
buffer += (start_S - map_begin);
3045 if (! *seq_length) {
3048 "Could not get sequence data.");
3051 if (((
buffer && *
buffer) || a_len) && (! *seq_length)) {
3061 if (
id >= (
static_cast<T>(1) << 32)) {
3064 "ID overflows range of specified type.");
3100 low_id = high_id =
count = 0;
3134 bool cache_data)
const
3138 if (offset_ranges.
empty() && (! cache_data) && (! append_ranges)) {
3151 if (
R.Empty() ||
R->GetRanges().empty()) {
3156 if (offset_ranges.
empty() && (! cache_data)) {
3172 bool flush_sequence = ((! append_ranges) ||
3173 (! offset_ranges.
empty()) ||
3176 if (flush_sequence) {
3180 R->SetRanges(offset_ranges, append_ranges, cache_data);
3193 if (append_ranges) {
3214 if ((**gilist).GetNumSis() != 0)
3217 if ((**gilist).GetNumTis() != 0)
3250 "Hash lookup requested but no hash ISAM file found.");
3256 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
3257 (!defined(NCBI_COMPILER_MIPSPRO)) )
3273 m_Columns[col_id]->GetBlob(oid, blob, keep, & locked);
3290 return m_Columns[col_id]->GetMetaData();
3303 titles.
insert((**iter).GetTitle());
3315 string alpha(
"abcdefghijklmnopqrstuvwxyz");
3316 string ei(
"??a"), ed(
"??b"), ed2(
"??c");
3318 ei[0] = ed[0] = ed2[0] = (
m_IsAA ?
'p' :
'n');
3322 for(
size_t i = 0;
i < alpha.size();
i++) {
3323 ei[1] = ed[1] = ed2[1] = alpha[
i];
3330 if ( ! (big || small))
continue;
3334 const Int2 bytetest = 0x0011;
3335 const char * ptr = (
const char *) &bytetest;
3336 if (ptr[0] == 0x11 && small) {
3342 string errmsg, errarg;
3344 string title = col->GetTitle();
3346 if (unique_titles[title]) {
3347 errmsg =
"duplicate column title";
3350 unique_titles[title] = 1;
3355 if (noidc != noidv) {
3356 errmsg =
"column has wrong #oids";
3361 if (errmsg.size()) {
3362 if (errarg.size()) {
3363 errmsg +=
string(
" [") + errarg +
"].";
3366 string(
"Error: ") + errmsg);
3387 return static_cast<int>(
i);
#define FENCE_SENTRY
This sentry value is used as a 'fence' around the valid portions of partially decoded sequences.
vector< TSeqRange > TRangeVector
`Blob' Class for SeqDB (and WriteDB).
TTaxIds GetTaxIds() const
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
CObjectIStreamAsnBinary –.
CObjectOStreamAsnBinary –.
static char * Alloc(size_t length, bool clear=true)
Allocate memory that atlas will keep track of.
static bool ColumnExists(const string &basename, const string &extn, CSeqDBAtlas &atlas)
Determine if the column exists.
void UnLease()
Release memory held in the atlas layer by this object.
const char * GetFileDataPtr(const string &fname, TIndx offset)
Get a pointer to the specified offset.
static bool IndexExists(const string &name, const char prot_nucl)
TGi GetSeqGI(TOid oid, CSeqDBLockHold &locked)
int GetNumGis() const
Get the number of GIs in the array.
int GetNumSis() const
Get the number of Seq-ids in the array.
int GetNumTis() const
Get the number of TIs in the array.
Uint8 GetMaskOpts() const
set< TTaxId > & GetTaxIdsList()
const char * GetFileDataPtr(TIndx start) const
Read part of the file into a buffer.
string GetDate() const
Get the construction date of the volume.
void GetHdrStartEnd(int oid, TIndx &start, TIndx &end) const
Get the location of a sequence's header data.
void UnLease()
Release any memory leases temporarily held here.
string GetTitle() const
Get the volume title.
int GetNumOIDs() const
Get the number of oids in this volume.
string GetLMDBFileName() const
bool GetAmbStartEnd(int oid, TIndx &start, TIndx &end) const
Get the location of a sequence's ambiguity data.
Uint8 GetVolumeLength() const
Get the length of the volume (in bases).
int GetMinLength() const
Get the length of the shortest sequence in this volume.
void GetSeqStart(int oid, TIndx &start) const
Get the location of a sequence's packed sequence data.
char GetSeqType() const
Get the sequence data type.
int GetMaxLength() const
Get the length of the longest sequence in this volume.
void GetSeqStartEnd(int oid, TIndx &start, TIndx &end) const
Get the location of a sequence's packed sequence data.
TValue & Lookup(int key)
Find a value in the cache.
bool IdToOid(Int8 id, TOid &oid)
GI or TI translation.
void HashToOids(unsigned hash, vector< TOid > &oids)
Sequence hash lookup.
bool PigToOid(TPig pig, TOid &oid)
PIG translation.
void IdsToOids(int vol_start, int vol_end, CSeqDBGiList &ids)
Translate Gis and Tis to Oids for the given ID list.
void GetIdBounds(Int8 &low_id, Int8 &high_id, int &count)
Get Numeric Bounds.
void UnLease()
Return any memory held by this object to the atlas.
void StringToOids(const string &acc, vector< TOid > &oids, bool adjusted, bool &version_check)
String translation.
static bool IndexExists(const string &dbname, char prot_nucl, char file_ext_char)
Check if a given ISAM index exists.
int GetNumTis() const
Get the number of TIs in the array.
int GetNumGis() const
Get the number of GIs in the array.
int GetNumSis() const
Get the number of SeqIds in the array.
set< TTaxId > & GetTaxIdsList()
TRangeList m_Ranges
Range of offsets needed for this sequence.
void SetRanges(const TRangeList &ranges, bool append_ranges, bool cache_data)
Set ranges of the sequence that will be used.
static int ImmediateLength()
Sequences shorter than this will not use ranges in any case.
bool m_CacheData
True if caching of sequence data is required for this sequence.
void ReadBytes(char *buf, TIndx start, TIndx end) const
Read part of the file into a buffer.
const char * GetFileDataPtr(TIndx start) const
Get a pointer into the file contents.
static bool GetTaxNames(TTaxId tax_id, SSeqDBTaxInfo &info)
Get the taxonomy names for a given tax id.
void OptimizeGiLists() const
Simplify the GI list configuration.
bool m_HaveColumns
True if we have opened the columns for this volume.
list< CRef< CSeq_id > > GetSeqIDs(int oid) const
Get the Seq-ids associated with a sequence.
CFastMutex m_MtxCachedRange
CRef< CSeqDBIsam > m_IsamGi
Handles translation of GIs to OIDs.
void SeqidToOids(CSeq_id &seqid, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified Seq-id.
Uint8 x_GetSeqResidueOffset(int oid) const
Returns the base-offset of the specified oid.
void x_OpenHashFile(void) const
void x_UnleasePigFile(void) const
int GetAmbigPartialSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, CSeqDB::TSequenceRanges *partial_ranges, CSeqDB::TSequenceRanges *masks) const
void x_UnleaseTiFile(void) const
CSeqDBAtlas & m_Atlas
The memory management layer.
void AccessionToOids(const string &acc, vector< int > &oids, CSeqDBLockHold &locked) const
Find OIDs for the specified accession or formatted Seq-id.
void GetColumnBlob(int col_id, int oid, CBlastDbBlob &blob, bool keep, CSeqDBLockHold &locked)
Fetch the data blob for the given column and oid.
CRef< CSeqDBHdrFile > m_Hdr
Contains header (defline) information for this volume.
void x_OpenSeqFile(void) const
CSeqDBIntCache< CRef< CSeqdesc > > m_TaxCache
This cache allows CBioseqs to share taxonomic objects.
void x_OpenTiFile(void) const
int GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const
Get a sequence with ambiguous regions.
vector< CRef< CSeqDBColumn > > m_Columns
Set of columns defined for this volume.
CRef< CSeqDBIdxFile > m_Idx
Metadata plus offsets into the sequence, header, and ambiguity data.
void x_OpenStrFile(void) const
int GetSeqLengthExact(int oid) const
Exact sequence length for nucleotide databases.
void x_StringToOids(const string &acc, ESeqDBIdType id_type, Int8 ident, const string &str_id, bool simplified, vector< int > &oids) const
void OpenSeqFile(CSeqDBLockHold &locked) const
Open sequence file.
int GetColumnId(const string &title, CSeqDBLockHold &locked)
Get an ID number for a given column title.
CRef< CSeqDBIsam > m_IsamStr
Handles translation of strings (accessions) to OIDs.
vector< CRef< CSeqDBGiList > > TGiLists
A set of GI lists.
CSeqDBIntCache< TDeflineCacheItem > m_DeflineCache
Cache of filtered deflines.
int x_GetAmbigSeq(int oid, char **buffer, int nucl_code, ESeqDBAllocType alloc_type, SSeqDBSlice *region, CSeqDB::TSequenceRanges *masks) const
Get a sequence with ambiguous regions.
int m_VolStart
Starting OID of this volume.
int GetNumOIDs() const
Get the number of OIDs for this volume.
bool GetGi(int oid, TGi &gi, CSeqDBLockHold &locked) const
Find the GI given an OID.
CRef< CBlast_def_line_set > x_GetHdrAsn1(int oid, bool adjust_oids, bool *changed) const
Get sequence header object.
void GetPigBounds(int &low_id, int &high_id, int &count, CSeqDBLockHold &locked) const
Get PIG Bounds.
void x_FilterHasId(const CSeq_id &id, bool &have_user, bool &have_vol) const
Determine if a user ID list affects this ID, and how.
string m_VolName
The name of this volume.
CTempString x_GetHdrAsn1Binary(int oid) const
Get sequence header binary data.
void FlushOffsetRangeCache()
Flush all offset ranges cached.
CSeqDBVol(CSeqDBAtlas &atlas, const string &name, char prot_nucl, CSeqDBGiList *user_list, CSeqDBNegativeList *neg_list, int vol_start, CSeqDBLockHold &locked)
Constructor.
void IdsToOids(CSeqDBGiList &gis, CSeqDBLockHold &locked) const
Translate Gis to Oids for the given vector of Gi/Oid pairs.
void GetRawSeqAndAmbig(int oid, const char **buffer, int *seq_length, int *ambig_length) const
Get Raw Sequence and Ambiguity Data.
void x_OpenHdrFile(void) const
string GetLMDBFileName() const
Get sqlite file name associated with this volume Empty string if version 4.
void ListColumns(set< string > &titles, CSeqDBLockHold &locked)
List the titles of all columns for this volume.
int GetSeqLengthApprox(int oid) const
Approximate sequence length for nucleotide databases.
TRangeCache m_RangeCache
Cached/ranged sequence info.
int m_VolEnd
First OID past end of this volume.
bool m_SeqFileOpened
True if the volume file has been (at least tried to) opened.
bool GetPig(int oid, int &pig, CSeqDBLockHold &locked) const
Find the PIG given an OID.
int GetSeqLengthProt(int oid) const
Sequence length for protein databases.
CRef< CSeqDBSeqFile > m_Seq
Contains sequence data for this volume.
CRef< CSeqdesc > x_GetAsnDefline(int oid) const
Get sequence header information structures.
string GetTitle() const
Get the volume title.
CRef< CSeqDBGiIndex > m_GiIndex
The GI index file (for fast oid->gi conversion)
CRef< CSeqDBNegativeList > m_NegativeList
The negative ID list, if one exists.
TGiLists m_VolumeGiLists
The volume GI lists, if any exist.
CRef< CSeqDBIsam > m_IsamTi
Handles translation of TI (trace ids) to OIDs.
int x_GetSequence(int oid, const char **buffer) const
Get sequence data.
CRef< CSeqDBIsam > m_IsamHash
Handles translation of sequence hash value to OIDs.
void UnLease()
Return expendable resources held by this volume.
list< CRef< CSeqdesc > > x_GetTaxonomy(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)
Get taxonomic descriptions of a sequence.
CRef< CBlast_def_line_set > x_GetFilteredHeader(int oid, bool *changed) const
Get sequence header information.
char * x_AllocType(size_t length, ESeqDBAllocType alloc_type) const
Allocate memory in one of several ways.
void x_CheckVersions(const string &acc, vector< int > &oids) const
Check Seq-id versions for special sparse-id support case.
bool GiToOid(TGi gi, int &oid, CSeqDBLockHold &locked) const
Find the OID given a GI.
TGi GetSeqGI(int oid, CSeqDBLockHold &locked) const
Get the GI of a sequence This method returns the gi of the sequence.
CRef< CSeq_data > GetSeqData(int oid, TSeqPos begin, TSeqPos end, CSeqDBLockHold &locked) const
Fetch data as a CSeq_data object.
void GetGiBounds(TGi &low_id, TGi &high_id, int &count, CSeqDBLockHold &locked) const
Get GI Bounds.
int GetOidAtOffset(int first_seq, Uint8 residue, CSeqDBLockHold &locked) const
Find the OID at a given index into the database.
char GetSeqType() const
Get the sequence type stored in this database.
bool x_HaveIdFilter(void) const
Returns true if this volume has an ID list.
void x_OpenAllColumns(CSeqDBLockHold &locked)
Find all columns for this volume.
CRef< CBioseq > GetBioseq(int oid, TGi pref_gi, const CSeq_id *pref_seq_id, bool seqdata, CSeqDBLockHold &locked)
Get a CBioseq object for this sequence.
int GetMinLength() const
Get the length of the smallest sequence in this volume.
void x_UnleaseStrFile(void) const
CRef< CSeqDBGiList > m_UserGiList
The user ID list, if one exists.
CSeqDBAtlas::TIndx TIndx
Import TIndx definition from the CSeqDBAtlas class.
void x_OpenGiFile(void) const
pair< CRef< CBlast_def_line_set >, bool > TDeflineCacheItem
Filtered defline plus whether binary data needed changes.
CRef< CSeqDBIsam > m_IsamPig
Handles translation of GIs to OIDs.
bool m_IsAA
True if the volume is protein, false for nucleotide.
const map< string, string > & GetColumnMetaData(int col_id, CSeqDBLockHold &locked)
Get all metadata for the specified column.
CRef< CBlast_def_line_set > x_GetTaxDefline(int oid, TGi preferred_gi, const CSeq_id *preferred_seq_id)
Get defline filtered by several criteria.
void x_OpenOidFile(void) const
int GetMaxLength() const
Get the length of the largest sequence in this volume.
bool PigToOid(int pig, int &oid) const
Find the OID given a PIG.
bool TiToOid(Int8 ti, int &oid, CSeqDBLockHold &locked) const
Find the OID given a TI.
void x_OpenPigFile(void) const
void x_GetFilteredBinaryHeader(int oid, vector< char > &hdr_data) const
Get binary sequence header information.
Uint8 GetVolumeLength() const
Get the total length of this volume (in bases).
CRef< CBlast_def_line_set > GetFilteredHeader(int oid, CSeqDBLockHold &locked) const
Get filtered sequence header information.
string GetDate() const
Get the formatting date of the volume.
int m_MemBit
The filtering MEMB_BIT.
void x_UnleaseGiFile(void) const
void SetOffsetRanges(int oid, const TRangeList &offset_ranges, bool append_ranges, bool cache_data) const
Apply a range of offsets to a database sequence.
void GetStringBounds(string &low_id, string &high_id, int &count) const
Get String Bounds.
void HashToOids(unsigned hash, vector< int > &oids, CSeqDBLockHold &locked) const
Get the OIDs for a given sequence hash.
void x_GetAmbChar(int oid, vector< Int4 > &ambchars) const
Get ambiguity information.
char x_GetSeqType() const
Returns 'p' for protein databases, or 'n' for nucleotide.
static const string kOidNotFound
String containing the error message in exceptions thrown when a given OID cannot be found.
static CRef< CBlast_def_line_set > ExtractBlastDefline(const CBioseq &bioseq)
Extract a Blast-def-line-set object from a Bioseq retrieved by CSeqDB.
@Seq_descr.hpp User-defined methods of the data storage class.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
container_type::iterator iterator
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator begin() const
const_iterator find(const key_type &key) const
const_iterator end() const
static int base_length[29]
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static void DLIST_NAME() remove(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static TDSRET convert(TDSSOCKET *tds, TDSICONV *conv, TDS_ICONV_DIRECTION direction, const char *from, size_t from_len, char *dest, size_t *dest_len)
#define GI_FROM(T, value)
unsigned int TSeqPos
Type for sequence locations and lengths.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define TAX_ID_TO(T, tax_id)
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
#define TAX_ID_FROM(T, value)
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string AsFastaString(void) const
E_SIC
Compare return values.
@ e_NO
different SeqId types-can't compare
@ e_YES
SeqIds compared, but are different.
void Close(void)
Detach reader from a data source.
void OpenFromBuffer(const char *buffer, size_t size)
Attach reader to a data source.
void AddReference(void) const
Add reference to object.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
void RemoveReference(void) const
Remove reference to object.
bool ReferencedOnlyOnce(void) const THROWS_NONE
Check if object is referenced only once.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
void Reset(void)
Reset random number generator to initial startup condition (LFG only)
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static string Int8ToString(Int8 value, TNumToStringFlags flags=0, int base=10)
Convert Int8 to string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
const char * data(void) const
Return a pointer to the array represented.
size_type length(void) const
Return the length of the represented array.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type size(void) const
Return the length of the represented array.
@ fConvErr_NoThrow
Do not throw an exception on error.
@ fAllowTrailingSymbols
Ignore trailing non-numerics characters.
const TSeqid & GetSeqid(void) const
Get the Seqid member data.
bool CanGetTitle(void) const
Check if it is safe to call GetTitle method.
bool IsSetLinks(void) const
Check if a value has been assigned to Links data member.
TTaxid GetTaxid(void) const
Get the Taxid member data.
bool IsSet(void) const
Check if a value has been assigned to data member.
bool IsSetMemberships(void) const
bit arrays Repurposed to store the (multiple) taxIDs associated with WP proteins.
bool IsSetTaxid(void) const
Check if a value has been assigned to Taxid data member.
const TLinks & GetLinks(void) const
Get the Links member data.
const TMemberships & GetMemberships(void) const
Get the Memberships member data.
Tdata & Set(void)
Assign a value to data member.
bool CanGet(void) const
Check if it is safe to call Get method.
const Tdata & Get(void) const
Get the member data.
bool CanGetSeqid(void) const
Check if it is safe to call GetSeqid method.
bool CanGetMemberships(void) const
Check if it is safe to call GetMemberships method.
const TTitle & GetTitle(void) const
Get the Title member data.
bool IsStr(void) const
Check if variant Str is selected.
const TTag & GetTag(void) const
Get the Tag member data.
void SetTag(TTag &value)
Assign a value to Tag data member.
const TDb & GetDb(void) const
Get the Db member data.
vector< vector< char > * > TOss
TData & SetData(void)
Assign a value to Data data member.
void SetNum(TNum value)
Assign a value to Num data member.
const TStr & GetStr(void) const
Get the variant data.
void SetLabel(TLabel &value)
Assign a value to Label data member.
TStr & SetStr(void)
Select the variant.
const TData & GetData(void) const
Get the Data member data.
void SetType(TType &value)
Assign a value to Type data member.
void SetData(TData &value)
Assign a value to Data data member.
const TType & GetType(void) const
Get the Type member data.
void SetDb(const TDb &value)
Assign a value to Db data member.
TId GetId(void) const
Get the variant data.
void SetCommon(const TCommon &value)
Assign a value to Common data member.
TDb & SetDb(void)
Assign a value to Db data member.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
TGeneral & SetGeneral(void)
Select the variant.
E_Choice Which(void) const
Which variant is currently selected.
TVersion GetVersion(void) const
Get the Version member data.
bool CanGetVersion(void) const
Check if it is safe to call GetVersion method.
bool CanGetAccession(void) const
Check if it is safe to call GetAccession method.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_General
for other databases
@ e_Gi
GenInfo Integrated Database.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
TTitle & SetTitle(void)
Select the variant.
void SetInst(TInst &value)
Assign a value to Inst data member.
TSource & SetSource(void)
Select the variant.
TNcbistdaa & SetNcbistdaa(void)
Select the variant.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TUser & SetUser(void)
Select the variant.
void SetRepr(TRepr value)
Assign a value to Repr data member.
Tdata & Set(void)
Assign a value to data member.
void SetLength(TLength value)
Assign a value to Length data member.
void SetSeq_data(TSeq_data &value)
Assign a value to Seq_data data member.
TNcbi4na & SetNcbi4na(void)
Select the variant.
void SetMol(TMol value)
Assign a value to Mol data member.
@ eRepr_raw
continuous sequence
@ eMol_na
just a nucleic acid
unsigned int
A callback function used to compare two keys in a database.
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
const CharType(& source)[N]
Multi-threading – mutexes; rw-locks; semaphore.
ESeqDBAllocType
Certain methods have an "Alloc" version.
ESeqDBIdType SeqDB_SimplifySeqid(CSeq_id &bestid, const string *acc, Int8 &num_id, string &str_id, bool &simpler)
Seq-id simplification.
const int kSeqDBNuclNcbiNA8
Used to request ambiguities in Ncbi/NA8 format.
ESeqDBIdType SeqDB_SimplifyAccession(const string &acc, Int8 &num_id, string &str_id, bool &simpler)
String id simplification.
const int kSeqDBNuclBlastNA8
Used to request ambiguities in BLAST/NA8 format.
ESeqDBIdType
Various identifier formats used in Id lookup.
@ eStringId
Each PIG identifier refers to exactly one protein sequence.
@ eTiId
Genomic ID is a relatively stable numeric identifier for sequences.
@ ePigId
Trace ID is a numeric identifier for Trace sequences.
@ eHashId
Some sequence sources uses string identifiers.
@ eOID
Lookup from sequence hash values to OIDs.
T SeqDB_GetStdOrd(const T *stdord_obj)
Read a network order integer value.
The SeqDB oid filtering layer.
static bool s_SeqDB_SeqIdIn(const list< CRef< CSeq_id > > &seqids, const CSeq_id &target)
Search for a Seq-id in a list of Seq-ids.
bool s_IncludeDefline_Taxid(const CBlast_def_line &def, const set< TTaxId > &user_tax_ids)
static void s_SeqDBMapNA2ToNA4(const char *buf2bit, vector< char > &buf4bit, int base_length)
Convert sequence data from NA2 to NA4 format.
Uint4 s_ResVal(const vector< Int4 > &ambchars, Uint4 i)
Get ambiguous residue value (old version)
void SeqDB_UnpackAmbiguities(const CTempString &sequence, const CTempString &ambiguities, string &result)
Unpack an ambiguous nucleotide sequence.
static void s_SeqDBRebuildDNA_NA8(char *seq, const vector< Int4 > &amb_chars, const SSeqDBSlice ®ion)
Rebuild an ambiguous region from sequence and ambiguity data.
Uint4 s_ResLenOld(const vector< Int4 > &ambchars, Uint4 i)
Get ambiguous region length (old version)
Uint4 s_ResLenNew(const vector< Int4 > &ambchars, Uint4 i)
Get length of ambiguous region (new version)
set< pair< int, int > > TRangeVector
List of offset ranges as begin/end pairs.
bool s_IncludeDefline_NegativeTaxid(const CBlast_def_line &def, const set< TTaxId > &user_tax_ids)
static vector< Uint1 > s_SeqDBMapNA2ToNA4Setup()
Build NA2 to NcbiNA4 translation table.
Uint4 s_ResPosNew(const vector< Int4 > &ambchars, Uint4 i)
Get position of ambiguous region (new version)
static void s_SeqDBRebuildDNA_NA4(vector< char > &buf4bit, const vector< Int4 > &amb_chars)
Rebuild an ambiguous region from sequence and ambiguity data.
static void s_SeqDBWriteSeqDataProt(CSeq_inst &seqinst, const char *seq_buffer, int length)
Store protein sequence data in a Seq-inst.
static void s_GetBioseqTitle(CRef< CBlast_def_line_set > deflines, string &title)
Get the title string for a CBioseq.
static void s_SeqDBFitsInFour(T id)
unsigned SeqDB_ncbina8_to_blastna8[]
static CRef< CBlast_def_line_set > s_OssToDefline(const CUser_field::TData::TOss &oss)
Efficiently decode a Blast-def-line-set from binary ASN.1.
Uint4 s_ResPosOld(const vector< Int4 > &ambchars, Uint4 i)
Get ambiguous residue value (old version)
static void s_SeqDBMapNcbiNA8ToBlastNA8(char *buf, const SSeqDBSlice &range)
Convert sequence data from Ncbi-NA8 to Blast-NA8 format.
static vector< Uint1 > s_SeqDBMapNA2ToNA8Setup()
Build NA2 to Ncbi-NA8 translation table.
static void s_SeqDBMaskSequence(char *seq, CSeqDB::TSequenceRanges *masks, char mask_letter, const SSeqDBSlice &range)
static void s_SeqDBWriteSeqDataNucl(CSeq_inst &seqinst, const char *seq_buffer, int length)
Store non-ambiguous nucleotide sequence data in a Seq-inst.
CRef< CBlast_def_line_set > s_ExtractBlastDefline(const T &bioseq)
static void s_SeqDBMapNA2ToNA8(const char *buf2bit, char *buf8bit, const SSeqDBSlice &range)
Convert sequence data from NA2 to NA8 format.
bool s_IncludeDefline_MaskFilter(const CBlast_def_line &def, Uint8 mask)
Defines database volume access classes.
List of sequence offset ranges.
OID-Range type to simplify interfaces.
int begin
First oid in range.
int end
OID after last included oid.
string common_name
Common name, such as "noisy night monkey".
string scientific_name
Scientific name, such as "Aotus vociferans".