71 #include "accguide2.inc"
74 #define NCBI_USE_ERRCODE_X Objects_SeqId
77 struct CSeq_id_find_pred
80 bool operator()(
const char ch)
84 return strchr(kSymbols, ch) != 0;
132 case eFormat:
return "eFormat";
156 if (verstr.
empty()) {
162 "Version embedded in accession " +
string(acc_in)
163 +
" is not a positive integer");
205 switch ( src.
Which() ) {
322 switch (
id.
Which() ) {
425 if ( tsip1->
Match(*tsip2) )
494 if ( tsip1 && tsip2 ) {
602 if (it == sc_ChoiceMap.end()) {
621 if (s.
size() > 3 && s[2] ==
'|') {
623 }
else if (s.
size() > 4 && s[3] ==
'|') {
635 if (s.
size() > 3 && s[2] ==
'|') {
637 }
else if (s.
size() > 4 && s[3] ==
'|') {
890 typedef pair<string, TAccInfo>
TPair;
951 string* key_used =
NULL)
const;
961 void x_Load(
const string& filename);
966 const string* old_name,
const CTempString& new_name);
981 if (it == sc_AccInfoMap.end()) {
997 if (fmt == prev_special_format) {
998 if (acc_or_range == prev_special_key) {
999 prev_special_type = prev_type;
1000 prev_special_type_name = prev_type_name;
1002 }
else if (pfx == prev_special_base_key) {
1003 return prev_special_base_type;
1006 prev_special_format = fmt;
1007 prev_special_base_key = pfx;
1008 prev_special_base_type = guide.
Find(fmt, pfx);
1009 return prev_special_base_type;
1016 if (prev_submap !=
NULL && prev_submap->first == fmt) {
1017 return prev_submap->second;
1020 if (it ==
rules.
end() || it->first != fmt) {
1024 prev_big_special = it->second.big_specials.end();
1025 prev_small_special = it->second.small_specials.end();
1033 vector<CTempStringEx> tokens;
1037 tmp1.assign(rule, 0, rule.
find(
'#'));
1043 if (tokens.empty()) {
1045 }
else if (tokens.size() == 2
1051 <<
": Unsupported version " << tokens[1]);
1054 }
else if ((pos = tokens[0].find(
'+')) !=
NPOS
1055 && (tokens.size() == 3
1056 || (tokens.size() == 4 && tokens[3] ==
"*"))) {
1059 tmp1.assign(tokens[0], 0, pos);
1065 unique_ptr<string> old_name;
1069 old_name.reset(
new string);
1077 if ( !key_used.empty() ) {
1078 key_used =
" (per " + key_used +
')';
1081 <<
": ignoring refinement of " << tokens[1]
1082 <<
" from " << *old_name << key_used
1083 <<
" to unrecognized accession type " << tokens[2]);
1087 *old_name = it->second;
1092 <<
": using default fallback from " << tokens[2]
1093 <<
" to " << *old_name <<
" for " << tokens[1]);
1095 *old_name =
"unknown";
1097 "SAccGuide::AddRule: " <<
count
1098 <<
": unrecognized accession type " << tokens[2]
1099 <<
" for " << tokens[1]);
1105 if (tokens.size() == 4) {
1109 if (tokens[1].find_first_of(
"?*") ==
NPOS) {
1114 if (wit->first == tokens[1]) {
1115 wit->second =
value;
1116 value_ptr = &wit->second;
1120 if (value_ptr ==
NULL) {
1122 value_ptr = &submap.
wildcards.back().second;
1127 _ASSERT(old_name.get() !=
NULL && !old_name->empty());
1128 fallbacks[value_ptr] = make_pair(*old_name, tokens[2]);
1135 pos = tokens[1].find_first_of(
kDigits);
1136 pos2 = tokens[1].find(
'-', pos);
1138 =
s_Key(pos, ((pos2 ==
NPOS) ? tokens[1].
size() : pos2) - pos);
1145 Find(fmt, tokens[1].substr(0, pos2), &key_used);
1146 if ( !key_used.empty() ) {
1148 <<
"SAccGuide::AddRule: Main listing for special "
1150 <<
" doesn't indicate that specials are present.");
1153 unique_ptr<string> old_name;
1156 Find(fmt, tokens[1].substr(0, pos2), &key_used);
1157 old_name.reset(
new string);
1158 if ( !key_used.empty() ) {
1162 *old_name = it->second;
1167 <<
": using default fallback from " << tokens[2]
1168 <<
" to " << *old_name <<
" for " << tokens[1]);
1180 if ( !key_used.empty() ) {
1181 key_used =
" (per " + key_used +
')';
1184 <<
": unrecognized accession type " << tokens[2]
1185 <<
" for special case " << tokens[1]
1186 <<
"; falling back to " << *old_name << key_used);
1187 }
else if (old_name->empty()) {
1188 *old_name =
"unknown";
1190 <<
": unrecognized accession type " << tokens[2]
1191 <<
" for stray(!) special case " << tokens[1]);
1200 tmp1 = tmp2 = tokens[1];
1202 tmp1.assign(tokens[1], 0, pos2);
1210 }
else if (tokens.size() >= 3
1215 <<
": special2 valid only in version 2+ guides");
1218 auto digits = NStr::StringToNumeric<unsigned short>(tmp2);
1232 if ( !why.empty() ) {
1234 <<
"SAccGuide::AddRule: Main listing for special "
1236 <<
" doesn't indicate that specials are present.");
1239 for (
size_t i = 2;
i < tokens.size(); ++
i) {
1249 for (
size_t i = 2;
i < tokens.size(); ++
i) {
1255 why =
" (per default fallback for " + tokens[
i] +
')';
1269 if ( !why.empty() ) {
1270 why =
" (per " + why +
')';
1275 <<
": unrecognized accession type " << tokens[2]
1276 <<
" for stray(!) special case " << tokens[1]);
1285 <<
": unrecognized accession type " << tokens[2]
1286 <<
" for special case " << tokens[1]
1292 }
else if (tokens.size() >= 2 && tokens[0] ==
":") {
1296 <<
": special2 continuation lines valid only in"
1297 " version 2+ guides");
1302 "SAccGuide::AddRule: " <<
count
1303 <<
": ignoring misplaced special2 ranges line.");
1309 for (
size_t i = 1;
i < tokens.size(); ++
i) {
1326 string key(tokens[1]);
1333 <<
": unrecognized accession type " << tokens[2]
1343 fallbacks[&it2->second] = make_pair(old_name, tokens[2]);
1345 <<
": ignoring refinement of " <<
key <<
" from "
1346 << old_name <<
" to unrecognized accession type "
1352 }
else if (tokens.size() == 3
1358 <<
": default fallbacks valid only in version 2+"
1366 <<
": ignoring invalid line: " << rule);
1372 string* key_used)
const
1380 const SSubMap& submap = it->second;
1389 bool bad_match =
false;
1391 while (pos !=
NPOS) {
1392 if ( !
isalnum(pfx[pos]) && pfx[pos] !=
'?' ) {
1396 pos = wit->first.find(
'?', pos + 1);
1402 if (key_used && acc_or_pfx != wit->first) {
1403 *key_used = wit->first;
1416 if (ssit->second.first[
n]) {
1420 return ssit->second.second;
1426 && !(acc_or_pfx < bsit->second.first) ) {
1430 return bsit->second.second;
1432 if (key_used && key_used->empty()) {
1433 *key_used = pfx.
substr(0, fmt >> 16);
1446 bool file_is_old =
false;
1449 CTime builtin_timestamp(
static_cast<time_t
>(kBuiltInGuide_Timestamp));
1450 if ( !
file.empty() &&
1460 "using built-in rules because accguide.txt is older.");
1463 "falling back on built-in rules.");
1465 static const unsigned int kNumBuiltInRules
1466 =
sizeof(kBuiltInGuide) /
sizeof(*kBuiltInGuide);
1468 for (
unsigned int i = 0;
i < kNumBuiltInRules; ++
i) {
1472 for (
auto &rit :
rules) {
1474 if (sit->second.first.any()) {
1475 sit->second.first.optimize();
1477 rit.second.small_specials.erase(sit);
1490 static const char*
const kNucDBs[] = {
1491 "SRA",
"TI",
"TR_ASSM_CH",
"TRACE_ASSM",
"TRACE_CHGR",
NULL
1493 for (
const char*
const* p = kNucDBs; *p; ++p) {
1510 }
while ( !
in.AtEOF() );
1515 const string* old_name,
1521 const TAccInfo* value_ptr =
nullptr;
1522 if (from_pfx != to_pfx) {
1546 it->second.first.clear_range(left, right);
1550 || (--it)->first != from_pfx) {
1560 if (it->first != from_pfx) {
1571 _ASSERT(it->first == from_pfx);
1588 if (value_ptr !=
nullptr) {
1589 _ASSERT(old_name !=
nullptr && !old_name->empty());
1590 fallbacks[value_ptr] = make_pair(*old_name, new_name);
1599 auto raw_digits = fmt & 0xffff, digits = raw_digits;
1600 auto normal_size = (fmt >> 16) + digits;
1608 if (acc.
size() == normal_size) {
1609 pos = acc.
size() - digits;
1614 pos = (fmt >> 16) + 2;
1617 if (digits == raw_digits) {
1619 result += (NStr::StringToNumeric<Uint1>(acc.
substr(pos, 2))
1638 bool has_version =
true;
1639 if (main_size ==
NPOS) {
1640 has_version =
false;
1641 main_size = acc.
size();
1642 }
else if (main_size >= acc.
size() - 1
1647 static const SIZE_TYPE kMainAccBufSize = 32;
1648 if (main_size <= kMainAccBufSize) {
1649 const unsigned char* ucdata = (
const unsigned char*)acc.
data();
1650 char main_acc_buf[kMainAccBufSize];
1659 string main_acc(acc, 0, main_size);
1670 main_size = main_acc.
size();
1671 char flag_char =
'\0';
1672 if (digit_pos ==
NPOS) {
1676 const unsigned char* ucdata = (
const unsigned char*)main_acc.
data();
1678 if ( !has_version && digit_pos == 0 && main_size >= 4
1679 && non_dig_pos < 5 &&
isalnum(ucdata[1])
1683 if (main_size > 4 && main_size <= 17
1684 && strchr(
"|-_", main_acc[4])
1685 && (main_size <= 6 ||
isalnum(ucdata[5]))) {
1688 }
else switch (main_size) {
1713 if (digit_pos == 1 && main_size == 6
1714 && (main_acc[0] ==
'O' || main_acc[0] ==
'P'
1715 || main_acc[0] ==
'Q' ||
isalpha(ucdata[2]))
1720 }
else if (digit_pos == 1 && main_size == 10
1721 && main_acc[0] !=
'O' && main_acc[0] !=
'P'
1722 && main_acc[0] !=
'Q'
1728 }
else if ( !has_version && digit_pos == 0
1729 && (non_dig_pos == 6 || non_dig_pos == 7)
1730 && (main_size == non_dig_pos + 1
1731 || main_acc[non_dig_pos + 1] ==
':'
1732 || (
isalpha(ucdata[non_dig_pos + 1])
1733 && (main_size == non_dig_pos + 2
1734 || main_acc[non_dig_pos + 2] ==
':')))) {
1739 }
else if (digit_pos >= 4 && non_dig_pos == digit_pos + 2
1740 && main_size - non_dig_pos >= 6 && main_acc[3] !=
'_'
1741 && (main_acc[non_dig_pos] ==
'S'
1742 || main_acc[non_dig_pos] ==
'P')
1745 flag_char = main_acc[non_dig_pos];
1752 if (digit_pos == 0) {
1754 && main_acc[0] !=
'0'
1764 SIZE_TYPE flag_len = (flag_char ==
'\0') ? 0 : 1;
1765 SIZE_TYPE digit_count = main_size - digit_pos - flag_len;
1775 static bool s_ReportedFallback;
1778 s_ReportedFallback =
true;
1779 auto it = guide->fallbacks.find(&found_ai);
1780 if (it != guide->fallbacks.end()) {
1782 " Returning fallback type "
1783 << it->second.first <<
" for accession "
1784 << main_acc <<
". (Preferred type "
1785 << it->second.second <<
" unrecognized.)");
1788 " Returning fallback type 0x"
1790 <<
" for accession " << main_acc
1791 <<
". (Internal error looking up names of"
1792 " fallback and preferred types.)");
1796 if (flag_char ==
'P') {
1809 <<
": Protein flag found with unexpected division "
1823 (
"0", digit_pos + 2) ==
NPOS)) {
1855 }
else if (type2 ==
type) {
1873 return it == (*s_Guide)->general.end() ?
eAcc_general : it->second;
1897 unsigned choice =
id.Which();
1907 *
label +=
id.GetGeneral().GetDb();
1949 if ( !
str.empty() ) {
1957 switch (
id.Which()) {
1966 }
else if (oid.
IsStr()) {
1986 const CDbtag& dbt =
id.GetGeneral();
2000 const CId_pat& idp =
id.GetPatent().GetCit();
2023 unsigned char chain =
static_cast<unsigned char>(pid.
GetChain());
2027 *
label +=
static_cast<char>(chain);
2073 list<string> secondary_id_list;
2075 if (!primary_id.empty())
2076 *
label += primary_id;
2077 else if (secondary_id_list.size() > 0)
2078 *
label += *secondary_id_list.begin();
2167 unsigned the_type =
Which();
2172 const char pgp[] =
"pat|";
2173 out.write(pgp,
sizeof(pgp) - 1);
2176 const char tr[] =
"tr|";
2177 out.write(tr,
sizeof(tr) - 1);
2188 unsigned the_type =
Which();
2261 out <<
"[UnknownSeqIdType]";
2268 #ifdef HAVE_THREAD_LOCAL
2271 #if NCBI_SHUN_OSTRSTREAM
2310 if ( (*iter)->IsGi() ) {
2312 (*iter)->WriteAsFasta(out_str);
2357 bool found_gi =
false;
2360 if ((*id)->IsGi()) {
2361 (*id)->WriteAsFasta(ostr);
2381 Set(dbtag, set_as_general);
2400 "Bad CDbtag tag type "
2426 if (set_as_general) {
2430 "Unrecognized Dbtag DB " + dbtag.
GetDb());
2467 if (the_id.
empty()) {
2469 "Empty bare accession supplied");
2485 if (colon_pos !=
NPOS) {
2486 string db = the_id.
substr(0, colon_pos);
2489 const auto& whitelist = kSupportedRawDbtags;
2490 if (whitelist.find(db) != whitelist.end()) {
2493 the_id.
substr(colon_pos + 1));
2502 "Malformatted ID " +
string(the_id));
2510 string mol(the_id, 0, 4), chain;
2512 if (the_id.
size() > 5) {
2513 chain = the_id.
substr(5);
2514 }
else if (the_id.
size() == 5 && the_id[4] !=
'|') {
2528 list<CTempString> fasta_pieces;
2531 fasta_pieces.pop_front();
2533 if ( !fasta_pieces.empty() ) {
2535 ITERATE(list<CTempString>, it, fasta_pieces) {
2536 if ( !it->empty() ) {
2539 " (synonyms?) in FASTA-style ID "
2543 "FASTA-style ID " +
string(the_id)
2544 +
" has too many parts.");
2557 Set(
f, the_type, the_content);
2563 list<CTempString> fasta_pieces;
2566 if ( !fasta_pieces.empty()
2569 fasta_pieces.pop_front();
2571 x_Init(fasta_pieces, the_type, tv);
2594 static const char* kIllegal =
" >[]|\"";
2595 CSeq_id_find_pred pred; pred.kSymbols = kIllegal;
2596 if (find_if(s.
begin(), s.
end(), pred) != s.
end()) {
2604 bool allow_partial_failure)
2607 if (allow_partial_failure) {
2629 list<CTempString> fasta_pieces;
2631 _ASSERT(fasta_pieces.size() > 0);
2632 if (fasta_pieces.size() == 1)
2643 if (fasta_pieces.size() == 2) {
2650 fasta_pieces.pop_front();
2652 while ( !fasta_pieces.empty() ) {
2656 type =
id->x_Init(fasta_pieces,
type, tv);
2662 fasta_pieces.pop_front();
2664 "Unsupported ID type " + typestr);
2668 _ASSERT( !fasta_pieces.empty() );
2670 fasta_pieces.pop_front();
2674 }
catch (std::exception& e) {
2675 if (fasta_pieces.empty()) {
2681 auto l = fasta_pieces.front().size();
2682 if (
l != 2 &&
l != 3) {
2683 fasta_pieces.pop_front();
2687 }
while ( !fasta_pieces.empty() );
2701 _ASSERT(!fasta_pieces.empty());
2704 vector<CTempString> fields(3);
2713 min_fields = max_fields = 1;
2716 min_fields = max_fields = 3;
2719 min_fields = max_fields = 2;
2734 if (fasta_pieces.empty()) {
2735 if (
i >= min_fields) {
2739 "Not enough fields for ID of type "
2743 if (
i >= min_fields && fasta_pieces.size() > 1
2744 && (fasta_pieces.front().size() == 2
2745 || fasta_pieces.front().size() == 3)
2751 list<CTempString>::iterator it = fasta_pieces.begin();
2753 _ASSERT(it != fasta_pieces.end());
2755 if ((it->size() == 2 || it->size() == 3)
2757 next_type = next_type_2;
2758 }
else if (it->find_first_not_of(
" \t\n") ==
NPOS
2759 && ++it == fasta_pieces.end()) {
2765 fields[
i] = fasta_pieces.front();
2766 fasta_pieces.pop_front();
2774 && !fasta_pieces.empty() ) {
2776 fields[1] = snp_name;
2777 fasta_pieces.clear();
2781 while ( !fasta_pieces.empty() && fasta_pieces.front().empty() ) {
2782 fasta_pieces.pop_front();
2789 fields[2] =
"unreviewed";
2791 fields[2] =
"reviewed";
2800 "Bad sequence number " +
string(fields[2]) +
" for "
2801 +
string(fields[0]) +
" patent " +
string(fields[1]));
2809 if (fields[0].
size() < 4
2810 || (fields[0].
size() > 5
2811 && ( !fields[1].
empty()
2812 || strchr(
"|-_", fields[0][4]) ==
NULL))) {
2814 "Malformatted PDB ID " +
string(fields[0]));
2816 if (fields[0].
size() > 4 && fields[1].
empty()) {
2817 if (fields[0].
size() > 5) {
2818 fields[1] = fields[0].substr(5);
2821 fields[1] = fields[0].substr(4);
2823 fields[0] = fields[0].substr(0, 4);
2831 Set(
type, fields[0] , fields[1] , ver,
2840 Set(the_type, the_id);
2843 #ifdef NCBI_STRICT_GI
2853 if ((the_id < 0) || (the_type ==
e_Local && the_id == 0)) {
2893 Set(the_type, acc_in, name_in,
version, release_in);
2909 bool allow_dot =
true;
2925 if ( the_type ==
e_Gi ) {
2927 TGi gi = NStr::StringToNumeric<TGi>(acc);
2939 return Set(the_type, the_id);
2942 "Negative, excessively large, or non-numeric "
2944 +
" ID " +
string(acc));
2951 case e_Pir: tsid = &
SetPir(); allow_dot =
false;
break;
2955 case e_Prf: tsid = &
SetPrf(); allow_dot =
false;
break;
2989 dbt.
SetTag().SetStrOrId(name);
3002 if (name_in.
empty()) {
3004 }
else if (name.
empty()) {
3007 }
else if (name.
size() == 1) {
3008 pdb.
SetChain(
static_cast<unsigned char>(name[0]));
3012 Info <<
"Necessarily using backwards-incompatible"
3013 " representation for chain " <<
string(name)
3014 <<
" of PDB molecule " << acc <<
'.');
3032 tsid->
Set(acc, name_in,
version, release_in, allow_dot);
3053 case e_Tpd:
return 10;
3058 case e_Prf:
return 20;
3107 if (db.compare(
"TMSMART") == 0 ||
3108 db.compare(
"BankIt") == 0 ||
3109 db.compare(
"NCBIFILE") == 0 )
3115 case e_Pdb:
return 80;
3135 if (db.compare(
"TMSMART") == 0 ||
3136 db.compare(
"BankIt") == 0 ||
3137 db.compare(
"NCBIFILE") == 0)
3143 case e_Prf:
return 70;
3144 case e_Pdb:
return 50;
3149 case e_Pir:
return 30;
3162 case e_Pir:
return 30;
3163 case e_Pdb:
return 40;
3167 case e_Tpg:
return 80;
3168 case e_Tpe:
return 90;
3169 case e_Tpd:
return 100;
3172 case e_Prf:
return 140;
3178 if (db.compare(
"TMSMART") == 0 ||
3179 db.compare(
"BankIt") == 0 ||
3180 db.compare(
"NCBIFILE") == 0)
3190 default:
return 255;
3197 int score = base_score * 10;
3200 if ( db ==
"TRACE" ) {
3206 if ( !text_id->IsSetVersion() ) {
3209 if ( !text_id->IsSetAccession() ) {
3216 if ( !text_id->IsSetName() ) {
3238 return &
match.SetGenbank();
3240 return &
match.SetEmbl();
3242 return &
match.SetPir();
3244 return &
match.SetSwissprot();
3246 return &
match.SetOther();
3248 return &
match.SetDdbj();
3250 return &
match.SetPrf();
3252 return &
match.SetTpg();
3254 return &
match.SetTpe();
3256 return &
match.SetTpd();
3258 return &
match.SetGpipe();
3260 return &
match.SetNamed_annot_track();
3271 if ( !
orig )
return;
3273 bool A =
orig->IsSetAccession();
3275 bool v =
orig->IsSetVersion();
3277 bool N =
orig->IsSetName();
3279 bool r =
orig->IsSetRelease();
3285 if (
A && (v ||
N ||
r)) {
3289 if (v && (
N ||
r)) {
3321 else if (
N && (v ||
r)) {
3337 switch (
Which() ) {
3357 if (
GetPdb().IsSetRel() ) {
3359 match.Assign(*
this);
3360 match.SetPdb().ResetRel();
3395 SEQ_ID_PREFER_ACCESSION_OVER_GI);
3406 return TPreferAccessionOverGi::GetDefault() ||
AvoidGi();
3412 return TAvoidGi::GetDefault();
3420 string secondary_id;
3422 bool mixed_case =
false;
3424 switch (seqid_type) {
3432 secondary_id = primary_id;
3436 secondary_id = primary_id;
3440 secondary_id = primary_id;
3457 if (secondary_id_list) {
3485 if (secondary_id_list) {
3489 if (!suffix.empty())
3496 if (secondary_id_list) {
3502 if ((parse_flags &
fAllowLocalId) != 0 && secondary_id_list) {
3506 }
else if (oid.
IsStr()) {
3507 secondary_id = oid.
GetStr();
3521 if ( secondary_id_list ) {
3524 && !primary_id.empty()) {
3526 secondary_id = primary_id +
"." + to_string(tsid->
GetVersion());
3528 secondary_id = primary_id +
".1";
3531 secondary_id = tsid->
GetName();
3541 if (secondary_id_list && !secondary_id.empty()) {
3543 secondary_id_list->emplace_back(secondary_id);
3575 if (!text_id || !text_id->IsSetAccession() || !text_id->IsSetVersion())
return true;
3592 if (scale_limit < min_limit)
return false;
3599 : start(0), stop(0), digits(0), acc_info(
CSeq_id::eAcc_unknown)
3601 size_t pos = 0,
n = s.
size();
3603 && (
isalpha((
unsigned char) s[pos])
3607 while (pos <
n &&
isdigit((
unsigned char) s[pos])) {
3614 }
else if (s[pos++] !=
'-') {
3616 "Expected hyphen in range " +
string(s));
3622 && (
isalpha((
unsigned char) s[pos])
3626 if ( !pfx2.empty() && pfx2 !=
prefix) {
3628 "Mismatched prefixes in range " +
string(s));
3633 "Mismatched digit counts in range " +
string(s));
3635 while (pos <
n &&
isdigit((
unsigned char) s[pos])) {
3652 case 'C':
case 'D':
case 'c':
case 'd':
3678 oss << m_Range->prefix << setw(m_Range->digits) << setfill(
'0') << m_Number;
3687 #undef NCBI_USE_ERRCODE_X
User-defined methods of the data storage class.
NCBI_PARAM_DECL(bool, SeqId, PreferAccessionOverGi)
static const TChoiceMapEntry sc_ChoiceArray[]
bool CanCmpAcc(CSeq_id::E_Choice choice)
static void x_GetLabel_Content(const CSeq_id &id, string *label, CSeq_id::TLabelFlags flags, int *version)
static const char *const s_TextId[CSeq_id::e_MaxChoice+1]
static const char * sc_SupportedRawDbtags[]
DEFINE_STATIC_ARRAY_MAP_WITH_COPY(CStaticArraySet< string >, kSupportedRawDbtags, sc_SupportedRawDbtags)
typedef NCBI_PARAM_TYPE(SeqId, PreferAccessionOverGi) TPreferAccessionOverGi
static CSeq_id::E_Choice s_CheckForFastaTag(const CTempString &s)
CStaticPairArrayMap< CTempString, CSeq_id::E_Choice, PNocase_Generic< CTempString > > TChoiceMap
static const unsigned int kMaxSmallSpecialDigits
void x_Assign(CObject_id &dst, const CObject_id &src)
static const TAccInfoMapEntry sc_AccInfoArray[]
CStaticPairArrayMap< CTempString, CSeq_id::EAccessionInfo, PNocase_Generic< CTempString > > TAccInfoMap
static CSafeStatic< CRef< SAccGuide > > s_Guide(s_CreateGuide, NULL)
static const bm::bvector ::size_type kBVSizes[kMaxSmallSpecialDigits+1]
static void x_GetLabel_Type(const CSeq_id &id, string *label, CSeq_id::TLabelFlags flags)
SStaticPair< const char *, CSeq_id::EAccessionInfo > TAccInfoMapEntry
static bool s_HasFastaTag(const CTempString &s)
static CRef< SAccGuide > * s_CreateGuide(void)
CTextseq_id * s_GetTextseq_id(const CSeq_id::E_Choice &choice, CSeq_id &match)
NCBI_PARAM_DEF_EX(bool, SeqId, PreferAccessionOverGi, false, eParam_NoThread, SEQ_ID_PREFER_ACCESSION_OVER_GI)
static void s_SplitVersion(const CTempString &acc_in, CTempString &acc, int &ver)
static const SAccGuide::TAccInfo kUnrecognized
SStaticPair< const char *, CSeq_id::E_Choice > TChoiceMapEntry
static const char kDigits[]
EDbtagType GetType(void) const
int Compare(const CDbtag &dbt2) const
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
void SetStrOrId(CTempString str)
ostream & AsString(ostream &s) const
int Compare(const CObject_id &oid2) const
bool Match(const CObject_id &oid2) const
int Compare(const CPDB_seq_id &psip2) const
bool Match(const CPDB_seq_id &psip2) const
ostream & AsFastaString(ostream &s) const
ostream & AsFastaString(ostream &s) const
int Compare(const CPatent_seq_id &psip2) const
bool Match(const CPatent_seq_id &psip2) const
Base class for all serializable objects.
class CStaticArrayMap<> is an array adaptor that provides an STLish interface to statically-defined a...
TBase::const_iterator const_iterator
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
ostream & AsFastaString(ostream &s, bool allow_version=true) const
Format the contents FASTA string style.
CTextseq_id & Set(const CTempString &acc_in, const CTempString &name_in=kEmptyStr, int version=0, const CTempString &release_in=kEmptyStr, bool allow_dot_version=true)
Set all fields with a single call.
int Compare(const CTextseq_id &tsip2) const
bool Match(const CTextseq_id &tsip2) const
Comparison functions.
Abstract base class for lightweight line-by-line reading.
bvector_size_type size_type
container_type::const_iterator const_iterator
container_type::iterator iterator
const_iterator end() const
const_iterator lower_bound(const key_type &key) const
iterator_bool insert(const value_type &val)
container_type::value_type value_type
const_iterator find(const key_type &key) const
const_iterator lower_bound(const key_type &key) const
const_iterator end() const
const_iterator begin() const
container_type::iterator iterator
iterator_bool insert(const value_type &val)
std::ofstream out("events_result.xml")
main entry point for tests
static const char * str(char *buf, int n)
#define GI_FROM(T, value)
constexpr size_t ArraySize(const Element(&)[Size])
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define ERASE_ITERATE(Type, Var, Cont)
Non-constant version with ability to erase current element, if container permits.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
#define INT_ID_TO(T, id)
Convert gi-compatible int to/from other types.
#define ERR_POST_X(err_subcode, message)
Error posting with default error code and given error subcode.
TErrCode GetErrCode(void) const
Get error code.
#define STD_CATCH_ALL_X(err_subcode, message)
Standard handling of "exception"-derived exceptions; catches non-standard exceptions and generates "u...
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
void Warning(CExceptionArgs_Base &args)
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
void Info(CExceptionArgs_Base &args)
ESerialRecursionMode
How to assign and compare child sub-objects of serial objects.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
static CRef< ILineReader > New(const string &filename)
Return a new ILineReader object corresponding to the given filename, taking "-" (but not "....
void x_WriteContentAsFasta(ostream &out) const
CSeq_id(void)
See also CSeq_id related functions in "util/sequence.hpp":
string ComposeOSLT(list< string > *secondary_ids=nullptr, TComposeOSLTFlags parse_flags=0) const
JIRA ID-5188 : Compose OSLT string for the primary id, as well as OSLT strings for the secondary ids,...
const string AsFastaString(void) const
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
static bool IsValid(const CBioseq::TId &ids, TParseFlags flags=fParse_Default)
string GetSeqIdString(bool with_version=false) const
Return seqid string with optional version for text seqid type.
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Optimized implementation of CSerialObject::Assign, which is not so efficient.
int BaseBlastScore(void) const
int BaseFastaAAScore(void) const
static const size_t kMaxLocalIDLength
ID length restrictions.
static SIZE_TYPE ParseIDs(CBioseq::TId &ids, const CTempString &s, TParseFlags flags=fParse_Default)
Parse a string representing one or more Seq-ids, appending the results to IDS.
void GetLabel(string *label, ELabelType type=eDefault, TLabelFlags flags=fLabel_Default) const
Append a label for this Seq-id to the supplied string.
E_Choice x_Init(list< CTempString > &fasta_pieces, E_Choice type, ETypeVariant tv)
EAccessionInfo
For IdentifyAccession (below)
static ESNPScaleLimit GetSNPScaleLimit_Value(const string &name)
int CompareOrdered(const CSeq_id &sid2) const
const SSeqIdRange * m_Range
static SIZE_TYPE ParseFastaIds(CBioseq::TId &ids, const CTempString &s, bool allow_partial_failure=false)
Parse an entire set of |-delimited FASTA-style IDs, appending the results to IDS.
static E_Choice GetAccType(EAccessionInfo info)
E_SIC
Compare return values.
EFastaAsTypeAndContent
Tag for method variants that would otherwise be ambiguous.
static ETypeVariant x_IdentifyTypeVariant(E_Choice type, const CTempString &str)
virtual void WriteAsFasta(ostream &out) const
Implement serializable interface.
int BaseBestRankScore(void) const
int TFlags
binary OR of EFlags
CSeq_id & Set(const CTempString &the_id, TParseFlags flags=fParse_AnyRaw)
Reassign based on flat specifications; arguments interpreted as with constructors.
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
SSeqIdRange(const CTempString &s, TFlags flags=0)
static E_Choice WhichInverseSeqId(const CTempString &SeqIdCode)
Converts a string to a choice, no need to require a member.
static const char * GetSNPScaleLimit_Name(ESNPScaleLimit value)
static bool IsValidLocalID(const CTempString &s)
Perform rudimentary validation on potential local IDs, whose contents should be pure ASCII and limite...
static string GetStringDescr(const CBioseq &bioseq, EStringFormat fmt)
static void LoadAccessionGuide(const string &filename)
static CSeq_id_Handle GetHandle(const CSeq_id &id)
Normal way of getting a handle, works for any seq-id.
int AdjustScore(int base_score, TAdjustScoreFlags flags=TAdjustScoreFlags()) const
EStringFormat
Get a string representation of the sequence IDs of a given bioseq.
CSeq_id::EAccessionInfo acc_info
virtual const char * GetErrCodeString(void) const override
Translate from the error code value to its string representation.
bool IsAllowedSNPScaleLimit(ESNPScaleLimit scale_limit) const
static EAccessionInfo x_IdentifyAccession(const CTempString &main_acc, TParseFlags flags, bool has_version)
CRef< CSeq_id > GetID(void) const
void GetMatchingTextseqIds(TSeqIdHandles &matches) const
Collect partially matching textseq-ids.
ELabelType
return the label for a given string
const string & x_SetAccession(void) const
void GetMatchingIds(TSeqIdHandles &matches) const
Collect partially matching seq-ids: no-version, no-name etc.
static int FastaNARank(const CRef< CSeq_id > &id)
int BaseTextScore(void) const
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
static bool PreferAccessionOverGi(void)
Check if the option to prefer accession.version over GI is enabled (SeqId/PreferAccessionOverGi or SE...
static bool AvoidGi(void)
Check if the option to avoid GI ids is enabled (SeqId/AvoidGi or SEQ_ID_AVOID_GI).
virtual ~CSeq_id(void)
Destructor.
static TErrorFlags CheckLocalID(const CTempString &s)
Perform rudimentary validation on potential local IDs, whose contents should not exceed fifty charact...
int BaseFastaNAScore(void) const
static const char * WhichFastaTag(E_Choice choice)
Converts a choice to a FASTA tag, with no trailing vertical bar.
E_SIC Compare(const CSeq_id &sid2) const
Compare() - more general.
ESNPScaleLimit
SNP annotation scale limits.
static int FastaAARank(const CRef< CSeq_id > &id)
@ fLabel_Trimmed
Trim trailing FASTA delimeters.
@ fLabel_UpperCase
Upper case label, with special encoding for PDB chain-ids.
@ fLabel_Version
Show the version.
@ fLabel_GeneralDbIsContent
For type general, use the database name as the tag and the (text or numeric) key as the content.
@ eUnknownType
Unrecognized Seq-id type.
@ eFormat
Contents not parsable as expected.
@ eAcc_refseq_wgsv_intermed
@ eAcc_refseq_unique_prot
@ eAcc_ddbj_tpa_wgsvm_prot
@ eAcc_ddbj_tpa_targeted_nuc
@ eAcc_ddbj_tpa_targetedvm_nuc
@ eAcc_embl_tpa_wgsvm_nuc
@ eAcc_refseq_contig_ncbo
@ eAcc_ddbj_tpa_tsam_prot
@ eAcc_ddbj_tpa_tsav_prot
@ eAcc_refseq_mrna_predicted
@ eAcc_embl_tpa_wgsvm_prot
@ eAcc_ddbj_tpa_wgsm_prot
@ eAcc_ddbj_tpa_tsavm_nuc
@ eAcc_embl_tpa_wgsm_prot
@ eAcc_ddbj_tpa_targetedm_nuc
@ eAcc_ddbj_tpa_targetedv_nuc
@ eAcc_ddbj_tpa_tsavm_prot
@ eAcc_ddbj_targetedvm_nuc
@ eAcc_refseq_prot_predicted
@ eAcc_embl_tpa_tsam_prot
@ eAcc_embl_tpa_tsav_prot
@ eAcc_ddbj_targetedv_nuc
@ eAcc_embl_tpa_tsavm_prot
@ eAcc_refseq_wgsm_intermed
@ eAcc_ddbj_tpa_chromosome
@ eAcc_ddbj_tpa_wgsvm_nuc
@ eAcc_embl_tpa_wgsv_prot
@ eAcc_refseq_chromosome_ncbo
@ eAcc_refseq_ncrna_predicted
@ eAcc_refseq_wgsvm_intermed
@ eAcc_embl_tpa_tsavm_nuc
@ eAcc_refseq_wgs_intermed
@ eAcc_ddbj_targetedm_nuc
@ eAcc_ddbj_tpa_wgsv_prot
@ e_NO
different SeqId types-can't compare
@ e_YES
SeqIds compared, but are different.
@ fGpipeAddSecondary
Add "ACC.VER(=1)" for a 2ndary id.
@ fParse_NoFASTA
Don't bother checking for a tag.
@ fParse_RawText
Try to ID raw non-numeric accessions.
@ fParse_PartialOK
Warn rather than throwing an exception when a FASTA-style ID set contains unparsable portions,...
@ fParse_RawGI
Treat raw numbers as GIs, not local IDs.
@ fParse_AnyLocal
Treat otherwise unidentified strings as local accessions as long as they don't resemble FASTA-style I...
@ fParse_FallbackOK
For IdentifyAccession, don't warn about falling back to a different specific type because broad ident...
@ fParse_ValidLocal
Treat otherwise unidentified strings as raw accessions, provided that they pass rudimentary validatio...
@ eFormat_BestWithoutVersion
@ eFormat_BestWithVersion
@ eFastaContent
Like eFasta, but without any tag.
@ eContent
Untagged human-readable accession or the like.
@ eDefault
default is to show type + content
@ eBoth
Type and content, delimited by a vertical bar.
@ eFasta
Tagged ID in NCBI's traditional FASTA style.
@ eType
FASTA-style type, or database in GeneralDbIsContent mode.
@ fAllowUnderscores
Allow prefixes to contain underscores.
@ eSNPScaleLimit_Supercontig
@ eSNPScaleLimit_Chromosome
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
@ eParam_NoThread
Do not use per-thread values.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
NCBI_NS_STD::string::size_type SIZE_TYPE
const_iterator end() const
Return an iterator to the string's ending position (one past the end of the represented sequence)
static CTempString TruncateSpaces_Unsafe(const CTempString str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
static int StringToNonNegativeInt(const CTempString str, TStringToNumFlags flags=0)
Convert string to non-negative integer value.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool MatchesMask(CTempString str, CTempString mask, ECase use_case=eCase)
Match "str" against the "mask".
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
CTempString & assign(const char *src_str, size_type len)
Assign new values to the content of the a string.
static TNumeric StringToNumeric(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to a numeric value.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static string Join(const TContainer &arr, const CTempString &delim)
Join strings using the specified delimiter.
const char * data(void) const
Return a pointer to the array represented.
void erase(size_type pos=0)
Truncate the string at some specified position Note: basic_string<> supports additional erase() optio...
bool empty(void) const
Return true if the represented string is empty (i.e., the length is zero)
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
void clear(void)
Clears the string.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
size_type length(void) const
Return the length of the represented array.
static unsigned int StringToUInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to unsigned int.
CTempString substr(size_type pos) const
Obtain a substring from this string, beginning at a given offset.
size_type find_first_not_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character not in the matching string within the current string,...
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
size_type find_first_of(const CTempString match, size_type pos=0) const
Find the first occurrence of any character in the matching string within the current string,...
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
size_type find(const CTempString match, size_type pos=0) const
Find the first instance of the entire matching string within the current string, beginning at an opti...
static string & ToUpper(string &str)
Convert string to upper case – string& version.
const char *const kEmptyCStr
Empty "C" string (points to a '\0').
size_type size(void) const
Return the length of the represented array.
const_iterator begin() const
Return an iterator to the string's starting position.