97 #define THIS_FILE "asci_blk.cpp"
99 #define Seq_descr_pub_same 50
105 "*** SEQUENCING IN PROGRESS ***",
106 "***SEQUENCING IN PROGRESS***",
107 "WORKING DRAFT SEQUENCE",
108 "LOW-PASS SEQUENCE SAMPLING",
109 "*** IN PROGRESS ***",
124 if (! line || *line ==
'\0')
127 for (p = line; *p !=
'\0'; p++) {
130 if ((*p ==
',' && p[1] ==
',') || (*p ==
';' && p[1] ==
';'))
132 if ((p[1] ==
',' || p[1] ==
';') && p[0] ==
' ') {
138 for (p = line, q = line; *p !=
'\0';) {
140 if (*p ==
' ' || *p ==
'\n') {
141 for (got_nl =
false; *p ==
' ' || *p ==
'\n'; p++) {
153 for (q--; q > line && (*q ==
' ' || *q ==
';' || *q ==
'\n');)
155 if (*q !=
' ' && *q !=
';' && *q !=
'\n')
160 for (p = line; *p ==
' ' || *p ==
';' || *p ==
'\n';)
173 for (
i = 0;
i < line.size(); ++
i) {
177 if (
i + 1 < line.size()) {
178 char& c1 = line[
i + 1];
179 if ((c ==
',' && c1 ==
',') || (c ==
';' && c1 ==
';'))
181 if ((c1 ==
',' || c1 ==
';') && c ==
' ') {
189 for (
i = 0;
i < line.size();) {
191 if (c ==
' ' || c ==
'\n') {
192 for (;
i < line.size() && (line[
i] ==
' ' || line[
i] ==
'\n'); ++
i) {
201 while (! line.empty()) {
202 char c = line.back();
203 if (c ==
' ' || c ==
';' || c ==
'\n')
210 for (
char c : line) {
211 if (c ==
' ' || c ==
';' || c ==
'\n')
258 vector<string> lines;
261 vector<string> sectionLines;
265 for (
const string& line : lines) {
271 auto* secPtr =
new Section(currentKw, sectionLines);
275 sectionLines.clear();
276 sectionLines.push_back(line);
279 sectionLines.push_back(line);
297 for (; ptr < eptr && *ptr !=
'\n'; ptr++)
312 }
while (nextkw == curkw);
343 eptr = bptr + dbp->
len;
345 const string s = to_string(bases);
346 const string str =
"(bases 1 to " + s +
")";
347 const string str1 =
"(bases 1 to " + s +
";";
348 const string str2 =
"(residues 1 to " + s +
"aa)";
350 string ref(bptr, bptr + dbp->
len);
352 while (bptr < eptr && *bptr !=
'\n' && *bptr !=
'(')
385 eptr = bptr + dbp->
len;
393 while (bptr < eptr) {
405 }
while ((*ptr ==
' ' && ptr < eptr) || skip);
417 if (! dbp || ! dbp->
mOffset || (! mtag && ! ptag))
423 size_t mlen = mtag ?
StringLen(mtag) : 0;
424 size_t plen = ptag ?
StringLen(ptag) : 0;
434 else if (ptag &&
StringEquN(p + 1, ptag, plen))
465 for (; dbp; dbp = dbp->
mpNext) {
483 for (; dbp; dbp = dbp->
mpNext) {
497 auto secType = secPtr->mType;
516 secPtr->xBuildFeatureBlocks();
550 bool seen_oc =
false;
558 for (; ptr < eptr && *ptr !=
'\n'; ptr++)
584 }
while (nextkw == curkw);
612 eptr = bptr + dbp->
len;
615 while (ptr && ptr + 1 < eptr) {
621 eptr = bptr + dbp->
len;
625 if (bptr[1] ==
'T') {
656 while (bptr < eptr) {
692 eptr = bptr + dbp->
len;
702 const string str =
" 1-" + to_string(bases);
719 for (sptr = bptr + 1; sptr < eptr && *sptr !=
'R';)
748 for (; temp; temp = temp->
mpNext) {
758 for (; temp; temp = temp->
mpNext) {
818 eptr = bptr + dbp->
len;
855 if (
l > 0 && l < dbp->
len) {
861 if (ldbp != dbp->
mpData && ldbp) {
872 if (
l > 0 && l < curdbp->
len) {
877 if (ldbp != curdbp->
mpNext && ldbp && !
done) {
890 if (! acc || *acc ==
'\0')
898 pat_id->SetCit().SetCountry(
string(p + 1, q));
901 pat_id->SetCit().SetId().SetNumber(
string(q + 1, p));
904 pat_id->SetCit().SetDoc_type(
string(p + 1, q));
906 pat_id->SetSeqid(atoi(q + 1));
964 if (! acc || *acc ==
'\0')
973 text_id->SetAccession(acc);
975 if (accver && vernum > 0)
976 text_id->SetVersion(vernum);
993 if (! locus || *locus ==
'\0')
997 text_id->SetName(locus);
1020 text_id->SetName(locus);
1051 while (sdbp && sdbp->
mType != subtype)
1070 bioseq.
SetId().push_back(seqId);
1094 id->SetPatent(*psip);
1105 res->
SetId().push_back(std::move(pId));
1108 res->
SetId().push_back(std::move(pId));
1112 }
else if ((! locus || *locus ==
'\0') && (! acc || *acc ==
'\0')) {
1117 if (ibp->
embl_new_ID ==
false && locus && *locus !=
'\0' &&
1119 textId->SetName(locus);
1121 if (acc && *acc !=
'\0')
1122 textId->SetAccession(acc);
1125 textId->SetVersion(ibp->
vernum);
1128 if (
SetTextId(seqtype, *seqId, *textId))
1129 res->
SetId().push_back(seqId);
1166 bool within =
false;
1168 char* eptr = bptr +
len;
1171 for (
str = com; bptr < eptr; bptr = p + 1) {
1175 if ((is_htg && bptr[col_data] ==
'*') ||
1193 if (*(
str - 1) !=
'~')
1199 if (p - bptr < col_data)
1203 size_t size = p - bptr;
1205 if (*bptr ==
' ' && *(
str - 1) !=
'~')
1209 if (is_pat &&
size > 4 &&
1210 q[0] >=
'A' && q[0] <=
'Z' && q[1] >=
'A' && q[1] <=
'Z' &&
1213 else if (
size < 50 || within)
1229 for (p += 2, eptr = p; *eptr ==
' ';)
1234 for (p = com; *p ==
' ';)
1238 for (p = com; *p !=
'\0';)
1242 if (*p ==
' ' || *p ==
'\t' || *p ==
';' || *p ==
',' ||
1243 *p ==
'.' || *p ==
'~') {
1270 auto it1 = secs.begin();
1271 if (it1 == secs.end() || it1->empty())
1273 auto it2 =
next(it1);
1274 if (it2 == secs.end() || *it2 !=
"-" ||
fta_if_wgs_acc(*it1) != 0)
1277 auto tbp = secs.insert_after(it1, *it1);
1326 bool unusual_wgs_msg;
1337 is_cp = (acc[0] ==
'C' && acc[1] ==
'P');
1340 if (pri_acc == 1 || pri_acc == 4) {
1342 for (p = acc; (*p >=
'A' && *p <=
'Z') || *p ==
'_';)
1352 unusual_wgs =
false;
1353 for (
auto tbp = ibp->
secaccs.begin(); tbp != ibp->
secaccs.end(); ++tbp) {
1356 if (tbp == ibp->
secaccs.end())
1358 if (! accessions.empty()) {
1359 accessions.back() +=
'-';
1360 accessions.back() += *tbp;
1366 const string&
a = *tbp;
1369 unusual_wgs_msg =
true;
1370 if (sec_acc == 0 || sec_acc == 3 ||
1371 sec_acc == 4 || sec_acc == 6 ||
1372 sec_acc == 10 || sec_acc == 12)
1381 unusual_wgs_msg =
false;
1388 if (sec_acc < 0 || sec_acc == 2) {
1389 if (pri_acc == 1 || pri_acc == 5 || pri_acc == 11) {
1390 if (! allow_uwsec) {
1391 ErrPostEx(
SEV_REJECT,
ERR_ACCESSION_WGSWithNonWGS_Sec,
"This WGS/TSA/TLS record has non-WGS/TSA/TLS secondary accession \"%s\". WGS/TSA/TLS records are not currently allowed to replace finished sequence records, scaffolds, etc. without human review and confirmation.",
a.c_str());
1398 accessions.push_back(
a);
1402 if (sec_acc == 3 || sec_acc == 6)
1405 (pri_acc == 1 || pri_acc == 5 || pri_acc == 11) &&
1416 if (pri_acc == 1 || pri_acc == 5 || pri_acc == 11)
1421 if (! allow_uwsec) {
1422 ErrPostStr(
SEV_REJECT,
ERR_ACCESSION_UnusualWGS_Secondary,
"This record has one or more WGS/TSA/TLS secondary accession numbers which imply that a WGS/TSA/TLS project is being replaced (either by another project or by finished sequence). This is not allowed without human review and confirmation.");
1425 ErrPostStr(
SEV_WARNING,
ERR_ACCESSION_UnusualWGS_Secondary,
"This record has one or more WGS/TSA/TLS secondary accession numbers which imply that a WGS/TSA project is being replaced (either by another project or by finished sequence). This is being allowed via the use of a special parser flag.");
1428 }
else if (pri_acc == 2)
1430 if (sec_acc == 1 || sec_acc == 5 || sec_acc == 11)
1436 }
else if (unusual_wgs_msg) {
1437 if (! allow_uwsec) {
1438 if (! unusual_wgs) {
1439 if (sec_acc == 1 || sec_acc == 5 || sec_acc == 11)
1440 text =
"WGS/TSA/TLS contig secondaries are present, implying that a scaffold is replacing a contig";
1442 text =
"This record has one or more WGS/TSA/TLS secondary accession numbers which imply that a WGS/TSA/TLS project is being replaced (either by another project or by finished sequence)";
1448 if (! unusual_wgs) {
1449 if (sec_acc == 1 || sec_acc == 5 || sec_acc == 11)
1450 text =
"WGS/TSA/TLS contig secondaries are present, implying that a scaffold is replacing a contig";
1452 text =
"This record has one or more WGS/TSA/TLS secondary accession numbers which imply that a WGS/TSA/TLS project is being replaced (either by another project or by finished sequence)";
1459 if (pri_acc == 1 || pri_acc == 5 || pri_acc == 11) {
1461 if (sec_acc == 1 || sec_acc == 5 || pri_acc == 11)
1462 accessions.push_back(
a);
1463 }
else if (allow_uwsec) {
1464 accessions.push_back(
a);
1466 }
else if (pri_acc == 2) {
1467 if (sec_acc == 0 || sec_acc == 4)
1468 accessions.push_back(
a);
1470 accessions.push_back(
a);
1482 for (
string&
key : keywords) {
1491 for (p =
key.c_str() + 4; *p ==
' ' || *p ==
'\t';)
1496 for (string::iterator p =
buf.begin() + 4; p !=
buf.end(); ++p) {
1497 if (*p >=
'A' && *p <=
'Z')
1509 string& keywordData)
1512 const string problematic(
"WGS Third Party Data");
1513 const string desired(
"WGS; Third Party Data");
1515 if (keywordData.empty()) {
1519 if (wgsStart == string::npos) {
1522 auto afterProblematic = keywordData[wgsStart + problematic.size()];
1523 if (afterProblematic !=
';' && afterProblematic !=
'.') {
1527 string fixedKeywords;
1529 auto semiBefore = keywordData.rfind(
';', wgsStart - 1);
1530 if (semiBefore == string::npos) {
1533 for (
auto i = semiBefore + 1;
i < wgsStart; ++
i) {
1534 if (keywordData[
i] !=
' ') {
1538 fixedKeywords = keywordData.substr(0, wgsStart - 1);
1540 fixedKeywords += desired;
1541 fixedKeywords += keywordData.substr(wgsStart + problematic.size());
1542 keywordData = fixedKeywords;
1558 if (keywordData.empty()) {
1568 auto it = keywords.begin();
1569 auto last = --keywords.end();
1570 while (it != keywords.end()) {
1571 auto& keyword = *it;
1577 if (keyword.empty()) {
1578 keywords.erase(it++);
1631 while (*ptr !=
'\n' && *ptr !=
'\0' && blank < 6 &&
count < 100) {
1632 if (numns && (*ptr ==
'n' || *ptr ==
'N'))
1643 }
else if (residue == 1 && (warn ||
isalpha(*ptr) != 0)) {
1645 *bu++ = replacechar;
1698 for (
char* p = seqptr; *p !=
'\0'; p++)
1699 if (*p >=
'A' && *p <=
'Z')
1710 endptr = seqptr +
len;
1720 while (*seqptr ==
' ' || *seqptr ==
'\n' || *seqptr ==
'\t')
1723 while (*seqptr !=
'\n')
1729 std::vector<char>
buf;
1731 for (numns = 0; seqptr < endptr;) {
1740 while (
isalpha(*seqptr) == 0 && seqptr < endptr)
1754 if (ibp->
is_pat ==
false)
1759 if (ibp->
is_pat ==
false)
1766 if (seqlen ==
static_cast<Uint4>(numns)) {
1789 unique_ptr<unsigned char[]> dnaconv(
new unsigned char[255]());
1790 MemSet((
char*)dnaconv.get(), (
Uint1)1, (
size_t)255);
1798 dnaconv[
static_cast<int>(
code[0])] =
code[0];
1817 unique_ptr<unsigned char[]> protconv(
new unsigned char[255]());
1820 MemSet((
char*)protconv.get(), (
Uint1)1, (
size_t)255);
1839 CSeq_descr::Tdata::const_iterator cur_descr = descr_list.begin();
1840 for (; cur_descr != descr_list.end(); ++cur_descr) {
1841 if ((*cur_descr)->Which() == choice)
1864 CSeq_descr::Tdata::iterator cur_descr = descr_list.begin();
1865 for (; cur_descr != descr_list.end(); ++cur_descr) {
1866 if ((*cur_descr)->Which() == choice) {
1868 descr_new.
Set().push_back(*cur_descr);
1869 descr_list.erase(cur_descr);
1883 if (it1->SameCitation(*it2)) {
1887 const CCit_gen& cit_a = it1->GetGen();
1888 const CCit_gen& cit_b = it2->GetGen();
1946 TEntryList::iterator next_seq =
entries.begin();
1947 for (++next_seq; next_seq !=
entries.end(); ++next_seq) {
1948 if (! (*next_seq)->IsSetDescr())
1954 for (
auto& cur_descr : descr.
Set()) {
1955 if (! cur_descr->IsPub() || ! cur_descr->GetPub().IsSetPub() || ! cur_descr->GetPub().GetPub().IsSet() ||
1956 cur_descr->GetPub().GetPub().Get().empty())
1971 same_pub_descr.insert(cur_descr);
1983 return (next_seq ==
entries.end());
1992 for (CSeq_descr::Tdata::iterator cur_descr = descr_list.begin(); cur_descr != descr_list.end();) {
1993 if ((*cur_descr)->Which() == choice)
1994 cur_descr = descr_list.erase(cur_descr);
2009 TEntryList::iterator next_seq =
entries.begin();
2012 for (; next_seq !=
entries.end(); ++next_seq)
2025 TEntryList::iterator next_seq =
entries.begin();
2028 for (; next_seq !=
entries.end(); ++next_seq) {
2030 for (CSeq_descr::Tdata::iterator cur_descr = descr_list.begin(); cur_descr != descr_list.end();) {
2031 std::set<CSeqdesc*>::iterator it = to_clean.find(*cur_descr);
2032 if (it != to_clean.end()) {
2033 cur_descr = descr_list.erase(cur_descr);
2055 for (CSeq_descr::Tdata::iterator cur_descr = descr_list.begin(); cur_descr != descr_list.end();) {
2056 if ((*cur_descr)->IsPub()) {
2059 std::set<CSeqdesc*> same_pub_descr;
2061 descr.
Set().push_back(*cur_descr);
2062 cur_descr = descr_list.erase(cur_descr);
2085 bool no_problem_found =
true;
2086 for (TEntryList::const_iterator seq =
entries.begin(); seq !=
entries.end(); ++seq) {
2087 const CSeq_descr& descr = (*seq)->GetDescr();
2090 CSeq_descr::Tdata::const_iterator cur_descr =
GetDescrByChoice(descr, choice);
2092 if (cur_descr == descr_list.end()) {
2093 no_problem_found =
false;
2099 org = (*cur_descr)->GetOrg().GetTaxname();
2100 else if (org != (*cur_descr)->GetOrg().GetTaxname()) {
2101 no_problem_found =
false;
2105 Int4 val = *(*cur_descr)->GetModif().begin();
2108 else if (modif !=
val) {
2109 no_problem_found =
false;
2115 date.
Assign((*cur_descr)->GetUpdate_date());
2117 no_problem_found =
false;
2123 return no_problem_found;
2139 const string* found =
nullptr;
2140 for (
auto it : descr.
Get()) {
2141 if (it->IsTitle()) {
2142 found = &it->GetTitle();
2150 string title = *found;
2152 auto pos = title.find(
"complete cds");
2153 if (pos == string::npos) {
2154 pos = title.find(
"exon");
2157 if (pos != string::npos) {
2189 descr.
Set().push_back(desc_new);
2220 Uint4 dblink_count = 0;
2221 Uint4 gpid_count = 0;
2223 bool bad_gpid =
false;
2224 bool bad_dblink =
false;
2232 for (CSeq_descr::Tdata::iterator cur_descr = descr_list.begin(); cur_descr != descr_list.end();) {
2233 if (! (*cur_descr)->IsUser()) {
2246 if (type_str ==
"DBLink") {
2251 cur_dblink = *cur_descr;
2254 dblink = cur_dblink;
2256 cur_descr = descr_list.erase(cur_descr);
2257 }
else if (type_str ==
"GenomeProjectsDB") {
2262 cur_gpid = *cur_descr;
2267 cur_descr = descr_list.erase(cur_descr);
2274 dblink = cur_dblink;
2276 if (! cur_dblink->
Equals(*dblink)) {
2287 if (! cur_gpid->
Equals(*gpid)) {
2295 if (bad_dblink ==
false && bad_gpid ==
false) {
2296 if (dblink_count > 0 &&
entries.size() != dblink_count)
2298 if (gpid_count > 0 &&
entries.size() != gpid_count)
2312 if (bad_gpid || bad_dblink ||
2314 descr.
Get().empty())
2318 descr.
Set().push_back(dblink);
2320 descr.
Set().push_back(gpid);
2370 for (
const auto& entry :
entries) {
2371 const CBioseq& cur_bioseq = entry->GetSeq();
2394 for (
const auto& entry :
entries) {
2395 const CBioseq& cur_bioseq = entry->GetSeq();
2415 string locusname =
"SEG_";
2421 string locusname =
"SEG_";
2422 locusname.append(ibp->
acnum);
2432 bioseq->
SetDescr().Set().push_back(descr);
2439 bool need_null =
false;
2446 inst.
SetExt().SetSeg().Set().push_back(null_loc);
2451 seqloc->
Assign(seq_it.GetEmbeddingSeq_loc());
2452 inst.
SetExt().SetSeg().Set().push_back(seqloc);
2477 if (
id.NotEmpty()) {
2504 bioseq_entry->
SetSeq(*bioseq);
2511 bioseq_set_entry->
SetSet(*bioseq_set);
2514 bioseq_set_head->
SetSeq_set().push_back(bioseq_entry);
2515 bioseq_set_head->
SetSeq_set().push_back(bioseq_set_entry);
2523 bioseq_set_head_entry->
SetSet(*bioseq_set_head);
2526 entries.push_back(bioseq_set_head_entry);
2565 bool check_div(
bool pat_acc,
bool pat_ref,
bool est_kwd,
bool sts_kwd,
bool gss_kwd,
bool if_cds,
string& div,
CMolInfo::TTech* tech,
size_t bases,
Parser::ESource source,
bool& drop)
2570 if (pat_acc || pat_ref ||
StringEqu(div.c_str(),
"PAT")) {
2571 if (pat_ref ==
false) {
2594 }
else if (est_kwd) {
2603 }
else if (bases > 1000) {
2617 }
else if (
StringEqu(div.c_str(),
"EST")) {
2625 }
else if (sts_kwd) {
2634 }
else if (bases > 1000) {
2648 }
else if (
StringEqu(div.c_str(),
"STS")) {
2653 }
else if (gss_kwd) {
2662 }
else if (bases > 2500) {
2676 }
else if (
StringEqu(div.c_str(),
"GSS")) {
2681 }
else if (
StringEqu(div.c_str(),
"TSA")) {
2686 return ! div.empty();
2698 if (*pch ==
'd' || *pch ==
'e') {
2701 lID = strtol(pch + 1, &pchEnd, 10);
2703 if (! ((lID == 0 && pch + 1 == pchEnd) || (lID == LONG_MAX && errno == ERANGE))) {
2708 tag->SetStr(
string(pch, pchEnd - pch));
2712 dbtag->
SetDb(pid ?
"PID" :
"NID");
2714 id->SetGeneral(*dbtag);
2737 bioseq.
SetId().push_back(sid);
2743 for (
auto& entry : seq_entries) {
2745 if (bioseq->IsSetDescr()) {
2750 for (
auto& descr : bioseq->SetDescr().Set()) {
2751 if (descr->IsGenbank() && ! gb_block)
2752 gb_block = &descr->SetGenbank();
2753 else if (descr->IsMolinfo() && ! molinfo) {
2754 molinfo = &descr->SetMolinfo();
2758 if (gb_block && molinfo)
2784 for (
const auto& descr : descrs) {
2785 if (descr->IsSource())
2786 return &(descr->GetSource());
2808 if (seq_entries.empty())
2832 for (q =
tmp; *q !=
'\0'; q++) {
2833 if (*q ==
'\n' &&
StringEquN(q + 1,
"DE ", 5))
2835 else if (*q ==
'\n' || *q ==
'\t')
2838 for (q =
tmp, p =
tmp; *p !=
'\0'; p++) {
2839 if (*p ==
' ' && p[1] ==
' ')
2851 ! p && ! cancelled) {
2863 r =
new char[dbp->
len + 1];
2868 for (p =
r, q =
r; *p !=
'\0'; p++)
2869 if (*p >=
'a' && *p <=
'z')
2873 for (
count = 0, p =
r; *p !=
'\0'; p++) {
2876 else if (++
count > 10) {
2894 if (! entry || ! xip)
2901 for (q =
tmp; *q !=
'\0'; q++)
2902 if (*q ==
'\n' || *q ==
'\t')
2904 for (q =
tmp, p =
tmp; *p !=
'\0'; p++) {
2905 if (*p ==
' ' && p[1] ==
' ')
2917 ! p && ! cancelled) {
2930 for (
count = 0, p =
r; *p !=
'\0'; p++) {
2933 else if (++
count > 10) {
2946 }
else if ((! div || !
StringEqu(div,
"HTG")) &&
2968 size_t start_pos = 0;
2969 for (; start_pos >
str.size() &&
str[start_pos] <=
' '; ++start_pos)
2972 if (start_pos ==
str.size()) {
2977 str =
str.substr(start_pos);
2978 size_t end_pos =
str.size() - 1;
2979 for (;; --end_pos) {
2980 if (
str[end_pos] ==
';' ||
str[end_pos] <=
' ') {
2989 if (
str[end_pos] !=
';' || end_pos == 0) {
2993 str =
str.substr(0, end_pos);
2998 size_t amp_pos = end_pos - 1;
2999 for (; amp_pos; --amp_pos) {
3000 if (
str[amp_pos] ==
' ' ||
str[amp_pos] ==
'&' ||
str[amp_pos] ==
';')
3004 if (
str[amp_pos] ==
'&')
3007 str =
str.substr(0, end_pos);
3013 for (list<string>::iterator it = str_list.begin(); it != str_list.end();) {
3017 it = str_list.erase(it);
3026 const Char* div =
nullptr;
3028 for (
const auto& descr : descrs) {
3029 if (! descr->IsEmbl())
3032 if (! descr->GetEmbl().IsSetDiv() || descr->GetEmbl().GetDiv() > 15)
3039 for (TSeqdescList::iterator descr = descrs.begin(); descr != descrs.end();) {
3040 if (! (*descr)->IsGenbank()) {
3045 CGB_block& gb_block = (*descr)->SetGenbank();
3052 gb_block.
GetDiv() !=
"SYN") {
3101 descr = descrs.erase(descr);
3113 for (
auto& entry : seq_entries) {
3115 if (bioseq->IsSetDescr())
3120 if (bio_set->IsSetDescr())
3132 for (
const auto& pub : pub_eq.
Get()) {
3134 if (pub->GetGen().IsSetSerial_number()) {
3135 ret = pub->GetGen().GetSerial_number();
3171 if (
strcmp(str1, str2) <= 0)
3183 for (
auto& entry : seq_entries) {
3185 if (bioseq->IsSetDescr())
3186 bioseq->SetDescr().Set().sort(
descr_cmp);
3190 if (bio_set->IsSetDescr())
3191 bio_set->SetDescr().Set().sort(
descr_cmp);
3202 }
else if (pub1->
IsGen()) {
3217 for (
auto& annot : annots) {
3218 if (annot->IsFtable()) {
3219 for (
auto& feat : annot->SetData().SetFtable()) {
3220 if (feat->IsSetCit() && feat->GetCit().IsPub()) {
3223 TPubList& pubs = feat->SetCit().SetPub();
3224 for (TPubList::iterator pub = pubs.begin(); pub != pubs.end(); ++pub) {
3225 TPubList::iterator next_pub = pub;
3226 for (++next_pub; next_pub != pubs.end(); ++next_pub) {
3228 swap(*next_pub, *pub);
3240 for (
auto& entry : seq_entries) {
3242 if (bioseq->IsSetAnnot())
3247 if (bio_set->IsSetAnnot())
3256 for (
const auto&
tag : dbtags) {
3257 if (
tag->IsSetDb() &&
tag->IsSetTag() &&
3258 !
tag->GetTag().IsStr() &&
tag->GetTag().GetId() > 0 &&
3259 tag->GetDb() ==
"taxon")
3280 for (
const auto& annot : annots) {
3281 if (! annot->IsFtable())
3285 for (
const auto& feat : feats) {
3286 if (! feat->IsSetData() || ! feat->GetData().IsBiosrc())
3291 const CBioSource& bio_src = feat->GetData().GetBiosrc();
3311 if (! entry || ! xip)
3314 for (; xip; xip = xip->
next)
3320 for (txip = xip->
subtags; txip; txip = txip->
next) {
3323 for (fxip = txip->
subtags; fxip; fxip = fxip->
next)
3339 for (
auto& entry : seq_entries) {
3341 if (bioseq->IsSetInst() && bioseq->GetInst().IsSetStrand())
3344 if (bioseq->GetInst().IsSetMol()) {
3358 for (
const auto& entry : seq_entries) {
3360 if (bioseq->IsSetId()) {
3361 for (
const auto&
id : bioseq->GetId()) {
3362 if (id->IsSwissprot())
3387 for (TPubList::iterator pub = pubs.begin(); pub != pubs.end();) {
3388 if ((*pub)->IsGen()) {
3389 if ((*pub)->GetGen().IsSetSerial_number())
3390 (*pub)->SetGen().ResetSerial_number();
3393 pub = pubs.erase(pub);
3405 for (
auto& entry : seq_entries) {
3407 if (pubdesc->IsSetPub()) {
3409 if (pubdesc->GetPub().Get().empty())
3410 pubdesc->ResetPub();
3415 if (feat->IsSetData()) {
3416 if (feat->GetData().IsPub()) {
3418 if (feat->GetData().GetPub().GetPub().Get().empty())
3419 feat->SetData().SetPub().ResetPub();
3420 }
else if (feat->GetData().IsImp()) {
3421 CImp_feat& imp = feat->SetData().SetImp();
3422 if (imp.
IsSetKey() && imp.
GetKey() ==
"Site-ref" && feat->IsSetCit() && feat->GetCit().IsPub()) {
3424 if (feat->GetCit().GetPub().empty())
3425 feat->SetCit().Reset();
3437 const string* seq_str =
nullptr;
3438 const vector<Char>* seq_vec =
nullptr;
3441 size_t old_size = 0;
3447 old_size = seq_str->size();
3453 old_size = seq_vec->size();
3459 old_size = seq_vec->size();
3465 std::vector<Char> new_seq(old_size);
3466 size_t new_size = 0;
3472 if (! new_seq.empty()) {
3473 seq_data.
SetNcbieaa().
Set().assign(new_seq.begin(), new_seq.begin() + new_size);
3493 for (
auto&
delta : bioseq.
SetInst().SetExt().SetDelta().Set()) {
3494 if (
delta->IsLiteral() &&
delta->GetLiteral().IsSetSeq_data() && !
delta->GetLiteral().GetSeq_data().IsGap()) {
3504 for (
auto& entry : seq_entries) {
3506 if (bioseq->IsSetInst() && bioseq->GetInst().IsSetRepr()) {
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
void StripECO(string &str)
void xFixEMBLKeywords(string &keywordData)
static void fta_check_mult_ids(DataBlkPtr dbp, const char *mtag, const char *ptag)
static CSeq_inst::EMol SrchSegSeqMol(const TEntryList &entries)
void BuildSubBlock(DataBlkPtr dbp, Int2 subtype, const char *subkw)
vector< string > genbankKeywords
static void CleanUpSeqDescrPub(TEntryList &entries, std::set< CSeqdesc * > &to_clean)
static bool CheckSegPub(const CPubdesc &pub, TEntryList &entries, std::set< CSeqdesc * > &same_pub_descr)
static bool SwissProtIDPresent(const TEntryList &seq_entries)
void GetGenBankSubBlock(const DataBlk &entry, size_t bases)
void StripSerialNumbers(TEntryList &seq_entries)
static void RemoveSerials(TPubList &pubs)
vector< string > emblKeywords
CRef< CSeq_id > StrToSeqId(const char *pch, bool pid)
static void RemoveDescrByChoice(CSeq_descr &descr, Uint1 choice)
static void GetSegPub(TEntryList &entries, CSeq_descr &descr)
static void InsertDatablkVal(DataBlkPtr *dbp, Int2 type, char *offset, size_t len)
static optional< string > GetBioseqSetDescrTitle(const CSeq_descr &descr)
static void fta_fix_secondaries(TokenBlkList &secs)
static const CBioSource * GetTopBiosource(const CSeq_entry &entry)
void AddNIDSeqId(CBioseq &bioseq, const DataBlk &entry, Int2 type, Int2 coldata, Parser::ESource source)
void fta_fix_orgref_div(const CBioseq::TAnnot &annots, COrg_ref *org_ref, CGB_block &gbb)
static void GetGenBankRefType(DataBlkPtr dbp, size_t bases)
static const char * GetMoleculeClassString(Uint1 mol)
void DefVsHTGKeywords(CMolInfo::TTech tech, const DataBlk &entry, Int2 what, Int2 ori, bool cancelled)
static bool CheckSegDescrChoice(const TEntryList &entries, Uint1 choice)
static void CheckDivCode(TEntryList &seq_entries, ParserPtr pp)
vector< string > swissProtKeywords
CRef< CPatent_seq_id > MakeUsptoPatSeqId(const char *acc)
bool IsSegBioseq(const CSeq_id &id)
void xGetGenBankSubBlocks(Entry &entry, size_t bases)
static void GetBioseqSetDescr(TEntryList &entries, CSeq_descr &descr, bool *drop)
void fta_sort_seqfeat_cit(TEntryList &seq_entries)
void PackEntries(TEntryList &seq_entries)
static Int4 SrchSegLength(const TEntryList &entries)
void fta_set_strandedness(TEntryList &seq_entries)
static void fta_fix_tpa_keywords(TKeywordList &keywords)
void CheckHTGDivision(const char *div, CMolInfo::TTech tech)
unique_ptr< unsigned char[]> GetDNAConv(void)
bool XMLCheckCDS(const char *entry, XmlIndexPtr xip)
unique_ptr< unsigned char[]> GetProteinConv(void)
static bool fta_if_pubs_sorted(const CPub_equiv &pub1, const CPub_equiv &pub2)
static void GetFirstSegDescrChoice(CBioseq &bioseq, Uint1 choice, CSeq_descr &descr_new)
static bool SeqEntryCheckTaxonDiv(const CSeq_entry &entry)
bool fta_orgref_has_taxid(const COrg_ref::TDb &dbtags)
char * GetDescrComment(char *offset, size_t len, Uint2 col_data, bool is_htg, bool is_pat)
void GetSequenceOfKeywords(const DataBlk &entry, int type, Uint2 col_data, TKeywordList &keywords)
static void CheckGBBlock(TSeqdescList &descrs, bool &got)
static void CleanUpSeqDescrChoice(TEntryList &entries, Uint1 choice)
static void CleanVisString(string &str)
static void CleanVisStringList(list< string > &str_list)
static bool pub_cmp(const CRef< CPub > &pub1, const CRef< CPub > &pub2)
static void SrchSegDescr(TEntryList &entries, CSeq_descr &descr)
void EntryCheckDivCode(TEntryList &seq_entries, ParserPtr pp)
void GetEmblSubBlock(size_t bases, Parser::ESource source, const DataBlk &entry)
char * GetEmblBlock(DataBlkPtr *chain, char *ptr, short *retkw, Parser::EFormat format, char *eptr)
const char * magic_phrases[]
static void PackSeqData(CSeq_data::E_Choice code, CSeq_data &seq_data)
static bool IsCitEmpty(const CCit_gen &cit)
static Uint1 ValidSeqType(const char *accession, Uint1 type)
static bool descr_cmp(const CRef< CSeqdesc > &desc1, const CRef< CSeqdesc > &desc2)
char * GetGenBankBlock(DataBlkPtr *chain, char *ptr, Int2 *retkw, char *eptr)
void GetSeqExt(ParserPtr pp, CSeq_loc &seq_loc)
static bool GetSubNodeType(const char *subkw, char **retbptr, char *eptr)
static CRef< CBioseq > GetBioseq(ParserPtr pp, const TEntryList &entries, const CSeq_loc &slp)
static void GetEmblRefType(size_t bases, Parser::ESource source, DataBlkPtr dbp)
CRef< CSeq_id > MakeAccSeqId(const char *acc, Uint1 seqtype, bool accver, Int2 vernum)
bool GetSeqData(ParserPtr pp, const DataBlk &entry, CBioseq &bioseq, Int4 nodetype, unsigned char *seqconv, Uint1 seq_data_type)
void GetLenSubNode(DataBlkPtr dbp)
bool fta_EntryCheckGBBlock(TEntryList &seq_entries)
char * SrchNodeSubType(const DataBlk &entry, Int2 type, Int2 subtype, size_t *len)
void xGetGenBankBlocks(Entry &entry)
static void sort_feat_cit(CBioseq::TAnnot &annots)
static void SetEmptyId(CBioseq &bioseq)
CRef< CSeq_id > MakeLocusSeqId(const char *locus, CSeq_id::E_Choice seqtype)
void fta_sort_descr(TEntryList &seq_entries)
static void DeltaBioseqPack(CBioseq &bioseq)
void XMLDefVsHTGKeywords(CMolInfo::TTech tech, const char *entry, XmlIndexPtr xip, bool cancelled)
void BuildBioSegHeader(ParserPtr pp, TEntryList &entries, const CSeq_loc &seqloc)
static CRef< CSeq_id > MakeSegSetSeqId(const char *accession, const string &locus, Uint1 seqtype, bool is_tpa)
static int GetSerialNumFromPubEquiv(const CPub_equiv &pub_eq)
void GetExtraAccession(IndexblkPtr ibp, bool allow_uwsec, Parser::ESource source, TAccessionList &accessions)
void ShrinkSpaces(char *line)
static void RawBioseqPack(CBioseq &bioseq)
static bool TrimEmblFeatBlk(DataBlkPtr dbp)
static void GetSegSetDblink(CSeq_descr &descr, TEntryList &entries, bool *drop)
static CSeq_descr::Tdata::const_iterator GetDescrByChoice(const CSeq_descr &descr, Uint1 choice)
static bool SameCitation_PubEquivMatch_Logic(const CPub_equiv &a, const CPub_equiv &b)
bool check_div(bool pat_acc, bool pat_ref, bool est_kwd, bool sts_kwd, bool gss_kwd, bool if_cds, string &div, CMolInfo::TTech *tech, size_t bases, Parser::ESource source, bool &drop)
CRef< CBioseq > CreateEntryBioseq(ParserPtr pp)
const CSeq_descr & GetDescrPointer(const CSeq_entry &entry)
static void BuildFeatureBlock(DataBlkPtr dbp)
Int4 ScanSequence(bool warn, char **seqptr, std::vector< char > &bsp, unsigned char *conv, Char replacechar, int *numns)
const char * GetEmblDiv(Uint1 num)
TSeqPos GetLength(void) const
ECompare Compare(const CDate &date) const
@ eCompare_same
They're equivalent.
@Imp_feat.hpp User-defined methods of the data storage class.
@Pubdesc.hpp User-defined methods of the data storage class.
static SIZE_TYPE Convert(const CTempString &src, TCoding src_coding, TSeqPos pos, TSeqPos length, string &dst, TCoding dst_coding)
@Seq_descr.hpp User-defined methods of the data storage class.
static bool IsNa(EMol mol)
Seq-loc iterator class – iterates all intervals from a seq-loc in the correct order.
static TPair GetCodeIndexFromTo(CSeq_data::E_Choice code_type)
static const string & GetCode(CSeq_data::E_Choice code_type, TIndex idx)
pair< TIndex, TIndex > TPair
static TSeqPos Pack(CSeq_data *in_seq, TSeqPos uLength=ncbi::numeric_limits< TSeqPos >::max())
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Template class for iteration on objects of class C (non-medifiable version)
#define ERR_DIVISION_NotMappedtoEST
#define ERR_ACCESSION_UnusualWGS_Secondary
#define ERR_DIVISION_ShouldBePAT
#define ERR_DIVISION_MappedtoPAT
#define ERR_DIVISION_MappedtoSTS
#define ERR_SEQUENCE_TooShort
#define ERR_SEQUENCE_TooShortIsPatent
#define ERR_SEQUENCE_UnknownBaseHTG3
#define ERR_DIVISION_LongGSSSequence
#define ERR_SEGMENT_GPIDMissingOrNonUnique
#define ERR_DIVISION_ESTHasCDSFeature
#define ERR_DIVISION_PATHasGSSKeywords
#define ERR_REFERENCE_MultipleIdentifiers
#define ERR_DIVISION_MissingSTSKeywords
#define ERR_DIVISION_MissingPatentRef
#define ERR_SEQUENCE_BadResidue
#define ERR_DIVISION_PATHasESTKeywords
#define ERR_ACCESSION_ScfldHasWGSContigSec
#define ERR_SEGMENT_PubMatch
#define ERR_FORMAT_LineTypeOrder
#define ERR_SEGMENT_DBLinkMissingOrNonUnique
#define ERR_DIVISION_MappedtoGSS
#define ERR_DIVISION_GSSHasCDSFeature
#define ERR_DIVISION_MappedtoEST
#define ERR_DEFINITION_HTGNotInProgress
#define ERR_ACCESSION_WGSMasterAsSecondary
#define ERR_DIVISION_STSHasCDSFeature
#define ERR_FEATURE_NoFeatData
#define ERR_SEGMENT_DiffMolType
#define ERR_DIVISION_ShouldBeHTG
#define ERR_DIVISION_MissingESTKeywords
#define ERR_DIVISION_NotMappedtoGSS
#define ERR_SEQUENCE_SeqLenNotEq
#define ERR_DIVISION_PATHasCDSFeature
#define ERR_DIVISION_MissingGSSKeywords
#define ERR_DIVISION_NotMappedtoSTS
#define ERR_DIVISION_LongSTSSequence
#define ERR_DIVISION_GBBlockDivision
#define ERR_SEQUENCE_AllNs
#define ERR_ACCESSION_WGSWithNonWGS_Sec
#define ERR_DIVISION_PATHasSTSKeywords
#define ERR_DIVISION_LongESTSequence
#define ERR_DEFINITION_HTGShouldBeComplete
#define ERR_DIVISION_ESTHasSTSKeywords
#define ERR_DIVISION_ShouldNotBeHTG
list< CRef< objects::CSeq_entry > > TEntryList
#define INSDSEQ_DEFINITION
#define INSDSEQ_FEATURE_TABLE
unique_ptr< string > XMLFindTagValue(const char *entry, const XmlIndex *xip, Int4 tag)
std::list< std::string > TKeywordList
std::list< CRef< objects::CPub > > TPubList
forward_list< string > TokenBlkList
std::list< CRef< objects::CSeqdesc > > TSeqdescList
std::list< std::string > TAccessionList
void MemSet(void *p, int n, size_t sz)
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
void StringNCpy(char *d, const char *s, size_t n)
size_t StringLen(const char *s)
void MemCpy(void *p, const void *q, size_t sz)
char * StringNew(size_t sz)
static DLIST_TYPE *DLIST_NAME() last(DLIST_LIST_TYPE *list)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
unsigned int TSeqPos
Type for sequence locations and lengths.
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
const TPrim & Get(void) const
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
static E_Choice GetAccType(EAccessionInfo info)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Override Assign() to incorporate cache invalidation.
const_iterator end(void) const
const_iterator begin(void) const
void Add(const CSeq_loc &other)
Simple adding of seq-locs.
void SetNull(void)
Override all setters to incorporate cache invalidation.
CBeginInfo Begin(C &obj)
Get starting point of object hierarchy.
CRef< C > Ref(C *object)
Helper functions to get CRef<> and CConstRef<> objects.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string (in-place)
static SIZE_TYPE CommonSuffixSize(const CTempString s1, const CTempString s2)
Determine the common suffix of two strings.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static void TrimSuffixInPlace(string &str, const CTempString suffix, ECase use_case=eCase)
Trim suffix from a string (in-place)
void SetSource(const TSource &value)
Assign a value to Source data member.
TKeywords & SetKeywords(void)
Assign a value to Keywords data member.
bool IsSetExtra_accessions(void) const
Check if a value has been assigned to Extra_accessions data member.
void ResetKeywords(void)
Reset Keywords data member.
void ResetOrigin(void)
Reset Origin data member.
bool IsSetDiv(void) const
GenBank division Check if a value has been assigned to Div data member.
void ResetSource(void)
Reset Source data member.
void ResetDate(void)
Reset Date data member.
bool IsSetSource(void) const
source line Check if a value has been assigned to Source data member.
void SetDate(const TDate &value)
Assign a value to Date data member.
bool IsSetEntry_date(void) const
replaces date Check if a value has been assigned to Entry_date data member.
const TDiv & GetDiv(void) const
Get the Div member data.
TExtra_accessions & SetExtra_accessions(void)
Assign a value to Extra_accessions data member.
const TExtra_accessions & GetExtra_accessions(void) const
Get the Extra_accessions member data.
const TKeywords & GetKeywords(void) const
Get the Keywords member data.
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
void SetDiv(const TDiv &value)
Assign a value to Div data member.
bool IsSetKeywords(void) const
Check if a value has been assigned to Keywords data member.
void ResetExtra_accessions(void)
Reset Extra_accessions data member.
void ResetDiv(void)
Reset Div data member.
bool IsSetDate(void) const
OBSOLETE old form Entry Date Check if a value has been assigned to Date data member.
void SetOrigin(const TOrigin &value)
Assign a value to Origin data member.
bool IsSetPages(void) const
Check if a value has been assigned to Pages data member.
bool IsSetDate(void) const
Check if a value has been assigned to Date data member.
TSerial_number GetSerial_number(void) const
Get the Serial_number member data.
bool IsSetAuthors(void) const
Check if a value has been assigned to Authors data member.
bool IsSetVolume(void) const
Check if a value has been assigned to Volume data member.
const TCit & GetCit(void) const
Get the Cit member data.
bool IsSetSerial_number(void) const
for GenBank style references Check if a value has been assigned to Serial_number data member.
bool IsSetCit(void) const
anything, not parsable Check if a value has been assigned to Cit data member.
bool IsSetTitle(void) const
eg.
bool IsSetJournal(void) const
Check if a value has been assigned to Journal data member.
bool IsSetPmid(void) const
PubMed Id Check if a value has been assigned to Pmid data member.
bool IsSetIssue(void) const
Check if a value has been assigned to Issue data member.
bool IsSetMuid(void) const
medline uid Check if a value has been assigned to Muid data member.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsStr(void) const
Check if variant Str is selected.
bool CanGetType(void) const
Check if it is safe to call GetType method.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
void SetTag(TTag &value)
Assign a value to Tag data member.
const TStr & GetStr(void) const
Get the variant data.
const TType & GetType(void) const
Get the Type member data.
E_Choice Which(void) const
Which variant is currently selected.
void SetDb(const TDb &value)
Assign a value to Db data member.
@ e_not_set
No variant selected.
vector< CRef< CDbtag > > TDb
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
bool IsSet(void) const
Check if a value has been assigned to data member.
const Tdata & Get(void) const
Get the member data.
E_Choice Which(void) const
Which variant is currently selected.
const TGen & GetGen(void) const
Get the variant data.
TMuid GetMuid(void) const
Get the variant data.
bool IsMuid(void) const
Check if variant Muid is selected.
bool IsGen(void) const
Check if variant Gen is selected.
@ e_Gen
general or generic unparsed
@ eSeq_code_type_iupacaa
IUPAC 1 letter amino acid code.
@ eSeq_code_type_iupacna
IUPAC 1 letter nuc acid code.
const TKey & GetKey(void) const
Get the Key member data.
bool IsSetKey(void) const
Check if a value has been assigned to Key data member.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
const TName & GetName(void) const
Get the Name member data.
TLocal & SetLocal(void)
Select the variant.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
@ e_Other
for historical reasons, 'other' = 'refseq'
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_not_set
No variant selected.
@ e_Tpg
Third Party Annot/Seq Genbank.
const TSeq & GetSeq(void) const
Get the variant data.
const TDescr & GetDescr(void) const
Get the Descr member data.
TSet & SetSet(void)
Select the variant.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
void SetClass(TClass value)
Assign a value to Class data member.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
TSeq & SetSeq(void)
Select the variant.
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
@ eClass_parts
parts for 2 or 3
@ eClass_segset
segmented sequence + parts
const TIupacaa & GetIupacaa(void) const
Get the variant data.
list< CRef< CSeqdesc > > Tdata
TId & SetId(void)
Assign a value to Id data member.
const TUser & GetUser(void) const
Get the variant data.
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
ERepr
representation class
void SetPub(TPub &value)
Assign a value to Pub data member.
const TInst & GetInst(void) const
Get the Inst member data.
TTitle & SetTitle(void)
Select the variant.
void SetExt(TExt &value)
Assign a value to Ext data member.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
const TPub & GetPub(void) const
Get the variant data.
const TNcbi8aa & GetNcbi8aa(void) const
Get the variant data.
TNcbieaa & SetNcbieaa(void)
Select the variant.
TTech GetTech(void) const
Get the Tech member data.
bool IsSetExt(void) const
extensions for special types Check if a value has been assigned to Ext data member.
const Tdata & Get(void) const
Get the member data.
TMol GetMol(void) const
Get the Mol member data.
bool IsDelta(void) const
Check if variant Delta is selected.
void SetInst(TInst &value)
Assign a value to Inst data member.
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
const TExt & GetExt(void) const
Get the Ext member data.
EMol
molecule class in living organism
bool IsPub(void) const
Check if variant Pub is selected.
void SetDescr(TDescr &value)
Assign a value to Descr data member.
bool IsSetPub(void) const
the citation(s) Check if a value has been assigned to Pub data member.
void SetRepr(TRepr value)
Assign a value to Repr data member.
list< CRef< CSeq_feat > > TFtable
E_Choice Which(void) const
Which variant is currently selected.
void SetFuzz(TFuzz &value)
Assign a value to Fuzz data member.
Tdata & Set(void)
Assign a value to data member.
list< CRef< CSeq_annot > > TAnnot
void SetLength(TLength value)
Assign a value to Length data member.
bool IsGap(void) const
Check if variant Gap is selected.
const TPub & GetPub(void) const
Get the Pub member data.
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
const TDescr & GetDescr(void) const
Get the Descr member data.
void SetMol(TMol value)
Assign a value to Mol data member.
bool IsUser(void) const
Check if variant User is selected.
E_Choice Which(void) const
Which variant is currently selected.
@ eRepr_const
constructed sequence
@ eRepr_seg
segmented sequence
@ eRepr_delta
sequence made by changes (delta) to others
@ eRepr_raw
continuous sequence
@ eTech_htgs_2
ordered High Throughput sequence contig
@ eTech_sts
Sequence Tagged Site.
@ eTech_htgs_3
finished High Throughput sequence
@ eTech_htgs_1
unordered High Throughput sequence contig
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_survey
one-pass genomic sequence
@ eTech_htgs_0
single genomic reads for coordination
@ eTech_est
Expressed Sequence Tag.
@ e_Ncbistdaa
consecutive codes for std aas
@ e_Iupacaa
IUPAC 1 letter amino acid code.
@ e_Ncbi8aa
8 bit extended amino acid codes
@ e_Org
if all from one organism
@ e_Update_date
date of last update
@ eMol_na
just a nucleic acid
@ eStrand_ds
double strand
@ eStrand_ss
single strand
unsigned int
A callback function used to compare two keys in a database.
CSeq_id::E_Choice GetNucAccOwner(const CTempString &acc)
int fta_if_wgs_acc(string_view accession)
void DelNonDigitTail(string &str)
static void text(MDB_val *v)
range(_Ty, _Ty) -> range< _Ty >
const struct ncbi::grid::netcache::search::fields::SIZE size
const struct ncbi::grid::netcache::search::fields::KEY key
const CharType(& source)[N]
int strcmp(const char *str1, const char *str2)
Int4 delta(size_t dimension_, const Int4 *score_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
static SLJIT_INLINE sljit_ins l(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
list< SectionPtr > mSections
vector< IndexblkPtr > entrylist
int SrchKeyword(const CTempString &ptr, const vector< string > &keywordList)
char * SrchTheChar(char *bptr, char *eptr, Char letter)
bool SetTextId(Uint1 seqtype, CSeq_id &seqId, CTextseq_id &textId)
string GetBlkDataReplaceNewLine(string_view instr, Uint2 indent)
bool fta_is_tpa_keyword(const char *str)
void CleanTailNoneAlphaCharInString(string &str)
char * xSrchNodeType(const DataBlk &entry, Int4 type, size_t *len)
string xGetNodeData(const DataBlk &entry, int nodeType)
void fta_StringCpy(char *dst, const char *src)
DataBlkPtr TrackNodeType(const DataBlk &entry, Int2 type)
char * SrchTheStr(char *bptr, char *eptr, const char *leadstr)
static wxAcceleratorEntry entries[3]