65 #define THIS_FILE "fta_src.cpp"
75 #define USE_CULTIVAR 00001
76 #define USE_ISOLATE 00002
77 #define USE_SEROTYPE 00004
78 #define USE_SEROVAR 00010
79 #define USE_SPECIMEN_VOUCHER 00020
80 #define USE_STRAIN 00040
81 #define USE_SUB_SPECIES 00100
82 #define USE_SUB_STRAIN 00200
83 #define USE_VARIETY 00400
84 #define USE_ECOTYPE 01000
87 #define BIOSOURCES_THRESHOLD 20
191 "synthetic construct",
192 "artificial sequence",
193 "eukaryotic synthetic construct",
249 "environmental_sample",
355 for (tsfbp = sfbp; tsfbp; tsfbp = sfbp) {
372 for (; dbp; dbp = dbp->
mpNext) {
397 if (! line || *line ==
'\0')
400 for (p = line, q = line; *p !=
'\0'; p++)
401 if (*p !=
' ' && *p !=
'\t')
409 for (; sfbp; sfbp = sfbp->
next) {
411 for (
auto& cur : sfbp->
quals) {
412 if (cur->IsSetQual()) {
415 if (cur->IsSetVal()) {
427 for (; sfbp; sfbp = sfbp->
next) {
428 for (
const auto& cur : sfbp->
quals) {
430 if (cur->GetQual() == *
b)
447 namstr.append(
value);
450 mod->SetSubtype(subtype);
453 mods.push_front(
mod);
457 static void CollectSubNames(
SourceFeatBlkPtr sfbp,
Int4 use_what,
const Char* name,
const Char* cultivar,
const Char* isolate,
const Char* serotype,
const Char* serovar,
const Char* specimen_voucher,
const Char* strain,
const Char* sub_species,
const Char* sub_strain,
const Char* variety,
const Char* ecotype)
503 s.reserve(s.size() +
i);
533 const Char* cultivar;
535 const Char* organelle;
536 const Char* serotype;
539 const Char* specimen_voucher;
541 const Char* sub_species;
542 const Char* sub_strain;
550 for (ret =
true; sfbp; sfbp = sfbp->
next) {
558 specimen_voucher =
nullptr;
560 sub_species =
nullptr;
561 sub_strain =
nullptr;
563 genomename =
nullptr;
565 for (
auto& cur : sfbp->
quals) {
566 if (! cur->IsSetQual())
569 const string& qual_str = cur->GetQual();
570 char* val_ptr = cur->IsSetVal() ? cur->SetVal().data() :
nullptr;
572 if (qual_str ==
"db_xref") {
574 if (! q || q[1] ==
'\0')
583 if (qual_str ==
"focus") {
587 if (qual_str ==
"transgenic") {
591 if (qual_str ==
"cultivar") {
595 if (qual_str ==
"isolate") {
600 if (qual_str ==
"mol_type") {
607 if (qual_str ==
"organelle") {
612 if (qual_str ==
"serotype") {
616 if (qual_str ==
"serovar") {
620 if (qual_str ==
"ecotype") {
624 if (qual_str ==
"specimen_voucher") {
625 specimen_voucher = val_ptr;
628 if (qual_str ==
"strain") {
633 if (qual_str ==
"sub_species") {
634 sub_species = val_ptr;
637 if (qual_str ==
"sub_strain") {
638 sub_strain = val_ptr;
641 if (qual_str ==
"variety") {
645 if (qual_str ==
"submitter_seqid") {
656 if (qual_str !=
"organism" ||
657 ! val_ptr || val_ptr[0] ==
'\0')
667 str_to_find.assign(val_ptr, p);
669 str_to_find.assign(val_ptr);
718 if (strain && ! sfbp->
strain)
720 if (isolate && ! sfbp->
isolate)
725 CollectSubNames(sfbp, use_what, name, cultivar, isolate, serotype, serovar, specimen_voucher, strain, sub_species, sub_strain, variety, ecotype);
733 for (; sfbp; sfbp = sfbp->
next) {
747 for (; sfbp; sfbp = sfbp->
next) {
772 for (; sfbp; sfbp = sfbp->
next) {
778 for (
const auto& cur : sfbp->
quals) {
779 if (cur->GetQual() !=
"partial")
793 if ((p == sfbp->
location || *p ==
'(' || *p ==
')' ||
794 *p ==
':' || *p ==
',' || *p ==
'.') &&
795 (*q ==
'\0' || *q ==
'(' || *q ==
')' || *q ==
',' ||
796 *q ==
':' || *q ==
'.')) {
804 for (count = 0, p = sfbp->
location; *p !=
'\0'; p++) {
807 else if (*p ==
'>' || *p ==
'<')
813 else if (*p ==
'.' && p[1] ==
'.')
815 else if (*p ==
'.' && p[1] !=
'.') {
816 for (q = p + 1; *q >=
'0' && *q <=
'9';)
818 if (q == p || *q !=
':')
825 if (invalid || count != 0) {
841 for (; sfbp; sfbp = sfbp->
next) {
844 for (p = sfbp->
location + 1; *p !=
'\0'; p++) {
847 for (
r =
nullptr, q = p - 1;; q--) {
849 if (*q !=
'_' && (*q < '0' || *q >
'9') &&
850 (*q < 'a' || *q >
'z') && (*q < 'A' || *q >
'Z'))
862 if (*q !=
'_' && (*q < '0' || *q >
'9') &&
863 (*q < 'a' || *q >
'z') && (*q < 'A' || *q >
'Z')) {
895 for (; mmp; mmp = tmmp) {
906 if (! name || *name ==
'\0')
935 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
937 ! tsfbp->
name || tsfbp->
name[0] ==
'\0')
942 for (p = loc; *p !=
'\0'; p++)
943 if (*p ==
',' || *p ==
'(' || *p ==
')' || *p ==
':' ||
944 *p ==
';' || *p ==
'^')
946 for (p = loc, q = loc; *p !=
'\0';) {
947 if (*p ==
'>' || *p ==
'<') {
958 if (q > loc && *(q - 1) ==
' ')
962 q = (*loc ==
' ') ? (loc + 1) : loc;
969 for (
r = q; *
r >=
'0' && *
r <=
'9';)
977 }
else if (*
r ==
'.' &&
r[1] ==
'.') {
981 for (q = ++
r; *
r >=
'0' && *
r <=
'9';)
990 for (tmmp = mmp;; tmmp = tmmp->
next) {
991 if (min < tmmp->
min) {
992 mmpnext = tmmp->
next;
1025 if (! mmp || mmp->
min != 1)
1038 for (tsfbp = sfbp,
i = 0; tsfbp; tsfbp = tsfbp->
next,
i++) {
1052 if (count > 2 ||
i > count || (tgs != 1 && sporg != 1))
1063 for (; sfbp; sfbp = sfbp->
next) {
1066 else if (sfbp->
focus)
1085 if (div && *div !=
'\0') {
1092 for (ret =
true, got =
false; sfbp; sfbp = sfbp->
next) {
1096 if (syntgndiv == 0) {
1113 if (syntgndiv == 2 && ! got)
1132 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1156 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1160 taxname = tsfbp->
name;
1163 if (tsfbp->
tg && tsfbp->
full)
1169 if (same ==
false && tgfull ==
false && focus ==
false)
1172 if (! sfbp->
next || ! tgs)
1175 for (tsfbp = sfbp->
next; tsfbp; tsfbp = tsfbp->
next)
1202 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1212 if (same && count > 0)
1216 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1227 if (same || count != 0)
1231 pat =
"1.." + to_string(
len);
1232 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1255 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1256 if (tsfbp->
full && tsfbp->
tg && ! tsfbp->
skip)
1268 if ((mmp->
min == 1 && (
size_t)mmp->
max ==
len) || mmp->
skip)
1279 for (; mmp; mmp = mmp->
next) {
1282 for (tmmp = mmp->
next; tmmp; tmmp = tmmp->
next) {
1297 ss <<
"\"" << mmp->
orgname <<
"\" at " << mmp->
min <<
".." << mmp->
max
1298 <<
" vs \"" << tmmp->
orgname <<
"\" at " << tmmp->
min <<
".." << tmmp->
max;
1309 if (! sfbp || ! sfbp->
next)
1312 for (tsfbp = sfbp->
next; tsfbp; tsfbp = tsfbp->
next)
1319 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next)
1320 if (tsfbp->
full && tsfbp->
tg)
1326 for (; sfbp; sfbp = sfbp->
next) {
1327 if (! sfbp->
full || sfbp->
tg)
1342 return (sfbp->
name);
1350 const Char* cultivar;
1351 const Char* isolate;
1352 const Char* serotype;
1353 const Char* serovar;
1354 const Char* ecotype;
1355 const Char* specimen_voucher;
1357 const Char* sub_species;
1358 const Char* sub_strain;
1359 const Char* variety;
1361 for (; sfbp; sfbp = sfbp->
next) {
1374 t_org_ref->
Assign(*org_ref);
1377 if (t_org_ref->
Equals(*org_ref))
1391 specimen_voucher =
nullptr;
1393 sub_species =
nullptr;
1394 sub_strain =
nullptr;
1398 switch (
mod->GetSubtype()) {
1400 cultivar =
mod->GetSubname().c_str();
1403 isolate =
mod->GetSubname().c_str();
1406 serotype =
mod->GetSubname().c_str();
1409 serovar =
mod->GetSubname().c_str();
1412 ecotype =
mod->GetSubname().c_str();
1415 specimen_voucher =
mod->GetSubname().c_str();
1418 strain =
mod->GetSubname().c_str();
1421 sub_species =
mod->GetSubname().c_str();
1424 sub_strain =
mod->GetSubname().c_str();
1427 variety =
mod->GetSubname().c_str();
1432 CollectSubNames(sfbp, use_what, sfbp->
name, cultivar, isolate, serotype, serovar, specimen_voucher, strain, sub_species, sub_strain, variety, ecotype);
1438 for (tsfbp = sfbp->
next; tsfbp; tsfbp = tsfbp->
next) {
1471 for (tsfbp = where->
next; tsfbp; tsfbp = tsfbp->
next) {
1491 for (
prev = sfbp, tsfbp = sfbp->
next; tsfbp; tsfbp =
next) {
1493 if (! tsfbp->
useit) {
1498 bool different =
false;
1499 for (
const auto& cur : tsfbp->
quals) {
1500 const string& cur_qual = cur->GetQual();
1501 if (cur_qual ==
"focus")
1506 const string& next_qual =
next->GetQual();
1508 if (next_qual ==
"focus" || next_qual != cur_qual)
1511 if (! cur->IsSetVal() && !
next->IsSetVal()) {
1516 if (cur->IsSetVal() &&
next->IsSetVal() &&
1517 cur->GetVal() ==
next->GetVal()) {
1536 tsfbp->
next =
nullptr;
1559 tsfbp->
next =
nullptr;
1572 for (TQualVector::iterator cur = sfbp->
quals.begin(); cur != sfbp->
quals.end();) {
1573 const string& cur_qual = (*cur)->GetQual();
1574 if (cur_qual ==
"focus") {
1579 for (tsfbp = sfbp->
next; tsfbp; tsfbp = tsfbp->
next) {
1580 if (tsfbp == res || ! tsfbp->
useit)
1585 const string& next_qual =
next->GetQual();
1587 if (next_qual ==
"focus" || next_qual != cur_qual)
1590 if (! (*cur)->IsSetVal() && !
next->IsSetVal()) {
1595 if ((*cur)->IsSetVal() &&
next->IsSetVal() &&
1596 (*cur)->GetVal() ==
next->GetVal()) {
1612 cur = sfbp->
quals.erase(cur);
1632 Int4 count_noskip = 0;
1634 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1640 if (! tsfbp->
skip) {
1648 if (count_noskip == 1) {
1652 for (res =
nullptr, tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1653 if (count_noskip != 0 && tsfbp->
skip)
1655 tsfbp->
useit =
true;
1662 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1669 for (res =
nullptr, tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1680 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1683 tsfbp->
useit =
true;
1689 if (count_noskip > 0) {
1690 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1693 if (res != tsfbp && tsfbp->
skip)
1694 tsfbp->
useit =
false;
1700 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
1733 for (
const auto& subtype : bio.
GetSubtype()) {
1734 if (subtype->GetSubtype() ==
type) {
1760 vector<string> modnames;
1765 if (it.num !=
mod->GetSubtype())
1768 modnames.push_back(it.name);
1774 for (
const auto& cur : quals) {
1775 if (! cur->IsSetQual() || cur->GetQual() ==
"organism")
1778 const string& cur_qual = cur->GetQual();
1779 const Char* val_ptr = cur->IsSetVal() ? cur->GetVal().c_str() :
nullptr;
1781 if (cur_qual ==
"note") {
1790 if (! val_ptr || val_ptr[0] ==
'\0')
1802 if (**
b !=
'\0' && cur_qual == *
b) {
1808 if (cur_qual ==
"organism" ||
1809 (taxserver != 0 && cur_qual ==
"type_material"))
1812 if (find(modnames.begin(), modnames.end(), cur_qual) != modnames.end())
1816 if (cur_qual == it.name) {
1833 if (qual->
GetQual() !=
"db_xref")
1836 std::vector<Char> val_buf(qual->
GetVal().begin(), qual->
GetVal().end());
1837 val_buf.push_back(0);
1840 if (! p || p[1] ==
'\0')
1880 string line(
"NBRC:");
1884 val_buf.assign(line.begin(), line.end());
1885 val_buf.push_back(0);
1887 p = &val_buf[0] + 4;
1922 tag->SetDb(&val_buf[0]);
1925 for (q = p; *p >=
'0' && *p <=
'9';)
1928 if (*p ==
'\0' && *q !=
'0')
1929 tag->SetTag().SetId(atoi(q));
1931 tag->SetTag().SetStr(q);
1948 bool is_syn =
false;
1949 bool is_pat =
false;
1958 for (; sfbp; sfbp = sfbp->
next) {
1965 if (is_syn && ! sfbp->
tg)
1972 if (is_syn ==
false && is_pat ==
false) {
1973 const Char* taxname =
nullptr;
1985 bool dropped =
false;
1986 for (
auto& cur : sfbp->
quals) {
1987 if (! cur->IsSetQual() || cur->GetQual().empty())
1990 const string& cur_qual = cur->GetQual();
1991 string cq = cur_qual;
1992 if (cq ==
"geo_loc_name") {
1995 if (cq ==
"db_xref") {
2000 bio.
SetOrg().SetDb().push_back(dbtag);
2004 const Char* val_ptr = cur->IsSetVal() ? cur->GetVal().c_str() :
nullptr;
2005 if (cq ==
"organelle") {
2006 if (! val_ptr || val_ptr[0] ==
'\0')
2009 const char* p =
StringChr(val_ptr,
':');
2017 string val_str(val_ptr, p);
2047 if (cq !=
"country" ||
2048 ! val_ptr || val_ptr[0] ==
'\0')
2055 for (p = tco; *p ==
' ' || *p ==
'\t';)
2060 for (q = p + 1; *q !=
'\0';)
2062 for (q--; *q ==
' ' || *q ==
'\t';)
2067 if (! valid_country) {
2070 if (! valid_country)
2086 else if (oldgen > -1)
2105 return c ==
' ' || c ==
'\t';
2116 for (
const auto& descr : bioseq.
GetDescr().
Get()) {
2117 if (! descr->IsSource())
2120 const CBioSource& bio_src = descr->GetSource();
2128 std::remove_copy_if(taxname.begin(), taxname.end(), std::back_inserter(orgdescr),
is_a_space_char);
2133 std::remove_copy_if(common.begin(), common.end(), std::back_inserter(commdescr),
is_a_space_char);
2136 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
2137 if (tsfbp->
name ==
nullptr || tsfbp->
name[0] ==
'\0')
2140 size_t name_len = strlen(tsfbp->
name);
2150 if (orgdescr == orgfeat || commdescr == orgfeat) {
2168 for (; sfbp; sfbp = sfbp->
next) {
2177 if (! p || *p ==
'\0') {
2204 for (; sfbp; sfbp = sfbp->
next) {
2207 ! sfbp->
name || *sfbp->
name ==
'\0' ||
2224 for (
const auto& descr : bioseq.
GetDescr().
Get()) {
2225 if (! descr->IsSource())
2228 const CBioSource& bio_src = descr->GetSource();
2247 "Taxonomy lookup for organism name \"%s\" failed, and no matching organism exists in OS/ORGANISM lines, so lineage has been set to \"Unclassified\".",
2251 if (lineage.empty()) {
2254 "Taxonomy lookup for organism name \"%s\" failed, and the matching organism from OS/ORGANISM lines has no lineage, so lineage has been set to \"Unclassified\".",
2258 p = lineage.c_str();
2262 for (tsfbp = sfbp->
next; tsfbp; tsfbp = tsfbp->
next) {
2265 ! tsfbp->
name || *tsfbp->
name ==
'\0' ||
2294 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next)
2302 for (ret =
true, tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
2333 if (! sfbp || ! ibp)
2339 for (envs = 0, sources = 0; sfbp; sfbp = sfbp->
next, sources++) {
2340 bool env_found =
false;
2341 for (
const auto& cur : sfbp->
quals) {
2342 if (cur->IsSetQual() && cur->GetQual() ==
"environmental_sample") {
2352 if (! sfbp->
full || ! sfbp->
name || sfbp->
name[0] ==
'\0')
2365 if (! skip && envs != sources) {
2418 if (! ppp || count < 1)
2428 subs.push_back(sub);
2433 subs.push_back(sub);
2439 subs.push_back(sub);
2446 subs.push_back(sub);
2457 for (tppp = ppp; tppp; tppp = tppp->
next) {
2470 str_fs.reserve(len_fs);
2471 str_rs.reserve(len_rs);
2473 str_fn.reserve(len_fn + count - num_fn + 1);
2475 str_rn.reserve(len_rn + count - num_rn + 1);
2477 for (tppp = ppp; tppp; tppp = tppp->
next) {
2494 if (! str_fs.empty()) {
2502 subs.push_back(sub);
2504 if (! str_rs.empty()) {
2512 subs.push_back(sub);
2514 if (! str_fn.empty()) {
2521 subs.push_back(sub);
2524 if (! str_rn.empty()) {
2531 subs.push_back(sub);
2540 for (; ppp; ppp =
next) {
2571 bool got_problem =
false;
2572 for (ppp =
nullptr; sfbp; sfbp = sfbp->
next) {
2577 for (
const auto& cur : sfbp->
quals) {
2578 if (cur->GetQual() !=
"PCR_primers" ||
2579 ! cur->IsSetVal() || cur->GetVal().empty())
2592 std::vector<Char> val_buf(cur->GetVal().begin(), cur->GetVal().end());
2593 val_buf.push_back(0);
2595 for (comma =
false, bad_start =
false, p = &val_buf[0]; *p !=
'\0';) {
2598 if (p != &val_buf[0]) {
2621 if (
r > q && *(
r - 1) ==
' ')
2631 if (! q || *q ==
'\0')
2791 const char* Mmm[] = {
"Jan",
"Feb",
"Mar",
"Apr",
"May",
"Jun",
"Jul",
"Aug",
"Sep",
"Oct",
"Nov",
"Dec",
nullptr };
2811 for (; sfbp; sfbp = sfbp->
next) {
2815 for (
const auto& cur : sfbp->
quals) {
2817 if (cur->GetQual() !=
"collection_date" ||
2818 ! cur->IsSetVal() || cur->GetVal().empty())
2821 val = (
char*)cur->GetVal().c_str();
2822 for (num_slash = 0, p =
val; *p !=
'\0'; p++)
2826 if (num_slash > 1) {
2836 for (
val = (
char*)cur->GetVal().c_str();;) {
2844 for (q =
val; *q ==
'0';)
2846 for (p = (
char*)q; *p !=
'\0'; p++)
2847 if (*p < '0' || *p >
'9')
2851 else if (atoi(q) > date.
GetYear())
2853 }
else if (
len == 8) {
2860 if (p[0] >=
'a' && p[0] <=
'z')
2862 if (p[1] >=
'A' && p[1] <=
'Z')
2864 if (p[2] >=
'A' && p[2] <=
'Z')
2867 for (
b = Mmm, month = 1; *
b;
b++, month++)
2875 for (q =
val + 4; *q ==
'0';)
2877 for (p = (
char*)q; *p !=
'\0'; p++)
2878 if (*p < '0' || *p >
'9')
2889 }
else if (
len == 11) {
2890 if (
val[2] !=
'-' ||
val[6] !=
'-')
2896 if (p[0] <
'0' || p[0] >
'3' || p[1] <
'0' || p[1] >
'9')
2904 if (p[0] >=
'a' && p[0] <=
'z')
2906 if (p[1] >=
'A' && p[1] <=
'Z')
2908 if (p[2] >=
'A' && p[2] <=
'Z')
2911 for (
b = Mmm, month = 1; *
b;
b++, month++)
2917 if (day < 1 || day > 31)
2919 else if (month == 2 && day > 29)
2921 else if ((month == 4 || month == 6 || month == 9 || month == 11) && day > 30)
2926 for (q =
val + 7; *q ==
'0';)
2928 for (p = (
char*)q; *p !=
'\0'; p++)
2929 if (*p < '0' || *p >
'9')
2934 year = atoi(q) - 1900;
2944 }
else if (
len == 7 ||
len == 10 ||
len == 14 ||
len == 17 ||
2949 for (p =
val; *p !=
'\0'; p++) {
2950 if ((*p < 'a' || *p >
'z') && (*p < 'A' || *p >
'Z') &&
2951 (*p < '0' || *p >
'9') && *p !=
'-' && *p !=
'/' &&
2963 if (
len == 7 ||
len == 10) {
2975 if ((
len == 14 && num_colon > 0) ||
2976 (
len == 17 && num_colon > 1) ||
2977 (
len == 20 && num_colon > 2))
2996 q =
"is not of the format DD-Mmm-YYYY, Mmm-YYYY, or YYYY";
2998 q =
"has an illegal day value for the stated month";
3000 q =
"has invalid characters";
3002 q =
"has too many time values";
3004 q =
"has too many Zulu indicators";
3006 q =
"has too many hour and minute delimiters";
3008 q =
"has not yet occured";
3028 if (! sfbp || ! sfbp->
next)
3031 for (; sfbp; sfbp = sfbp->
next) {
3054 bool metatax =
false;
3055 bool metalin =
false;
3065 if (! metalin && ! metatax)
3069 if (! taxname || taxname[0] == 0)
3070 taxname =
"unknown";
3072 if (metalin && metatax) {
3077 }
else if (! metalin)
3096 for (ssid =
nullptr, tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
3118 if (count_feat == count_qual)
3254 ErrPostEx(sev,
ERR_SOURCE_TransSingleOrgName,
"Use of /transgenic requires at least two source features with differences among /organism, /strain, /organelle, and /isolate, between the host and foreign organisms.");
3259 }
else if (
i == 2) {
3261 }
else if (
i == 3) {
3291 }
else if (
i == 2) {
3320 for (tsfbp = sfbp,
i = 0; tsfbp; tsfbp = tsfbp->
next)
3345 for (tsfbp = sfbp; tsfbp; tsfbp = tsfbp->
next) {
3362 if (! tsfbp->
quals.empty()) {
3372 seq_feats.push_back(feat);
bool fta_strings_same(const char *s1, const char *s2)
void ShrinkSpaces(char *line)
void fta_sort_biosource(objects::CBioSource &bio)
const COrgName & GetOrgname(void) const
bool IsSetOrgname(void) const
static bool WasValid(const string &country)
static bool IsValid(const string &country)
@OrgMod.hpp User-defined methods of the data storage class.
bool IsSetOrgMod(void) const
namespace ncbi::objects::
#define ERR_SOURCE_InconsistentMolType
#define ERR_SOURCE_MissingMolType
#define ERR_SOURCE_FocusAndTransposonNotAllowed
#define ERR_SOURCE_ExcessCoverage
#define ERR_SOURCE_IncompleteCoverage
#define ERR_SOURCE_InvalidDbXref
#define ERR_SOURCE_TransSingleOrgName
#define ERR_QUALIFIER_PCRprimerEmbeddedComma
#define ERR_SOURCE_OrganelleQualMultToks
#define ERR_SOURCE_UnwantedQualifiers
#define ERR_SOURCE_PartialLocation
#define ERR_SOURCE_DifferentSubmitterSeqids
#define ERR_SOURCE_FormerCountry
#define ERR_SOURCE_OrganismIncomplete
#define ERR_SOURCE_MultipleOrganismWithFocus
#define ERR_SOURCE_UnusualOrgName
#define ERR_ORGANISM_UnclassifiedLineage
#define ERR_SERVER_NoLineageFromTaxon
#define ERR_SOURCE_MissingSourceFeatureForDescr
#define ERR_SOURCE_MissingEnvSampQual
#define ERR_DIVISION_TGNnotTransgenic
#define ERR_SOURCE_MultipleSubmitterSeqids
#define ERR_ORGANISM_OrgNameLacksMetagenome
#define ERR_SOURCE_ManySourceFeats
#define ERR_SOURCE_FocusQualMissing
#define ERR_SOURCE_InvalidCollectionDate
#define ERR_SOURCE_UnusualLocation
#define ERR_DIVISION_TransgenicNotSYN_TGN
#define ERR_QUALIFIER_InvalidPCRprimer
#define ERR_SOURCE_LackingSubmitterSeqids
#define ERR_SOURCE_OrganelleIllegalClass
#define ERR_SOURCE_MultipleMolTypes
#define ERR_SOURCE_BadLocation
#define ERR_SOURCE_FocusQualNotFullLength
#define ERR_SOURCE_MultiOrgOverlap
#define ERR_FEATURE_Dropped
#define ERR_SOURCE_MultipleTransgenicQuals
#define ERR_SOURCE_InconsistentEnvSampQual
#define ERR_ORGANISM_SynOrgNameNotSYNdivision
#define ERR_QUALIFIER_MissingPCRprimerSeq
#define ERR_SOURCE_FocusQualNotNeeded
#define ERR_SOURCE_PartialQualifier
#define ERR_SOURCE_FocusAndTransgenicQuals
#define ERR_ORGANISM_NoSourceFeatMatch
#define ERR_SOURCE_TransgenicTooShort
#define ERR_ORGANISM_LineageLacksMetagenome
#define ERR_SOURCE_ObsoleteDbXref
#define ERR_SOURCE_InvalidLocation
#define ERR_SOURCE_FeatureMissing
#define ERR_SOURCE_SingleSourceTooShort
#define ERR_SOURCE_InvalidCountry
#define ERR_SOURCE_NoOrganismQual
static void FTASubSourceAdd(CBioSource &bio, const Char *val, CSubSource::ESubtype type)
static void SourceFeatBlkFree(SourceFeatBlkPtr sfbp)
static void PopulateSubNames(string &namstr, const Char *name, const Char *value, COrgMod::ESubtype subtype, TOrgModList &mods)
static const char * GenomicSourceFeatQual[]
static void PcrPrimersFree(PcrPrimersPtr ppp)
static void RemoveSourceFeatSpaces(SourceFeatBlkPtr sfbp)
static char * CheckWholeSourcesVersusFocused(SourceFeatBlkPtr sfbp)
static char * CheckSourceFeatLocAccs(SourceFeatBlkPtr sfbp, char *acc)
static bool IfSpecialFeat(MinMaxPtr mmp, size_t len)
static const char * OrganelleFirstToken[]
static void PopulatePcrPrimers(CBioSource &bio, PcrPrimersPtr ppp, Int4 count)
static const char * unusual_toks[]
static SourceFeatBlkPtr CollectSourceFeats(DataBlkPtr dbp, Int2 type)
static const char * special_orgs[]
static bool SourceFeatStructFillIn(IndexblkPtr ibp, SourceFeatBlkPtr sfbp, Int4 use_what)
static void CheckQualsInSourceFeat(CBioSource &bio, TQualVector &quals, Uint1 taxserver)
static Int4 CheckFocusInOrgs(SourceFeatBlkPtr sfbp, size_t len, int *status)
static const char * SourceSubSources[]
static const char * DENLRSourceDbxrefTag[]
static void CompareDescrFeatSources(SourceFeatBlkPtr sfbp, const CBioseq &bioseq)
#define USE_SPECIMEN_VOUCHER
static bool CheckMoltypeConsistency(SourceFeatBlkPtr sfbp, string &moltype)
static void CheckCollectionDate(SourceFeatBlkPtr sfbp, Parser::ESource source)
static Int4 CheckTransgenicSourceFeats(SourceFeatBlkPtr sfbp)
static void CheckMetagenome(CBioSource &bio)
static char * CheckSourceFeatOrgs(SourceFeatBlkPtr sfbp, int *status)
static void PropogateSuppliedLineage(CBioseq &bioseq, SourceFeatBlkPtr sfbp, Uint1 taxserver)
static const char * NLRSourceDbxrefTag[]
static void CreateRawBioSources(ParserPtr pp, SourceFeatBlkPtr sfbp, Int4 use_what)
#define BIOSOURCES_THRESHOLD
static const char * exempt_quals[]
static void MinMaxFree(MinMaxPtr mmp)
static const char * SourceBadQuals[]
static SourceFeatBlkPtr SourceFeatBlkNew(void)
static const char * source_genomes[]
static Int4 CheckSourceFeatCoverage(SourceFeatBlkPtr sfbp, MinMaxPtr mmp, size_t len)
static bool CheckSourceFeatLocFuzz(SourceFeatBlkPtr sfbp)
static bool CheckSYNTGNDivision(SourceFeatBlkPtr sfbp, char *div)
static SourceFeatBlkPtr SourceFeatRemoveDups(SourceFeatBlkPtr sfbp)
static char * CheckSourceOverlap(MinMaxPtr mmp, size_t len)
static char * CheckSourceFeatFocusAndTransposon(SourceFeatBlkPtr sfbp)
static const char * ObsoleteSourceDbxrefTag[]
static bool CheckForENV(SourceFeatBlkPtr sfbp, IndexblkPtr ibp, Parser::ESource source)
static SourceFeatBlkPtr PickTheDescrSource(SourceFeatBlkPtr sfbp)
static const char * DESourceDbxrefTag[]
static const CharUInt1 SourceOrgMods[]
static char * CheckPcrPrimersTag(char *str)
static bool CheckNeedSYNFocus(SourceFeatBlkPtr sfbp)
static const char * ESourceDbxrefTag[]
static bool UpdateRawBioSource(SourceFeatBlkPtr sfbp, Parser::ESource source, IndexblkPtr ibp, Uint1 taxserver)
static char * CheckForUnusualFullLengthOrgs(SourceFeatBlkPtr sfbp)
bool fta_if_special_org(const Char *name)
static void RemoveStringSpaces(char *line)
static bool is_a_space_char(Char c)
static void CheckForExemption(SourceFeatBlkPtr sfbp)
static bool ParsePcrPrimers(SourceFeatBlkPtr sfbp)
static CRef< CDbtag > GetSourceDbtag(CRef< CGb_qual > &qual, Parser::ESource source)
static void AddOrgMod(COrg_ref &org_ref, const Char *val, COrgMod::ESubtype type)
static void CollectSubNames(SourceFeatBlkPtr sfbp, Int4 use_what, const Char *name, const Char *cultivar, const Char *isolate, const Char *serotype, const Char *serovar, const Char *specimen_voucher, const Char *strain, const Char *sub_species, const Char *sub_strain, const Char *variety, const Char *ecotype)
void ParseSourceFeat(ParserPtr pp, DataBlkPtr dbp, TSeqIdList &seqids, Int2 type, CBioseq &bioseq, TSeqFeatList &seq_feats)
static bool CheckSourceLineage(SourceFeatBlkPtr sfbp, Parser::ESource source, bool is_pat)
static SourceFeatBlkPtr SourceFeatDerive(SourceFeatBlkPtr sfbp, SourceFeatBlkPtr res)
static bool CheckSubmitterSeqidQuals(SourceFeatBlkPtr sfbp, char *acc)
static void SourceFeatBlkSetFree(SourceFeatBlkPtr sfbp)
static SourceFeatBlkPtr SourceFeatMoveOneUp(SourceFeatBlkPtr where, SourceFeatBlkPtr what)
std::list< CRef< objects::COrgMod > > TOrgModList
std::list< CRef< objects::CSeq_id > > TSeqIdList
std::list< CRef< objects::CSeq_feat > > TSeqFeatList
bool StringEquNI(const char *s1, const char *s2, size_t n)
bool StringEquN(const char *s1, const char *s2, size_t n)
bool StringEqu(const char *s1, const char *s2)
size_t StringLen(const char *s)
void fta_fix_orgref(ParserPtr pp, COrg_ref &org_ref, bool *drop, char *organelle)
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static const char * str(char *buf, int n)
static const char location[]
#define TAX_ID_FROM(T, value)
virtual void Assign(const CSerialObject &source, ESerialRecursionMode how=eRecursive)
Set object to copy of another one.
virtual bool Equals(const CSerialObject &object, ESerialRecursionMode how=eRecursive) const
Check if both objects contain the same values.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
@ eCurrent
Use current time. See also CCurrentTime.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
void SetSubtype(TSubtype value)
Assign a value to Subtype data member.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool CanGetOrg(void) const
Check if it is safe to call GetOrg method.
list< CRef< CSubSource > > TSubtype
void SetIs_focus(void)
Set NULL data member (assign 'NULL' value to Is_focus data member).
void ResetIs_focus(void)
Reset Is_focus data member.
void SetOrigin(TOrigin value)
Assign a value to Origin data member.
const TOrg & GetOrg(void) const
Get the Org member data.
void SetGenome(TGenome value)
Assign a value to Genome data member.
void SetOrg(TOrg &value)
Assign a value to Org data member.
void SetName(const TName &value)
Assign a value to Name data member.
EGenome
biological context
TSubtype & SetSubtype(void)
Assign a value to Subtype data member.
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_rev_primer_name
@ eSubtype_fwd_primer_name
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eOrigin_artificial
artificially engineered
TYear GetYear(void) const
Get the Year member data.
TMonth GetMonth(void) const
Get the Month member data.
TDay GetDay(void) const
Get the Day member data.
const TMod & GetMod(void) const
Get the Mod member data.
bool CanGetMod(void) const
Check if it is safe to call GetMod method.
const TLineage & GetLineage(void) const
Get the Lineage member data.
bool CanGetDiv(void) const
Check if it is safe to call GetDiv method.
const TDiv & GetDiv(void) const
Get the Div member data.
void ResetLineage(void)
Reset Lineage data member.
bool IsSetCommon(void) const
common name Check if a value has been assigned to Common data member.
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
const TCommon & GetCommon(void) const
Get the Common member data.
bool CanGetTaxname(void) const
Check if it is safe to call GetTaxname method.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
bool CanGetOrgname(void) const
Check if it is safe to call GetOrgname method.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
TMod & SetMod(void)
Assign a value to Mod data member.
void SetOrgname(TOrgname &value)
Assign a value to Orgname data member.
void SetLineage(const TLineage &value)
Assign a value to Lineage data member.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
@ eSubtype_nat_host
natural host of this specimen
@ eSubtype_metagenome_source
@ eSubtype_specimen_voucher
@ eSubtype_culture_collection
const TVal & GetVal(void) const
Get the Val member data.
void SetData(TData &value)
Assign a value to Data data member.
void SetExp_ev(TExp_ev value)
Assign a value to Exp_ev data member.
void SetVal(const TVal &value)
Assign a value to Val data member.
const TQual & GetQual(void) const
Get the Qual member data.
@ eExp_ev_experimental
any reasonable experimental check
@ eExp_ev_not_experimental
similarity, pattern, etc
const Tdata & Get(void) const
Get the member data.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is whole
bool GetSeqLocation(CSeq_feat &feat, char *location, TSeqIdList &ids, bool *hard_err, ParserPtr pp, const char *name)
constexpr bool empty(list< Ts... >) noexcept
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
vector< IndexblkPtr > entrylist
CBioSource::EGenome genome
CRef< CBioSource > bio_src
optional< string > GetTheQualValue(TQualVector &qlist, const Char *qual)
Int2 StringMatchIcase(const Char **array, const Char *text)
Char * StringIStr(const Char *where, const Char *what)
std::vector< CRef< objects::CGb_qual > > TQualVector