64 #define NCBI_USE_ERRCODE_X Objtools_Validator
69 using namespace sequence;
82 string::iterator it =
str.begin();
83 while (it !=
str.end()) {
86 }
else if (*it ==
')') {
91 }
else if (*it ==
'[') {
93 }
else if (*it ==
']') {
101 if (par > 0 || bkt > 0) {
161 static bool s_IsValidPrimerSequence (
string str,
char& bad_ch)
187 string::iterator sit =
str.begin();
188 while (sit !=
str.end()) {
191 if (pos2 == string::npos) {
195 string match =
str.substr(pos + 1, pos2 - pos - 1);
196 if (find(list_begin, list_end,
match) == list_end) {
200 sit += pos2 - pos + 1;
203 if (*sit !=
'(' && *sit !=
')' && *sit !=
',' && *sit !=
':') {
209 if (strchr (
"ABCDGHKMNRSTVWY", ch) ==
NULL) {
265 countryname =
"USA: District of Columbia";
267 countryname =
"USA: Puerto Rico";
269 countryname =
"USA: Puerto Rico";
272 countryname = countryname.substr(5);
276 countryname = countryname.substr(7);
296 PostObjErr(sev, errtype,
error, obj,
ctx);
412 bool all_local_or_gnl =
true;
413 for (
auto pId : bioseq.
GetId()) {
414 switch (pId->Which()) {
424 all_local_or_gnl =
false;
427 return all_local_or_gnl;
448 if (pDesc->IsMolinfo()) {
449 const auto& molinfo = pDesc->GetMolinfo();
463 return &(
ctx->GetSeq());
467 ctx->GetSet().IsSetClass() &&
469 const auto& bioseq_set =
ctx->GetSet();
470 if (bioseq_set.IsSetSeq_set()) {
471 for (
const auto& pEntry : bioseq_set.GetSeq_set()) {
472 if (pEntry->IsSeq()) {
473 const auto& bioseq = pEntry->GetSeq();
474 if (bioseq.IsSetInst() &&
475 bioseq.GetInst().IsNa()) {
493 if (!
isdigit(*it) && *it !=
' ') {
508 "No organism has been applied to this Bioseq. Other qualifiers may exist.", obj,
ctx);
519 bool is_env_sample =
false;
523 is_env_sample =
true;
527 if (!is_env_sample) {
529 "Uncultured should also have /environmental_sample",
534 "Blank sample should not be associated with any sequences",
543 "Transposon and insertion sequence are no longer legal locations",
547 if (IsIndexerVersion()
551 "INDEXER_ONLY - BioSource location is chromosome",
555 bool isViral =
false, isAnimal =
false, isPlant =
false,
556 isBacteria =
false, isArchaea =
false, isFungal =
false,
580 bool chrom_conflict =
false;
586 double lat_value = 0.0, lon_value = 0.0;
587 bool is_single_cell_amplification =
false;
591 ValidateSubSource(**ssit, obj,
ctx, isViral);
592 if (!(*ssit)->IsSetSubtype()) {
596 if ((*ssit)->IsSetName()) {
597 string str = (*ssit)->GetName();
600 "Subsource name should not be " +
str,
611 countryname = (**ssit).GetName();
615 if ((*ssit)->IsSetName()) {
616 lat_lon = (*ssit)->GetName();
617 bool format_correct =
false, lat_in_range =
false, lon_in_range =
false, precision_correct =
false;
619 lat_in_range, lon_in_range,
620 lat_value, lon_value);
627 if ((*ssit)->IsSetName()) {
628 val = (*ssit)->GetName();
631 "'" +
val +
"' is an invalid altitude value, altitude should be provided in meters",
639 chrom_conflict =
true;
642 chromosome = ssit->GetPointer();
647 linkage_group = ssit->GetPointer();
651 if ((*ssit)->IsSetName()) {
657 if ((*ssit)->IsSetName()) {
663 if ((*ssit)->IsSetName()) {
664 pcr_set_list.
AddFwdSeq((*ssit)->GetName());
669 if ((*ssit)->IsSetName()) {
670 pcr_set_list.
AddRevSeq((*ssit)->GetName());
677 if (IsGpipe() && IsGenomic()) {
680 if (isAnimal || isPlant) {
682 const string str = (*ssit)->GetName();
685 "Invalid value (" +
str +
") for /sex qualifier", obj,
ctx);
687 }
else if (isViral) {
689 "Virus has unexpected Sex qualifier", obj,
ctx);
690 }
else if (isBacteria || isArchaea || isFungal) {
692 "Unexpected use of /sex qualifier", obj,
ctx);
694 const string str = (*ssit)->GetName();
698 "Invalid value (" +
str +
") for /sex qualifier", obj,
ctx);
705 if (isAnimal || isPlant || isViral) {
707 "Unexpected use of /mating_type qualifier", obj,
ctx);
711 "Unexpected use of /mating_type qualifier", obj,
ctx);
718 "Plasmid subsource but not plasmid location", obj,
ctx);
724 if ((*ssit)->IsSetName()) {
730 const string&
subname = ((*ssit)->GetName());
740 if (genome_from_name != genome) {
743 val_name = val_name.substr(8);
746 "Plastid name subsource " + val_name +
" but not " + val_name +
" location", obj,
ctx);
750 "Plastid name subsource contains unrecognized value", obj,
ctx);
757 if ((*ssit)->IsSetName() && hasTaxname) {
768 "Tissue-type is inappropriate for bacteria", obj,
ctx);
769 }
else if (isViroid) {
771 "Viroid has unexpected tissue-type qualifier", obj,
ctx);
776 if ((*ssit)->IsSetName()) {
777 const string&
subname = ((*ssit)->GetName());
779 is_single_cell_amplification =
true;
783 string num =
subname.substr(0, pos);
785 is_single_cell_amplification =
true;
802 "Virus has unexpected " +
subname +
" qualifier", obj,
ctx);
809 bool suppress =
false;
812 it->IsSetName() &&
NStr::Equal(it->GetName(),
"unlocalized")) {
824 switch (sid.
Which()) {
836 if (acc.length() == 8) {
851 string msg =
"INDEXER_ONLY - source contains chromosome value '";
855 msg +=
"' but the BioSource location is not set to chromosome";
864 bool suppress =
false;
867 it->IsSetName() &&
NStr::Equal(it->GetName(),
"unlocalized")) {
879 switch (sid.
Which()) {
891 if (acc.length() == 8) {
906 string msg =
"INDEXER_ONLY - source contains linkage_group value '";
908 msg += linkage_group->
GetName();
910 msg +=
"' but the BioSource location is not set to chromosome";
918 if (it->second <= 1)
continue;
923 qual = chrom_conflict ?
"conflicting chromosome" :
"identical chromosome";
break;
925 qual =
"germline";
break;
927 qual =
"rearranged";
break;
929 qual =
"plasmid_name";
break;
931 qual =
"segment";
break;
933 qual =
"country";
break;
935 qual =
"transgenic";
break;
937 qual =
"environmental_sample";
break;
939 qual =
"lat_lon";
break;
941 qual =
"collection_date";
break;
943 qual =
"collected_by";
break;
945 qual =
"identified_by";
break;
947 qual =
"fwd_primer_seq";
break;
949 qual =
"rev_primer_seq";
break;
951 qual =
"fwd_primer_name";
break;
953 qual =
"rev_primer_name";
break;
955 qual =
"metagenomic";
break;
957 qual =
"altitude";
break;
967 "Germline and rearranged should not both be present", obj,
ctx);
971 "Transgenic and environmental sample should not both be present", obj,
ctx);
975 "Metagenomic should also have environmental sample annotated", obj,
ctx);
979 "Sex and mating type should not both be present", obj,
ctx);
983 if (m_genomeSubmission) {
987 "Plasmid location set but plasmid name missing. Add a plasmid source modifier with the plasmid name. Use unnamed if the name is not known.",
997 "PCR primer does not have both sequences", obj,
ctx);
1000 bool has_duplicate_primers =
false;
1002 has_duplicate_primers =
true;
1005 has_duplicate_primers =
true;
1008 if (has_duplicate_primers) {
1010 "PCR primer sequence has duplicates", obj,
ctx);
1014 ValidateLatLonCountry(countryname, lat_lon, obj,
ctx);
1021 if (!IsSeqSubmitParent() && IsIndexerVersion()) {
1033 if (IsEmbl() || IsDdbj()) {
1038 "No lineage for this BioSource.", obj,
ctx);
1043 const string& lineage = orgname.
GetLineage();
1045 if (lineage.find(
"Kinetoplastida") == string::npos && lineage.find(
"Kinetoplastea") == string::npos) {
1047 "Only Kinetoplastida have kinetoplasts", obj,
ctx);
1050 if (lineage.find(
"Chlorarachniophyceae") == string::npos &&
1051 lineage.find(
"Cryptophyceae") == string::npos) {
1054 "Only Chlorarachniophyceae and Cryptophyceae have nucleomorphs", obj,
ctx);
1057 if (lineage.find(
"Ciliophora") == string::npos) {
1059 "Only Ciliophora have macronuclear locations", obj,
ctx);
1064 const string& div = orgname.
GetDiv();
1076 "Bacterial or viral source should not have organelle location",
1081 "BioSource with ENV division is missing environmental sample subsource",
1088 "If metagenomes appears in lineage, BioSource should have metagenomic qualifier",
1095 bool specific_host =
false;
1099 if (!it->IsSetSubtype()) {
1105 specific_host =
true;
1125 "Virus has unexpected " +
subname +
" qualifier", obj,
ctx);
1131 "Environmental sample should also have isolation source or specific host annotated",
1135 m_biosource_kind = bsrc;
1137 const CBioseq* pBioseq=
nullptr;
1138 const bool checkForUndefinedSpecies = hasTaxname &&
1139 (IsGenomeSubmission() ||
1144 ValidateOrgRef(orgref, obj,
ctx, checkForUndefinedSpecies, is_single_cell_amplification);
1153 (
const string& primer_kind,
1158 if (badch < ' ' || badch >
'~') {
1161 string msg =
"PCR " + primer_kind +
" primer sequence format is incorrect, first bad character is '";
1171 const string& primer_kind,
1177 x_ReportPCRSeqProblem(primer_kind, badch, obj,
ctx);
1183 "PCR " + primer_kind +
" primer name appears to be a sequence",
1195 for (
auto it : pcrset.
Get())
1197 if (it->IsSetForward()) {
1198 for (
auto pit : it->GetForward().Get())
1200 x_CheckPCRPrimer(*pit,
"forward", obj,
ctx);
1203 if (it->IsSetReverse()) {
1204 for (
auto pit : it->GetReverse().Get())
1206 x_CheckPCRPrimer(*pit,
"reverse", obj,
ctx);
1222 "Unknown subsource subtype 0", obj,
ctx);
1236 const auto& fdata = feat->
GetData();
1237 if (fdata.IsBiosrc() && fdata.GetBiosrc().IsSetTaxname()) {
1252 string countryname = subsrc.
GetName();
1253 bool is_miscapitalized =
false;
1256 if (is_miscapitalized) {
1257 if (use_geo_loc_name) {
1259 "Bad geo_loc_name capitalization [" + countryname +
"]",
1263 "Bad country capitalization [" + countryname +
"]",
1268 if (use_geo_loc_name) {
1270 "Colon at end of geo_loc_name [" + countryname +
"]", obj,
ctx);
1273 "Colon at end of country name [" + countryname +
"]", obj,
ctx);
1277 if (use_geo_loc_name) {
1279 "Replaced geo_loc_name [" + countryname +
"]", obj,
ctx);
1282 "Replaced country name [" + countryname +
"]", obj,
ctx);
1286 if (countryname.empty()) {
1289 if (use_geo_loc_name) {
1291 "Bad geo_loc_name [" + countryname +
"]", obj,
ctx);
1294 "Bad country name [" + countryname +
"]", obj,
ctx);
1302 bool format_correct =
false, lat_in_range =
false, lon_in_range =
false, precision_correct =
false;
1303 double lat_value = 0.0, lon_value = 0.0;
1304 string lat_lon = subsrc.
GetName();
1306 lat_in_range, lon_in_range,
1307 lat_value, lon_value);
1308 if (!format_correct) {
1310 if (pos != string::npos) {
1312 if (format_correct) {
1314 "lat_lon format has extra text after correct dd.dd N|S ddd.dd E|W format",
1320 if (!format_correct) {
1322 "lat_lon format is incorrect - should be dd.dd N|S ddd.dd E|W",
1325 if (!lat_in_range) {
1327 "latitude value is out of range - should be between 90.00 N and 90.00 S",
1330 if (!lon_in_range) {
1332 "longitude value is out of range - should be between 180.00 E and 180.00 W",
1335 if (!precision_correct) {
1348 string name = subsrc.
GetName();
1350 if (name.length() > 10
1353 "PCR primer name appears to be a sequence",
1361 string name = subsrc.
GetName();
1363 if (name.length() > 10
1366 "PCR primer name appears to be a sequence",
1376 x_ReportPCRSeqProblem(
"forward", bad_ch, obj,
ctx);
1385 x_ReportPCRSeqProblem(
"reverse", bad_ch, obj,
ctx);
1393 "Transposon name and insertion sequence name are no "
1394 "longer legal qualifiers", obj,
ctx);
1399 "Unknown subsource subtype 0", obj,
ctx);
1403 ValidateSourceQualTags(subsrc.
GetName(), obj,
ctx);
1433 "Problematic plasmid/chromosome/linkage group name '" + sname +
"'",
1440 "Problematic plasmid/chromosome/linkage group name '" + sname +
"'",
1447 "Problematic plasmid/chromosome/linkage group name '" + sname +
"'",
1459 "Non-viral source feature should not have a segment qualifier",
1482 const string& frequency = subsrc.
GetName();
1487 "bad frequency qualifier value " + frequency,
1490 string::const_iterator sit = frequency.begin();
1491 bool bad_frequency =
false;
1495 if (sit != frequency.end() && *sit ==
'.') {
1497 if (sit == frequency.end()) {
1498 bad_frequency =
true;
1500 while (sit != frequency.end() &&
isdigit(*sit)) {
1503 if (sit != frequency.end()) {
1504 bad_frequency =
true;
1507 bad_frequency =
true;
1509 if (bad_frequency) {
1511 "bad frequency qualifier value " + frequency,
1520 "Collection_date format is not in DD-Mmm-YYYY format",
1543 subname +
" qualifier should not have descriptive text",
1550 "Unbalanced parentheses in subsource '" +
subname +
"'",
1555 "subsource " +
subname +
" has SGML",
1569 size_t value_len =
value.length();
1570 while (pos != string::npos
1571 && (((pos != 0 &&
isalpha(taxname.c_str()[pos - 1]))
1572 ||
isalpha(taxname.c_str()[pos + value_len])))) {
1575 if (pos == string::npos) {
1601 if (pos == string::npos) {
1605 }
else if (pos > 0 &&
NStr::EqualNocase(taxname.substr(0, pos),
"Salmonella")) {
1641 const bool checkForUndefinedSpecies,
1642 const bool is_single_cell_amplification)
1648 "No organism name included in the source. Other qualifiers may exist.", obj,
ctx);
1670 "Organism '" + taxname +
"' is undefined species and does not have a specific identifier.",
1676 "Unbalanced parentheses in taxname '" + orgref.
GetTaxname() +
"'", obj,
ctx);
1680 "taxname " + taxname +
" has SGML",
1688 ValidateTaxNameOrgname(taxname, orgref.
GetOrgname(), obj,
ctx);
1694 ValidateDbxref(orgref.
GetDb(), obj,
true,
ctx);
1697 bool has_taxon =
false;
1705 if (! IsLocalGeneralOnly()) {
1708 if (IsRequireTaxonID() && !has_taxon) {
1710 "BioSource is missing taxon ID", obj,
ctx);
1717 ValidateOrgName(orgname, has_taxon, obj,
ctx);
1720 string taxname_search = taxname;
1722 size_t pos =
NStr::Find(taxname_search,
" ");
1723 if (pos == string::npos) {
1724 taxname_search.clear();
1726 taxname_search = taxname_search.substr(pos + 1);
1729 if (pos == string::npos) {
1730 taxname_search.clear();
1732 taxname_search = taxname_search.substr(pos + 1);
1742 if (!(*it)->IsSetSubtype() || !(*it)->IsSetSubname()) {
1746 const string&
subname = (*it)->GetSubname();
1748 if (orgmod_name.length() > 0) {
1749 orgmod_name[0] =
toupper(orgmod_name[0]);
1755 "Subspecies value specified is not found in taxname",
1761 orgmod_name +
" value specified is not found in taxname",
1768 orgmod_name +
" value specified is not found in taxname",
1774 "Specific host is identical to taxname",
1780 if (s_IsSalmonellaGenus(taxname)) {
1782 "Salmonella organisms should use serovar instead of serotype.",
1787 if (s_IsSalmonellaGenus(taxname) &&
NStr::Find(taxname,
subname) == string::npos) {
1789 "Salmonella organism name should contain the serovar value.",
1815 for (
auto it : hybrid) {
1821 if (!rval && hybrid.size() > 1 &&
1822 hybrid.front()->IsSetName()) {
1831 for (
auto it : partial) {
1832 if (it->IsSetName()) {
1833 mismatch = it->GetName();
1840 if (!rval && partial.size() > 1 &&
1841 partial.front()->IsSetName()) {
1843 mismatch = partial.front()->GetName();
1855 (
const string& taxname,
1863 "Taxname does not match orgname ('" + taxname +
"', '" + mismatch +
"')",
1872 const bool has_taxon,
1876 bool is_viral =
false;
1903 bool has_strain =
false;
1904 vector<string> vouchers;
1907 const COrgMod& omd = **omd_itr;
1914 "Orgmod name should not be " +
str,
1931 "Orgmod.strain should not start with subsp.",
1935 "Orgmod.strain should not start with serovar",
1939 "Orgmod.strain should not be '" +
str +
"'",
1945 "Multiple strain qualifiers on the same BioSource", obj,
ctx);
1955 "Orgmod.serovar should not start with subsp.",
1959 "Orgmod.serovar should not start with strain",
1970 "Orgmod.sub-species should not contain subsp.",
1984 "Orgmod variety should only be in plants, fungi, or cyanobacteria",
1993 if ((*omd_itr)->IsSetSubname() && !
NStr::IsBlank((*omd_itr)->GetSubname())) {
1994 const string&
val = (*omd_itr)->GetSubname();
1999 if ((*it2)->IsSetSubtype()
2001 && (*it2)->IsSetSubname()
2004 "OrgMod synonym is identical to OrgMod gb_synonym",
2013 ValidateOrgModVoucher(omd, obj,
ctx);
2018 if (!(*omd_itr)->IsSetSubname() ||
2021 "Bad value for type_material", obj,
ctx);
2033 "Unbalanced parentheses in orgmod '" +
subname +
"'",
2038 "orgmod " +
subname +
" has SGML",
2051 if (strain.length() < 1) {
2056 "Orgmod.strain should not be species '" + species +
"'",
2061 "Orgmod.strain should not be subspecies '" + sub_species +
"'",
2066 "Orgmod.strain should not be serovar '" + serovar +
"'",
2069 if (
NStr::FindNoCase(strain, genus +
" " + species) != string::npos && genus.length() > 0 && species.length() > 0) {
2071 "Orgmod.strain should not contain '" + genus +
" " + species +
"'",
2097 if (!
source.IsSetGenome()
2100 bool is_viral =
false;
2123 if (
source.IsSetLineage()) {
2124 string lineage =
source.GetLineage();
2137 if (
source.IsSetLineage()) {
2138 string lineage =
source.GetLineage();
2151 while (d && !rval) {
2152 const auto & user = d->
GetUser();
2153 if (user.IsSetType() && user.GetType().IsStr() &&
NStr::Equal(user.GetType().GetStr(),
"DBLink")) {
2154 for (
auto f : user.GetData()) {
2155 if (
f->IsSetLabel() &&
f->GetLabel().IsStr() &&
NStr::Equal(
f->GetLabel().GetStr(),
"BioSample")
2156 &&
f->IsSetData() && (
f->GetData().IsStr() ||
f->GetData().IsStrs())) {
2174 m_biosource_kind =
source;
2176 const auto & inst = bsh.
GetInst();
2178 if (
source.IsSetIs_focus()) {
2186 "BioSource descriptor has focus, "
2187 "but no BioSource feature", obj,
ctx);
2191 if (
source.CanGetOrigin() &&
2193 if (!IsOtherDNA(bsh) && !bsh.
IsAa()) {
2195 "Molinfo-biomol other should be used if "
2196 "Biosource-location is synthetic", obj,
ctx);
2210 "HIV with moltype DNA should be proviral",
2218 "HIV with mRNA molecule type is rare",
2231 sequence::CDeflineGenerator defline_generator;
2232 title = defline_generator.GenerateDefline(bsh, sequence::CDeflineGenerator::fIgnoreExisting);
2235 bool isViral =
false;
2236 if (
source.IsSetLineage()) {
2237 string lineage =
source.GetLineage();
2248 &&
NStr::Find(title,
"complete genome") != string::npos
2251 "Non-viral complete genome not labeled as chromosome",
2258 bool is_synthetic_construct = IsSyntheticConstruct(
source);
2259 bool is_artificial = IsArtificial(
source);
2261 if (is_synthetic_construct) {
2265 "synthetic construct should have other-genetic",
2268 if (!is_artificial) {
2270 "synthetic construct should have artificial origin",
2273 }
else if (is_artificial) {
2275 "artificial origin should have other-genetic and synthetic construct",
2278 if (is_artificial) {
2283 "artificial origin should have other-genetic",
2294 if (!(*it)->IsSetSubtype()) {
2307 "cRNA note conflicts with molecule type",
2311 "cRNA note redundant with molecule type",
2332 if (it->IsSetSubtype()
2334 && it->IsSetSubname()
2339 "cRNA note conflicts with molecule type",
2344 "cRNA note redundant with molecule type",
2360 "Genomic DNA viral lineage indicates no DNA stage",
2368 if ( (IsGpipe() || IsIndexerVersion() ) &&
s_IsBioSample(bsh) ) {
2371 if ( is_bact || is_arch ) {
2372 bool has_strain =
false;
2373 bool has_isolate =
false;
2374 bool env_sample =
false;
2375 if (
source.IsSetSubtype()) {
2384 if (!env_sample &&
source.IsSetOrg()
2385 &&
source.GetOrg().IsSetOrgname()) {
2386 const auto& orgname =
source.GetOrg().GetOrgname();
2387 if (orgname.IsSetMod()) {
2388 for (
auto om : orgname.GetMod()) {
2389 if (
om->IsSetSubtype()) {
2404 if (!has_strain && !has_isolate && !env_sample) {
2407 "Bacteria should have strain or isolate or environmental sample",
2409 }
else if (is_arch) {
2411 "Archaea should have strain or isolate or environmental sample",
2459 "culture_collection:",
2463 "endogenous_virus_name:",
2464 "environmental_sample:",
2468 "fwd_pcr_primer_name",
2469 "fwd_pcr_primer_seq",
2478 "insertion_seq_name:",
2480 "isolation_source:",
2487 "metagenome_source:",
2497 "rev_pcr_primer_name",
2498 "rev_pcr_primer_seq",
2507 "specimen_voucher:",
2529 static std::mutex m;
2531 std::lock_guard
g(m);
2553 size_t str_len =
str.length();
2557 for (
size_t i = 0;
i < str_len; ++
i) {
2561 if (
match.empty()) {
2564 size_t match_len =
match.length();
2567 if ((
int)(
i - match_len) >= 0) {
2568 char ch =
str[
i - match_len];
2569 if (!
isspace((
unsigned char)ch) && ch !=
';') {
2577 if (pos != string::npos) {
2578 if (pos == 0 ||
isspace ((
unsigned char)
str[pos]) ||
str[pos] ==
';') {
2590 "Source note has structured tag '" +
match +
"'", obj,
ctx);
2657 usr_descs.push_back(desc);
2660 desc_ctxs.push_back(r_se);
2666 for (
auto annot_it : se.
GetAnnot()) {
2667 if (annot_it->IsFtable()) {
2668 for (
auto feat_it : annot_it->GetData().GetFtable()) {
2669 if (feat_it->IsSetData() && feat_it->GetData().IsUser()
2672 feat.
Reset(feat_it);
2673 usr_feats.push_back(feat);
2684 GatherTentativeName(**it, usr_descs, desc_ctxs, usr_feats);
2695 if (org_rq_list.size() > 0) {
2699 while (
i < org_rq_list.size()) {
2701 vector< CRef<COrg_ref> > tmp_rq(org_rq_list.begin() +
i, org_rq_list.begin() +
i +
len);
2703 if (!reply || !reply->IsSetReply()) {
2724 if (org_rq_list.size() == 0) {
2730 while (
i < org_rq_list.size()) {
2732 vector< CRef<COrg_ref> > tmp_rq(org_rq_list.begin() +
i, org_rq_list.begin() +
i +
len);
2735 if (tmp_spec_host_reply) {
2738 err_msg =
"Connection to taxonomy failed";
2756 if (org_rq_list.size() == 0) {
2762 while (
i < org_rq_list.size()) {
2764 vector< CRef<COrg_ref> > tmp_rq(org_rq_list.begin() +
i, org_rq_list.begin() +
i +
len);
2780 auto pTval = x_CreateTaxValidator();
2782 ValidateSpecificHost(*pTval);
2788 const string err_str =
error.IsSetMessage() ?
error.GetMessage() :
"?";
2800 vector<CConstRef<CSeqdesc> > src_descs;
2801 vector<CConstRef<CSeq_entry> > desc_ctxs;
2802 vector<CConstRef<CSeq_feat> > src_feats;
2804 GatherTentativeName(se, src_descs, desc_ctxs, src_feats);
2807 vector< CRef<COrg_ref> > org_rq_list;
2810 vector<CConstRef<CSeqdesc> >::iterator desc_it = src_descs.begin();
2811 vector<CConstRef<CSeq_entry> >::iterator ctx_it = desc_ctxs.begin();
2812 while (desc_it != src_descs.end() && ctx_it != desc_ctxs.end()) {
2816 org_rq_list.push_back(rq);
2823 vector<CConstRef<CSeq_feat> >::iterator feat_it = src_feats.begin();
2824 while (feat_it != src_feats.end()) {
2828 org_rq_list.push_back(rq);
2833 if (org_rq_list.empty()) {
2838 if (!reply || !reply->IsSetReply()) {
2840 "Taxonomy service connection failure", se);
2843 const auto& rlist = reply->GetReply();
2844 CTaxon3_reply::TReply::const_iterator reply_it = rlist.begin();
2847 desc_it = src_descs.begin();
2848 ctx_it = desc_ctxs.begin();
2851 while (reply_it != rlist.end()
2852 && desc_it != src_descs.end()
2853 && ctx_it != desc_ctxs.end()) {
2854 if ((*reply_it)->IsError()) {
2857 "Taxonomy lookup failed for Tentative Name '" + org_rq_list[pos]->GetTaxname() +
"'",
2858 **desc_it, *ctx_it);
2860 HandleTaxonomyError((*reply_it)->GetError(),
2871 feat_it = src_feats.begin();
2872 while (reply_it != rlist.end()
2873 && feat_it != src_feats.end()) {
2874 if ((*reply_it)->IsError()) {
2877 "Taxonomy lookup failed for Tentative Name '" + org_rq_list[pos]->GetTaxname() +
"'",
2880 HandleTaxonomyError((*reply_it)->GetError(),
2893 const string err_str =
error.IsSetMessage() ?
error.GetMessage() :
"?";
2896 string msg =
"Organism not found in taxonomy database";
2897 if (
error.IsSetOrg()) {
2898 const auto& e_org =
error.GetOrg();
2900 if (e_org.IsSetTaxname() &&
2902 (!d_org.IsSetTaxname() ||
2903 !
NStr::Equal(d_org.GetTaxname(), e_org.GetTaxname()))) {
2904 msg +=
" (suggested:" + e_org.
GetTaxname() +
")";
2916 "Taxonomy lookup failed with message '" + err_str +
"'",
2920 "Taxonomy lookup failed with message '" + err_str +
"'",
2928 const string err_str =
error.IsSetMessage() ?
error.GetMessage() :
"?";
2936 "Taxonomy lookup failed with message '" + err_str +
"'",
2940 "Taxonomy lookup failed with message '" + err_str +
"'",
2946 const string& host,
const COrg_ref& org)
2948 const string err_str =
error.IsSetMessage() ?
error.GetMessage() :
"?";
2952 "Organism not found in taxonomy database",
2956 "Specific host value is ambiguous: " + host, org);
2963 "Invalid value for specific host: " + host, org);
2990 string fromEnv =
env.Get(
"NCBI_NEW_STRAIN_VALIDATION");
2992 if (fromEnv ==
"true") {
2994 }
else if (fromEnv ==
"false") {
3026 auto pTval = x_CreateTaxValidator();
3030 ValidateOrgRefs(*pTval);
3033 ValidateSpecificHost(*pTval);
3039 {
return m_pContext->m_taxon_update(request);});
3041 ValidateStrain(*pTval, pTval->m_descTaxID);
3044 ValidateTentativeName(se);
3050 auto pTval = x_CreateTaxValidator();
3051 pTval->CheckOneOrg(org, genome, *
this);
3082 unsigned int pcr_num = 0;
3084 name = name.substr(1, name.length() - 2);
3085 vector<string> mult_names;
3087 unsigned int name_num = 0;
3088 while (name_num < mult_names.size()) {
3095 m_SetList[pcr_num]->SetFwdName(mult_names[name_num]);
3113 unsigned int pcr_num = 0;
3115 name = name.substr(1, name.length() - 2);
3116 vector<string> mult_names;
3118 unsigned int name_num = 0;
3119 while (name_num < mult_names.size()) {
3126 m_SetList[pcr_num]->SetRevName(mult_names[name_num]);
3144 unsigned int pcr_num = 0;
3146 name = name.substr(1, name.length() - 2);
3147 vector<string> mult_names;
3149 unsigned int name_num = 0;
3150 while (name_num < mult_names.size()) {
3157 m_SetList[pcr_num]->SetFwdSeq(mult_names[name_num]);
3175 unsigned int pcr_num = 0;
3177 name = name.substr(1, name.length() - 2);
3178 vector<string> mult_names;
3180 unsigned int name_num = 0;
3181 while (name_num < mult_names.size()) {
3188 m_SetList[pcr_num]->SetRevSeq(mult_names[name_num]);
3213 }
else if (compare > 0) {
3217 }
else if (compare > 0) {
3221 }
else if (compare > 0) {
3273 }
else if (compare > 0) {
3297 }
else if (s1.
Get().size() < s2.
Get().size()) {
3299 }
else if (s1.
Get().size() > s2.
Get().size()) {
3302 auto it1 = s1.
Get().begin();
3303 auto it2 = s2.
Get().begin();
3304 while (it1 != s1.
Get().end()) {
3355 template <
typename T>
3369 if (!primers.
IsSet() || primers.
Get().size() < 2) {
3373 for (
auto it : primers.
Get()) {
3374 if (already_seen.
find(it) != already_seen.
end()) {
3412 vector<string> error_list;
3414 ITERATE(vector<string>, err, error_list) {
3417 }
else if (
NStr::FindNoCase(*err,
"should be structured") != string::npos) {
3419 }
else if (
NStr::FindNoCase(*err,
"missing institution code") != string::npos) {
3421 }
else if (
NStr::FindNoCase(*err,
"missing specific identifier") != string::npos) {
3432 }
else if (
NStr::FindNoCase(*err,
"should not be qualified with a <COUNTRY> designation") != string::npos) {
3433 if (use_geo_loc_name) {
3438 }
else if (
NStr::FindNoCase(*err,
"needs to be qualified with a <COUNTRY> designation") != string::npos) {
3440 }
else if (
NStr::FindNoCase(*err,
" exists, but collection ") != string::npos) {
3454 return m_taxon->SendOrgRefList(orgRefs);
3456 return make_unique<CTaxValidationAndCleanup>(taxFunc);
3460 return make_unique<CTaxValidationAndCleanup>(
m_pContext->m_taxon_update);
const char * sm_ValidModifiedPrimerBases[]
@ eErr_SEQ_DESCR_BadPlastidName
@ eErr_SEQ_DESCR_ObsoleteSourceQual
@ eErr_SEQ_DESCR_MissingEnvironmentalSample
@ eErr_SEQ_DESCR_ObsoleteSourceLocation
@ eErr_SEQ_DESCR_MissingPlasmidLocation
@ eErr_SEQ_DESCR_InvalidTissueType
@ eErr_SEQ_DESCR_TaxonomyServiceProblem
@ eErr_SEQ_DESCR_TaxonomyBlankSample
@ eErr_SEQ_DESCR_MissingPersonalCollectionName
@ eErr_SEQ_DESCR_LatLonRange
@ eErr_SEQ_DESCR_DuplicatePCRPrimerSequence
@ eErr_GENERIC_SgmlPresentInText
@ eErr_SEQ_DESCR_UnstructuredVoucher
@ eErr_SEQ_DESCR_BadVariety
@ eErr_SEQ_DESCR_BadInstitutionGeoLocName
@ eErr_SEQ_DESCR_BadTypeMaterial
@ eErr_SEQ_DESCR_OrgModMissingValue
@ eErr_SEQ_DESCR_NoOrgFound
@ eErr_SEQ_DESCR_BadPCRPrimerSequence
@ eErr_SEQ_DESCR_UnnecessaryBioSourceFocus
@ eErr_SEQ_DESCR_InvalidForType
@ eErr_SEQ_DESCR_LatLonValue
@ eErr_SEQ_DESCR_OrganismIsUndefinedSpecies
@ eErr_SEQ_DESCR_IdenticalInstitutionCode
@ eErr_SEQ_DESCR_BacteriaMissingSourceQualifier
@ eErr_SEQ_DESCR_BadCountryCapitalization
@ eErr_SEQ_DESCR_BadCollectionDate
@ eErr_SEQ_DESCR_BadInstitutionCode
@ eErr_SEQ_DESCR_BadAltitude
@ eErr_SEQ_DESCR_IncorrectlyFormattedVoucherID
@ eErr_SEQ_DESCR_StrainWithEnvironSample
@ eErr_SEQ_DESCR_OrganismNotFound
@ eErr_SEQ_DESCR_InconsistentVirusMoltype
@ eErr_SEQ_DESCR_BadInstitutionCountry
@ eErr_SEQ_DESCR_MissingPlasmidName
@ eErr_SEQ_DESCR_UnculturedNeedsEnvSample
@ eErr_SEQ_DESCR_BadTentativeName
@ eErr_SEQ_DESCR_BadPlasmidChromosomeLinkageName
@ eErr_SEQ_DESCR_BadTextInSourceQualifier
@ eErr_SEQ_DESCR_SuspectedContaminatedCellLine
@ eErr_SEQ_DESCR_AmbiguousSpecificHost
@ eErr_SEQ_DESCR_BadGeoLocNameCapitalization
@ eErr_SEQ_DESCR_ChromosomeWithoutLocation
@ eErr_SEQ_DESCR_StructuredSourceNote
@ eErr_SEQ_DESCR_InvalidMatingType
@ eErr_SEQ_DESCR_BadSubSource
@ eErr_SEQ_DESCR_MultipleStrains
@ eErr_SEQ_DESCR_BadGeoLocNameCode
@ eErr_SEQ_DESCR_InvalidSexQualifier
@ eErr_SEQ_DESCR_TaxonomyAmbiguousName
@ eErr_SEQ_DESCR_MultipleSourceQualifiers
@ eErr_SEQ_DESCR_WrongVoucherType
@ eErr_SEQ_DESCR_BadCollectionCode
@ eErr_SEQ_DESCR_SyntheticConstructWrongMolType
@ eErr_SEQ_DESCR_TaxonomyLookupProblem
@ eErr_SEQ_DESCR_NoTaxonID
@ eErr_SEQ_DESCR_LatLonFormat
@ eErr_SEQ_DESCR_MissingLineage
@ eErr_SEQ_DESCR_BadOrgMod
@ eErr_SEQ_DESCR_BadSpecificHost
@ eErr_SEQ_DESCR_BadPCRPrimerName
@ eErr_SEQ_DESCR_OrgModValueInvalid
@ eErr_SEQ_DESCR_BadOrganelleLocation
@ eErr_SEQ_DESCR_EnvironSampleMissingQualifier
@ eErr_SEQ_DESCR_BadCountryCode
@ eErr_SEQ_DESCR_ChromosomeLocation
@ eErr_SEQ_DESCR_BioSourceNeedsChromosome
@ eErr_SEQ_DESCR_BioSourceInconsistency
@ eErr_SEQ_DESCR_HostIdenticalToOrganism
@ eErr_SEQ_DESCR_BadBioSourceFrequencyValue
@ eErr_SEQ_DESCR_ReplacedCountryCode
@ eErr_SEQ_DESCR_ReplacedGeoLocNameCode
@ eErr_SEQ_DESCR_UnbalancedParentheses
@ eErr_SEQ_DESCR_MissingMetagenomicQualifier
@ eErr_SEQ_DESCR_SyntheticConstructNeedsArtificial
@ eErr_SEQ_DESCR_NonViralSegment
bool IsOrganismArchaea() const
bool IsOrganismEukaryote() const
bool IsOrganismBacteria() const
CBioSourceKind & operator=(const CBioSource &bsrc)
const string & GetLineage(void) const
static string GetOrganelleByGenome(unsigned int genome)
bool IsSetLineage(void) const
static CBioSource::EGenome GetGenomeByOrganelle(const string &organelle, NStr::ECase use_case=NStr::eCase, bool starts_with=false)
bool IsSetTaxname(void) const
static bool WasValid(const string &country)
static bool IsValid(const string &country)
static CNcbiApplication * Instance(void)
Singleton method.
@OrgMod.hpp User-defined methods of the data storage class.
static string IsCultureCollectionValid(const string &culture_collection)
static bool IsStrainValid(const string &strain)
static bool IsValidTypeMaterial(const string &type_material)
static string IsBiomaterialValid(const string &biomaterial)
static string GetSubtypeName(TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
static string CheckMultipleVouchers(const vector< string > &)
static string IsSpecimenVoucherValid(const string &specimen_voucher)
bool GetFlatName(string &name_out, string *lineage=0) const
const string & GetLineage(void) const
const string & GetDivision(void) const
bool IsSetDivision(void) const
bool IsVarietyValid(const string &variety) const
bool IsSetOrgMod(void) const
bool IsSubspeciesValid(const string &subspecies) const
bool IsSetLineage(void) const
static bool IsValid(const string &seq, char &bad_ch)
void AddFwdName(string name)
vector< CPCRSet * > m_SetList
void AddRevName(string name)
void AddFwdSeq(string name)
void AddRevSeq(string name)
size_t GetOrigPos() const
string GetRevName() const
string GetFwdName() const
const TAnnot & GetAnnot(void) const
const CSeq_descr & GetDescr(void) const
bool IsSetAnnot(void) const
bool IsSetDescr(void) const
namespace ncbi::objects::
Base class for all serializable objects.
static void ExploreStrainsForTaxonInfo(CTaxValidationAndCleanup &tval, CValidError_imp &imp, const CSeq_entry &se, std::function< CRef< CTaxon3_reply >(const vector< CRef< COrg_ref >> &)> taxoncallback)
static string GetCollectionDateProblem(const string &date_string)
static bool NCBI_UseGeoLocNameForCountry(void)
static bool IsPlasmidNameValid(const string &value, const string &taxname)
static bool IsValidSexQualifierValue(const string &value)
static bool IsMultipleValuesAllowed(TSubtype)
static bool IsAltitudeValid(const string &value)
static string ValidateLatLonCountry(const string &countryname, string &lat_lon, bool check_state, ELatLonCountryErr &errcode)
static string CheckCellLine(const string &cell_line, const string &organism)
static string GetSubtypeName(CSubSource::TSubtype stype, EVocabulary vocabulary=eVocabulary_raw)
static bool NeedsNoText(const TSubtype &subtype)
static bool IsEndogenousVirusNameValid(const string &value)
static bool IsChromosomeNameValid(const string &value, const string &taxname)
static void IsCorrectLatLonFormat(string lat_lon, bool &format_correct, bool &precision_correct, bool &lat_in_range, bool &lon_in_range, double &lat_value, double &lon_value)
static bool IsSegmentValid(const string &value)
static bool IsLinkageGroupNameValid(const string &value, const string &taxname)
vector< CRef< COrg_ref > > GetTaxonomyLookupRequest() const
void ReportSpecificHostErrors(const CTaxon3_reply &reply, CValidError_imp &imp)
void ReportIncrementalTaxLookupErrors(const CTaxon3_reply &reply, CValidError_imp &imp, bool is_insd_patent, size_t offset) const
vector< CRef< COrg_ref > > GetStrainLookupRequest()
string IncrementalSpecificHostMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply)
CConstRef< CSeq_entry > GetTopReportObject() const
string IncrementalStrainMapUpdate(const vector< CRef< COrg_ref > > &input, const CTaxon3_reply &reply, TTaxId descTaxID=ZERO_TAX_ID)
void ReportStrainErrors(CValidError_imp &imp)
vector< CRef< COrg_ref > > GetSpecificHostLookupRequest(bool for_fix)
static bool IsWGS(const CBioseq &seq)
void ValidateTaxNameOrgname(const string &taxname, const COrgName &orgname, const CSerialObject &obj, const CSeq_entry *ctx)
bool IsSyntheticConstruct(const CBioSource &src)
void ValidateSubSource(const CSubSource &subsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr, const bool isViral=false)
void HandleTaxonomyError(const CT3Error &error, const string &host, const COrg_ref &orf)
void ValidateLatLonCountry(string countryname, string lat_lon, const CSerialObject &obj, const CSeq_entry *ctx)
void GatherTentativeName(const CSeq_entry &se, vector< CConstRef< CSeqdesc > > &usr_descs, vector< CConstRef< CSeq_entry > > &desc_ctxs, vector< CConstRef< CSeq_feat > > &usr_feats)
void ValidateOrgName(const COrgName &orgname, const bool has_taxon, const CSerialObject &obj, const CSeq_entry *ctx)
void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void InitializeSourceQualTags()
void ValidateTaxonomy(const CSeq_entry &se)
void ValidateOrgRef(const COrg_ref &orgref, const CSerialObject &obj, const CSeq_entry *ctx, const bool checkForUndefinedSpecies=false, const bool is_single_cell_amplification=false)
static bool s_IsSalmonellaGenus(const string &taxname)
unique_ptr< CTaxValidationAndCleanup > x_CreateTaxValidator() const
void x_ReportPCRSeqProblem(const string &primer_kind, char badch, const CSerialObject &obj, const CSeq_entry *ctx)
void x_CheckPCRPrimer(const CPCRPrimer &primer, const string &primer_kind, const CSerialObject &obj, const CSeq_entry *ctx)
void ValidateSourceQualTags(const string &str, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
bool IsArtificial(const CBioSource &src)
void ValidatePCRReactionSet(const CPCRReactionSet &pcrset, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void ValidateBioSourceForSeq(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx, const CBioseq_Handle &bsh)
void ValidateOrgRefs(CTaxValidationAndCleanup &tval)
bool IsOtherDNA(const CBioseq_Handle &bsh) const
void ValidateSpecificHost(CTaxValidationAndCleanup &tval)
void ValidateStrain(CTaxValidationAndCleanup &tval, TTaxId descTaxID=ZERO_TAX_ID)
shared_ptr< SValidatorContext > m_pContext
void ValidateOrgModVoucher(const COrgMod &orgmod, const CSerialObject &obj, const CSeq_entry *ctx)
void ValidateTentativeName(const CSeq_entry &se)
bool IsTransgenic(const CBioSource &bsrc)
EDiagSev x_SalmonellaErrorLevel()
static EErrType ConvertCode(CSubSource::ELatLonCountryErr errcode)
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
static const int chunk_size
Include a standard set of the NCBI C++ Toolkit most basic headers.
The NCBI C++ standard methods for dealing with std::string.
static const char * str(char *buf, int n)
#define FOR_EACH_USERFIELD_ON_USEROBJECT(Itr, Var)
FOR_EACH_USERFIELD_ON_USEROBJECT EDIT_EACH_USERFIELD_ON_USEROBJECT.
const CNcbiEnvironment & GetEnvironment(void) const
Get the application's cached environment.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
SStrictId_Tax::TId TTaxId
Taxon id type.
EDiagSev
Severity level for the posted diagnostics.
@ eDiag_Info
Informational message.
@ eDiag_Error
Error message.
@ eDiag_Warning
Warning message.
@ eDiag_Fatal
Fatal error – guarantees exit(or abort)
@ eDiag_Critical
Critical error message.
const TPrim & Get(void) const
virtual const CTypeInfo * GetThisTypeInfo(void) const =0
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
bool IsSetInst(void) const
const TInst & GetInst(void) const
void Reset(void)
Reset reference object.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static void TruncateSpacesInPlace(string &str, ETrunc where=eTrunc_Both)
Truncate spaces in a string (in-place)
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool EqualCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive equality of a substring with another string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static string & ToLower(string &str)
Convert string to lower case – string& version.
@ eNocase
Case insensitive compare.
@ eCase
Case sensitive compare.
const Tdata & Get(void) const
Get the member data.
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
const TPcr_primers & GetPcr_primers(void) const
Get the Pcr_primers member data.
TGenome GetGenome(void) const
Get the Genome member data.
TOrigin GetOrigin(void) const
Get the Origin member data.
bool IsSetSeq(void) const
Check if a value has been assigned to Seq data member.
const Tdata & Get(void) const
Get the member data.
bool CanGetSubtype(void) const
Check if it is safe to call GetSubtype method.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool CanGetOrg(void) const
Check if it is safe to call GetOrg method.
list< CRef< CSubSource > > TSubtype
bool IsSetPcr_primers(void) const
Check if a value has been assigned to Pcr_primers data member.
const TForward & GetForward(void) const
Get the Forward member data.
const TOrg & GetOrg(void) const
Get the Org member data.
bool IsSetOrigin(void) const
Check if a value has been assigned to Origin data member.
TSubtype GetSubtype(void) const
Get the Subtype member data.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TSeq & GetSeq(void) const
Get the Seq member data.
bool IsSetReverse(void) const
Check if a value has been assigned to Reverse data member.
const TName & GetName(void) const
Get the Name member data.
const TReverse & GetReverse(void) const
Get the Reverse member data.
const TName & GetName(void) const
Get the Name member data.
bool IsSetForward(void) const
Check if a value has been assigned to Forward data member.
EGenome
biological context
bool IsSet(void) const
Check if a value has been assigned to data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
@ eSubtype_collection_date
DD-MMM-YYYY format.
@ eSubtype_insertion_seq_name
@ eSubtype_transposon_name
@ eSubtype_fwd_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_lat_lon
+/- decimal degrees
@ eSubtype_rev_primer_name
@ eSubtype_collected_by
name of person who collected the sample
@ eSubtype_fwd_primer_name
@ eSubtype_rev_primer_seq
sequence (possibly more than one; semicolon-separated)
@ eSubtype_isolation_source
@ eSubtype_environmental_sample
@ eSubtype_endogenous_virus_name
@ eSubtype_identified_by
name of person who identified the sample
@ eOrigin_synthetic
purely synthetic
@ eOrigin_artificial
artificially engineered
const TData & GetData(void) const
Get the Data member data.
const TStr & GetStr(void) const
Get the variant data.
const TLabel & GetLabel(void) const
Get the Label member data.
const TType & GetType(void) const
Get the Type member data.
const TMod & GetMod(void) const
Get the Mod member data.
bool IsSetDb(void) const
ids in taxonomic or culture dbases Check if a value has been assigned to Db data member.
const TLineage & GetLineage(void) const
Get the Lineage member data.
TSubtype GetSubtype(void) const
Get the Subtype member data.
const TDiv & GetDiv(void) const
Get the Div member data.
E_Choice Which(void) const
Which variant is currently selected.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
const TSubname & GetSubname(void) const
Get the Subname member data.
const THybrid & GetHybrid(void) const
Get the variant data.
bool IsSetCommon(void) const
common name Check if a value has been assigned to Common data member.
bool IsSetLineage(void) const
lineage with semicolon separators Check if a value has been assigned to Lineage data member.
const TName & GetName(void) const
Get the Name member data.
const TTaxname & GetTaxname(void) const
Get the Taxname member data.
const TCommon & GetCommon(void) const
Get the Common member data.
const TBinomial & GetBinomial(void) const
Get the variant data.
const TDb & GetDb(void) const
Get the Db member data.
bool IsSetDiv(void) const
GenBank division code Check if a value has been assigned to Div data member.
void SetTaxname(const TTaxname &value)
Assign a value to Taxname data member.
bool IsSetMod(void) const
Check if a value has been assigned to Mod data member.
const Tdata & Get(void) const
Get the member data.
bool IsSetGenus(void) const
required Check if a value has been assigned to Genus data member.
const TSpecies & GetSpecies(void) const
Get the Species member data.
bool IsSetOrgname(void) const
Check if a value has been assigned to Orgname data member.
bool IsSetSubname(void) const
Check if a value has been assigned to Subname data member.
bool IsSetTaxname(void) const
preferred formal name Check if a value has been assigned to Taxname data member.
const TGenus & GetGenus(void) const
Get the Genus member data.
const TPartial & GetPartial(void) const
Get the variant data.
bool IsSetSpecies(void) const
species required if subspecies used Check if a value has been assigned to Species data member.
bool IsSetName(void) const
Check if a value has been assigned to Name data member.
const Tdata & Get(void) const
Get the member data.
const TOrgname & GetOrgname(void) const
Get the Orgname member data.
@ eSubtype_gb_synonym
used by taxonomy database
@ eSubtype_other
ASN5: old-name (254) will be added to next spec.
@ eSubtype_nat_host
natural host of this specimen
@ eSubtype_metagenome_source
@ eSubtype_specimen_voucher
@ eSubtype_culture_collection
@ eSubtype_forma_specialis
@ e_Hybrid
hybrid between organisms
@ e_Binomial
genus/species type name
@ e_Partial
when genus not known
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
const TData & GetData(void) const
Get the Data member data.
const TBiosrc & GetBiosrc(void) const
Get the variant data.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
E_Choice Which(void) const
Which variant is currently selected.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Tpe
Third Party Annot/Seq EMBL.
@ e_Tpd
Third Party Annot/Seq DDBJ.
@ e_General
for other databases
@ e_Tpg
Third Party Annot/Seq Genbank.
const TSeq & GetSeq(void) const
Get the variant data.
const TSet & GetSet(void) const
Get the variant data.
bool IsSeq(void) const
Check if variant Seq is selected.
bool IsSet(void) const
Check if variant Set is selected.
@ eClass_parts
parts for 2 or 3
@ eClass_nuc_prot
nuc acid and coded proteins
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
bool CanGetBiomol(void) const
Check if it is safe to call GetBiomol method.
const TUser & GetUser(void) const
Get the variant data.
bool IsMolinfo(void) const
Check if variant Molinfo is selected.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
const TTitle & GetTitle(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSource(void) const
Check if variant Source is selected.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
const TId & GetId(void) const
Get the Id member data.
const Tdata & Get(void) const
Get the member data.
TMol GetMol(void) const
Get the Mol member data.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
TBiomol GetBiomol(void) const
Get the Biomol member data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
@ eRepr_seg
segmented sequence
@ eCompleteness_complete
complete biological entity
@ eTech_wgs
whole genome shotgun sequencing
@ eBiomol_cRNA
viral RNA genome copy intermediate
@ eBiomol_other_genetic
other genetic material
@ e_User
user defined object
@ e_Molinfo
info on the molecule and techniques
@ e_Title
a title for this sequence
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double f(double x_, const double &y_)
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
#define FOR_EACH_SEQID_ON_BIOSEQ(Itr, Var)
FOR_EACH_SEQID_ON_BIOSEQ EDIT_EACH_SEQID_ON_BIOSEQ.
#define FOR_EACH_DBXREF_ON_ORGREF(Itr, Var)
FOR_EACH_DBXREF_ON_ORGREF EDIT_EACH_DBXREF_ON_ORGREF.
#define FOR_EACH_SUBSOURCE_ON_BIOSOURCE(Itr, Var)
FOR_EACH_SUBSOURCE_ON_BIOSOURCE EDIT_EACH_SUBSOURCE_ON_BIOSOURCE.
#define FOR_EACH_ORGMOD_ON_ORGNAME(Itr, Var)
FOR_EACH_ORGMOD_ON_ORGNAME EDIT_EACH_ORGMOD_ON_ORGNAME.
#define FOR_EACH_SEQENTRY_ON_SEQSET(Itr, Var)
FOR_EACH_SEQENTRY_ON_SEQSET EDIT_EACH_SEQENTRY_ON_SEQSET.
#define FIELD_IS_SET_AND_IS(Var, Fld, Chs)
FIELD_IS_SET_AND_IS base macro.
#define GET_FIELD(Var, Fld)
GET_FIELD base macro.
bool seq_mac_is_unique(Iterator iter1, Iterator iter2, Predicate pred)
bool ContainsSgml(const string &str)
CRef< objects::CObjectManager > om
bool operator()(T l, T r) const
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
static string x_RepairCountryName(string countryname)
static bool s_PCRReactionLess(CConstRef< CPCRReaction > pp1, CConstRef< CPCRReaction > pp2)
static const int sNumUnexpectedViralOrgModQualifiers
static const int sNumUnexpectedViralSubSourceQualifiers
static bool s_MatchOrgname(const string &taxname, const COrgName &orgname, string &mismatch)
std::string_view sm_SourceQualPrefixes[]
bool s_IsAllDigitsOrSpaces(string str)
static bool s_PCRPrimerSetLess(const CPCRPrimerSet &s1, const CPCRPrimerSet &s2)
static unique_ptr< CTextFsa > m_SourceQualTags
static bool NCBI_NewTaxVal(void)
static bool IsUnexpectedViralOrgModQualifier(COrgMod::TSubtype subtype)
bool s_IsBioSample(const CBioseq_Handle &bsh)
static const CSubSource::ESubtype sUnexpectedViralSubSourceQualifiers[]
static bool s_HasMetagenomeSource(const COrg_ref &org)
static const COrgMod::TSubtype sUnexpectedViralOrgModQualifiers[]
static bool s_PCRSetEqual(const CPCRSet *p1, const CPCRSet *p2)
static bool s_UnbalancedParentheses(string str)
static bool s_IsChromosome(const CBioSource &biosource)
static bool x_HasTentativeName(const CUser_object &user_object)
const size_t kDefaultChunkSize
static const string kInvalidReplyMsg
static string x_GetTentativeName(const CUser_object &user_object)
bool s_IsArchaea(const CBioSource &source)
static bool s_HasWGSTech(const CBioseq &bioseq)
static bool s_PCRSetCompare(const CPCRSet *p1, const CPCRSet *p2)
static bool s_FindWholeName(const string &taxname, const string &value)
static bool s_IsEukaryoteOrProkaryote(const CBioSourceKind &biosourceKind)
static bool s_ReportUndefinedSpeciesId(const CBioseq &bioseq)
static bool s_CompleteGenomeNeedsChromosome(const CBioSource &source)
static bool s_PCRPrimerLess(const CPCRPrimer &p1, const CPCRPrimer &p2)
bool IsOrgNotFound(const CT3Error &error)
static const CBioseq * s_GetNucSeqFromContext(const CSeq_entry *ctx)
bool s_IsBacteria(const CBioSource &source)
static bool s_IsUndefinedSpecies(const string &taxname)
static bool IsUnexpectedViralSubSourceQualifier(CSubSource::TSubtype subtype)
static bool s_init_NewTaxVal(void)