86 auto bsh =
context.GetBioseqHandle(bioseq);
88 sequence::CDeflineGenerator deflineGenerator;
89 auto defline = deflineGenerator.GenerateDefline(bsh, 0);
92 for (
const auto& desc :
context.GetSeqdesc()) {
94 m_Objs[defline].Add(*
context.SeqdescObjRef(desc));
99 m_Objs[defline].Add(*
context.BioseqObjRef());
107 if (m_Objs.empty()) {
110 bool all_unique =
true;
112 for (
auto& it : m_Objs.GetMap()) {
114 if (list.size() == 1) {
117 else if (list.size() > 1) {
126 m_ReportItems =
tmp.Export(*this)->GetSubitems();
138 m_Objs[
"[n] sequence[s] [has] terminal Ns"].Fatal().Add(*
context.BioseqObjRef());
152 m_Objs[
"[n] protein sequences are shorter than 50 aa."].Add(*
context.BioseqObjRef(),
false);
162 for (
const auto& desc :
context.GetSeqdesc()) {
163 if (desc.IsComment()) {
164 m_Objs[desc.GetComment()].Add(*
context.SeqdescObjRef(desc));
172 if (!m_Objs.empty()) {
174 string label = m_Objs.GetMap().size() == 1 ?
"[n] comment descriptor[s] were found (all same)" :
"[n] comment descriptor[s] were found (some different)";
175 for (
auto it : m_Objs.GetMap()) {
176 for (
auto obj : it.second->GetObjects()) {
187 DISCREPANCY_CASE(MRNA_ON_WRONG_SEQUENCE_TYPE, SEQUENCE,
eDisc |
eOncaller,
"Eukaryotic sequences that are not genomic or macronuclear should not have mRNA features")
205 m_Objs[
"[n] mRNA[s] [is] located on eukaryotic sequence[s] that [does] not have genomic or plasmid source[s]"].Add(*
context.SeqFeatObjRef(*feat));
219 bool has_gaps = !!sum.
Gaps;
223 if (it->IsFtable()) {
250 for (
const auto& desc :
context.GetAllSeqdesc()) {
254 for (
const auto& user_field : user.
GetData()) {
255 if (user_field->IsSetLabel() && user_field->GetLabel().IsStr() && user_field->GetLabel().GetStr() ==
"BioProject" && user_field->IsSetData() && user_field->GetData().IsStrs()) {
257 if (!strs.empty() && !strs[0].empty()) {
258 m_Objs[
"[n] sequence[s] contain[S] BioProject IDs"].Add(*
context.BioseqObjRef());
276 m_Objs[
"[n] bioseq[s] [has] no definition line"].Add(*
context.BioseqObjRef());
290 m_Objs[
"[n] sequence[s] [has] runs of 15 or more Ns"].Add(*
context.BioseqObjRef());
304 m_Objs[
"[n] sequence[s] [has] external references"].Add(*
context.BioseqObjRef());
314 const double MIN_N_PERCENTAGE = 10.0;
319 if (!sum.
HasRef && sum.
N * 100. / sum.
Len > MIN_N_PERCENTAGE) {
320 m_Objs[
"[n] sequence[s] [has] > 10% Ns"].Add(*
context.BioseqObjRef());
331 for (
const auto& feat :
context.GetFeat()) {
336 m_Objs[
key +
": [n] present"].Info().Incr();
345 for (
const auto& feat :
context.GetAllFeat()) {
350 key = to_string(feat.GetData().GetSubtype()) +
" " +
key;
361 for (
auto& it : m_Objs[
kEmptyCStr].GetMap()) {
362 if (it.first ==
"N" || it.first ==
"A") {
365 size_t n = it.first.find(
' ');
366 string key = it.first.substr(
n + 1);
368 string label =
key +
": [n] present";
380 for (
auto& obj : m_Objs[
kEmptyStr][it.first].GetObjects()) {
383 for (
auto& pp : obj2num) {
384 m_Objs[
label][
"[n] bioseq[s] [has] [(]" + to_string(pp.second) +
"[)] " +
key +
" features"].Info().Add(*pp.first);
399 if (
context.FeatExons().size()) {
453 if (m_Objs.empty()) {
464 size_t num_of_missing = 0,
467 for (
auto it : the_map) {
468 num_of_bioseqs += it.second->GetObjects().
size();
469 if (it.first.empty()) {
470 num_of_missing += it.second->GetObjects().size();
476 else if (tech != it.first) {
481 if (num_of_missing == num_of_bioseqs || (same && !num_of_missing)) {
484 summary += num_of_missing ?
"some missing, " :
"all present, ";
485 summary += same ?
"all same)" :
"some different)";
486 if (num_of_missing) {
487 if (num_of_missing == num_of_bioseqs) {
488 report[summary].
SetCount(num_of_missing);
503 return (ch ==
'A' || ch ==
'T' || ch ==
'G' || ch ==
'C');
509 static const size_t MIN_TITLE_SEQ_LEN = 19;
512 for (string::const_reverse_iterator it = title.rbegin(); it != title.rend(); ++it) {
519 if (
count >= MIN_TITLE_SEQ_LEN) {
524 return count >= MIN_TITLE_SEQ_LEN;
530 for (
auto& desc :
context.GetSeqdesc()) {
532 m_Objs[
"[n] defline[s] appear[S] to end with sequence characters"].Add(*
context.SeqdescObjRef(desc));
543 bool is_genomic =
false;
548 auto molinfo =
context.GetMolinfo();
552 if (!is_genomic || !is_dna) {
555 for (
auto& annot_it : bioseq.
GetAnnot()) {
556 if (annot_it->IsFtable()) {
564 if (feat->IsSetData()) {
583 if (descrs.
IsSet()) {
584 for (
auto descr : descrs.
Set()) {
585 if (descr->IsMolinfo()) {
586 molinfo = &(descr->SetMolinfo());
591 if (molinfo ==
nullptr) {
594 descrs.
Set().push_back(new_descr);
596 if (molinfo ==
nullptr) {
643 const string& object_name,
649 for (
auto& z : obj.second->GetMap()) {
650 collector[field_prefix + z.first][
" [n] " + object_name +
"[s] [is] missing field " + field_prefix + z.first]
661 if (
f->IsSetLabel() &&
f->GetLabel().IsStr() &&
f->IsSetData()) {
662 string field_name = field_prefix +
f->GetLabel().GetStr();
664 if (already_seen && !collector.
Exist(field_name)) {
666 string missing_label =
"[n] " + object_name +
"[s] [is] missing field " + field_name;
670 collector[field_name][missing_label].
Add(*ro);
673 collector[field_name][
"[n] " + object_name +
"[s] [has] field " + field_name +
" value '" +
GetFieldValueAsString(*
f) +
"'"].
Add(*
context.SeqdescObjRef(*desc),
false);
682 collector[field_prefix + z.first][
" [n] " + object_name +
"[s] [is] missing field " + field_prefix + z.first].
Add(*
context.SeqdescObjRef(*desc));
700 auto rep_seq =
context.BioseqObjRef();
701 for (
auto& desc :
context.GetAllSeqdesc()) {
722 size_t num_values = 0;
725 for (
auto& s : node.
GetMap()) {
732 value = s.first.substr(pos);
740 if (num_values > 1) {
755 for (
auto& s : node.
GetMap()) {
756 bool this_present =
true;
757 bool this_same =
true;
759 all_present &= this_present;
760 all_same &= this_same;
761 if (!all_present && !all_same) {
770 string summary =
"(";
772 summary +=
"all present";
774 summary +=
"some missing";
778 summary +=
"all same";
780 summary +=
"inconsistent";
789 for (
auto& s : original.
GetMap()){
790 for (
auto q : s.second->GetObjects()) {
791 new_home[s.first].
Add(*q);
803 return " " + orig_field_name;
804 }
else if (
NStr::Equal(orig_field_name,
"ProbeDB")) {
805 return " " + orig_field_name;
806 }
else if (
NStr::Equal(orig_field_name,
"Sequence Read Archive")) {
807 return " " + orig_field_name;
808 }
else if (
NStr::Equal(orig_field_name,
"BioProject")) {
809 return " " + orig_field_name;
810 }
else if (
NStr::Equal(orig_field_name,
"Assembly")) {
811 return " " + orig_field_name;
813 return orig_field_name;
822 if (m_Objs.empty()) {
827 bool all_present =
true;
828 bool all_same =
true;
830 if (all_present && all_same) {
834 string top_label =
"DBLink Report " +
GetSummaryLabel(all_present, all_same);
837 while (it != m_Objs.GetMap().end()) {
840 CopyNode(m_Objs[top_label][
" " + it->first], *it->second);
841 it = m_Objs.GetMap().erase(it);
848 bool this_present =
true;
849 bool this_same =
true;
852 for (
auto& s : it2.second->GetMap()){
853 for (
auto& q : s.second->GetObjects()) {
854 m_Objs[top_label][new_label][s.first].Add(*q);
876 auto rep_seq =
context.BioseqObjRef();
877 for (
auto& desc :
context.GetAllSeqdesc()) {
893 m_Objs[
"[n] Bioseq[s] [is] missing " + it.first +
" structured comment"].Add(*rep_seq);
901 m_Objs[
"[n] Bioseq[s] [is] missing " + it.first +
" structured comment"].Add(*ro);
923 if (m_Objs.empty()) {
928 bool all_present =
true;
929 bool all_same =
true;
931 if (all_present && all_same) {
935 string top_label =
"Structured Comment Report " +
GetSummaryLabel(all_present, all_same);
938 while (it != m_Objs.GetMap().end()) {
941 CopyNode(m_Objs[top_label][
" " + it->first], *it->second);
942 it = m_Objs.GetMap().erase(it);
949 bool this_present =
true;
950 bool this_same =
true;
952 string new_label = it2.first +
" " +
GetSummaryLabel(this_present, this_same);
953 for (
auto& s : it2.second->GetMap()) {
954 string sub_label = s.first;
955 if (this_present && this_same) {
958 for (
auto& q : s.second->GetObjects()) {
959 m_Objs[top_label][new_label][sub_label].Add(*q);
975 for (
auto& desc :
context.GetAllSeqdesc()) {
983 m_Objs[
"[n] sequence[s] [does] not include structured comments."].Add(*
context.BioseqObjRef());
994 for (
auto& desc :
context.GetAllSeqdesc()) {
999 for (
auto& it : user.
GetData()) {
1000 if (it->IsSetLabel() && it->GetLabel().IsStr() &&
NStr::Equal(it->GetLabel().GetStr(),
"BioProject")) {
1011 m_Objs[
"[n] sequence[s] [does] not include project."].Add(*
context.BioseqObjRef());
1022 for (
auto& desc :
context.GetAllSeqdesc()) {
1023 if (desc.IsUser()) {
1026 m_Objs[
"[n] sequence[s] [is] unverified"].Add(*
context.BioseqObjRef(),
false);
1056 m_Objs[
"[n] sequence[s] contain[S] nucleotides that are not ATCG or N"].Add(*
context.BioseqObjRef());
1072 if (
source &&
source->IsSource() &&
source->GetSource().IsSetOrg() &&
source->GetSource().GetOrg().IsSetTaxname() && title) {
1073 string taxname =
source->GetSource().GetOrg().GetTaxname();
1080 bool no_taxname_in_defline =
false;
1082 if (taxname_pos ==
NPOS) {
1083 no_taxname_in_defline =
true;
1087 no_taxname_in_defline =
NStr::CompareCase(title->
GetTitle().c_str() + taxname_pos, 1, taxname.size() - 1, taxname.c_str() + 1) != 0;
1089 no_taxname_in_defline =
true;
1092 if (no_taxname_in_defline) {
1106 for (
auto field: user.
GetData()) {
1107 if (field->IsSetData() && field->GetData().IsInt() && field->IsSetLabel() && field->GetLabel().IsStr() && field->GetLabel().GetStr() ==
"ProjectID") {
1120 for (
auto& desc :
context.GetAllSeqdesc()) {
1121 if (desc.IsUser()) {
1125 if (!proj_id.empty()) {
1126 m_Objs[proj_id][bioseq.
IsNa() ?
"N" :
"A"].Add(*
context.BioseqObjRef());
1137 if (m_Objs.empty()) {
1141 string all =
"[n] sequence[s] [has] project IDs ";
1142 string prots =
"[n] protein sequence[s] [has] project IDs ";
1143 string nucs =
"[n] nucleotide sequence[s] [has] project IDs ";
1144 auto& projects = m_Objs.GetMap();
1145 all += projects.size() > 1 ?
"(some different)" :
"(all same)";
1146 size_t count_prots = 0;
1147 size_t count_nucs = 0;
1148 for (
auto it: projects) {
1149 auto&
M = it.second->GetMap();
1150 if (
M.find(
"A") !=
M.end()) {
1153 if (
M.find(
"N") !=
M.end()) {
1157 prots += count_prots > 1 ?
"(some different)" :
"(all same)";
1158 nucs += count_nucs > 1 ?
"(some different)" :
"(all same)";
1159 for (
auto it : projects) {
1160 auto&
M = it.second->GetMap();
1161 if (
M.find(
"A") !=
M.end()) {
1163 res[
all][prots].
Add(*obj);
1166 if (
M.find(
"N") !=
M.end()) {
1168 res[
all][nucs].
Add(*obj);
1183 auto& cds =
context.FeatCDS();
1184 if (cds.size() < 2) {
1187 size_t count_pseudo = 0;
1188 size_t count_disrupt = 0;
1189 for (
auto feat : cds) {
1190 if (feat->IsSetComment() &&
NStr::Find(feat->GetComment(),
"coding region disrupted by sequencing gap") !=
NPOS) {
1193 if (
context.IsPseudo(*feat)) {
1197 if (count_disrupt != cds.size() && count_pseudo != cds.size()) {
1198 m_Objs[
"[n] mRNA bioseq[s] [has] multiple CDS features"].Add(*
context.BioseqObjRef());
1212 auto& cds =
context.FeatCDS();
1213 size_t count_plus = 0;
1214 size_t count_minus = 0;
1215 for (
auto& feat : cds) {
1240 vector<CSeq_feat*> features;
1242 for (; feat_ci; ++feat_ci) {
1247 new_inst->Assign(bioseq.
GetInst());
1251 for (
auto& feat : features) {
1264 const size_t MAX_N_IN_SEQ = 7;
1268 if (sum.
MinQ > MAX_N_IN_SEQ) {
1269 m_Objs[
"[n] sequence[s] contain[S] low quality region"].Add(*
context.BioseqObjRef());
1280 if (
set.IsSetDescr()) {
1281 for (
const auto& descr :
set.GetDescr().Get()) {
1282 if (descr->IsTitle()) {
1283 m_Objs[
"[n] title[s] on sets were found"].Add(*
context.SeqdescObjRef(*descr));
1292 m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1303 bool has_D_loop =
false;
1304 bool has_misc_feat_with_control_region =
false;
1305 for (
auto& feat :
all) {
1306 if (feat->IsSetData()) {
1313 has_misc_feat_with_control_region =
true;
1319 if (has_D_loop || has_misc_feat_with_control_region) {
1328 m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1378 m_Objs[
"[n] sequence[s] [is] shorter than 50 nt"].Add(*
context.BioseqObjRef());
1397 for (
auto& annot_it : bioseq.
GetAnnot()) {
1398 if (annot_it->IsFtable()) {
1403 m_Objs[
"[n] contig[s] [is] shorter than 200 nt"].Add(*
context.BioseqObjRef(fix));
1410 m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1432 m_Objs[
"[n] RNA bioseq[s] [is] proviral"].Add(*
context.BioseqObjRef());
1440 m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1452 if ((bio_src.*is_set_fn)()) {
1454 val = (bio_src.*get_fn)();
1456 else if (
val != (bio_src.*get_fn)()) {
1469 if (
mod->IsSetSubtype() &&
mod->GetSubtype() == subtype &&
mod->IsSetSubname()) {
1474 if (
mod->GetSubname() !=
val) {
1490 for (
const auto& subtype : bio_src.
GetSubtype()) {
1505 string taxname, isolate, strain;
1506 bool all_taxname_same =
true, all_isolate_same =
true, all_strain_same =
true;
1507 for (
auto& descr_bio_src :
context.GetSetBiosources()) {
1508 const CBioSource& bio_src = descr_bio_src->GetSource();
1509 if (
context.HasLineage(bio_src,
"",
"Viruses")) {
1511 m_Objs[
"[n] biosource[s] should have segment qualifier but [does] not"].Add(*
context.SeqdescObjRef(*descr_bio_src));
1514 if (all_taxname_same) {
1517 if (all_isolate_same) {
1520 if (all_strain_same) {
1524 if (!all_taxname_same) {
1525 m_Objs[
"Not all biosources have same taxname"];
1527 if (!all_isolate_same) {
1528 m_Objs[
"Not all biosources have same isolate"];
1530 if (!all_strain_same) {
1531 m_Objs[
"Not all biosources have same strain"];
1539 m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1549 for (
auto& qual : feat.
GetQual()) {
1571 for (
auto& feat :
context.GetFeat()) {
1581 if (
set.IsSetClass()) {
1586 m_Objs[
"[n] unwanted set wrapper[s]"].Add(*
context.BioseqSetObjRef());
1603 {
"Agricultutral",
"agricultural",
false },
1604 {
"Bacilllus",
"Bacillus",
false },
1605 {
"Enviromental",
"Environmental",
false },
1606 {
"Insitiute",
"institute",
false },
1607 {
"Instutite",
"institute",
false },
1608 {
"Instutute",
"Institute",
false },
1609 {
"P.R.Chian",
"P.R. China",
false },
1610 {
"PRChian",
"PR China",
false },
1611 {
"Scieces",
"Sciences",
false },
1612 {
"agricultral",
"agricultural",
false },
1613 {
"agriculturral",
"agricultural",
false },
1614 {
"biotechnlogy",
"biotechnology",
false },
1615 {
"Biotechnlogy",
"Biotechnology",
false },
1616 {
"biotechnolgy",
"biotechnology",
false },
1617 {
"biotechology",
"biotechnology",
false },
1618 {
"caputre",
"capture",
true },
1619 {
"casette",
"cassette",
true },
1620 {
"catalize",
"catalyze",
false },
1621 {
"charaterization",
"characterization",
false },
1622 {
"clonging",
"cloning",
false },
1623 {
"consevered",
"conserved",
false },
1624 {
"cotaining",
"containing",
false },
1625 {
"cytochome",
"cytochrome",
true },
1626 {
"diveristy",
"diversity",
true },
1627 {
"enivronment",
"environment",
false },
1628 {
"enviroment",
"environment",
false },
1629 {
"genone",
"genome",
true },
1630 {
"homologue",
"homolog",
true },
1631 {
"hypotethical",
"hypothetical",
false },
1632 {
"hypotetical",
"hypothetical",
false },
1633 {
"hypothetcial",
"hypothetical",
false },
1634 {
"hypothteical",
"hypothetical",
false },
1635 {
"indepedent",
"independent",
false },
1636 {
"insititute",
"institute",
false },
1637 {
"insitute",
"institute",
false },
1638 {
"institue",
"institute",
false },
1639 {
"instute",
"institute",
false },
1640 {
"muesum",
"museum",
true },
1641 {
"musuem",
"museum",
true },
1642 {
"nuclear shutting",
"nuclear shuttling",
true },
1643 {
"phylogentic",
"phylogenetic",
false },
1644 {
"protien",
"protein",
false },
1645 {
"puatative",
"putative",
false },
1646 {
"putaitve",
"putative",
false },
1647 {
"putaive",
"putative",
false },
1648 {
"putataive",
"putative",
false },
1649 {
"putatitve",
"putative",
false },
1650 {
"putatuve",
"putative",
false },
1651 {
"putatvie",
"putative",
false },
1652 {
"pylogeny",
"phylogeny",
false },
1653 {
"resaerch",
"research",
false },
1654 {
"reseach",
"research",
false },
1655 {
"reserach",
"research",
true },
1656 {
"reserch",
"research",
false },
1657 {
"ribosoml",
"ribosomal",
false },
1658 {
"ribossomal",
"ribosomal",
false },
1659 {
"scencies",
"sciences",
false },
1660 {
"scinece",
"science",
false },
1661 {
"simmilar",
"similar",
false },
1662 {
"structual",
"structural",
false },
1663 {
"subitilus",
"subtilis",
false },
1664 {
"sulfer",
"sulfur",
false },
1665 {
"technlogy",
"technology",
false },
1666 {
"technolgy",
"technology",
false },
1667 {
"Technlogy",
"Technology",
false },
1668 {
"Veterinry",
"Veterinary",
false },
1669 {
"Argricultural",
"Agricultural",
false },
1670 {
"transcirbed",
"transcribed",
false },
1671 {
"transcirption",
"transcription",
true },
1672 {
"uiniversity",
"university",
false },
1673 {
"uinversity",
"university",
false },
1674 {
"univercity",
"university",
false },
1675 {
"univerisity",
"university",
false },
1676 {
"univeristy",
"university",
false },
1677 {
"univesity",
"university",
false },
1678 {
"unversity",
"university",
true },
1679 {
"uviversity",
"university",
false },
1680 {
"anaemia",
nullptr,
false },
1681 {
"haem",
nullptr,
false },
1682 {
"haemagglutination",
nullptr,
false },
1683 {
"heam",
nullptr,
false },
1684 {
"mithocon",
nullptr,
false },
1694 #include "FLATFILE_FIND.inc"
1695 static constexpr TLocalFSM s_FSM{s_compact, s_hits_init_1, s_hits_init_2, s_states,
nullptr};
1707 string error =
"String not found: ";
1720 "FLATFILE_FIND_ONCALLER",
1721 "FLATFILE_FIND_ONCALLER_UNFIXABLE",
1722 "FLATFILE_FIND_ONCALLER_FIXABLE"
1726 for (
auto& desc :
context.GetAllSeqdesc()) {
1736 m_Objs[fixable][subitem].Add(*
context.SeqdescObjRef(desc, &desc));
1740 for (
auto& feat:
context.FeatAll()) {
1750 m_Objs[fixable][subitem].Add(*
context.SeqFeatObjRef(*feat, feat));
1759 m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1828 if (m_Objs.GetMap().find(
kEmptyStr) == m_Objs.GetMap().end()) {
1830 m_Objs[
"No sequences longer than 20,000 nt found"];
1835 m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1844 if (m_Objs[
"N"].GetCount()) {
1853 if (!m_Objs[
"F"].GetCount()) {
1855 for (
auto id : bioseq.
GetId()) {
1867 for (
const auto& descr : bioseq.
GetDescr().
Get()) {
1868 if (descr->IsMolinfo() && descr->GetMolinfo().CanGetTech()) {
1888 if (m_Objs[
"C"].GetCount() && !m_Objs[
"N"].GetCount()) {
1897 static constexpr
auto suspicious_id_re =
ctll::fixed_string{
"chromosome|plasmid|mito|chloroplast|apicoplast|plastid|^chr|^lg|\\bnw_|\\bnz_|\\bnm_|\\bnc_|\\bac_|cp\\d\\d\\d\\d\\d\\d|^x$|^y$|^z$|^w$|^mt$|^pltd$|^chl$" };
1902 static CRegexp regexp(
"chromosome|plasmid|mito|chloroplast|apicoplast|plastid|^chr|^lg|\\bNW_|\\bNZ_|\\bNM_|\\bNC_|\\bAC_|CP\\d\\d\\d\\d\\d\\d|^X$|^Y$|^Z$|^W$|^MT$|^PLTD$|^CHL$",
CRegexp::fCompile_ignore_case);
1907 return ctre::search<suspicious_id_re>(
id);
1915 bool report =
false;
1916 for (
const auto&
id : bioseq.
GetId()) {
1917 if (id->IsLocal()) {
1918 if (id->GetLocal().IsStr() &&
SuspiciousId(id->GetLocal().GetStr())) {
1923 else if (id->IsGeneral()) {
1924 if (id->GetGeneral().IsSetDb() &&
SuspiciousId(id->GetGeneral().GetDb())) {
1928 if (id->GetGeneral().IsSetTag() && id->GetGeneral().GetTag().IsStr() &&
SuspiciousId(id->GetGeneral().GetTag().GetStr())) {
1935 m_Objs[
"[n] sequence[s] [has] suspicious identifiers"].Add(*
context.BioseqSetObjRef());
1943 m_ReportItems = m_Objs.Export(*
this,
false)->GetSubitems();
1982 if (
set.IsSetSeq_set()) {
1983 for (
const auto& se :
set.GetSeq_set()) {
1984 if (!se->IsSetDescr()) {
1988 for (
const auto& descr : se->GetDescr().Get()) {
1989 if (!descr->IsSource()) {
1992 const CBioSource& bio_src = descr->GetSource();
2004 for (
const auto& subtype : bio_src.
GetSubtype()) {
2006 if (subtype->IsSetSubtype()) {
2010 m_Objs[
"one or more chromosomes are present"];
2015 m_Objs[
"one or more chromosomes are present"];
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
const string & GetTaxname(void) const
bool IsSetOrgMod(void) const
const COrgName & GetOrgname(void) const
bool IsSetTaxname(void) const
TSeqPos GetLength(void) const
bool IsSetLength(void) const
void Search(const char *input, VoidCall1 found_callback) const
virtual vector< CRef< CReportItem > > GetSubitems() const =0
static void Add(TReportObjectList &list, TReportObjectSet &hash, CReportObj &obj, bool unique=true)
TReportObjectList & GetObjects()
CReportNode & Severity(CReportItem::ESeverity s)
CRef< CReportItem > Export(CDiscrepancyCore &test, bool unique=true) const
static bool Exist(TReportObjectSet &hash, CReportObj &obj)
static EFeatureLocationAllowed AllowedFeatureLocation(ESubtype subtype)
@ eFeatureLocationAllowed_NucOnly
@ eFeatureLocationAllowed_ProtOnly
@ eFeatureLocationAllowed_Any
ESubtype GetSubtype(void) const
@Seq_descr.hpp User-defined methods of the data storage class.
CSeq_feat_EditHandle –.
namespace ncbi::objects::
static bool IsAa(EMol mol)
static bool IsNa(EMol mol)
Base class for all serializable objects.
Template class for iteration on objects of class C (non-medifiable version)
Template class for iteration on objects of class C.
@ eObjectType_StructuredComment
EObjectType GetObjectType() const
container_type::iterator iterator
API (CDeflineGenerator) for computing sequences' titles ("definitions").
vector< CRef< CReportObj > > TReportObjectList
#define DISCREPANCY_AUTOFIX(name)
#define DISCREPANCY_CASE1(name, type, group, descr,...)
#define DISCREPANCY_CASE0(name, sname, type, group, descr)
#define DISCREPANCY_CASE(name, type, group, descr)
#define DISCREPANCY_SUMMARIZE(name)
vector< CConstRef< CObject > > GetObjects(CSeq_entry_Handle seh, const string &field, CFieldNamePanel::EFieldType field_type, int subtype, const string &ncRNA_class, CConstRef< objects::CSeq_submit > submit, CRef< CEditingActionConstraint > constraint, vector< CSeq_entry_Handle > *descr_context=nullptr)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static const char * str(char *buf, int n)
void ReverseComplement(const BidirectionalIterator &first, const BidirectionalIterator &last)
constexpr size_t ArraySize(const Element(&)[Size])
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
static EAccessionInfo IdentifyAccession(const CTempString &accession, TParseFlags flags=fParse_AnyRaw)
Deduces information from a bare accession a la WHICH_db_accession; may report false negatives on prop...
EAccessionInfo
For IdentifyAccession (below)
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
void SetDescr(TDescr &v) const
void SetInst_Mol(TInst_Mol v) const
const CSeqFeatData & GetData(void) const
void Remove(void) const
Remove the feature from Seq-annot.
void SetInst(TInst &v) const
void Remove(ERemoveMode mode=eRemoveSeq_entry) const
bool IsSetData(void) const
const TInst & GetInst(void) const
const CSeq_feat & GetMappedFeature(void) const
Feature mapped to the master sequence.
CConstRef< CSeq_feat > GetSeq_feat(void) const
Get current seq-feat.
bool IsMatch(CTempString str, TMatch flags=fMatch_default)
Check existence substring which match a specified pattern.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
NCBI_NS_STD::string::size_type SIZE_TYPE
static SIZE_TYPE FindNoCase(const CTempString str, const CTempString pattern, SIZE_TYPE start, SIZE_TYPE end, EOccurrence which=eFirst)
Find the pattern in the specified range of a string using a case insensitive search.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool EqualNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive equality of a substring with another string.
static bool Equal(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2, ECase use_case=eCase)
Test for equality of a substring with another string.
static string & ReplaceInPlace(string &src, const string &search, const string &replace, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
const char *const kEmptyCStr
Empty "C" string (points to a '\0').
static string & ToLower(string &str)
Convert string to lower case – string& version.
static int CompareCase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-sensitive compare of a substring with another string.
@ eNocase
Case insensitive compare.
static const char label[]
const TSubtype & GetSubtype(void) const
Get the Subtype member data.
TGenome GetGenome(void) const
Get the Genome member data.
bool IsSetOrg(void) const
Check if a value has been assigned to Org data member.
bool IsSetSubtype(void) const
Check if a value has been assigned to Subtype data member.
void SetGenome(TGenome value)
Assign a value to Genome data member.
bool IsSetGenome(void) const
Check if a value has been assigned to Genome data member.
EGenome
biological context
const TStr & GetStr(void) const
Get the variant data.
bool IsSetData(void) const
the object itself Check if a value has been assigned to Data data member.
bool IsStr(void) const
Check if variant Str is selected.
bool IsSetType(void) const
type of object within class Check if a value has been assigned to Type data member.
bool IsStrs(void) const
Check if variant Strs is selected.
const TStrs & GetStrs(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
bool IsStr(void) const
Check if variant Str is selected.
const TStr & GetStr(void) const
Get the variant data.
const TData & GetData(void) const
Get the Data member data.
const TType & GetType(void) const
Get the Type member data.
vector< CStringUTF8 > TStrs
const TMod & GetMod(void) const
Get the Mod member data.
bool IsSetData(void) const
the specific data Check if a value has been assigned to Data data member.
bool IsSetQual(void) const
qualifiers Check if a value has been assigned to Qual data member.
const TQual & GetQual(void) const
Get the Qual member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetAccession(void) const
Check if a value has been assigned to Accession data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ eClass_pop_set
population study
@ eClass_phy_set
phylogenetic study
@ eClass_mut_set
set of mutations
@ eClass_eco_set
ecological sample study
@ eClass_small_genome_set
viral segments or mitochondrial minicircles
TRepr GetRepr(void) const
Get the Repr member data.
bool IsSetCompleteness(void) const
Check if a value has been assigned to Completeness data member.
const TUser & GetUser(void) const
Get the variant data.
const TInst & GetInst(void) const
Get the Inst member data.
TTopology GetTopology(void) const
Get the Topology member data.
bool IsSetAnnot(void) const
Check if a value has been assigned to Annot data member.
bool IsSetRepr(void) const
Check if a value has been assigned to Repr data member.
bool IsSetMol(void) const
Check if a value has been assigned to Mol data member.
bool CanGetTopology(void) const
Check if it is safe to call GetTopology method.
const TTitle & GetTitle(void) const
Get the variant data.
const TSource & GetSource(void) const
Get the variant data.
bool IsSetBiomol(void) const
Check if a value has been assigned to Biomol data member.
const TAnnot & GetAnnot(void) const
Get the Annot member data.
const TId & GetId(void) const
Get the Id member data.
TTech GetTech(void) const
Get the Tech member data.
const Tdata & Get(void) const
Get the member data.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
TLength GetLength(void) const
Get the Length member data.
TMol GetMol(void) const
Get the Mol member data.
bool IsSetLength(void) const
length of sequence in residues Check if a value has been assigned to Length data member.
TSource & SetSource(void)
Select the variant.
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
bool IsSet(void) const
Check if a value has been assigned to data member.
TBiomol GetBiomol(void) const
Get the Biomol member data.
void SetBiomol(TBiomol value)
Assign a value to Biomol data member.
bool CanGetId(void) const
Check if it is safe to call GetId method.
bool IsSetTech(void) const
Check if a value has been assigned to Tech data member.
const TFtable & GetFtable(void) const
Get the variant data.
TCompleteness GetCompleteness(void) const
Get the Completeness member data.
const TData & GetData(void) const
Get the Data member data.
bool IsSetId(void) const
equivalent identifiers Check if a value has been assigned to Id data member.
Tdata & Set(void)
Assign a value to data member.
const TDescr & GetDescr(void) const
Get the Descr member data.
const TMolinfo & GetMolinfo(void) const
Get the variant data.
TMolinfo & SetMolinfo(void)
Select the variant.
bool CanGetInst(void) const
Check if it is safe to call GetInst method.
@ eRepr_delta
sequence made by changes (delta) to others
@ eCompleteness_complete
complete biological entity
@ eTech_targeted
targeted locus sets/studies
@ eTech_tsa
transcriptome shotgun assembly
@ eTech_wgs
whole genome shotgun sequencing
@ eBiomol_pre_RNA
precursor RNA of any sort really
@ e_Source
source of materials, includes Org-ref
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is smart and slim</td> n<td> orig</td> n</tr> n<tr> n<td> last_modified</td> n<td> optional</td> n<td> Integer</td> n<td class=\"description\"> The blob last modification If provided then the exact match will be requested with n the Cassandra storage corresponding field value</td> n<td> Positive integer Not provided means that the most recent match will be selected</td> n<td></td> n</tr> n<tr> n<td> use_cache</td> n<td> optional</td> n<td> String</td> n<td class=\"description\"> The option controls if the Cassandra LMDB cache and or database should be used It n affects the seq id resolution step and the blob properties lookup step The following n options are BIOSEQ_INFO and BLOB_PROP at all
void ReverseComplementFeature(CSeq_feat &feat, CScope &scope)
Simultaneous search of multiple RegEx patterns in the input string.
const struct ncbi::grid::netcache::search::fields::KEY key
const GenericPointer< typename T::ValueType > T2 value
const CharType(& source)[N]
static const string kFixable
const string kStructuredCommentReport
const string & kPreviouslySeenFields
static const string kMrnaSequenceMinusStrandFeatures
static bool IsSegmentSubtype(const CBioSource &bio_src)
static bool s_areCompatible(CBioSource::EGenome Location, CSubSource::ESubtype Qualifier)
void UnitTest_FLATFILE_FIND()
Checking that FLATFILE_FIND.inc is in sync with kSpellFixes If the array is changed,...
string AdjustDBLinkFieldName(const string &orig_field_name)
static const CSubSource::ESubtype eSubtype_unknown
const string kMissingDBLink
const string kStructuredCommentObservedPrefixes
const string kSomeIdenticalDeflines
const string &(CBioSource::* FnGet)() const
string GetFieldValueAsString(const CUser_field &field)
const string & kPreviouslySeenObjects
static const string kNonFixable
static bool SuspiciousId(const string &s)
static const string kInconsistentMolinfoTech
void AddUserObjectFieldItems(const CSeqdesc *desc, CReportObj &rep_seq, CReportNode &collector, CReportNode &previously_seen, CDiscrepancyContext &context, const string &object_name, const string &field_prefix=kEmptyStr)
const string kDBLinkObjectList
static const string kInconsistentMolinfoTechSummary
const string kStructuredCommentObservedPrefixesThis
static bool IsATGC(char ch)
string GetSummaryLabel(bool all_present, bool all_same)
static const size_t MIN_SEQUENCE_LEN
const string & kPreviouslySeenFieldsThis
static bool EndsWithSequence(const string &title)
static bool FixTextInObject(CSerialObject *obj, size_t misspell_idx)
void AnalyzeFieldReport(CReportNode &node, bool &all_present, bool &all_same)
const string kSequencesWithGaps
const string kIdenticalDeflines
const string kDeflineExists
static void FindFlatfileText(const char *str, bool *result)
static bool IsMolProd(int biomol)
void AnalyzeField(CReportNode &node, bool &all_present, bool &all_same)
static bool CompareOrGetString(const CBioSource &bio_src, FnIsSet is_set_fn, FnGet get_fn, string &val)
void CopyNode(CReportNode &new_home, CReportNode &original)
const string kStructuredCommentPrevious
const string kNoTaxnameInDefline
static SpellFixData kSpellFixes[]
static bool CompareOrgModValue(const CBioSource &bio_src, COrgMod::TSubtype subtype, string &val)
static constexpr auto suspicious_id_re
const string kStructuredCommentFieldPrefix
static const size_t kSpellFixesSize
const string kDBLinkFieldCountTop
static string GetProjectID(const CUser_object &user)
const string kUniqueDeflines
const string kAllUniqueDeflines
static bool IsMicroSatellite(const CSeq_feat &feat)
const string kDBLinkCollect
bool(CBioSource::* FnIsSet)() const
const string kStructuredCommentsSeqs
C++ wrappers for the Perl-compatible regular expression (PCRE) library.
static CS_CONTEXT * context