142 #define NCBI_USE_ERRCODE_X Objtools_Validator
147 using namespace sequence;
171 shared_ptr<SValidatorContext> pContext,
175 m_ErrRepository{errs},
178 x_Init(options, pContext->CumulativeInferenceCount, pContext->NotJustLocalOrGeneral, pContext->HasRefSeq);
184 Reset(initialInferenceCount, notJustLocalOrGeneral, hasRefSeq);
220 return context.PreprocessHugeFile ||
236 return edit::CHugeAsnReader::IsHugeSet(setClass);
385 if (type_info == CSeqdesc::GetTypeInfo()) {
389 }
else if (type_info == CSeq_feat::GetTypeInfo()) {
392 }
else if (type_info == CBioseq::GetTypeInfo()) {
395 }
else if (type_info == CBioseq_set::GetTypeInfo()) {
398 }
else if (type_info == CSeq_annot::GetTypeInfo()) {
401 }
else if (type_info == CSeq_graph::GetTypeInfo()) {
404 }
else if (type_info == CSeq_align::GetTypeInfo()) {
407 }
else if (type_info == CSeq_entry::GetTypeInfo()) {
410 }
else if (type_info == CBioSource::GetTypeInfo()) {
413 }
else if (type_info == COrg_ref::GetTypeInfo()) {
416 }
else if (type_info == CPubdesc::GetTypeInfo()) {
419 }
else if (type_info == CSeq_submit::GetTypeInfo()) {
627 if (sc_GenomeRaiseExceptEmblDdbjRefSeqArray.find(et) != sc_GenomeRaiseExceptEmblDdbjRefSeqArray.end()) {
634 if (sc_GenomeRaiseExceptEmblDdbjArray.find(et) != sc_GenomeRaiseExceptEmblDdbjArray.end()) {
641 if (sc_GenomeRaiseArray.find (et) != sc_GenomeRaiseArray.end()) {
666 item->SetErrIndex(et);
676 item->SetObj_content(content_label);
680 item->SetFeatureId(feature_id);
685 item->SetBioseq(bioseq_label);
695 item->SetLocation(loc_label);
697 item->SetSeqOffset(
offset);
703 item->SetProduct_loc(product_label);
711 item->SetAccession(accession);
716 item->SetAccnver(accession);
737 item->SetFeatureObjDescFromFields();
794 const auto isSetClass =
st.IsSetClass();
796 if (isSetClass &&
GetContext().PreprocessHugeFile) {
797 if (
auto setClass =
st.GetClass();
IsHugeSet(setClass)) {
838 ctx.IsSet() &&
ctx.GetSet().IsSetClass()) {
839 if (
auto setClass =
ctx.GetSet().GetClass();
IsHugeSet(setClass)) {
840 string desc{
"DESCRIPTOR: "};
842 desc +=
"BIOSEQ-SET: ";
919 string desc =
"ANNOTATION: ";
951 string desc =
"GRAPH: ";
989 string desc(
"GRAPH: ");
1035 string desc =
"ALIGNMENT: ";
1037 desc += align.ENUM_METHOD_NAME(EType)()->FindName(align.
GetType(),
true);
1043 desc +=
", dim=UNASSIGNED";
1077 if (entry.
IsSeq()) {
1079 }
else if (entry.
IsSet()) {
1082 string desc =
"SEQ-ENTRY: ";
1113 string desc =
"BioSource: ";
1139 string desc =
"Org-ref: ";
1164 string desc =
"Pubdesc: ";
1189 string desc =
"Seq-submit: ";
1200 const string& accession,
1239 reasons =
msg +
" - " + reasons;
1276 bool has_mult =
false;
1282 desc_ci && !has_mult;
1284 if (desc_ci->GetSource().IsSetOrg()) {
1285 const COrg_ref& org = desc_ci->GetSource().GetOrg();
1289 (*it)->IsSetTag() && (*it)->GetTag().IsId()) {
1290 int this_id = (*it)->GetTag().GetId();
1294 }
else if (first_id == 0) {
1296 }
else if (first_id != this_id) {
1306 if (has_mult || (phage_id > 0 && first_id > 0)) {
1308 "There are multiple taxonIDs in this RefSeq record.",
1373 "Non-ascii chars in input ASN.1 strings", *seq);
1381 bool has_gi =
false;
1383 bool has_nucleotide_sequence =
false;
1386 bi && (!
IsINSDInSep() || !has_gi || !has_nucleotide_sequence);
1389 if ((*it)->IsGi()) {
1393 if (bi->IsSetInst_Mol() && bi->IsNa()) {
1394 has_nucleotide_sequence =
true;
1403 "INSD and RefSeq records should not be present in the same set", *
m_TSE);
1409 vector<string> id_strings;
1414 if (!IsNCBIFILESeqId(**it)) {
1416 (*it)->GetLabel(&
label);
1417 id_strings.push_back(
label);
1421 stable_sort (id_strings.begin(), id_strings.end());
1422 for (vector<string>::iterator id_str_it = id_strings.begin();
1423 id_str_it != id_strings.end();
1425 string pattern = (*id_str_it).substr(0, 30);
1426 string first_id = *id_str_it;
1427 vector<string>::iterator cmp_it = id_str_it;
1433 "First 30 characters of " + first_id +
" and " +
1442 vector < int > feature_ids;
1444 const CSeq_feat& sf = fi->GetOriginalFeature();
1450 if (feature_ids.size() > 0) {
1452 stable_sort (feature_ids.begin(), feature_ids.end());
1453 vector <int>::iterator it = feature_ids.begin();
1456 while (it != feature_ids.end()) {
1459 ITERATE( vector<CSeq_feat_Handle>, feat_it, handles ) {
1463 while (it != feature_ids.end() && *it ==
id) {
1466 if (it != feature_ids.end()) {
1478 bool has_nongps =
false;
1479 bool has_gps =
false;
1482 if (
si->IsSetClass()) {
1496 if (has_nongps && has_gps) {
1498 "Genomic product set and mut/pop/phy/eco set records should not be present in the same set",
1520 "Skipping validation of remaining /inference qualifiers",
1525 string prefix, remainder;
1527 size_t num_accessions = 0;
1529 for (
size_t i = 0;
i < accessions.size();
i++) {
1531 string acc_prefix, accession;
1537 if (num_accessions > 0) {
1545 "Skipping validation of remaining /inference qualifiers",
1562 }
catch (
const exception& e ) {
1564 string(
"Exception while validating bioseq. EXCEPTION: ") +
1568 }
else if (seh.
IsSet()) {
1575 }
catch (
const exception& e ) {
1577 string(
"Exception while validating bioseq set. EXCEPTION: ") +
1594 " TPAs with history and " +
1596 " without history in this record.", *seq);
1602 " TPAs without history in this record, but the record has a gi number assignment.", *
m_TSE);
1607 call_once(
SetContext().ProteinHaveGeneralIDOnceFlag,
1610 "INDEXER_ONLY - Protein bioseqs have general seq-id.",
1624 "There is 1 mispackaged feature in this record.",
1633 "There is 1 mispackaged feature in this small genome set record.",
1640 " gene xrefs and no gene features in this record.", *
m_TSE);
1667 "Far fetch failures caused some validator tests to be bypassed",
1693 "Record release date has already passed", ss);
1710 "Bad last name '" +
last +
"'", ss);
1717 "Bad first name '" +
first +
"'", ss);
1722 "Bad first and last name", ss);
1731 if (
names.IsStd()) {
1733 if ( (*name)->GetName().IsName() ) {
1734 const CName_std& nstd = (*name)->GetName().GetName();
1741 "Bad last name '" +
last +
"'", ss);
1748 "Bad first name '" +
first +
"'", ss);
1753 "Bad first and last name", ss);
1795 if(
set.IsSetClass() &&
1802 call_once(
SetContext().WgsSetInSeqSubmitOnceFlag,
1805 "File was created as a wgs-set, but should be a batch submission instead.",
1813 "File was created as a wgs-set, but should be a batch submission instead.",
1833 switch (sah.
Which()) {
1837 for (
CFeat_CI fi (sah); fi; ++fi) {
1838 const CSeq_feat& sf = fi->GetOriginalFeature();
1850 const CSeq_align& sa = ai.GetOriginalSeq_align();
1862 const CSeq_graph& sg = gi->GetOriginalGraph();
1983 "dbxref value " + xref.
GetTag().
GetStr() +
" has SGML",
1988 "dbxref value " + xref.
GetTag().
GetStr() +
" contains space character",
1993 "dbxref database " + db +
" has SGML",
2008 "Illegal db_xref type " + db +
" (" + dbv +
")", obj,
ctx);
2012 bool refseq_db =
false, src_db =
false;
2013 string correct_caps;
2014 xref.
GetDBFlags(refseq_db, src_db, correct_caps);
2015 string message =
"Illegal db_xref type " + db +
" (" + dbv +
"), legal capitalization is " + correct_caps;
2017 message +=
", but should not be used on an OrgRef";
2019 message +=
", but should only be used on an OrgRef";
2027 "RefSeq-specific db_xref type " + db +
" (" + dbv +
") should not be used on a non-RefSeq OrgRef",
2031 "db_xref type " + db +
" (" + dbv +
") is only legal for RefSeq",
2037 "RefSeq-specific db_xref type " + db +
" (" + dbv +
") should not be used on an OrgRef",
2041 "db_xref type " + db +
" (" + dbv +
") should not be used on an OrgRef",
2046 "db_xref type " + db +
" (" + dbv +
") should only be used on an OrgRef",
2051 if (isStr && db ==
"GeneID") {
2053 "db_xref type " + db +
" (" + dbv +
") is required to be an integer",
2069 && (*xref)->IsSetDb()) {
2073 "BioSource uses db " + last_db +
" multiple times",
2076 last_db = (*xref)->GetDb();
2094 lc.id_prv =
lc.id_cur;
2095 lc.strand_prv =
lc.strand_cur;
2096 lc.int_prv =
lc.int_cur;
2108 id_cur = &int_cur->
GetId();
2122 static const string kSpaceLeftFirst =
"Should not specify 'space to left' at first position of non-circular sequence";
2123 static const string kSpaceRightLast =
"Should not specify 'space to right' at last position of non-circular sequence";
2125 static const string kSpaceLeftCircle =
"Should not specify 'circle to left' except at first position of circular sequence";
2126 static const string kSpaceRightCircle =
"Should not specify 'circle to right' except at last position of circular sequence";
2132 bool has_fuzz_from =
false;
2133 bool has_fuzz_to =
false;
2137 has_fuzz_from =
true;
2143 if (! has_fuzz_from && ! has_fuzz_to) {
2148 if (has_fuzz_from && has_fuzz_to && fuzz_from == fuzz_to) {
2152 "Should not specify 'space to left' for both ends of interval", obj);
2157 "Should not specify 'space to right' for both ends of interval", obj);
2162 "Should not specify 'origin of circle' for both ends of interval", obj);
2234 for (; lit; ++lit) {
2236 switch (loc_choice) {
2255 unsigned int num_mix = 0;
2257 for (; lit; ++lit) {
2269 lc.unmarked_strand =
false;
2270 lc.mixed_strand =
false;
2271 lc.has_other =
false;
2272 lc.has_not_other =
false;
2273 lc.id_cur =
nullptr;
2274 lc.id_prv =
nullptr;
2275 lc.int_cur =
nullptr;
2276 lc.int_prv =
nullptr;
2286 if (
lc.id_cur &&
lc.id_prv &&
2288 if (
lc.strand_prv !=
lc.strand_cur) {
2293 lc.unmarked_strand =
true;
2295 lc.mixed_strand =
true;
2301 lc.has_other =
true;
2303 lc.has_not_other =
true;
2311 switch (loc.Which()) {
2313 lc.int_cur = &loc.GetInt();
2316 lc.has_other =
true;
2318 if ((!
lc.chk) && lowerSev) {
2320 TSeqPos fr = loc.GetInt().GetFrom();
2321 TSeqPos to = loc.GetInt().GetTo();
2322 if (fr < length && to >= length) {
2331 lc.strand_cur = loc.GetPnt().IsSetStrand() ?
2334 lc.has_other =
true;
2336 lc.id_cur = &loc.GetPnt().GetId();
2338 lc.int_prv =
nullptr;
2341 lc.strand_cur = loc.GetPacked_pnt().IsSetStrand() ?
2344 lc.has_other =
true;
2346 lc.id_cur = &loc.GetPacked_pnt().GetId();
2348 lc.int_prv =
nullptr;
2356 for (
auto l : loc.GetMix().Get()) {
2363 lc.id_cur =
nullptr;
2364 lc.int_prv =
nullptr;
2374 lc.prefix +
": SeqLoc [" + lbl +
"] out of range", obj);
2380 lc.strand_prv =
lc.strand_cur;
2381 lc.id_prv =
lc.id_cur;
2383 }
catch(
const exception& e ) {
2386 "Exception caught while validating location " +
2387 label +
". Exception: " + e.what(), obj);
2390 lc.id_cur =
nullptr;
2391 lc.int_prv =
nullptr;
2396 (
const CSeq_loc& loc,
2398 bool report_abutting,
2399 const string& prefix,
2409 if (
lc.has_other &&
lc.has_not_other) {
2412 prefix +
": Inconsistent use of other strand SeqLoc [" +
label +
"]", obj);
2413 }
else if (
lc.has_other &&
NStr::Equal(prefix,
"Location")) {
2416 "Strand 'other' in location", obj);
2424 "Duplicate exons in location", obj);
2429 loc.GetLabel(&
label);
2431 prefix +
": SeqLoc [" +
label +
"] has nested SEQLOC_MIX elements",
2439 bool trans_splice =
false;
2440 bool circular_rna =
false;
2441 bool exception =
false;
2444 sfp =
dynamic_cast<const CSeq_feat*
>(&obj);
2449 lc.mixed_strand =
false;
2450 lc.unmarked_strand =
false;
2458 trans_splice =
true;
2461 circular_rna =
true;
2473 prefix +
": Adjacent intervals in SeqLoc [" +
2474 loc_lbl +
"]", obj);
2477 if (trans_splice && !
NStr::Equal(prefix,
"Product")) {
2478 CSeq_loc_CI li(loc);
2486 bool ordered =
true;
2487 bool circular =
false;
2499 loc.GetLabel(&
label);
2501 "Exception caught while validating location " +
2502 label +
". Exception: " + ex.
what(), obj);
2505 if (
lc.mixed_strand ||
lc.unmarked_strand || !ordered) {
2506 if (loc_lbl.empty()) {
2509 if (
lc.mixed_strand) {
2512 prefix +
": Mixed strands in SeqLoc ["
2513 + loc_lbl +
"] in small genome set - set trans-splicing exception if appropriate", obj);
2520 prefix +
": Mixed strands in SeqLoc ["
2521 + loc_lbl +
"]", obj);
2523 }
else if (
lc.unmarked_strand) {
2525 prefix +
": Mixed plus and unknown strands in SeqLoc ["
2526 + loc_lbl +
"]", obj);
2528 if (!ordered && !circular_rna) {
2531 prefix +
": Intervals out of order in SeqLoc [" +
2532 loc_lbl +
"]", obj);
2535 prefix +
": Intervals out of order in SeqLoc [" +
2536 loc_lbl +
"]", obj);
2550 if (loc_lbl.empty()) {
2551 loc.GetLabel(&loc_lbl);
2554 prefix +
"Intervals out of order in SeqLoc [" +
2555 loc_lbl +
"]", obj);
2560 if (loc_lbl.empty()) {
2561 loc.GetLabel(&loc_lbl);
2564 prefix +
": Mixed strands in SeqLoc [" +
2565 loc_lbl +
"]", obj);
2592 if ((*it)->IsMolinfo() && (*it)->GetMolinfo().IsSetTech()
2602 bool is_other =
false;
2603 bool has_gi =
false;
2606 if ((*it)->IsOther()) {
2609 }
else if ((*it)->IsGi()) {
2614 if (!is_other || has_gi) {
2627 if ((*it)->IsMolinfo() && (*it)->GetMolinfo().IsSetTech()
2637 bool is_other =
false;
2638 bool has_gi =
false;
2641 if ((*it)->IsOther()) {
2644 }
else if ((*it)->IsGi()) {
2649 if (!is_other || has_gi) {
2666 "No source information included on this record.", se);
2675 for (
size_t i = 0;
i < num_no_source; ++
i ) {
2677 "No organism name included in the source. Other qualifiers may exist.",
2769 if ( parent->
IsSet() ) {
2771 if (
set.IsSetClass() &&
set.GetClass() == clss ) {
2782 size_t pos = comment.find(
'[', 0);
2783 while ( pos != string::npos ) {
2786 if (
isdigit((
unsigned char) comment[pos]) ) {
2788 if (comment[pos] ==
'0') {
2791 while (
isdigit((
unsigned char) comment[pos]) ) {
2794 if ( comment[pos] ==
']' && okay ) {
2799 pos = comment.find(
'[', pos);
2808 if ( sid && sid->
IsOther() ) {
2814 if (
GetTSE().IsSeq() ) {
2827 vector<TEntrezId>& pmids, vector<TEntrezId>& muids, vector<int>& serials,
2828 vector<string>& published_labels, vector<string>& unpublished_labels)
2831 if ((*it)->IsPub()) {
2846 vector<TEntrezId> pmids;
2847 vector<TEntrezId> muids;
2848 vector<int> serials;
2849 vector<string> published_labels;
2850 vector<string> unpublished_labels;
2864 if (
f->IsSetCit() &&
f->GetCit().IsPub()) {
2868 if ((*cit_it)->IsPmid()) {
2869 vector<TEntrezId>::iterator it = pmids.begin();
2870 while (it != pmids.end() && !found) {
2871 if (*it == (*cit_it)->GetPmid()) {
2878 "Citation on feature refers to uid ["
2880 +
"] not on a publication in the record",
2881 f->GetOriginalFeature());
2883 }
else if ((*cit_it)->IsMuid()) {
2884 vector<TEntrezId>::iterator it = muids.begin();
2885 while (it != muids.end() && !found) {
2886 if (*it == (*cit_it)->GetMuid()) {
2893 "Citation on feature refers to uid ["
2895 +
"] not on a publication in the record",
2896 f->GetOriginalFeature());
2898 }
else if ((*cit_it)->IsEquiv()) {
2914 vector<string>::iterator unpub_it = unpublished_labels.begin();
2915 while (unpub_it != unpublished_labels.end() && !found) {
2916 size_t it_len =(*unpub_it).length();
2922 vector<string>::iterator pub_it = published_labels.begin();
2924 while (pub_it != published_labels.end() && !found) {
2925 size_t it_len =(*pub_it).length();
2928 "Citation on feature needs to be updated to published uid",
2929 f->GetOriginalFeature());
2936 "Citation on feature refers to a publication not in the record",
2937 f->GetOriginalFeature());
2957 const string&
str = *it;
2959 const char& ch = *c_it;
2960 unsigned char chu = ch;
2961 if (ch > 127 || (ch < 32 && ch !=
'\t' && ch !=
'\r' && ch !=
'\n')) {
2973 class CScriptTagTextFsm :
public CTextFsm<int>
2976 CScriptTagTextFsm() {
2977 const char * script_tags[] = {
2978 "<script",
"<object",
"<applet",
"<embed",
"<form",
2979 "javascript:",
"vbscript:"};
2981 AddWord(script_tags[idx],
true);
2988 bool DoesStrHaveFsmHits(
const string &
str) {
2989 int state = GetInitialState();
2992 if( IsMatchFound(
state) ) {
3000 static CScriptTagTextFsm s_ScriptTagFsm;
3005 if (s_ScriptTagFsm.DoesStrHaveFsmHits(*it)) {
3007 "Script tag found in item", obj);
3020 CSeq_loc_CI curr(loc);
3024 CSeq_loc_CI
prev = curr;
3050 for ( CSeq_loc_CI it(loc); it && !rval; ++it ) {
3051 if (it.GetSeq_id().IsGi()) {
3081 }
else if (!se.
IsSet()) {
3116 while (pub && !pub->IsSub()) {
3127 if (
si->IsSetClass ()) {
3140 const CSeq_id& sid = **sid_itr;
3178 }
else if (acc ==
"NG_") {
3180 }
else if (acc ==
"NM_") {
3182 }
else if (acc ==
"NP_") {
3184 }
else if (acc ==
"NR_") {
3186 }
else if (acc ==
"NZ_") {
3188 }
else if (acc ==
"NS_") {
3190 }
else if (acc ==
"NT_") {
3192 }
else if (acc ==
"NW_") {
3194 }
else if (acc ==
"WP_") {
3196 }
else if (acc ==
"XR_") {
3249 if (desc_ci->GetSource().IsSetGenome()
3259 if ( desc_ci->GetUser().IsSetType() ) {
3262 if ( ! oi.
IsStr() )
continue;
3267 if ((*field)->IsSetLabel() && (*field)->GetLabel().IsStr()) {
3269 if (
NStr::EqualNocase((*field)->GetData().GetStr(),
"NCBI eukaryotic genome annotation pipeline")) {
3286 if (feat_ci->IsSetProduct() &&
s_SeqLocHasGI(feat_ci->GetProduct())) {
3289 if (feat_ci->IsSetData() && feat_ci->GetData().IsGene()
3290 && feat_ci->GetData().GetGene().IsSetLocus_tag()
3291 && !
NStr::IsBlank (feat_ci->GetData().GetGene().GetLocus_tag())) {
3373 (
const CSeq_loc& loc,
3376 for ( CSeq_loc_CI lit(loc); lit; ++lit ) {
3377 const CSeq_id& id1 = lit.GetSeq_id();
3378 CSeq_loc_CI lit2 = lit;
3379 for ( ++lit2; lit2; ++lit2 ) {
3380 const CSeq_id& id2 = lit2.GetSeq_id();
3384 "Two ids refer to the same bioseq but are of "
3385 "different type", obj);
3390 "Feature locations should not use Seq-ids that will be stripped during ID load", obj);
3395 "Feature location intervals should all be on the same sequence", obj);
3508 #define ADD_BARCODE_ERR(TestName) \
3509 PostErr(eDiag_Warning, eErr_GENERIC_Barcode##TestName, k##TestName, sq); \
3510 if (!msg.empty()) { \
3519 const CBioseq& sq = *(
r.bsh.GetCompleteBioseq());
3534 if (!
r.percent_n.empty()) {
3541 if (
r.collection_date) {
3544 if (
r.order_assignment) {
3550 if (
r.frame_shift) {
3553 if (!
r.structured_voucher) {
3740 if (!parent || !parent.
IsSet()) {
3767 }
else if (seh.
IsSeq()) {
3778 appropriate_parent = gps;
3783 appropriate_parent = gp;
3785 appropriate_parent = np;
3788 appropriate_parent = seh;
3790 return appropriate_parent;
3802 return *find_iter->second;
3807 *pub, pInfo->m_pmids, pInfo->m_muids,
3808 pInfo->m_serials, pInfo->m_published_labels,
3809 pInfo->m_unpublished_labels);
3842 return find_iter->second;
3862 for( ; feat_ci; ++feat_ci ) {
3871 SFeatKey any_type_key = inner_feat_key;
3875 SFeatKey any_subtype_key = inner_feat_key;
3880 SFeatKey any_type_or_subtype_key = inner_feat_key;
3883 m_featCache[any_type_or_subtype_key].push_back(*feat_ci);
3895 const vector<SFeatKey> &featKeys)
3897 if( featKeys.empty() ) {
3903 ITERATE(vector<SFeatKey>, feat_it, featKeys) {
3904 if( feat_it->bioseq_h != bioseq_h ) {
3905 throw runtime_error(
"GetFeatFromCacheMulti must be called with only 1 bioseq in its args");
3913 ITERATE(vector<SFeatKey>, key_it, featKeys ) {
3916 set_of_feats, set_of_feats.
begin()));
3928 if( set_of_feats.
find(*feat_it) != set_of_feats.
end() ) {
3929 answer->push_back(*feat_it);
3972 _ASSERT(search_bsh || tse_arg);
3990 for( ; gene_ci; ++gene_ci ) {
4005 const string & locus_tag = (
4021 return find_iter->second;
4037 for( ; bioseq_ci; ++bioseq_ci ) {
4039 for( ; feat_ci; ++feat_ci ) {
4053 return find_iter->second;
4056 return kEmptyFeatToBioseqCache;
4071 for( ; bioseq_ci; ++bioseq_ci ) {
4085 return find_iter->second;
4088 return s_EmptyResult;
4103 for ( CSeq_loc_CI citer (loc); citer; ++citer) {
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eErr_SEQ_FEAT_WrongQualOnImpFeat
@ eErr_SEQ_DESCR_ObsoleteSourceQual
@ eErr_SEQ_DESCR_ObsoleteSourceLocation
@ eErr_SEQ_INST_FarFetchFailure
@ eErr_SEQ_FEAT_WholeLocation
@ eErr_GENERIC_MissingPubRequirement
@ eErr_SEQ_FEAT_EcNumberProblem
@ eErr_SEQ_FEAT_DuplicateAnticodonInterval
@ eErr_SEQ_INST_CompleteGenomeHasGaps
@ eErr_SEQ_FEAT_CDShasTooManyXs
@ eErr_SEQ_FEAT_TranslExceptPhase
@ eErr_SEQ_FEAT_MinusStrandProtein
@ eErr_SEQ_INST_CompleteTitleProblem
@ eErr_SEQ_DESCR_UnwantedCompleteFlag
@ eErr_SEQ_FEAT_GeneXrefWithoutLocus
@ eErr_SEQ_FEAT_BadLocation
@ eErr_SEQ_FEAT_GenesInconsistent
@ eErr_SEQ_INST_HighNContentStretch
@ eErr_SEQ_PKG_NoBioseqFound
@ eErr_SEQ_FEAT_PseudoRnaHasProduct
@ eErr_SEQ_DESCR_InconsistentBioSources
@ eErr_GENERIC_PastReleaseDate
@ eErr_SEQ_DESCR_BioSourceDbTagConflict
@ eErr_SEQ_FEAT_UnknownImpFeatQual
@ eErr_SEQ_FEAT_DuplicateExonInterval
@ eErr_GENERIC_UnnecessaryPubEquiv
@ eErr_SEQ_DESCR_BioSourceOnProtein
@ eErr_SEQ_DESCR_LatLonRange
@ eErr_SEQ_FEAT_UnnecessaryTranslExcept
@ eErr_SEQ_GRAPH_GraphBioseqId
@ eErr_SEQ_FEAT_MixedStrand
@ eErr_SEQ_FEAT_BadRRNAcomponentOrder
@ eErr_SEQ_DESCR_DuplicatePCRPrimerSequence
@ eErr_SEQ_FEAT_BadGeneOntologyFormat
@ eErr_SEQ_DESCR_LatLonCountry
@ eErr_SEQ_PKG_NucProtSetHasTitle
@ eErr_SEQ_FEAT_IllegalDbXref
@ eErr_GENERIC_SgmlPresentInText
@ eErr_SEQ_FEAT_BadAnticodonAA
@ eErr_SEQ_FEAT_MissingCDSproduct
@ eErr_SEQ_FEAT_FeatureBeginsOrEndsInGap
@ eErr_SEQ_FEAT_TranslExceptAndRnaEditing
@ eErr_GENERIC_BarcodeTooManyNs
@ eErr_SEQ_PKG_BioseqSetClassNotSet
@ eErr_SEQ_DESCR_NoOrgFound
@ eErr_SEQ_FEAT_MissingProteinName
@ eErr_SEQ_DESCR_BadPCRPrimerSequence
@ eErr_SEQ_FEAT_GeneXrefWithoutGene
@ eErr_SEQ_DESCR_TransgenicProblem
@ eErr_SEQ_PKG_MissingSetTitle
@ eErr_SEQ_FEAT_InvalidQualifierValue
@ eErr_SEQ_FEAT_GeneOntologyTermMissingGOID
@ eErr_SEQ_FEAT_ProtRefHasNoData
@ eErr_SEQ_GRAPH_GraphSeqLocLen
@ eErr_SEQ_DESCR_InvalidForType
@ eErr_SEQ_DESCR_LatLonValue
@ eErr_SEQ_FEAT_FeatureCitationProblem
@ eErr_SEQ_DESCR_IdenticalInstitutionCode
@ eErr_SEQ_PKG_ImproperlyNestedSets
@ eErr_SEQ_INST_UnknownLengthGapNot100
@ eErr_SEQ_FEAT_WrongQualOnFeature
@ eErr_SEQ_FEAT_MultipleProtRefs
@ eErr_SEQ_FEAT_MultipleEquivPublications
@ eErr_SEQ_PKG_SeqSubmitWithWgsSet
@ eErr_SEQ_PKG_InconsistentMoltypeSet
@ eErr_SEQ_INST_ConflictingBiomolTech
@ eErr_SEQ_FEAT_MissingQualOnImpFeat
@ eErr_SEQ_PKG_INSDRefSeqPackaging
@ eErr_SEQ_FEAT_LocusCollidesWithLocusTag
@ eErr_SEQ_PKG_GPSnonGPSPackaging
@ eErr_SEQ_DESCR_BadCollectionDate
@ eErr_SEQ_FEAT_MultipleEquivBioSources
@ eErr_SEQ_FEAT_CDSwithNoMRNAOverlap
@ eErr_SEQ_DESCR_BadInstitutionCode
@ eErr_SEQ_FEAT_PeptideFeatOutOfFrame
@ eErr_SEQ_FEAT_ProteinNameHasPMID
@ eErr_SEQ_FEAT_RepeatRegionNeedsNote
@ eErr_SEQ_DESCR_BadAltitude
@ eErr_SEQ_FEAT_GeneXrefStrandProblem
@ eErr_SEQ_FEAT_MissingTrnaAA
@ eErr_GENERIC_NonAsciiAsn
@ eErr_SEQ_FEAT_CDSwithMultipleMRNAs
@ eErr_SEQ_FEAT_CollidingFeatureIDs
@ eErr_SEQ_DESCR_IncorrectlyFormattedVoucherID
@ eErr_SEQ_FEAT_OrfCdsHasProduct
@ eErr_SEQ_FEAT_ImproperBondLocation
@ eErr_SEQ_PKG_GraphPackagingProblem
@ eErr_SEQ_INST_OverlappingDeltaRange
@ eErr_SEQ_FEAT_BadTranssplicedInterval
@ eErr_SEQ_INST_SeqLocLength
@ eErr_SEQ_DESCR_MultipleTaxonIDs
@ eErr_SEQ_DESCR_BadKeyword
@ eErr_SEQ_FEAT_UnknownImpFeatKey
@ eErr_SEQ_DESCR_Inconsistent
@ eErr_SEQ_PKG_ArchaicFeatureLocation
@ eErr_GENERIC_BarcodeTestFails
@ eErr_SEQ_FEAT_NestedSeqLocMix
@ eErr_SEQ_FEAT_ShortIntron
@ eErr_SEQ_FEAT_UnknownFeatureQual
@ eErr_SEQ_DESCR_MultipleChromosomes
@ eErr_SEQ_FEAT_InconsistentGeneOntologyTermAndId
@ eErr_SEQ_PKG_MisplacedMolInfo
@ eErr_GENERIC_EmbeddedScript
@ eErr_GENERIC_BarcodeTestPasses
@ eErr_SEQ_GRAPH_GraphAbove
@ eErr_SEQ_FEAT_FeatureInsideGap
@ eErr_SEQ_FEAT_DifferntIdTypesInSeqLoc
@ eErr_SEQ_FEAT_BadFullLengthFeature
@ eErr_SEQ_FEAT_BadCharInAuthorName
@ eErr_SEQ_FEAT_FarLocation
@ eErr_SEQ_INST_BadHTGSeq
@ eErr_SEQ_FEAT_InvalidFuzz
@ eErr_SEQ_FEAT_InvalidInferenceValue
@ eErr_SEQ_FEAT_GeneXrefNeeded
@ eErr_SEQ_INST_UnexpectedIdentifierChange
@ eErr_SEQ_FEAT_InconsistentRRNAstrands
@ eErr_SEQ_PKG_ArchaicFeatureProduct
@ eErr_SEQ_DESCR_MultipleSourceQualifiers
@ eErr_SEQ_FEAT_BadRRNAcomponentOverlap
@ eErr_SEQ_FEAT_BadTrailingCharacter
@ eErr_SEQ_DESCR_WrongVoucherType
@ eErr_SEQ_INST_ProteinsHaveGeneralID
@ eErr_SEQ_GRAPH_GraphOutOfOrder
@ eErr_SEQ_FEAT_BadInternalCharacter
@ eErr_SEQ_DESCR_NoSourceDescriptor
@ eErr_SEQ_DESCR_BadCollectionCode
@ eErr_SEQ_FEAT_BadProteinName
@ eErr_SEQ_FEAT_FeatureProductInconsistency
@ eErr_GENERIC_PublicationInconsistency
@ eErr_GENERIC_BadSubmissionAuthorName
@ eErr_GENERIC_CollidingSerialNumbers
@ eErr_SEQ_PKG_ComponentMissingTitle
@ eErr_SEQ_DESCR_DBLinkMissingUserObject
@ eErr_SEQ_PKG_InternalGenBankSet
@ eErr_SEQ_DESCR_BioSourceMissing
@ eErr_SEQ_FEAT_BadAnticodonCodon
@ eErr_SEQ_FEAT_BadTrailingHyphen
@ eErr_SEQ_FEAT_OldLocusTagMismtach
@ eErr_SEQ_DESCR_MolInfoConflictsWithBioSource
@ eErr_SEQ_FEAT_UTRdoesNotAbutCDS
@ eErr_SEQ_FEAT_PseudoRnaViaGeneHasProduct
@ eErr_SEQ_FEAT_ConflictFlagSet
@ eErr_SEQ_FEAT_StrandOther
@ eErr_SEQ_PKG_FeaturePackagingProblem
@ eErr_SEQ_DESCR_MultipleNames
@ eErr_SEQ_INST_BadSeqIdFormat
@ eErr_SEQ_PKG_GenomicProductPackagingProblem
@ eErr_INTERNAL_Exception
@ eErr_SEQ_FEAT_BadEcNumberFormat
@ eErr_SEQ_FEAT_CDSproductPackagingProblem
@ eErr_SEQ_FEAT_RedundantFields
@ eErr_SEQ_INST_InternalNsInSeqRaw
@ eErr_SEQ_DESCR_BadOrgMod
@ eErr_SEQ_INST_TerminalNs
@ eErr_SEQ_DESCR_BadOrganelleLocation
@ eErr_SEQ_FEAT_NoNameForProtein
@ eErr_SEQ_FEAT_RptUnitRangeProblem
@ eErr_SEQ_FEAT_SeqLocOrder
@ eErr_SEQ_DESCR_TaxonomyIsSpeciesProblem
@ eErr_SEQ_FEAT_CDSmRNAXrefLocationProblem
@ eErr_SEQ_PKG_SingleItemSet
@ eErr_SEQ_DESCR_BioSourceNeedsChromosome
@ eErr_SEQ_FEAT_VectorContamination
@ eErr_SEQ_FEAT_AbuttingIntervals
@ eErr_SEQ_FEAT_LocusTagProblem
@ eErr_SEQ_DESCR_BioSourceInconsistency
@ eErr_SEQ_FEAT_OnlyGeneXrefs
@ eErr_SEQ_FEAT_TranslExcept
@ eErr_SEQ_INST_InternalGapsInSeqRaw
@ eErr_SEQ_FEAT_GeneRefHasNoData
@ eErr_SEQ_INST_DuplicateSegmentReferences
@ eErr_SEQ_FEAT_TooManyInferenceAccessions
@ eErr_SEQ_FEAT_TerminalXDiscrepancy
@ eErr_SEQ_FEAT_MiscFeatureNeedsNote
@ eErr_SEQ_DESCR_CollidingPublications
@ eErr_SEQ_FEAT_GenomeSetMixedStrand
@ eErr_SEQ_FEAT_BadCharInAuthorLastName
@ eErr_SEQ_FEAT_HypotheticalProteinMismatch
@ eErr_SEQ_INST_TpaAssemblyProblem
@ eErr_SEQ_FEAT_MissingGeneXref
@Auth_list.hpp User-defined methods of the data storage class.
CSeq_entry * GetParentEntry(void) const
static void GetPubdescLabels(const CPubdesc &pd, vector< TEntrezId > &pmids, vector< TEntrezId > &muids, vector< int > &serials, vector< string > &published_labels, vector< string > &unpublished_labels)
For Publication Citations Get labels for a pubdesc.
bool GetDBFlags(bool &is_refseq, bool &is_src, string &correct_caps) const
bool IsSkippable(void) const
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
void GetLabel(string *label) const
@Name_std.hpp User-defined methods of the data storage class.
static CNcbiApplication * Instance(void)
Singleton method.
const string & GetDivision(void) const
bool IsSetDivision(void) const
@Pubdesc.hpp User-defined methods of the data storage class.
ESubtype GetSubtype(void) const
static bool RequireLocationIntervalsInBiologicalOrder(ESubtype subtype)
static bool AllowAdjacentIntervals(ESubtype subtype)
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
void GetLabel(string *label, ELabelType type) const
CSeq_entry * GetParentEntry(void) const
namespace ncbi::objects::
Base class for all serializable objects.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
CScope & GetScope(void) const
Returns scope.
Template class for iteration on objects of class C (non-medifiable version)
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
Thrown on an attempt to write unassigned data member.
void ValidateSeqAlign(const CSeq_align &align, int order=-1)
void ValidateSeqAnnot(const CSeq_annot_Handle &annot)
virtual ~CValidError_base()
static CSeq_entry_Handle GetAppropriateXrefParent(CSeq_entry_Handle seh)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
CValidError_base(CValidError_imp &imp)
void ValidateBioseq(const CBioseq &seq)
bool GetTSAConflictingBiomolTechErrors(const CBioseq &seq)
bool GetTSANStretchErrors(const CBioseq &seq)
void ValidateBioseqSet(const CBioseq_set &seqset)
void ValidateSeqDesc(const CSeqdesc &desc, const CSeq_entry &ctx)
Validate descriptors as stand alone objects (no context)
void SetScope(CScope &scope)
void SetTSE(CSeq_entry_Handle seh)
bool GetTSACDSOnMinusStrandErrors(const CSeq_feat &feat, const CBioseq &seq)
static bool GetPrefixAndAccessionFromInferenceAccession(string inf_accession, string &prefix, string &accession)
void ValidateSeqFeat(const CSeq_feat &feat)
static vector< string > GetAccessionsFromInferenceString(string inference, string &prefix, string &remainder, bool &same_species)
void ValidateSeqGraph(const CSeq_graph &graph)
void x_ReportInvalidFuzz(const CPacked_seqint &packed_int, const CSerialObject &obj)
CRef< CObjectManager > m_ObjMgr
void SetScope(const CSeq_entry &se)
void FindCollidingSerialNumbers(const CSerialObject &obj)
const CSeq_entry_Handle & GetTSEH()
static bool BadMultipleSequenceLocation(const CSeq_loc &loc, CScope &scope)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
static bool IsTSAIntermediate(const CBioseq &seq)
void x_CheckPackedInt(const CPacked_seqint &packed_int, SLocCheck &lc, const CSerialObject &obj)
static bool IsInOrganelleSmallGenomeSet(const CSeq_id &id, CScope &scope)
const CBioSourceKind & BioSourceKind() const
bool m_ValidateAlignments
bool HasGiOrAccnVer() const
void SetTSE(const CSeq_entry_Handle &seh)
bool m_FarFetchCDSproducts
const SValidatorContext & GetContext() const
CValidator::TProgressCallback m_PrgCallback
bool m_GenerateGoldenFile
IValidError * m_ErrRepository
size_t m_NumMisplacedGraphs
bool m_NotJustLocalOrGeneral
CConstRef< CSeq_feat > GetmRNAGivenProduct(const CBioseq &seq)
bool IsValidateAlignments() const
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id)
size_t m_NumTopSetSiblings
void ValidateCitations(const CSeq_entry_Handle &seh)
bool DoesAnyFeatLocHaveGI() const
void FindNonAsciiText(const CSerialObject &obj)
void AddBioseqWithNoBiosource(const CBioseq &seq)
void ValidateSeqLocIds(const CSeq_loc &loc, const CSerialObject &obj)
bool GenerateGoldenFile() const
bool IsStandaloneAnnot() const
void x_DoBarcodeTests(CSeq_entry_Handle seh)
CConstRef< CSeq_annot > m_SeqAnnot
TSuppressed & SetSuppressed()
bool DoesAnyProductLocHaveGI() const
bool GetTSAConflictingBiomolTechErrors(const CSeq_entry_Handle &se)
bool x_IsSuppressed(CValidErrItem::TErrIndex errType) const
void x_AddValidErrItem(EDiagSev sev, EErrType type, const string &msg, const string &desc, const CSerialObject &obj, const string &accession, const int version)
unique_ptr< CValidatorEntryInfo > m_pEntryInfo
void x_Init(Uint4 options, size_t initialInferenceCount, bool notJustLocalOrGeneral, bool hasRefSeq)
void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
TSuppressed m_SuppressedErrors
void Setup(const CSeq_entry_Handle &seh)
bool Validate(const CSeq_entry &se, const CCit_sub *cs=nullptr, CScope *scope=nullptr)
void InitializeSourceQualTags()
static bool IsWGSIntermediate(const CBioseq &seq)
CValidator::CProgressInfo m_PrgInfo
void ValidateDbxref(const CDbtag &xref, const CSerialObject &obj, bool biosource=false, const CSeq_entry *ctx=nullptr)
bool IsSerialNumberInComment(const string &comment)
void ValidateTaxonomy(const CSeq_entry &se)
bool IsFarSequence(const CSeq_id &id)
const CTSE_Handle & GetTSE_Handle()
size_t m_NumMisplacedFeatures