141 #define NCBI_USE_ERRCODE_X Objtools_Validator
146 using namespace sequence;
170 shared_ptr<SValidatorContext> pContext,
174 m_ErrRepository{errs},
214 return context.PreprocessHugeFile ||
215 context.PostprocessHugeFile;
230 return edit::CHugeAsnReader::IsHugeSet(setClass);
364 if (type_info == CSeqdesc::GetTypeInfo()) {
368 }
else if (type_info == CSeq_feat::GetTypeInfo()) {
371 }
else if (type_info == CBioseq::GetTypeInfo()) {
374 }
else if (type_info == CBioseq_set::GetTypeInfo()) {
377 }
else if (type_info == CSeq_annot::GetTypeInfo()) {
380 }
else if (type_info == CSeq_graph::GetTypeInfo()) {
383 }
else if (type_info == CSeq_align::GetTypeInfo()) {
386 }
else if (type_info == CSeq_entry::GetTypeInfo()) {
389 }
else if (type_info == CBioSource::GetTypeInfo()) {
392 }
else if (type_info == COrg_ref::GetTypeInfo()) {
395 }
else if (type_info == CPubdesc::GetTypeInfo()) {
398 }
else if (type_info == CSeq_submit::GetTypeInfo()) {
606 if (sc_GenomeRaiseExceptEmblDdbjRefSeqArray.find(et) != sc_GenomeRaiseExceptEmblDdbjRefSeqArray.end()) {
613 if (sc_GenomeRaiseExceptEmblDdbjArray.find(et) != sc_GenomeRaiseExceptEmblDdbjArray.end()) {
620 if (sc_GenomeRaiseArray.find (et) != sc_GenomeRaiseArray.end()) {
640 item->SetErrIndex(et);
650 item->SetObj_content(content_label);
654 item->SetFeatureId(feature_id);
659 item->SetBioseq(bioseq_label);
669 item->SetLocation(loc_label);
671 item->SetSeqOffset(
offset);
677 item->SetProduct_loc(product_label);
685 item->SetAccession(accession);
690 item->SetAccnver(accession);
711 item->SetFeatureObjDescFromFields();
762 if (isSetClass &&
GetContext().PreprocessHugeFile) {
800 ctx.IsSet() &&
ctx.GetSet().IsSetClass()) {
801 if (
auto setClass =
ctx.GetSet().GetClass();
IsHugeSet(setClass)) {
802 string desc{
"DESCRIPTOR: "};
804 desc +=
"BIOSEQ-SET: ";
877 string desc =
"ANNOTATION: ";
904 string desc =
"GRAPH: ";
937 string desc(
"GRAPH: ");
978 string desc =
"ALIGNMENT: ";
980 desc += align.ENUM_METHOD_NAME(EType)()->FindName(align.
GetType(),
true);
986 desc +=
", dim=UNASSIGNED";
1016 if (entry.
IsSeq()) {
1018 }
else if (entry.
IsSet()) {
1021 string desc =
"SEQ-ENTRY: ";
1047 string desc =
"BioSource: ";
1068 string desc =
"Org-ref: ";
1089 string desc =
"Pubdesc: ";
1110 string desc =
"Seq-submit: ";
1121 const string& accession,
1160 reasons = msg +
" - " + reasons;
1197 bool has_mult =
false;
1203 desc_ci && !has_mult;
1205 if (desc_ci->GetSource().IsSetOrg()) {
1206 const COrg_ref& org = desc_ci->GetSource().GetOrg();
1210 (*it)->IsSetTag() && (*it)->GetTag().IsId()) {
1211 int this_id = (*it)->GetTag().GetId();
1215 }
else if (first_id == 0) {
1217 }
else if (first_id != this_id) {
1227 if (has_mult || (phage_id > 0 && first_id > 0)) {
1229 "There are multiple taxonIDs in this RefSeq record.",
1294 "Non-ascii chars in input ASN.1 strings", *seq);
1302 bool has_gi =
false;
1304 bool has_nucleotide_sequence =
false;
1307 bi && (!
IsINSDInSep() || !has_gi || !has_nucleotide_sequence);
1310 if ((*it)->IsGi()) {
1314 if (bi->IsSetInst_Mol() && bi->IsNa()) {
1315 has_nucleotide_sequence =
true;
1324 "INSD and RefSeq records should not be present in the same set", *
m_TSE);
1330 vector<string> id_strings;
1335 if (!IsNCBIFILESeqId(**it)) {
1337 (*it)->GetLabel(&
label);
1338 id_strings.push_back(
label);
1342 stable_sort (id_strings.begin(), id_strings.end());
1343 for (vector<string>::iterator id_str_it = id_strings.begin();
1344 id_str_it != id_strings.end();
1346 string pattern = (*id_str_it).substr(0, 30);
1347 string first_id = *id_str_it;
1348 vector<string>::iterator cmp_it = id_str_it;
1354 "First 30 characters of " + first_id +
" and " +
1363 vector < int > feature_ids;
1371 if (feature_ids.size() > 0) {
1373 stable_sort (feature_ids.begin(), feature_ids.end());
1374 vector <int>::iterator it = feature_ids.begin();
1377 while (it != feature_ids.end()) {
1380 ITERATE( vector<CSeq_feat_Handle>, feat_it, handles ) {
1384 while (it != feature_ids.end() && *it ==
id) {
1387 if (it != feature_ids.end()) {
1399 bool has_nongps =
false;
1400 bool has_gps =
false;
1403 if (
si->IsSetClass()) {
1417 if (has_nongps && has_gps) {
1419 "Genomic product set and mut/pop/phy/eco set records should not be present in the same set",
1426 size_t num_inferences = 0, num_accessions = 0;
1430 if ((*qual)->IsSetQual() && (*qual)->IsSetVal() &&
NStr::Equal((*qual)->GetQual(),
"inference")) {
1432 string prefix, remainder;
1435 for (
size_t i = 0;
i < accessions.size();
i++) {
1437 string acc_prefix, accession;
1448 if ( num_accessions > 1000) {
1451 "Skipping validation of " +
NStr::SizetToString (num_inferences) +
" /inference qualifiers with "
1466 }
catch (
const exception& e ) {
1468 string(
"Exception while validating bioseq. EXCEPTION: ") +
1472 }
else if (seh.
IsSet()) {
1479 }
catch (
const exception& e ) {
1481 string(
"Exception while validating bioseq set. EXCEPTION: ") +
1497 " TPAs with history and " +
1499 " without history in this record.", *seq);
1505 " TPAs without history in this record, but the record has a gi number assignment.", *
m_TSE);
1508 call_once(
SetContext().ProteinHaveGeneralIDOnceFlag,
1511 "INDEXER_ONLY - Protein bioseqs have general seq-id.",
1525 "There is 1 mispackaged feature in this record.",
1534 "There is 1 mispackaged feature in this small genome set record.",
1541 " gene xrefs and no gene features in this record.", *
m_TSE);
1567 "Far fetch failures caused some validator tests to be bypassed",
1593 "Record release date has already passed", ss);
1636 if(
set.IsSetClass() &&
1643 call_once(
SetContext().WgsSetInSeqSubmitOnceFlag,
1646 "File was created as a wgs-set, but should be a batch submission instead.",
1654 "File was created as a wgs-set, but should be a batch submission instead.",
1674 switch (sah.
Which()) {
1691 const CSeq_align& sa = ai.GetOriginalSeq_align();
1703 const CSeq_graph& sg = gi->GetOriginalGraph();
1824 "dbxref value " + xref.
GetTag().
GetStr() +
" has SGML",
1829 "dbxref value " + xref.
GetTag().
GetStr() +
" contains space character",
1834 "dbxref database " + db +
" has SGML",
1849 "Illegal db_xref type " + db +
" (" + dbv +
")", obj,
ctx);
1853 bool refseq_db =
false, src_db =
false;
1854 string correct_caps;
1855 xref.
GetDBFlags(refseq_db, src_db, correct_caps);
1856 string message =
"Illegal db_xref type " + db +
" (" + dbv +
"), legal capitalization is " + correct_caps;
1858 message +=
", but should not be used on an OrgRef";
1860 message +=
", but should only be used on an OrgRef";
1868 "RefSeq-specific db_xref type " + db +
" (" + dbv +
") should not be used on a non-RefSeq OrgRef",
1872 "db_xref type " + db +
" (" + dbv +
") is only legal for RefSeq",
1878 "RefSeq-specific db_xref type " + db +
" (" + dbv +
") should not be used on an OrgRef",
1882 "db_xref type " + db +
" (" + dbv +
") should not be used on an OrgRef",
1887 "db_xref type " + db +
" (" + dbv +
") should only be used on an OrgRef",
1892 if (isStr && db ==
"GeneID") {
1894 "db_xref type " + db +
" (" + dbv +
") is required to be an integer",
1910 && (*xref)->IsSetDb()) {
1914 "BioSource uses db " + last_db +
" multiple times",
1917 last_db = (*xref)->GetDb();
1935 lc.id_prv =
lc.id_cur;
1936 lc.strand_prv =
lc.strand_cur;
1937 lc.int_prv =
lc.int_cur;
1950 id_cur = &int_cur->
GetId();
1964 static const string kSpaceLeftFirst =
"Should not specify 'space to left' at first position of non-circular sequence";
1965 static const string kSpaceRightLast =
"Should not specify 'space to right' at last position of non-circular sequence";
1967 static const string kSpaceLeftCircle =
"Should not specify 'circle to left' except at first position of circular sequence";
1968 static const string kSpaceRightCircle =
"Should not specify 'circle to right' except at last position of circular sequence";
1974 bool has_fuzz_from =
false;
1975 bool has_fuzz_to =
false;
1979 has_fuzz_from =
true;
1985 if (! has_fuzz_from && ! has_fuzz_to) {
1990 if (has_fuzz_from && has_fuzz_to && fuzz_from == fuzz_to) {
1994 "Should not specify 'space to left' for both ends of interval", obj);
1999 "Should not specify 'space to right' for both ends of interval", obj);
2004 "Should not specify 'origin of circle' for both ends of interval", obj);
2076 for (; lit; ++lit) {
2078 switch (loc_choice) {
2097 unsigned int num_mix = 0;
2099 for (; lit; ++lit) {
2111 lc.unmarked_strand =
false;
2112 lc.mixed_strand =
false;
2113 lc.has_other =
false;
2114 lc.has_not_other =
false;
2115 lc.id_cur =
nullptr;
2116 lc.id_prv =
nullptr;
2117 lc.int_cur =
nullptr;
2118 lc.int_prv =
nullptr;
2128 if (
lc.id_cur &&
lc.id_prv &&
2130 if (
lc.strand_prv !=
lc.strand_cur) {
2135 lc.unmarked_strand =
true;
2137 lc.mixed_strand =
true;
2143 lc.has_other =
true;
2145 lc.has_not_other =
true;
2153 switch (loc.Which()) {
2155 lc.int_cur = &loc.GetInt();
2158 lc.has_other =
true;
2160 if ((!
lc.chk) && lowerSev) {
2162 TSeqPos fr = loc.GetInt().GetFrom();
2163 TSeqPos to = loc.GetInt().GetTo();
2164 if (fr < length && to >= length) {
2173 lc.strand_cur = loc.GetPnt().IsSetStrand() ?
2176 lc.has_other =
true;
2178 lc.id_cur = &loc.GetPnt().GetId();
2180 lc.int_prv =
nullptr;
2183 lc.strand_cur = loc.GetPacked_pnt().IsSetStrand() ?
2186 lc.has_other =
true;
2188 lc.id_cur = &loc.GetPacked_pnt().GetId();
2190 lc.int_prv =
nullptr;
2198 for (
auto l : loc.GetMix().Get()) {
2205 lc.id_cur =
nullptr;
2206 lc.int_prv =
nullptr;
2216 lc.prefix +
": SeqLoc [" + lbl +
"] out of range", obj);
2222 lc.strand_prv =
lc.strand_cur;
2223 lc.id_prv =
lc.id_cur;
2225 }
catch(
const exception& e ) {
2228 "Exception caught while validating location " +
2229 label +
". Exception: " + e.what(), obj);
2232 lc.id_cur =
nullptr;
2233 lc.int_prv =
nullptr;
2238 (
const CSeq_loc& loc,
2240 bool report_abutting,
2251 if (
lc.has_other &&
lc.has_not_other) {
2254 prefix +
": Inconsistent use of other strand SeqLoc [" +
label +
"]", obj);
2258 "Strand 'other' in location", obj);
2266 "Duplicate exons in location", obj);
2271 loc.GetLabel(&
label);
2273 prefix +
": SeqLoc [" +
label +
"] has nested SEQLOC_MIX elements",
2281 bool trans_splice =
false;
2282 bool circular_rna =
false;
2283 bool exception =
false;
2286 sfp =
dynamic_cast<const CSeq_feat*
>(&obj);
2291 lc.mixed_strand =
false;
2292 lc.unmarked_strand =
false;
2300 trans_splice =
true;
2303 circular_rna =
true;
2315 prefix +
": Adjacent intervals in SeqLoc [" +
2316 loc_lbl +
"]", obj);
2320 CSeq_loc_CI li(loc);
2328 bool ordered =
true;
2329 bool circular =
false;
2341 loc.GetLabel(&
label);
2343 "Exception caught while validating location " +
2344 label +
". Exception: " + ex.
what(), obj);
2347 if (
lc.mixed_strand ||
lc.unmarked_strand || !ordered) {
2348 if (loc_lbl.empty()) {
2351 if (
lc.mixed_strand) {
2354 prefix +
": Mixed strands in SeqLoc ["
2355 + loc_lbl +
"] in small genome set - set trans-splicing exception if appropriate", obj);
2362 prefix +
": Mixed strands in SeqLoc ["
2363 + loc_lbl +
"]", obj);
2365 }
else if (
lc.unmarked_strand) {
2367 prefix +
": Mixed plus and unknown strands in SeqLoc ["
2368 + loc_lbl +
"]", obj);
2370 if (!ordered && !circular_rna) {
2373 prefix +
": Intervals out of order in SeqLoc [" +
2374 loc_lbl +
"]", obj);
2377 prefix +
": Intervals out of order in SeqLoc [" +
2378 loc_lbl +
"]", obj);
2392 if (loc_lbl.empty()) {
2393 loc.GetLabel(&loc_lbl);
2396 prefix +
"Intervals out of order in SeqLoc [" +
2397 loc_lbl +
"]", obj);
2402 if (loc_lbl.empty()) {
2403 loc.GetLabel(&loc_lbl);
2406 prefix +
": Mixed strands in SeqLoc [" +
2407 loc_lbl +
"]", obj);
2434 if ((*it)->IsMolinfo() && (*it)->GetMolinfo().IsSetTech()
2444 bool is_other =
false;
2445 bool has_gi =
false;
2448 if ((*it)->IsOther()) {
2451 }
else if ((*it)->IsGi()) {
2456 if (!is_other || has_gi) {
2469 if ((*it)->IsMolinfo() && (*it)->GetMolinfo().IsSetTech()
2479 bool is_other =
false;
2480 bool has_gi =
false;
2483 if ((*it)->IsOther()) {
2486 }
else if ((*it)->IsGi()) {
2491 if (!is_other || has_gi) {
2508 "No source information included on this record.", se);
2517 for (
size_t i = 0;
i < num_no_source; ++
i ) {
2519 "No organism name included in the source. Other qualifiers may exist.",
2542 feat.
Reset(&(
fi->GetOriginalFeature()));
2551 feat.
Reset(&(
fi->GetOriginalFeature()));
2585 feat.
Reset(&(
fi->GetOriginalFeature()));
2594 feat.
Reset(&(
fi->GetOriginalFeature()));
2611 if ( parent->
IsSet() ) {
2613 if (
set.IsSetClass() &&
set.GetClass() == clss ) {
2624 size_t pos = comment.find(
'[', 0);
2625 while ( pos != string::npos ) {
2628 if (
isdigit((
unsigned char) comment[pos]) ) {
2630 if (comment[pos] ==
'0') {
2633 while (
isdigit((
unsigned char) comment[pos]) ) {
2636 if ( comment[pos] ==
']' && okay ) {
2641 pos = comment.find(
'[', pos);
2650 if ( sid && sid->
IsOther() ) {
2656 if (
GetTSE().IsSeq() ) {
2669 vector<TEntrezId>& pmids, vector<TEntrezId>& muids, vector<int>& serials,
2670 vector<string>& published_labels, vector<string>& unpublished_labels)
2673 if ((*it)->IsPub()) {
2688 vector<TEntrezId> pmids;
2689 vector<TEntrezId> muids;
2690 vector<int> serials;
2691 vector<string> published_labels;
2692 vector<string> unpublished_labels;
2706 if (
f->IsSetCit() &&
f->GetCit().IsPub()) {
2710 if ((*cit_it)->IsPmid()) {
2711 vector<TEntrezId>::iterator it = pmids.begin();
2712 while (it != pmids.end() && !found) {
2713 if (*it == (*cit_it)->GetPmid()) {
2720 "Citation on feature refers to uid ["
2722 +
"] not on a publication in the record",
2723 f->GetOriginalFeature());
2725 }
else if ((*cit_it)->IsMuid()) {
2726 vector<TEntrezId>::iterator it = muids.begin();
2727 while (it != muids.end() && !found) {
2728 if (*it == (*cit_it)->GetMuid()) {
2735 "Citation on feature refers to uid ["
2737 +
"] not on a publication in the record",
2738 f->GetOriginalFeature());
2740 }
else if ((*cit_it)->IsEquiv()) {
2756 vector<string>::iterator unpub_it = unpublished_labels.begin();
2757 while (unpub_it != unpublished_labels.end() && !found) {
2758 size_t it_len =(*unpub_it).length();
2764 vector<string>::iterator pub_it = published_labels.begin();
2766 while (pub_it != published_labels.end() && !found) {
2767 size_t it_len =(*pub_it).length();
2770 "Citation on feature needs to be updated to published uid",
2771 f->GetOriginalFeature());
2778 "Citation on feature refers to a publication not in the record",
2779 f->GetOriginalFeature());
2799 const string&
str = *it;
2801 const char& ch = *c_it;
2802 unsigned char chu = ch;
2803 if (ch > 127 || (ch < 32 && ch !=
'\t' && ch !=
'\r' && ch !=
'\n')) {
2815 class CScriptTagTextFsm :
public CTextFsm<int>
2818 CScriptTagTextFsm() {
2819 const char * script_tags[] = {
2820 "<script",
"<object",
"<applet",
"<embed",
"<form",
2821 "javascript:",
"vbscript:"};
2823 AddWord(script_tags[idx],
true);
2830 bool DoesStrHaveFsmHits(
const string &
str) {
2831 int state = GetInitialState();
2834 if( IsMatchFound(
state) ) {
2842 static CScriptTagTextFsm s_ScriptTagFsm;
2847 if (s_ScriptTagFsm.DoesStrHaveFsmHits(*it)) {
2849 "Script tag found in item", obj);
2862 CSeq_loc_CI curr(loc);
2866 CSeq_loc_CI
prev = curr;
2892 for ( CSeq_loc_CI it(loc); it && !rval; ++it ) {
2893 if (it.GetSeq_id().IsGi()) {
2923 }
else if (!se.
IsSet()) {
2958 while (pub && !pub->IsSub()) {
2969 if (
si->IsSetClass ()) {
2982 const CSeq_id& sid = **sid_itr;
3020 }
else if (acc ==
"NG_") {
3022 }
else if (acc ==
"NM_") {
3024 }
else if (acc ==
"NP_") {
3026 }
else if (acc ==
"NR_") {
3028 }
else if (acc ==
"NZ_") {
3030 }
else if (acc ==
"NS_") {
3032 }
else if (acc ==
"NT_") {
3034 }
else if (acc ==
"NW_") {
3036 }
else if (acc ==
"WP_") {
3038 }
else if (acc ==
"XR_") {
3091 if (desc_ci->GetSource().IsSetGenome()
3101 if ( desc_ci->GetUser().IsSetType() ) {
3104 if ( ! oi.
IsStr() )
continue;
3109 if ((*field)->IsSetLabel() && (*field)->GetLabel().IsStr()) {
3111 if (
NStr::EqualNocase((*field)->GetData().GetStr(),
"NCBI eukaryotic genome annotation pipeline")) {
3128 if (feat_ci->IsSetProduct() &&
s_SeqLocHasGI(feat_ci->GetProduct())) {
3131 if (feat_ci->IsSetData() && feat_ci->GetData().IsGene()
3132 && feat_ci->GetData().GetGene().IsSetLocus_tag()
3133 && !
NStr::IsBlank (feat_ci->GetData().GetGene().GetLocus_tag())) {
3215 (
const CSeq_loc& loc,
3218 for ( CSeq_loc_CI lit(loc); lit; ++lit ) {
3219 const CSeq_id& id1 = lit.GetSeq_id();
3220 CSeq_loc_CI lit2 = lit;
3221 for ( ++lit2; lit2; ++lit2 ) {
3222 const CSeq_id& id2 = lit2.GetSeq_id();
3226 "Two ids refer to the same bioseq but are of "
3227 "different type", obj);
3232 "Feature locations should not use Seq-ids that will be stripped during ID load", obj);
3237 "Feature location intervals should all be on the same sequence", obj);
3350 #define ADD_BARCODE_ERR(TestName) \
3351 PostErr(eDiag_Warning, eErr_GENERIC_Barcode##TestName, k##TestName, sq); \
3352 if (!msg.empty()) { \
3360 for (
auto r : results) {
3361 const CBioseq& sq = *(
r.bsh.GetCompleteBioseq());
3376 if (!
r.percent_n.empty()) {
3383 if (
r.collection_date) {
3386 if (
r.order_assignment) {
3392 if (
r.frame_shift) {
3395 if (!
r.structured_voucher) {
3582 if (!parent || !parent.
IsSet()) {
3609 }
else if (seh.
IsSeq()) {
3620 appropriate_parent = gps;
3625 appropriate_parent = gp;
3627 appropriate_parent = np;
3630 appropriate_parent = seh;
3632 return appropriate_parent;
3644 return *find_iter->second;
3649 *pub, pInfo->m_pmids, pInfo->m_muids,
3650 pInfo->m_serials, pInfo->m_published_labels,
3651 pInfo->m_unpublished_labels);
3684 return find_iter->second;
3705 for( ; feat_ci; ++feat_ci ) {
3714 SFeatKey any_type_key = inner_feat_key;
3718 SFeatKey any_subtype_key = inner_feat_key;
3723 SFeatKey any_type_or_subtype_key = inner_feat_key;
3726 m_featCache[any_type_or_subtype_key].push_back(*feat_ci);
3738 const vector<SFeatKey> &featKeys)
3740 if( featKeys.empty() ) {
3746 ITERATE(vector<SFeatKey>, feat_it, featKeys) {
3747 if( feat_it->bioseq_h != bioseq_h ) {
3748 throw runtime_error(
"GetFeatFromCacheMulti must be called with only 1 bioseq in its args");
3756 ITERATE(vector<SFeatKey>, key_it, featKeys ) {
3759 set_of_feats, set_of_feats.
begin()));
3771 if( set_of_feats.
find(*feat_it) != set_of_feats.
end() ) {
3772 answer->push_back(*feat_it);
3815 _ASSERT(search_bsh || tse_arg);
3833 for( ; gene_ci; ++gene_ci ) {
3848 const string & locus_tag = (
3864 return find_iter->second;
3880 for( ; bioseq_ci; ++bioseq_ci ) {
3882 for( ; feat_ci; ++feat_ci ) {
3896 return find_iter->second;
3899 return kEmptyFeatToBioseqCache;
3914 for( ; bioseq_ci; ++bioseq_ci ) {
3928 return find_iter->second;
3931 return s_EmptyResult;
3946 for ( CSeq_loc_CI citer (loc); citer; ++citer) {
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eExtreme_Positional
numerical value
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
@ eErr_SEQ_FEAT_WrongQualOnImpFeat
@ eErr_SEQ_DESCR_ObsoleteSourceQual
@ eErr_SEQ_DESCR_ObsoleteSourceLocation
@ eErr_SEQ_INST_FarFetchFailure
@ eErr_SEQ_FEAT_WholeLocation
@ eErr_GENERIC_MissingPubRequirement
@ eErr_SEQ_FEAT_EcNumberProblem
@ eErr_SEQ_FEAT_DuplicateAnticodonInterval
@ eErr_SEQ_INST_CompleteGenomeHasGaps
@ eErr_SEQ_FEAT_CDShasTooManyXs
@ eErr_SEQ_FEAT_TranslExceptPhase
@ eErr_SEQ_FEAT_MinusStrandProtein
@ eErr_SEQ_INST_CompleteTitleProblem
@ eErr_SEQ_DESCR_UnwantedCompleteFlag
@ eErr_SEQ_FEAT_GeneXrefWithoutLocus
@ eErr_SEQ_FEAT_BadLocation
@ eErr_SEQ_FEAT_GenesInconsistent
@ eErr_SEQ_INST_HighNContentStretch
@ eErr_SEQ_PKG_NoBioseqFound
@ eErr_SEQ_FEAT_PseudoRnaHasProduct
@ eErr_SEQ_DESCR_InconsistentBioSources
@ eErr_GENERIC_PastReleaseDate
@ eErr_SEQ_DESCR_BioSourceDbTagConflict
@ eErr_SEQ_FEAT_UnknownImpFeatQual
@ eErr_SEQ_FEAT_DuplicateExonInterval
@ eErr_GENERIC_UnnecessaryPubEquiv
@ eErr_SEQ_DESCR_BioSourceOnProtein
@ eErr_SEQ_DESCR_LatLonRange
@ eErr_SEQ_FEAT_UnnecessaryTranslExcept
@ eErr_SEQ_GRAPH_GraphBioseqId
@ eErr_SEQ_FEAT_MixedStrand
@ eErr_SEQ_FEAT_BadRRNAcomponentOrder
@ eErr_SEQ_DESCR_DuplicatePCRPrimerSequence
@ eErr_SEQ_FEAT_BadGeneOntologyFormat
@ eErr_SEQ_DESCR_LatLonCountry
@ eErr_SEQ_PKG_NucProtSetHasTitle
@ eErr_SEQ_FEAT_IllegalDbXref
@ eErr_GENERIC_SgmlPresentInText
@ eErr_SEQ_FEAT_BadAnticodonAA
@ eErr_SEQ_FEAT_MissingCDSproduct
@ eErr_SEQ_FEAT_FeatureBeginsOrEndsInGap
@ eErr_SEQ_FEAT_TranslExceptAndRnaEditing
@ eErr_GENERIC_BarcodeTooManyNs
@ eErr_SEQ_PKG_BioseqSetClassNotSet
@ eErr_SEQ_DESCR_NoOrgFound
@ eErr_SEQ_FEAT_MissingProteinName
@ eErr_SEQ_DESCR_BadPCRPrimerSequence
@ eErr_SEQ_FEAT_GeneXrefWithoutGene
@ eErr_SEQ_DESCR_TransgenicProblem
@ eErr_SEQ_PKG_MissingSetTitle
@ eErr_SEQ_FEAT_InvalidQualifierValue
@ eErr_SEQ_FEAT_GeneOntologyTermMissingGOID
@ eErr_SEQ_FEAT_ProtRefHasNoData
@ eErr_SEQ_GRAPH_GraphSeqLocLen
@ eErr_SEQ_DESCR_InvalidForType
@ eErr_SEQ_DESCR_LatLonValue
@ eErr_SEQ_FEAT_FeatureCitationProblem
@ eErr_SEQ_DESCR_IdenticalInstitutionCode
@ eErr_SEQ_PKG_ImproperlyNestedSets
@ eErr_SEQ_INST_UnknownLengthGapNot100
@ eErr_SEQ_FEAT_WrongQualOnFeature
@ eErr_SEQ_FEAT_MultipleProtRefs
@ eErr_SEQ_FEAT_MultipleEquivPublications
@ eErr_SEQ_PKG_SeqSubmitWithWgsSet
@ eErr_SEQ_PKG_InconsistentMoltypeSet
@ eErr_SEQ_INST_ConflictingBiomolTech
@ eErr_SEQ_FEAT_MissingQualOnImpFeat
@ eErr_SEQ_PKG_INSDRefSeqPackaging
@ eErr_SEQ_FEAT_LocusCollidesWithLocusTag
@ eErr_SEQ_PKG_GPSnonGPSPackaging
@ eErr_SEQ_DESCR_BadCollectionDate
@ eErr_SEQ_FEAT_MultipleEquivBioSources
@ eErr_SEQ_FEAT_CDSwithNoMRNAOverlap
@ eErr_SEQ_DESCR_BadInstitutionCode
@ eErr_SEQ_FEAT_PeptideFeatOutOfFrame
@ eErr_SEQ_FEAT_ProteinNameHasPMID
@ eErr_SEQ_FEAT_RepeatRegionNeedsNote
@ eErr_SEQ_DESCR_BadAltitude
@ eErr_SEQ_FEAT_GeneXrefStrandProblem
@ eErr_SEQ_FEAT_MissingTrnaAA
@ eErr_GENERIC_NonAsciiAsn
@ eErr_SEQ_FEAT_CDSwithMultipleMRNAs
@ eErr_SEQ_FEAT_CollidingFeatureIDs
@ eErr_SEQ_DESCR_IncorrectlyFormattedVoucherID
@ eErr_SEQ_FEAT_OrfCdsHasProduct
@ eErr_SEQ_FEAT_ImproperBondLocation
@ eErr_SEQ_PKG_GraphPackagingProblem
@ eErr_SEQ_INST_OverlappingDeltaRange
@ eErr_SEQ_FEAT_BadTranssplicedInterval
@ eErr_SEQ_INST_SeqLocLength
@ eErr_SEQ_DESCR_MultipleTaxonIDs
@ eErr_SEQ_DESCR_BadKeyword
@ eErr_SEQ_FEAT_UnknownImpFeatKey
@ eErr_SEQ_DESCR_Inconsistent
@ eErr_SEQ_PKG_ArchaicFeatureLocation
@ eErr_GENERIC_BarcodeTestFails
@ eErr_SEQ_FEAT_NestedSeqLocMix
@ eErr_SEQ_FEAT_ShortIntron
@ eErr_SEQ_FEAT_UnknownFeatureQual
@ eErr_SEQ_DESCR_MultipleChromosomes
@ eErr_SEQ_FEAT_InconsistentGeneOntologyTermAndId
@ eErr_SEQ_PKG_MisplacedMolInfo
@ eErr_GENERIC_EmbeddedScript
@ eErr_GENERIC_BarcodeTestPasses
@ eErr_SEQ_GRAPH_GraphAbove
@ eErr_SEQ_FEAT_FeatureInsideGap
@ eErr_SEQ_FEAT_DifferntIdTypesInSeqLoc
@ eErr_SEQ_FEAT_BadFullLengthFeature
@ eErr_SEQ_FEAT_BadCharInAuthorName
@ eErr_SEQ_FEAT_FarLocation
@ eErr_SEQ_INST_BadHTGSeq
@ eErr_SEQ_FEAT_InvalidFuzz
@ eErr_SEQ_FEAT_InvalidInferenceValue
@ eErr_SEQ_FEAT_GeneXrefNeeded
@ eErr_SEQ_INST_UnexpectedIdentifierChange
@ eErr_SEQ_FEAT_InconsistentRRNAstrands
@ eErr_SEQ_PKG_ArchaicFeatureProduct
@ eErr_SEQ_DESCR_MultipleSourceQualifiers
@ eErr_SEQ_FEAT_BadRRNAcomponentOverlap
@ eErr_SEQ_FEAT_BadTrailingCharacter
@ eErr_SEQ_DESCR_WrongVoucherType
@ eErr_SEQ_INST_ProteinsHaveGeneralID
@ eErr_SEQ_GRAPH_GraphOutOfOrder
@ eErr_SEQ_FEAT_BadInternalCharacter
@ eErr_SEQ_DESCR_NoSourceDescriptor
@ eErr_SEQ_DESCR_BadCollectionCode
@ eErr_SEQ_FEAT_BadProteinName
@ eErr_SEQ_FEAT_FeatureProductInconsistency
@ eErr_GENERIC_PublicationInconsistency
@ eErr_GENERIC_CollidingSerialNumbers
@ eErr_SEQ_PKG_ComponentMissingTitle
@ eErr_SEQ_DESCR_DBLinkMissingUserObject
@ eErr_SEQ_PKG_InternalGenBankSet
@ eErr_SEQ_DESCR_BioSourceMissing
@ eErr_SEQ_FEAT_BadAnticodonCodon
@ eErr_SEQ_FEAT_BadTrailingHyphen
@ eErr_SEQ_FEAT_OldLocusTagMismtach
@ eErr_SEQ_DESCR_MolInfoConflictsWithBioSource
@ eErr_SEQ_FEAT_UTRdoesNotAbutCDS
@ eErr_SEQ_FEAT_PseudoRnaViaGeneHasProduct
@ eErr_SEQ_FEAT_ConflictFlagSet
@ eErr_SEQ_FEAT_StrandOther
@ eErr_SEQ_PKG_FeaturePackagingProblem
@ eErr_SEQ_DESCR_MultipleNames
@ eErr_SEQ_INST_BadSeqIdFormat
@ eErr_SEQ_PKG_GenomicProductPackagingProblem
@ eErr_INTERNAL_Exception
@ eErr_SEQ_FEAT_BadEcNumberFormat
@ eErr_SEQ_FEAT_CDSproductPackagingProblem
@ eErr_SEQ_FEAT_RedundantFields
@ eErr_SEQ_INST_InternalNsInSeqRaw
@ eErr_SEQ_DESCR_BadOrgMod
@ eErr_SEQ_INST_TerminalNs
@ eErr_SEQ_DESCR_BadOrganelleLocation
@ eErr_SEQ_FEAT_NoNameForProtein
@ eErr_SEQ_FEAT_RptUnitRangeProblem
@ eErr_SEQ_FEAT_SeqLocOrder
@ eErr_SEQ_DESCR_TaxonomyIsSpeciesProblem
@ eErr_SEQ_FEAT_CDSmRNAXrefLocationProblem
@ eErr_SEQ_PKG_SingleItemSet
@ eErr_SEQ_DESCR_BioSourceNeedsChromosome
@ eErr_SEQ_FEAT_VectorContamination
@ eErr_SEQ_FEAT_AbuttingIntervals
@ eErr_SEQ_FEAT_LocusTagProblem
@ eErr_SEQ_DESCR_BioSourceInconsistency
@ eErr_SEQ_FEAT_OnlyGeneXrefs
@ eErr_SEQ_FEAT_TranslExcept
@ eErr_SEQ_INST_InternalGapsInSeqRaw
@ eErr_SEQ_FEAT_GeneRefHasNoData
@ eErr_SEQ_INST_DuplicateSegmentReferences
@ eErr_SEQ_FEAT_TooManyInferenceAccessions
@ eErr_SEQ_FEAT_TerminalXDiscrepancy
@ eErr_SEQ_FEAT_MiscFeatureNeedsNote
@ eErr_SEQ_DESCR_CollidingPublications
@ eErr_SEQ_FEAT_GenomeSetMixedStrand
@ eErr_SEQ_FEAT_BadCharInAuthorLastName
@ eErr_SEQ_FEAT_HypotheticalProteinMismatch
@ eErr_SEQ_INST_TpaAssemblyProblem
@ eErr_SEQ_FEAT_MissingGeneXref
CSeq_entry * GetParentEntry(void) const
static void GetPubdescLabels(const CPubdesc &pd, vector< TEntrezId > &pmids, vector< TEntrezId > &muids, vector< int > &serials, vector< string > &published_labels, vector< string > &unpublished_labels)
For Publication Citations Get labels for a pubdesc.
bool GetDBFlags(bool &is_refseq, bool &is_src, string &correct_caps) const
bool IsSkippable(void) const
CConstRef< CSeq_feat > GetGeneFromCache(const CSeq_feat *feat, CScope &scope)
void GetLabel(string *label) const
static CNcbiApplication * Instance(void)
Singleton method.
const string & GetDivision(void) const
bool IsSetDivision(void) const
@Pubdesc.hpp User-defined methods of the data storage class.
ESubtype GetSubtype(void) const
static bool RequireLocationIntervalsInBiologicalOrder(ESubtype subtype)
static bool AllowAdjacentIntervals(ESubtype subtype)
@ eSubtype_bad
These no longer need to match the FEATDEF values in the C toolkit's objfdef.h.
void GetLabel(string *label, ELabelType type) const
CSeq_entry * GetParentEntry(void) const
namespace ncbi::objects::
Base class for all serializable objects.
CSeq_entry_Handle GetTopLevelEntry(void) const
Get top level Seq-entry handle.
TSeq_feat_Handles GetFeaturesWithId(CSeqFeatData::E_Choice type, TFeatureIdInt id) const
CScope & GetScope(void) const
Returns scope.
Template class for iteration on objects of class C (non-medifiable version)
CTypeInfo class contains all information about C++ types (both basic and classes): members and layout...
Thrown on an attempt to write unassigned data member.
void ValidateSeqAlign(const CSeq_align &align, int order=-1)
void ValidateSeqAnnot(const CSeq_annot_Handle &annot)
virtual ~CValidError_base()
static CSeq_entry_Handle GetAppropriateXrefParent(CSeq_entry_Handle seh)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
CValidError_base(CValidError_imp &imp)
void ValidateBioseq(const CBioseq &seq)
bool GetTSAConflictingBiomolTechErrors(const CBioseq &seq)
bool GetTSANStretchErrors(const CBioseq &seq)
void ValidateBioseqSet(const CBioseq_set &seqset)
void ValidateSeqDesc(const CSeqdesc &desc, const CSeq_entry &ctx)
Validate descriptors as stand alone objects (no context)
void SetScope(CScope &scope)
void SetTSE(CSeq_entry_Handle seh)
bool GetTSACDSOnMinusStrandErrors(const CSeq_feat &feat, const CBioseq &seq)
static bool GetPrefixAndAccessionFromInferenceAccession(string inf_accession, string &prefix, string &accession)
void ValidateSeqFeat(const CSeq_feat &feat)
static vector< string > GetAccessionsFromInferenceString(string inference, string &prefix, string &remainder, bool &same_species)
void ValidateSeqGraph(const CSeq_graph &graph)
void x_ReportInvalidFuzz(const CPacked_seqint &packed_int, const CSerialObject &obj)
CRef< CObjectManager > m_ObjMgr
void SetScope(const CSeq_entry &se)
void FindCollidingSerialNumbers(const CSerialObject &obj)
const CSeq_entry_Handle & GetTSEH()
static bool BadMultipleSequenceLocation(const CSeq_loc &loc, CScope &scope)
void PostErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj)
static bool IsTSAIntermediate(const CBioseq &seq)
void x_CheckPackedInt(const CPacked_seqint &packed_int, SLocCheck &lc, const CSerialObject &obj)
static bool IsInOrganelleSmallGenomeSet(const CSeq_id &id, CScope &scope)
const CBioSourceKind & BioSourceKind() const
bool m_ValidateAlignments
SIZE_TYPE m_NumPseudogene
bool HasGiOrAccnVer() const
SIZE_TYPE m_NumTpaWithHistory
void SetTSE(const CSeq_entry_Handle &seh)
bool m_FarFetchCDSproducts
const SValidatorContext & GetContext() const
CValidator::TProgressCallback m_PrgCallback
bool m_GenerateGoldenFile
CValidError * m_ErrRepository
CConstRef< CSeq_feat > GetmRNAGivenProduct(const CBioseq &seq)
bool IsValidateAlignments() const
CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id &id)
size_t m_NumTopSetSiblings
void ValidateCitations(const CSeq_entry_Handle &seh)
bool DoesAnyFeatLocHaveGI() const
void FindNonAsciiText(const CSerialObject &obj)
void AddBioseqWithNoBiosource(const CBioseq &seq)
void ValidateSeqLocIds(const CSeq_loc &loc, const CSerialObject &obj)
bool GenerateGoldenFile() const
bool IsStandaloneAnnot() const
void x_DoBarcodeTests(CSeq_entry_Handle seh)
CConstRef< CSeq_annot > m_SeqAnnot
bool DoesAnyProductLocHaveGI() const
bool GetTSAConflictingBiomolTechErrors(const CSeq_entry_Handle &se)
void x_AddValidErrItem(EDiagSev sev, EErrType type, const string &msg, const string &desc, const CSerialObject &obj, const string &accession, const int version)
unique_ptr< CValidatorEntryInfo > m_pEntryInfo
SIZE_TYPE m_NumMisplacedGraphs
void PostObjErr(EDiagSev sv, EErrType et, const string &msg, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void Setup(const CSeq_entry_Handle &seh)
bool Validate(const CSeq_entry &se, const CCit_sub *cs=nullptr, CScope *scope=nullptr)
void InitializeSourceQualTags()
SIZE_TYPE m_NumTpaWithoutHistory
static bool IsWGSIntermediate(const CBioseq &seq)
CValidator::CProgressInfo m_PrgInfo
void ValidateDbxref(const CDbtag &xref, const CSerialObject &obj, bool biosource=false, const CSeq_entry *ctx=nullptr)
bool IsSerialNumberInComment(const string &comment)
void ValidateTaxonomy(const CSeq_entry &se)
bool IsFarSequence(const CSeq_id &id)
const CTSE_Handle & GetTSE_Handle()
void FindEmbeddedScript(const CSerialObject &obj)
bool IsHugeFileMode() const
SIZE_TYPE m_NumSmallGenomeSetMisplaced
void ValidateCitSub(const CCit_sub &cs, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void SetOptions(Uint4 options)
bool m_ValidateInferenceAccessions
bool m_LocusTagGeneralMatch
void ValidateSubmitBlock(const CSubmit_block &block, const CSeq_submit &ss)
bool IsNoCitSubPubs() const
void SetErrorRepository(CValidError *errors)
vector< CConstRef< CBioseq > > m_BioseqWithNoSource
void ValidateAffil(const CAffil::TStd &std, const CSerialObject &obj, const CSeq_entry *ctx)
CConstRef< CSeq_feat > GetCDSGivenProduct(const CBioseq &seq)
CBioseq_Handle GetLocalBioseqHandle(const CSeq_id &id)
const CSeq_entry * GetAncestor(const CBioseq &seq, CBioseq_set::EClass clss)
bool x_IsFarFetchFailure(const CSeq_loc &loc)
CValidError_imp(CObjectManager &objmgr, shared_ptr< SValidatorContext > pContext, CValidError *errors, Uint4 options=0)
void PostBadDateError(EDiagSev sv, const string &msg, int flags, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void AddProtWithoutFullRef(const CBioseq_Handle &seq)
void ValidateBioSource(const CBioSource &bsrc, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
const CValidatorEntryInfo & GetEntryInfo() const
bool RaiseGenomeSeverity(EErrType et)
SIZE_TYPE m_NumBioseq_set
bool x_CheckSeqInt(CConstRef< CSeq_id > &id_cur, const CSeq_interval *int_cur, ENa_strand &strand_cur, const CSerialObject &obj)
bool RequireLocalProduct(const CSeq_id *sid) const
bool m_ReportSpliceAsError
bool IsFarFetchCDSproducts() const
bool DoesAnyProteinHaveGeneralID() const
virtual ~CValidError_imp()
void x_Init(Uint4 options)
void ReportMissingPubs(const CSeq_entry &se, const CCit_sub *cs)
bool IsNoBioSource() const
bool IsMixedStrands(const CSeq_loc &loc)
bool m_FarFetchMRNAproducts
bool IsLocalGeneralOnly() const
CBioSourceKind m_biosource_kind
CConstRef< CSeq_entry > m_TSE
void x_InitLocCheck(SLocCheck &lc, const string &prefix)
SValidatorContext & SetContext()
bool IsIndexerVersion() const
CGeneCache & GetGeneCache()
bool IsSmallGenomeSet() const
void SetProgressCallback(CValidator::TProgressCallback callback, void *user_data)
bool GetTSACDSOnMinusStrandErrors(const CSeq_entry_Handle &se)
void ValidateMultipleTaxIds(const CSeq_entry_Handle &seh)
bool IsHugeSet(const CBioseq_set &bioseqSet) const
void ValidateSeqLoc(const CSeq_loc &loc, const CBioseq_Handle &seq, bool report_abutting, const string &prefix, const CSerialObject &obj, bool lowerSev=false)
bool GetTSANStretchErrors(const CSeq_entry_Handle &se)
shared_ptr< SValidatorContext > m_pContext
bool DoesAnyGeneHaveLocusTag() const
void x_CheckLoc(const CSeq_loc &loc, const CSerialObject &obj, SLocCheck &lc, bool lowerSev=false)
const CSeq_entry & GetTSE() const
bool IsFarFetchMRNAproducts() const
CValidatorEntryInfo & x_SetEntryInfo()
SIZE_TYPE m_NumMisplacedFeatures
void x_CheckForStrandChange(SLocCheck &lc)
void ReportMissingBiosource(const CSeq_entry &se)
void ValidatePubdesc(const CPubdesc &pub, const CSerialObject &obj, const CSeq_entry *ctx=nullptr)
void AddValidErrItem(EDiagSev sev, unsigned int ec, const string &msg, const string &desc, const CSerialObject &obj, const string &acc, const int ver, const string &location=kEmptyStr, const int seq_offset=0)
void SetNoBioSource(bool val=true)
void SetPatent(bool val=true)
void SetGpipe(bool val=true)
void SetGenomic(bool val=true)
void SetProductLocHasGI(bool val=true)
bool DoesAnyGeneHaveLocusTag() const
void SetPDB(bool val=true)