NCBI C++ ToolKit
validerror_imp.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: validerror_imp.hpp 103135 2024-09-12 15:25:15Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *`
26  * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko......
27  *
28  * File Description:
29  * Privae classes and definition for the validator
30  * .......
31  *
32  */
33 
34 #ifndef VALIDATOR___VALIDERROR_IMP__HPP
35 #define VALIDATOR___VALIDERROR_IMP__HPP
36 
37 #include <corelib/ncbistd.hpp>
39 
40 #include <objmgr/scope.hpp>
41 #include <objmgr/feat_ci.hpp> // for CMappedFeat
44 #include <objects/seq/GIBB_mol.hpp>
54 
61 
63 
65 
66 #include <objmgr/util/feature.hpp>
67 #include <memory>
68 
71 
72 class CSeq_entry;
73 class CCit_sub;
74 class CCit_art;
75 class CCit_gen;
76 class CSeq_feat;
77 class CBioseq;
78 class CSeqdesc;
79 class CSeq_annot;
80 class CTrna_ext;
81 class CProt_ref;
82 class CSeq_loc;
83 class CFeat_CI;
84 class CPub_set;
85 class CAuth_list;
86 class CTitle;
87 class CMolInfo;
88 class CUser_object;
89 class CSeqdesc_CI;
90 class CSeq_graph;
91 class CMappedGraph;
92 class CDense_diag;
93 class CDense_seg;
94 class CSeq_align_set;
95 class CPubdesc;
96 class CBioSource;
97 class COrg_ref;
98 class CByte_graph;
99 class CDelta_seq;
100 class CGene_ref;
101 class CCdregion;
102 class CRNA_ref;
103 class CImp_feat;
104 class CSeq_literal;
105 class CBioseq_Handle;
106 class CSeq_feat_Handle;
107 class CCountries;
109 class CComment_set;
110 class CTaxon3_reply;
111 class ITaxon3;
112 class CT3Error;
113 
114 BEGIN_SCOPE(validator)
115 
116 
117 struct SValidatorContext;
118 class CValidError_desc;
119 class CValidError_descr;
121 
122 
123 // =========================== Central Validation ==========================
124 
125 // CValidError_imp provides the entry point to the validation process.
126 // It calls upon the various validation classes to perform validation of
127 // each part.
128 // The class holds all the data for the validation process.
130 {
131 public:
133 
135  shared_ptr<SValidatorContext> pContext,
136  IValidError* errors,
137  Uint4 options=0);
138 
139  // Destructor
140  virtual ~CValidError_imp();
141 
142  void SetOptions(Uint4 options);
143  void SetErrorRepository(IValidError* errors);
144  void Reset(size_t initialInferenceCount, bool notJustLocalOrGeneral, bool hasRefSeq);
145 
146  // Validation methods
147  bool Validate(const CSeq_entry& se, const CCit_sub* cs = nullptr,
148  CScope* scope = nullptr);
149  bool Validate(
150  const CSeq_entry_Handle& seh, const CCit_sub* cs = nullptr);
151  void Validate(
152  const CSeq_submit& ss, CScope* scope = nullptr);
153  void Validate(const CSeq_annot_Handle& sa);
154 
155  void Validate(const CSeq_feat& feat, CScope* scope = nullptr);
156  void Validate(const CBioSource& src, CScope* scope = nullptr);
157  void Validate(const CPubdesc& pubdesc, CScope* scope = nullptr);
158  void Validate(const CSeqdesc& desc, const CSeq_entry& ctx);
159  void ValidateSubAffil(const CAffil::TStd& std, const CSerialObject& obj, const CSeq_entry *ctx);
160  void ValidateAffil(const CAffil::TStd& std, const CSerialObject& obj, const CSeq_entry *ctx);
161 
162  bool GetTSANStretchErrors(const CSeq_entry_Handle& se);
163  bool GetTSACDSOnMinusStrandErrors (const CSeq_entry_Handle& se);
164  bool GetTSAConflictingBiomolTechErrors (const CSeq_entry_Handle& se);
165  bool GetTSANStretchErrors(const CBioseq& seq);
166  bool GetTSACDSOnMinusStrandErrors (const CSeq_feat& f, const CBioseq& seq);
167  bool GetTSAConflictingBiomolTechErrors (const CBioseq& seq);
168 
169 
170  void SetProgressCallback(CValidator::TProgressCallback callback,
171  void* user_data);
172 
173  void SetTSE(const CSeq_entry_Handle& seh);
174 
175  bool ShouldSubdivide() const { if (m_NumTopSetSiblings > 1000) return true; else return false; }
176 
177  SValidatorContext& SetContext();
178  const SValidatorContext& GetContext() const;
179 
180  bool IsHugeFileMode() const;
181  bool IsHugeSet(const CBioseq_set& bioseqSet) const;
182  bool IsHugeSet(CBioseq_set::TClass setClass) const;
183 
184 public:
185  // interface to be used by the various validation classes
186 
187  // typedefs:
188  typedef const CSeq_feat& TFeat;
189  typedef const CBioseq& TBioseq;
190  typedef const CBioseq_set& TSet;
191  typedef const CSeqdesc& TDesc;
192  typedef const CSeq_annot& TAnnot;
193  typedef const CSeq_graph& TGraph;
194  typedef const CSeq_align& TAlign;
195  typedef const CSeq_entry& TEntry;
197 
198 
199  const CValidatorEntryInfo& GetEntryInfo() const;
200 
201  // Posts errors.
202  void PostErr(EDiagSev sv, EErrType et, const string& msg,
203  const CSerialObject& obj);
204  void PostErr(EDiagSev sv, EErrType et, const string& msg, TDesc ds);
205  void PostErr(EDiagSev sv, EErrType et, const string& msg, TFeat ft);
206  void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq);
207  void PostErr(EDiagSev sv, EErrType et, const string& msg, TEntry ctx,
208  TDesc ds);
209  void PostErr(EDiagSev sv, EErrType et, const string& msg, TSet set);
210  void PostErr(EDiagSev sv, EErrType et, const string& msg, TAnnot annot);
211  void PostErr(EDiagSev sv, EErrType et, const string& msg, TGraph graph);
212  void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq,
213  TGraph graph);
214  void PostErr(EDiagSev sv, EErrType et, const string& msg, TAlign align);
215  void PostErr(EDiagSev sv, EErrType et, const string& msg, TEntry entry);
216  void PostErr(EDiagSev sv, EErrType et, const string& msg, const CBioSource& src);
217  void PostErr(EDiagSev sv, EErrType et, const string& msg, const COrg_ref& org);
218  void PostErr(EDiagSev sv, EErrType et, const string& msg, const CPubdesc& src);
219  void PostErr(EDiagSev sv, EErrType et, const string& msg, const CSeq_submit& ss);
220  void PostObjErr (EDiagSev sv, EErrType et, const string& msg, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
221  void PostBadDateError (EDiagSev sv, const string& msg, int flags, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
222 
223  void HandleTaxonomyError(const CT3Error& error, const string& host, const COrg_ref& orf);
224  void HandleTaxonomyError(const CT3Error& error, const EErrType type, const CSeq_feat& feat);
225  void HandleTaxonomyError(const CT3Error& error, const EErrType type, const CSeqdesc& desc, const CSeq_entry* entry);
226 
227  bool RaiseGenomeSeverity(EErrType et);
228 
229  // General use validation methods
230  void ValidatePubdesc(const CPubdesc& pub, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
231  void ValidateBioSource(const CBioSource& bsrc, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
232  void ValidatePCRReactionSet(const CPCRReactionSet& pcrset, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
233  void ValidateSubSource(const CSubSource& subsrc, const CSerialObject& obj, const CSeq_entry *ctx = nullptr, const bool isViral = false, const bool isInfluenzaOrSars2 = false);
234  void ValidateOrgRef(const COrg_ref& orgref, const CSerialObject& obj, const CSeq_entry *ctx, const bool checkForUndefinedSpecies = false, const bool is_single_cell_amplification = false);
235  void ValidateTaxNameOrgname(const string& taxname, const COrgName& orgname, const CSerialObject& obj, const CSeq_entry *ctx);
236  void ValidateOrgName(const COrgName& orgname, const bool has_taxon, const CSerialObject& obj, const CSeq_entry *ctx);
237  void ValidateOrgModVoucher(const COrgMod& orgmod, const CSerialObject& obj, const CSeq_entry *ctx);
238  void ValidateBioSourceForSeq(const CBioSource& bsrc, const CSerialObject& obj, const CSeq_entry *ctx, const CBioseq_Handle& bsh);
239 
240  void ValidateLatLonCountry(string countryname, string lat_lon, const CSerialObject& obj, const CSeq_entry *ctx);
241 
242  bool IsSyntheticConstruct (const CBioSource& src);
243  bool IsArtificial (const CBioSource& src);
244  bool IsOtherDNA(const CBioseq_Handle& bsh) const;
245  void ValidateSeqLoc(const CSeq_loc& loc, const CBioseq_Handle& seq, bool report_abutting,
246  const string& prefix, const CSerialObject& obj, bool lowerSev = false);
247 
248  void ValidateSeqLocIds(const CSeq_loc& loc, const CSerialObject& obj);
249  NCBI_STD_DEPRECATED("Please use corresponding function in objtools/validator/utilities.hpp")
251  NCBI_STD_DEPRECATED("Please use corresponding function in objtools/validator/utilities.hpp")
253  void CheckMultipleIds(const CSeq_loc& loc, const CSerialObject& obj);
254  void ValidateDbxref(const CDbtag& xref, const CSerialObject& obj,
255  bool biosource = false, const CSeq_entry *ctx = nullptr);
256  void ValidateDbxref(TDbtags& xref_list, const CSerialObject& obj,
257  bool biosource = false, const CSeq_entry *ctx = nullptr);
258  void ValidateCitSub(const CCit_sub& cs, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
259  void ValidateTaxonomy(const CSeq_entry& se);
260  void ValidateOrgRefs(CTaxValidationAndCleanup& tval);
261  void ValidateSpecificHost(CTaxValidationAndCleanup& tval);
262  void ValidateStrain(CTaxValidationAndCleanup& tval, TTaxId descTaxID = ZERO_TAX_ID);
263  void ValidateSpecificHost (const CSeq_entry& se);
264  void ValidateTentativeName(const CSeq_entry& se);
265  void ValidateTaxonomy(const COrg_ref& org, CBioSource::TGenome genome = CBioSource::eGenome_unknown);
266  void ValidateMultipleTaxIds(const CSeq_entry_Handle& seh);
267  void ValidateCitations (const CSeq_entry_Handle& seh);
268  bool x_IsFarFetchFailure (const CSeq_loc& loc);
269 
270  // getters
271  inline CScope* GetScope() { return m_Scope; }
272  inline CCacheImpl& GetCache() { return m_cache; }
273 
274  inline CConstRef<CSeq_feat> GetCachedGene(const CSeq_feat* f) { return m_GeneCache.GetGeneFromCache(f, *m_Scope); }
275  inline CGeneCache& GetGeneCache() { return m_GeneCache; }
276 
277  // flags derived from options parameter
278  bool IsNonASCII() const { return m_NonASCII; }
279  bool IsSuppressContext() const { return m_SuppressContext; }
280  bool IsValidateAlignments() const { return m_ValidateAlignments; }
281  bool IsValidateExons() const { return m_ValidateExons; }
282  bool IsOvlPepErr() const { return m_OvlPepErr; }
283  bool IsRequireTaxonID() const { return !m_SeqSubmitParent; }
284  bool IsSeqSubmitParent() const { return m_SeqSubmitParent; }
285  bool IsRequireISOJTA() const { return m_RequireISOJTA; }
286  bool IsValidateIdSet() const { return m_ValidateIdSet; }
287  bool IsRemoteFetch() const { return m_RemoteFetch; }
288  bool IsFarFetchMRNAproducts() const { return m_FarFetchMRNAproducts; }
289  bool IsFarFetchCDSproducts() const { return m_FarFetchCDSproducts; }
290  bool IsLocusTagGeneralMatch() const { return m_LocusTagGeneralMatch; }
291  bool DoRubiscoTest() const { return m_DoRubiscoText; }
292  bool IsIndexerVersion() const { return m_IndexerVersion; }
293  bool IsGenomeSubmission() const { return m_genomeSubmission; }
294  bool UseEntrez() const { return m_UseEntrez; }
295  bool DoTaxLookup() const { return m_DoTaxLookup; }
296  bool ValidateInferenceAccessions() const { return m_ValidateInferenceAccessions; }
297  bool IgnoreExceptions() const { return m_IgnoreExceptions; }
298  bool ReportSpliceAsError() const { return m_ReportSpliceAsError; }
299  bool IsLatLonCheckState() const { return m_LatLonCheckState; }
300  bool IsLatLonIgnoreWater() const { return m_LatLonIgnoreWater; }
301  bool IsRefSeqConventions() const { return m_RefSeqConventions; }
302  bool GenerateGoldenFile() const { return m_GenerateGoldenFile; }
303  bool DoCompareVDJCtoCDS() const { return m_CompareVDJCtoCDS; }
304  bool IgnoreInferences() const { return m_IgnoreInferences; }
305 
306 
307  // flags calculated by examining data in record
308  bool IsStandaloneAnnot() const { return m_IsStandaloneAnnot; }
309  bool IsNoPubs() const;
310  bool IsNoCitSubPubs() const;
311  bool IsNoBioSource() const;
312  bool IsGPS() const;
313  bool IsGED() const;
314  bool IsPDB() const;
315  bool IsPatent() const;
316  bool IsRefSeq() const;
317  bool IsEmbl() const;
318  bool IsDdbj() const;
319  bool IsTPE() const;
320  NCBI_DEPRECATED bool IsNC() const;
321  NCBI_DEPRECATED bool IsNG() const;
322  NCBI_DEPRECATED bool IsNM() const;
323  NCBI_DEPRECATED bool IsNP() const;
324  NCBI_DEPRECATED bool IsNR() const;
325  NCBI_DEPRECATED bool IsNZ() const;
326  NCBI_DEPRECATED bool IsNS() const;
327  bool IsNT() const;
328  NCBI_DEPRECATED bool IsNW() const;
329  bool IsWP() const;
330  bool IsXR() const;
331  bool IsGI() const;
332  bool IsGpipe() const;
333  bool IsHtg() const;
334  bool IsLocalGeneralOnly() const;
335  bool HasGiOrAccnVer() const;
336  bool IsGenomic() const;
337  bool IsSeqSubmit() const;
338  bool IsSmallGenomeSet() const;
339  bool IsNoncuratedRefSeq(const CBioseq& seq, EDiagSev& sev);
340  bool IsGenbank() const;
341  bool DoesAnyFeatLocHaveGI() const;
342  bool DoesAnyProductLocHaveGI() const;
343  bool DoesAnyGeneHaveLocusTag() const;
344  bool DoesAnyProteinHaveGeneralID() const;
345  bool IsINSDInSep() const;
346  bool IsGeneious() const;
347  const CBioSourceKind& BioSourceKind() const;
348  inline bool HasRefSeq(void) const { return m_HasRefSeq; }
349 
350  // counting number of misplaced features
351  inline void ResetMisplacedFeatureCount() { m_NumMisplacedFeatures = 0; }
352  inline void IncrementMisplacedFeatureCount() { m_NumMisplacedFeatures++; }
353  inline void AddToMisplacedFeatureCount(size_t num) { m_NumMisplacedFeatures += num; }
354 
355  // counting number of small genome set misplaced features
356  inline void ResetSmallGenomeSetMisplacedCount() { m_NumSmallGenomeSetMisplaced = 0; }
357  inline void IncrementSmallGenomeSetMisplacedCount() { m_NumSmallGenomeSetMisplaced++; }
358  inline void AddToSmallGenomeSetMisplacedCount(size_t num) { m_NumSmallGenomeSetMisplaced += num; }
359 
360  // counting number of misplaced graphs
361  inline void ResetMisplacedGraphCount() { m_NumMisplacedGraphs = 0; }
362  inline void IncrementMisplacedGraphCount() { m_NumMisplacedGraphs++; }
363  inline void AddToMisplacedGraphCount(size_t num) { m_NumMisplacedGraphs += num; }
364 
365  // counting number of genes and gene xrefs
366  inline void ResetGeneCount() { m_NumGenes = 0; }
367  inline void IncrementGeneCount() { m_NumGenes++; }
368  inline void AddToGeneCount(size_t num) { m_NumGenes += num; }
369  inline size_t GetGeneCount(void) const { return m_NumGenes; }
370  inline void ResetGeneXrefCount() { m_NumGeneXrefs = 0; }
371  inline void IncrementGeneXrefCount() { m_NumGeneXrefs++; }
372  inline void AddToGeneXrefCount(size_t num) { m_NumGeneXrefs += num; }
373  inline size_t GetGeneXrefCount(void) const { return m_NumGeneXrefs; }
374 
375  // counting cumulative number of inference qualifiers with accessions
376  inline void ResetCumulativeInferenceCount() { m_CumulativeInferenceCount = 0; }
377  inline void IncrementCumulativeInferenceCount() { m_CumulativeInferenceCount++; }
378  inline void AddToCumulativeInferenceCount(size_t num) { m_CumulativeInferenceCount += num; }
379  inline size_t GetCumulativeInferenceCount(void) const { return m_CumulativeInferenceCount; }
380 
381  // counting sequences with and without TPA history
382  inline void ResetTpaWithHistoryCount() { m_NumTpaWithHistory = 0; }
383  inline void IncrementTpaWithHistoryCount() { m_NumTpaWithHistory++; }
384  inline void AddToTpaWithHistoryCount(size_t num) { m_NumTpaWithHistory += num; }
385  inline void ResetTpaWithoutHistoryCount() { m_NumTpaWithoutHistory = 0; }
386  inline void IncrementTpaWithoutHistoryCount() { m_NumTpaWithoutHistory++; }
387  inline void AddToTpaWithoutHistoryCount(size_t num) { m_NumTpaWithoutHistory += num; }
388 
389  // counting number of Pseudos and Pseudogenes
390  inline void ResetPseudoCount() { m_NumPseudo = 0; }
391  inline void IncrementPseudoCount() { m_NumPseudo++; }
392  inline void AddToPseudoCount(size_t num) { m_NumPseudo += num; }
393  inline void ResetPseudogeneCount() { m_NumPseudogene = 0; }
394  inline void IncrementPseudogeneCount() { m_NumPseudogene++; }
395  inline void AddToPseudogeneCount(size_t num) { m_NumPseudogene += num; }
396 
397  // set flag for farfetchfailure
398  inline void SetFarFetchFailure() { m_FarFetchFailure = true; }
399 
400  bool IsFarSequence(const CSeq_id& id); // const;
401 
402  const CSeq_entry& GetTSE() const { return *m_TSE; };
403  const CSeq_entry_Handle& GetTSEH() { return m_TSEH; }
404  const CTSE_Handle& GetTSE_Handle() { return
405  (m_TSEH ? m_TSEH.GetTSE_Handle() : CCacheImpl::kEmptyTSEHandle); }
406 
407  CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id& id);
408  CBioseq_Handle GetLocalBioseqHandle(const CSeq_id& id); // Local here means not far
409 
410  const CConstRef<CSeq_annot>& GetSeqAnnot() { return m_SeqAnnot; }
411 
412  void AddBioseqWithNoPub(const CBioseq& seq);
413  void AddBioseqWithNoBiosource(const CBioseq& seq);
414  void AddProtWithoutFullRef(const CBioseq_Handle& seq);
415  static bool IsWGSIntermediate(const CBioseq& seq);
416  static bool IsTSAIntermediate(const CBioseq& seq);
417  void ReportMissingPubs(const CSeq_entry& se, const CCit_sub* cs);
418  void ReportMissingBiosource(const CSeq_entry& se);
419 
420  CConstRef<CSeq_feat> GetCDSGivenProduct(const CBioseq& seq);
421  NCBI_DEPRECATED CConstRef<CSeq_feat> GetmRNAGivenProduct(const CBioseq& seq);
422  CConstRef<CSeq_feat> GetmRNAGivenProduct(const CBioseq_Handle& seq);
423  const CSeq_entry* GetAncestor(const CBioseq& seq, CBioseq_set::EClass clss);
424  bool IsSerialNumberInComment(const string& comment);
425 
426  bool IsTransgenic(const CBioSource& bsrc);
427 
428  bool RequireLocalProduct(const CSeq_id* sid) const;
429 
431  TSuppressed& SetSuppressed();
432 
433 private:
434 
435  // Setup common options during consturction;
436  void x_Init(Uint4 options, size_t initialInferenceCount, bool notJustLocalOrGeneral, bool hasRefSeq);
437 
438  // This is so we can temporarily set m_Scope in a function
439  // and be sure that it will be set to its old value when we're done
441  public:
443  m_scopeToRestore(scope), m_scopeOriginalValue(scope) { }
444 
445  ~CScopeRestorer() { m_scopeToRestore = m_scopeOriginalValue; }
446  private:
449  };
450 
451  // Prohibit copy constructor & assignment operator
453  CValidError_imp& operator= (const CValidError_imp&);
454 
455  void Setup(const CSeq_entry_Handle& seh);
456  void Setup(const CSeq_annot_Handle& sa);
457  CSeq_entry_Handle Setup(const CBioseq& seq);
458  void SetScope(const CSeq_entry& se);
459 
460  CValidatorEntryInfo& x_SetEntryInfo();
461 
462  void x_AddValidErrItem(EDiagSev sev,
463  EErrType type,
464  const string& msg,
465  const string& desc,
466  const CSerialObject& obj,
467  const string& accession,
468  const int version);
469 
470  void ValidateSubmitBlock(const CSubmit_block& block, const CSeq_submit& ss);
471 
472  void InitializeSourceQualTags();
473  void ValidateSourceQualTags(const string& str, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
474 
475  bool IsMixedStrands(const CSeq_loc& loc);
476 
477  void ValidatePubGen(const CCit_gen& gen, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
478  void ValidatePubArticle(const CCit_art& art, TEntrezId uid, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
479  void ValidatePubArticleNoPMID(const CCit_art& art, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
480  void x_ValidatePages(const string& pages, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
481  void ValidateAuthorList(const CAuth_list::C_Names& names, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
482  void ValidateAuthorsInPubequiv (const CPub_equiv& pe, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
483  void ValidatePubHasAuthor(const CPubdesc& pubdesc, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
484 
485  bool HasName(const CAuth_list& authors);
486  bool HasTitle(const CTitle& title);
487  bool HasIsoJTA(const CTitle& title);
488 
489  void FindEmbeddedScript(const CSerialObject& obj);
490  void FindNonAsciiText (const CSerialObject& obj);
491  void FindCollidingSerialNumbers (const CSerialObject& obj);
492 
493 
494  void GatherTentativeName (const CSeq_entry& se, vector<CConstRef<CSeqdesc> >& usr_descs, vector<CConstRef<CSeq_entry> >& desc_ctxs, vector<CConstRef<CSeq_feat> >& usr_feats);
495 
496  static bool s_IsSalmonellaGenus(const string& taxname);
497  EDiagSev x_SalmonellaErrorLevel();
498 
499  typedef struct tagSLocCheck {
500  bool chk;
503  bool has_other;
507  const CSeq_interval *int_cur = nullptr;
508  const CSeq_interval *int_prv = nullptr;
511  string prefix;
512  } SLocCheck;
513 
514  void x_InitLocCheck(SLocCheck& lc, const string& prefix);
515  void x_CheckForStrandChange(SLocCheck& lc);
516  void x_CheckLoc(const CSeq_loc& loc, const CSerialObject& obj, SLocCheck& lc, bool lowerSev = false);
517  void x_CheckPackedInt(const CPacked_seqint& packed_int,
518  SLocCheck& lc,
519  const CSerialObject& obj);
520  bool x_CheckSeqInt(CConstRef<CSeq_id>& id_cur,
521  const CSeq_interval * int_cur,
522  ENa_strand& strand_cur);
523  void x_ReportInvalidFuzz(const CPacked_seqint& packed_int, const CSerialObject& obj);
524  void x_ReportInvalidFuzz(const CSeq_interval& interval, const CSerialObject& obj);
525  void x_ReportInvalidFuzz(const CSeq_point& point, const CSerialObject& obj);
526  void x_ReportInvalidFuzz(const CSeq_loc& loc, const CSerialObject& obj);
527  void x_ReportPCRSeqProblem(const string& primer_kind,
528  char badch,
529  const CSerialObject& obj,
530  const CSeq_entry *ctx);
531  void x_CheckPCRPrimer(const CPCRPrimer& primer,
532  const string& primer_kind,
533  const CSerialObject& obj,
534  const CSeq_entry *ctx);
535 
536  void x_DoBarcodeTests(CSeq_entry_Handle seh);
537 
538  bool x_DowngradeForMissingAffil(const CCit_sub& cs);
539 
540  bool x_IsSuppressed(CValidErrItem::TErrIndex errType) const;
541 
547 
550 
551  // error repoitory
553 
554  // flags derived from options parameter
555  bool m_NonASCII; // User sets if Non ASCII char found
556  bool m_SuppressContext; // Include context in errors if true
557  bool m_ValidateAlignments; // Validate Alignments if true
558  bool m_ValidateExons; // Check exon feature splice sites
559  bool m_OvlPepErr; // Peptide overlap error if true, else warn
560  bool m_RequireISOJTA; // Journal requires ISO JTA
561  bool m_ValidateIdSet; // validate update against ID set in database
562  bool m_RemoteFetch; // Remote fetch enabled?
563  bool m_FarFetchMRNAproducts; // Remote fetch mRNA products
564  bool m_FarFetchCDSproducts; // Remote fetch proteins
572  bool m_IgnoreExceptions; // ignore exceptions when validating translation
573  bool m_ValidateInferenceAccessions; // check that accessions in inferences are valid
578  bool m_CollectLocusTags; // collect locus tags for use in special formatted reports
579  bool m_SeqSubmitParent; // some errors are suppressed if this is run on a newly created submission
583 
584  // flags calculated by examining data in record
586 
587  bool m_IsNC=false;
588  bool m_IsNG=false;
589  bool m_IsNM=false;
590  bool m_IsNP=false;
591  bool m_IsNR=false;
592  bool m_IsNZ=false;
593  bool m_IsNS=false;
594  bool m_IsNT=false;
595  bool m_IsNW=false;
596  bool m_IsWP=false;
597  bool m_IsXR=false;
598 
601 
603 
604  // seq ids contained within the orignal seq entry.
605  // (used to check for far location)
606  vector< CConstRef<CSeq_id> > m_InitialSeqIds;
607  // Bioseqs without source (should be considered only if m_NoSource is false)
608  vector< CConstRef<CBioseq> > m_BioseqWithNoSource;
609 
610  // list of publication serial numbers
611  vector< int > m_PubSerialNumbers;
612 
615  size_t m_NumAlign;
616  size_t m_NumAnnot;
617  size_t m_NumBioseq;
619  size_t m_NumDesc;
620  size_t m_NumDescr;
621  size_t m_NumFeat;
622  size_t m_NumGraph;
623 
627  size_t m_NumGenes;
629 
633 
636 
637  size_t m_NumPseudo;
639 
641 
642  // Taxonomy service interface.
643  unique_ptr<CTaxValidationAndCleanup> x_CreateTaxValidator() const;
644 
645  shared_ptr<SValidatorContext> m_pContext;
646  unique_ptr<CValidatorEntryInfo> m_pEntryInfo = make_unique<CValidatorEntryInfo>();
647 
649 };
650 
651 
652 END_SCOPE(validator)
655 
656 #endif /* VALIDATOR___VALIDERROR_IMP__HPP */
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define static
User-defined methods of the data storage class.
EErrType
std representation
Definition: Affil_.hpp:91
C_Names –.
Definition: Auth_list_.hpp:98
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
CBioseq_Handle –.
CByte_graph –.
Definition: Byte_graph.hpp:66
CCdregion –.
Definition: Cdregion.hpp:66
Definition: Dbtag.hpp:53
CDelta_seq –.
Definition: Delta_seq.hpp:66
CFeat_CI –.
Definition: feat_ci.hpp:64
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
CMappedGraph –.
Definition: graph_ci.hpp:61
CObjectManager –.
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
CPCRPrimer –.
Definition: PCRPrimer.hpp:66
CPCRReactionSet –.
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CScope –.
Definition: scope.hpp:92
CSeq_annot_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Base class for all serializable objects.
Definition: serialbase.hpp:150
CSubmit_block –.
CTaxon3_reply –.
Definition: Title.hpp:51
CScopeRestorer(CRef< CScope > &scope)
CRef< CObjectManager > m_ObjMgr
bool DoRubiscoTest() const
void PostErr(EDiagSev sv, EErrType et, const string &msg, TDesc ds)
const CSeq_entry_Handle & GetTSEH()
bool ReportSpliceAsError() const
void AddToPseudoCount(size_t num)
bool IsLatLonCheckState() const
size_t GetGeneCount(void) const
CRef< CScope > m_Scope
bool IsRemoteFetch() const
void AddToTpaWithHistoryCount(size_t num)
bool IsSuppressContext() const
CValidator::TProgressCallback m_PrgCallback
IValidError * m_ErrRepository
void AddToMisplacedFeatureCount(size_t num)
bool DoTaxLookup() const
void IncrementMisplacedFeatureCount()
bool IsValidateAlignments() const
const CConstRef< CSeq_annot > & GetSeqAnnot()
void ResetSmallGenomeSetMisplacedCount()
void ResetTpaWithoutHistoryCount()
bool GenerateGoldenFile() const
bool IsStandaloneAnnot() const
CConstRef< CSeq_feat > GetCachedGene(const CSeq_feat *f)
CConstRef< CSeq_annot > m_SeqAnnot
bool IsValidateIdSet() const
const CSeq_feat & TFeat
TSuppressed m_SuppressedErrors
bool IsLocusTagGeneralMatch() const
bool IgnoreInferences() const
CValidator::CProgressInfo m_PrgInfo
void AddToGeneXrefCount(size_t num)
const CTSE_Handle & GetTSE_Handle()
bool ValidateInferenceAccessions() const
size_t m_NumMisplacedFeatures
void IncrementMisplacedGraphCount()
void IncrementPseudogeneCount()
bool m_ValidateInferenceAccessions
vector< CConstRef< CBioseq > > m_BioseqWithNoSource
const CSeq_annot & TAnnot
bool IsRequireTaxonID() const
const CBioseq_set & TSet
bool IsSeqSubmitParent() const
const CSeq_graph & TGraph
bool IsOvlPepErr() const
void AddToMisplacedGraphCount(size_t num)
size_t m_NumTpaWithoutHistory
bool IsNonASCII() const
bool IsGenomeSubmission() const
CSeq_entry_Handle m_TSEH
void AddToGeneCount(size_t num)
void IncrementGeneXrefCount()
bool UseEntrez() const
void AddToSmallGenomeSetMisplacedCount(size_t num)
bool IsLatLonIgnoreWater() const
bool IsFarFetchCDSproducts() const
void ResetTpaWithHistoryCount()
void AddToTpaWithoutHistoryCount(size_t num)
void IncrementTpaWithHistoryCount()
bool IsValidateExons() const
size_t GetCumulativeInferenceCount(void) const
size_t m_NumSmallGenomeSetMisplaced
map< CSubSource::TSubtype, int > TCount
bool IgnoreExceptions() const
vector< CConstRef< CSeq_id > > m_InitialSeqIds
void ResetCumulativeInferenceCount()
CBioSourceKind m_biosource_kind
CConstRef< CSeq_entry > m_TSE
CGeneCache m_GeneCache
void IncrementTpaWithoutHistoryCount()
bool IsRefSeqConventions() const
bool IsIndexerVersion() const
bool IsRequireISOJTA() const
CGeneCache & GetGeneCache()
map< const CSeq_feat *, const CSeq_annot * > & TFeatAnnotMap
void AddToCumulativeInferenceCount(size_t num)
void IncrementSmallGenomeSetMisplacedCount()
size_t m_CumulativeInferenceCount
const CSeq_align & TAlign
vector< int > m_PubSerialNumbers
void AddToPseudogeneCount(size_t num)
bool HasRefSeq(void) const
void ResetMisplacedGraphCount()
const CSeq_entry & TEntry
shared_ptr< SValidatorContext > m_pContext
const CSeq_entry & GetTSE() const
bool IsFarFetchMRNAproducts() const
bool DoCompareVDJCtoCDS() const
const CSeqdesc & TDesc
CCacheImpl & GetCache()
bool ShouldSubdivide() const
void IncrementCumulativeInferenceCount()
void ResetMisplacedFeatureCount()
size_t GetGeneXrefCount(void) const
CValidError_imp(const CValidError_imp &)
const CBioseq & TBioseq
Cache various information for one validation run.
Definition: cache_impl.hpp:126
static const CTSE_Handle kEmptyTSEHandle
Definition: cache_impl.hpp:237
bool(* TProgressCallback)(CProgressInfo *)
Definition: validator.hpp:287
Definition: map.hpp:338
Definition: set.hpp:45
Include a standard set of the NCBI C++ Toolkit most basic headers.
API (CDeflineGenerator) for computing sequences' titles ("definitions").
static uch flags
CS_CONTEXT * ctx
Definition: t0006.c:12
static const struct name_t names[]
#define false
Definition: bool.h:36
static int lc
Definition: getdata.c:30
static const char * str(char *buf, int n)
Definition: stats.c:84
static FILE * f
Definition: readconf.c:23
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
#define NCBI_DEPRECATED
#define NCBI_STD_DEPRECATED(message)
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_VALIDATOR_EXPORT
Definition: ncbi_export.h:913
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
unsigned int TErrIndex
const string version
version string
Definition: variables.hpp:66
std::istream & in(std::istream &in_, double &x_)
static wxString GetContext(const wxString &str, int pos)
bool IsInOrganelleSmallGenomeSet(const CSeq_id &id, CScope &scope)
Definition: utilities.cpp:3050
bool BadMultipleSequenceLocation(const CSeq_loc &loc, CScope &scope)
Definition: utilities.cpp:3080
bool IsNG(const CSeq_id &id)
Definition: utilities.cpp:2790
const CSeq_feat::TDbxref TDbtags
Definition: utilities.hpp:199
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Definition: type.c:6
CScope & GetScope()
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:57:44 2024 by modify_doxy.py rev. 669887