NCBI C++ ToolKit
validerror_imp.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: validerror_imp.hpp 102121 2024-04-03 21:59:37Z kans $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *`
26  * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko......
27  *
28  * File Description:
29  * Privae classes and definition for the validator
30  * .......
31  *
32  */
33 
34 #ifndef VALIDATOR___VALIDERROR_IMP__HPP
35 #define VALIDATOR___VALIDERROR_IMP__HPP
36 
37 #include <corelib/ncbistd.hpp>
39 
40 #include <objmgr/scope.hpp>
41 #include <objmgr/feat_ci.hpp> // for CMappedFeat
44 #include <objects/seq/GIBB_mol.hpp>
54 
61 
63 
65 
66 #include <objmgr/util/feature.hpp>
67 #include <memory>
68 
71 
72 class CSeq_entry;
73 class CCit_sub;
74 class CCit_art;
75 class CCit_gen;
76 class CSeq_feat;
77 class CBioseq;
78 class CSeqdesc;
79 class CSeq_annot;
80 class CTrna_ext;
81 class CProt_ref;
82 class CSeq_loc;
83 class CFeat_CI;
84 class CPub_set;
85 class CAuth_list;
86 class CTitle;
87 class CMolInfo;
88 class CUser_object;
89 class CSeqdesc_CI;
90 class CSeq_graph;
91 class CMappedGraph;
92 class CDense_diag;
93 class CDense_seg;
94 class CSeq_align_set;
95 class CPubdesc;
96 class CBioSource;
97 class COrg_ref;
98 class CByte_graph;
99 class CDelta_seq;
100 class CGene_ref;
101 class CCdregion;
102 class CRNA_ref;
103 class CImp_feat;
104 class CSeq_literal;
105 class CBioseq_Handle;
106 class CSeq_feat_Handle;
107 class CCountries;
109 class CComment_set;
110 class CTaxon3_reply;
111 class ITaxon3;
112 class CT3Error;
113 
114 BEGIN_SCOPE(validator)
115 
116 
117 struct SValidatorContext;
118 class CValidError_desc;
119 class CValidError_descr;
121 
122 
123 // =========================== Central Validation ==========================
124 
125 // CValidError_imp provides the entry point to the validation process.
126 // It calls upon the various validation classes to perform validation of
127 // each part.
128 // The class holds all the data for the validation process.
130 {
131 public:
133 
135  shared_ptr<SValidatorContext> pContext,
136  CValidError* errors,
137  Uint4 options=0);
138 
139  // Destructor
140  virtual ~CValidError_imp();
141 
142  void SetOptions (Uint4 options);
143  void SetErrorRepository (CValidError* errors);
144  void Reset();
145 
146  // Validation methods
147  bool Validate(const CSeq_entry& se, const CCit_sub* cs = nullptr,
148  CScope* scope = nullptr);
149  bool Validate(
150  const CSeq_entry_Handle& seh, const CCit_sub* cs = nullptr);
151  void Validate(
152  const CSeq_submit& ss, CScope* scope = nullptr);
153  void Validate(const CSeq_annot_Handle& sa);
154 
155  void Validate(const CSeq_feat& feat, CScope* scope = nullptr);
156  void Validate(const CBioSource& src, CScope* scope = nullptr);
157  void Validate(const CPubdesc& pubdesc, CScope* scope = nullptr);
158  void Validate(const CSeqdesc& desc, const CSeq_entry& ctx);
159  void ValidateSubAffil(const CAffil::TStd& std, const CSerialObject& obj, const CSeq_entry *ctx);
160  void ValidateAffil(const CAffil::TStd& std, const CSerialObject& obj, const CSeq_entry *ctx);
161 
162  bool GetTSANStretchErrors(const CSeq_entry_Handle& se);
163  bool GetTSACDSOnMinusStrandErrors (const CSeq_entry_Handle& se);
164  bool GetTSAConflictingBiomolTechErrors (const CSeq_entry_Handle& se);
165  bool GetTSANStretchErrors(const CBioseq& seq);
166  bool GetTSACDSOnMinusStrandErrors (const CSeq_feat& f, const CBioseq& seq);
167  bool GetTSAConflictingBiomolTechErrors (const CBioseq& seq);
168 
169 
170  void SetProgressCallback(CValidator::TProgressCallback callback,
171  void* user_data);
172 
173  void SetTSE(const CSeq_entry_Handle& seh);
174 
175  bool ShouldSubdivide() const { if (m_NumTopSetSiblings > 1000) return true; else return false; }
176 
177  SValidatorContext& SetContext();
178  const SValidatorContext& GetContext() const;
179 
180  bool IsHugeFileMode() const;
181  bool IsHugeSet(const CBioseq_set& bioseqSet) const;
182  bool IsHugeSet(CBioseq_set::TClass setClass) const;
183 
184 public:
185  // interface to be used by the various validation classes
186 
187  // typedefs:
188  typedef const CSeq_feat& TFeat;
189  typedef const CBioseq& TBioseq;
190  typedef const CBioseq_set& TSet;
191  typedef const CSeqdesc& TDesc;
192  typedef const CSeq_annot& TAnnot;
193  typedef const CSeq_graph& TGraph;
194  typedef const CSeq_align& TAlign;
195  typedef const CSeq_entry& TEntry;
197 
198 
199  const CValidatorEntryInfo& GetEntryInfo() const;
200 
201  // Posts errors.
202  void PostErr(EDiagSev sv, EErrType et, const string& msg,
203  const CSerialObject& obj);
204  void PostErr(EDiagSev sv, EErrType et, const string& msg, TDesc ds);
205  void PostErr(EDiagSev sv, EErrType et, const string& msg, TFeat ft);
206  void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq);
207  void PostErr(EDiagSev sv, EErrType et, const string& msg, TEntry ctx,
208  TDesc ds);
209  void PostErr(EDiagSev sv, EErrType et, const string& msg, TSet set);
210  void PostErr(EDiagSev sv, EErrType et, const string& msg, TAnnot annot);
211  void PostErr(EDiagSev sv, EErrType et, const string& msg, TGraph graph);
212  void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq,
213  TGraph graph);
214  void PostErr(EDiagSev sv, EErrType et, const string& msg, TAlign align);
215  void PostErr(EDiagSev sv, EErrType et, const string& msg, TEntry entry);
216  void PostErr(EDiagSev sv, EErrType et, const string& msg, const CBioSource& src);
217  void PostErr(EDiagSev sv, EErrType et, const string& msg, const COrg_ref& org);
218  void PostErr(EDiagSev sv, EErrType et, const string& msg, const CPubdesc& src);
219  void PostErr(EDiagSev sv, EErrType et, const string& msg, const CSeq_submit& ss);
220  void PostObjErr (EDiagSev sv, EErrType et, const string& msg, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
221  void PostBadDateError (EDiagSev sv, const string& msg, int flags, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
222 
223  void HandleTaxonomyError(const CT3Error& error, const string& host, const COrg_ref& orf);
224  void HandleTaxonomyError(const CT3Error& error, const EErrType type, const CSeq_feat& feat);
225  void HandleTaxonomyError(const CT3Error& error, const EErrType type, const CSeqdesc& desc, const CSeq_entry* entry);
226 
227  bool RaiseGenomeSeverity(EErrType et);
228 
229  // General use validation methods
230  void ValidatePubdesc(const CPubdesc& pub, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
231  void ValidateBioSource(const CBioSource& bsrc, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
232  void ValidatePCRReactionSet(const CPCRReactionSet& pcrset, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
233  void ValidateSubSource(const CSubSource& subsrc, const CSerialObject& obj, const CSeq_entry *ctx = nullptr, const bool isViral = false);
234  void ValidateOrgRef(const COrg_ref& orgref, const CSerialObject& obj, const CSeq_entry *ctx, const bool checkForUndefinedSpecies = false, const bool is_single_cell_amplification = false);
235  void ValidateTaxNameOrgname(const string& taxname, const COrgName& orgname, const CSerialObject& obj, const CSeq_entry *ctx);
236  void ValidateOrgName(const COrgName& orgname, const bool has_taxon, const CSerialObject& obj, const CSeq_entry *ctx);
237  void ValidateOrgModVoucher(const COrgMod& orgmod, const CSerialObject& obj, const CSeq_entry *ctx);
238  void ValidateBioSourceForSeq(const CBioSource& bsrc, const CSerialObject& obj, const CSeq_entry *ctx, const CBioseq_Handle& bsh);
239 
240  void ValidateLatLonCountry(string countryname, string lat_lon, const CSerialObject& obj, const CSeq_entry *ctx);
241 
242  bool IsSyntheticConstruct (const CBioSource& src);
243  bool IsArtificial (const CBioSource& src);
244  bool IsOtherDNA(const CBioseq_Handle& bsh) const;
245  void ValidateSeqLoc(const CSeq_loc& loc, const CBioseq_Handle& seq, bool report_abutting,
246  const string& prefix, const CSerialObject& obj, bool lowerSev = false);
247 
248  void ValidateSeqLocIds(const CSeq_loc& loc, const CSerialObject& obj);
249  NCBI_STD_DEPRECATED("Please use corresponding function in objtools/validator/utilities.hpp")
251  NCBI_STD_DEPRECATED("Please use corresponding function in objtools/validator/utilities.hpp")
253  void CheckMultipleIds(const CSeq_loc& loc, const CSerialObject& obj);
254  void ValidateDbxref(const CDbtag& xref, const CSerialObject& obj,
255  bool biosource = false, const CSeq_entry *ctx = nullptr);
256  void ValidateDbxref(TDbtags& xref_list, const CSerialObject& obj,
257  bool biosource = false, const CSeq_entry *ctx = nullptr);
258  void ValidateCitSub(const CCit_sub& cs, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
259  void ValidateTaxonomy(const CSeq_entry& se);
260  void ValidateOrgRefs(CTaxValidationAndCleanup& tval);
261  void ValidateSpecificHost(CTaxValidationAndCleanup& tval);
262  void ValidateStrain(CTaxValidationAndCleanup& tval, TTaxId descTaxID = ZERO_TAX_ID);
263  void ValidateSpecificHost (const CSeq_entry& se);
264  void ValidateTentativeName(const CSeq_entry& se);
265  void ValidateTaxonomy(const COrg_ref& org, CBioSource::TGenome genome = CBioSource::eGenome_unknown);
266  void ValidateMultipleTaxIds(const CSeq_entry_Handle& seh);
267  void ValidateCitations (const CSeq_entry_Handle& seh);
268  bool x_IsFarFetchFailure (const CSeq_loc& loc);
269 
270  // getters
271  inline CScope* GetScope() { return m_Scope; }
272  inline CCacheImpl& GetCache() { return m_cache; }
273 
274  inline CConstRef<CSeq_feat> GetCachedGene(const CSeq_feat* f) { return m_GeneCache.GetGeneFromCache(f, *m_Scope); }
275  inline CGeneCache& GetGeneCache() { return m_GeneCache; }
276 
277  // flags derived from options parameter
278  bool IsNonASCII() const { return m_NonASCII; }
279  bool IsSuppressContext() const { return m_SuppressContext; }
280  bool IsValidateAlignments() const { return m_ValidateAlignments; }
281  bool IsValidateExons() const { return m_ValidateExons; }
282  bool IsOvlPepErr() const { return m_OvlPepErr; }
283  bool IsRequireTaxonID() const { return !m_SeqSubmitParent; }
284  bool IsSeqSubmitParent() const { return m_SeqSubmitParent; }
285  bool IsRequireISOJTA() const { return m_RequireISOJTA; }
286  bool IsValidateIdSet() const { return m_ValidateIdSet; }
287  bool IsRemoteFetch() const { return m_RemoteFetch; }
288  bool IsFarFetchMRNAproducts() const { return m_FarFetchMRNAproducts; }
289  bool IsFarFetchCDSproducts() const { return m_FarFetchCDSproducts; }
290  bool IsLocusTagGeneralMatch() const { return m_LocusTagGeneralMatch; }
291  bool DoRubiscoTest() const { return m_DoRubiscoText; }
292  bool IsIndexerVersion() const { return m_IndexerVersion; }
293  bool IsGenomeSubmission() const { return m_genomeSubmission; }
294  bool UseEntrez() const { return m_UseEntrez; }
295  bool DoTaxLookup() const { return m_DoTaxLookup; }
296  bool ValidateInferenceAccessions() const { return m_ValidateInferenceAccessions; }
297  bool IgnoreExceptions() const { return m_IgnoreExceptions; }
298  bool ReportSpliceAsError() const { return m_ReportSpliceAsError; }
299  bool IsLatLonCheckState() const { return m_LatLonCheckState; }
300  bool IsLatLonIgnoreWater() const { return m_LatLonIgnoreWater; }
301  bool IsRefSeqConventions() const { return m_RefSeqConventions; }
302  bool GenerateGoldenFile() const { return m_GenerateGoldenFile; }
303  bool DoCompareVDJCtoCDS() const { return m_CompareVDJCtoCDS; }
304  bool IgnoreInferences() const { return m_IgnoreInferences; }
305 
306 
307  // flags calculated by examining data in record
308  bool IsStandaloneAnnot() const { return m_IsStandaloneAnnot; }
309  bool IsNoPubs() const;
310  bool IsNoCitSubPubs() const;
311  bool IsNoBioSource() const;
312  bool IsGPS() const;
313  bool IsGED() const;
314  bool IsPDB() const;
315  bool IsPatent() const;
316  bool IsRefSeq() const;
317  bool IsEmbl() const;
318  bool IsDdbj() const;
319  bool IsTPE() const;
320  NCBI_DEPRECATED bool IsNC() const;
321  NCBI_DEPRECATED bool IsNG() const;
322  NCBI_DEPRECATED bool IsNM() const;
323  NCBI_DEPRECATED bool IsNP() const;
324  NCBI_DEPRECATED bool IsNR() const;
325  NCBI_DEPRECATED bool IsNZ() const;
326  NCBI_DEPRECATED bool IsNS() const;
327  bool IsNT() const;
328  NCBI_DEPRECATED bool IsNW() const;
329  bool IsWP() const;
330  bool IsXR() const;
331  bool IsGI() const;
332  bool IsGpipe() const;
333  bool IsHtg() const;
334  bool IsLocalGeneralOnly() const;
335  bool HasGiOrAccnVer() const;
336  bool IsGenomic() const;
337  bool IsSeqSubmit() const;
338  bool IsSmallGenomeSet() const;
339  bool IsNoncuratedRefSeq(const CBioseq& seq, EDiagSev& sev);
340  bool IsGenbank() const;
341  bool DoesAnyFeatLocHaveGI() const;
342  bool DoesAnyProductLocHaveGI() const;
343  bool DoesAnyGeneHaveLocusTag() const;
344  bool DoesAnyProteinHaveGeneralID() const;
345  bool IsINSDInSep() const;
346  bool IsGeneious() const;
347  const CBioSourceKind& BioSourceKind() const;
348 
349  // counting number of misplaced features
350  inline void ResetMisplacedFeatureCount() { m_NumMisplacedFeatures = 0; }
351  inline void IncrementMisplacedFeatureCount() { m_NumMisplacedFeatures++; }
352  inline void AddToMisplacedFeatureCount(SIZE_TYPE num) { m_NumMisplacedFeatures += num; }
353 
354  // counting number of small genome set misplaced features
355  inline void ResetSmallGenomeSetMisplacedCount() { m_NumSmallGenomeSetMisplaced = 0; }
356  inline void IncrementSmallGenomeSetMisplacedCount() { m_NumSmallGenomeSetMisplaced++; }
357  inline void AddToSmallGenomeSetMisplacedCount(SIZE_TYPE num) { m_NumSmallGenomeSetMisplaced += num; }
358 
359  // counting number of misplaced graphs
360  inline void ResetMisplacedGraphCount() { m_NumMisplacedGraphs = 0; }
361  inline void IncrementMisplacedGraphCount() { m_NumMisplacedGraphs++; }
362  inline void AddToMisplacedGraphCount(SIZE_TYPE num) { m_NumMisplacedGraphs += num; }
363 
364  // counting number of genes and gene xrefs
365  inline void ResetGeneCount() { m_NumGenes = 0; }
366  inline void IncrementGeneCount() { m_NumGenes++; }
367  inline void AddToGeneCount(SIZE_TYPE num) { m_NumGenes += num; }
368  inline SIZE_TYPE GetGeneCount(void) const { return m_NumGenes; }
369  inline void ResetGeneXrefCount() { m_NumGeneXrefs = 0; }
370  inline void IncrementGeneXrefCount() { m_NumGeneXrefs++; }
371  inline void AddToGeneXrefCount(SIZE_TYPE num) { m_NumGeneXrefs += num; }
372  inline SIZE_TYPE GetGeneXrefCount(void) const { return m_NumGeneXrefs; }
373 
374  // counting sequences with and without TPA history
375  inline void ResetTpaWithHistoryCount() { m_NumTpaWithHistory = 0; }
376  inline void IncrementTpaWithHistoryCount() { m_NumTpaWithHistory++; }
377  inline void AddToTpaWithHistoryCount(SIZE_TYPE num) { m_NumTpaWithHistory += num; }
378  inline void ResetTpaWithoutHistoryCount() { m_NumTpaWithoutHistory = 0; }
379  inline void IncrementTpaWithoutHistoryCount() { m_NumTpaWithoutHistory++; }
380  inline void AddToTpaWithoutHistoryCount(SIZE_TYPE num) { m_NumTpaWithoutHistory += num; }
381 
382  // counting number of Pseudos and Pseudogenes
383  inline void ResetPseudoCount() { m_NumPseudo = 0; }
384  inline void IncrementPseudoCount() { m_NumPseudo++; }
385  inline void AddToPseudoCount(SIZE_TYPE num) { m_NumPseudo += num; }
386  inline void ResetPseudogeneCount() { m_NumPseudogene = 0; }
387  inline void IncrementPseudogeneCount() { m_NumPseudogene++; }
388  inline void AddToPseudogeneCount(SIZE_TYPE num) { m_NumPseudogene += num; }
389 
390  // set flag for farfetchfailure
391  inline void SetFarFetchFailure() { m_FarFetchFailure = true; }
392 
393  bool IsFarSequence(const CSeq_id& id); // const;
394 
395  const CSeq_entry& GetTSE() const { return *m_TSE; };
396  const CSeq_entry_Handle& GetTSEH() { return m_TSEH; }
397  const CTSE_Handle& GetTSE_Handle() { return
398  (m_TSEH ? m_TSEH.GetTSE_Handle() : CCacheImpl::kEmptyTSEHandle); }
399 
400  CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id& id);
401  CBioseq_Handle GetLocalBioseqHandle(const CSeq_id& id); // Local here means not far
402 
403  const CConstRef<CSeq_annot>& GetSeqAnnot() { return m_SeqAnnot; }
404 
405  void AddBioseqWithNoPub(const CBioseq& seq);
406  void AddBioseqWithNoBiosource(const CBioseq& seq);
407  void AddProtWithoutFullRef(const CBioseq_Handle& seq);
408  static bool IsWGSIntermediate(const CBioseq& seq);
409  static bool IsTSAIntermediate(const CBioseq& seq);
410  void ReportMissingPubs(const CSeq_entry& se, const CCit_sub* cs);
411  void ReportMissingBiosource(const CSeq_entry& se);
412 
413  CConstRef<CSeq_feat> GetCDSGivenProduct(const CBioseq& seq);
414  NCBI_DEPRECATED CConstRef<CSeq_feat> GetmRNAGivenProduct(const CBioseq& seq);
415  CConstRef<CSeq_feat> GetmRNAGivenProduct(const CBioseq_Handle& seq);
416  const CSeq_entry* GetAncestor(const CBioseq& seq, CBioseq_set::EClass clss);
417  bool IsSerialNumberInComment(const string& comment);
418 
419  bool IsTransgenic(const CBioSource& bsrc);
420 
421  bool RequireLocalProduct(const CSeq_id* sid) const;
422 
423 private:
424 
425  // Setup common options during consturction;
426  void x_Init(Uint4 options);
427 
428  // This is so we can temporarily set m_Scope in a function
429  // and be sure that it will be set to its old value when we're done
431  public:
433  m_scopeToRestore(scope), m_scopeOriginalValue(scope) { }
434 
435  ~CScopeRestorer() { m_scopeToRestore = m_scopeOriginalValue; }
436  private:
439  };
440 
441  // Prohibit copy constructor & assignment operator
443  CValidError_imp& operator= (const CValidError_imp&);
444 
445  void Setup(const CSeq_entry_Handle& seh);
446  void Setup(const CSeq_annot_Handle& sa);
447  CSeq_entry_Handle Setup(const CBioseq& seq);
448  void SetScope(const CSeq_entry& se);
449 
450  CValidatorEntryInfo& x_SetEntryInfo();
451 
452  void x_AddValidErrItem(EDiagSev sev,
453  EErrType type,
454  const string& msg,
455  const string& desc,
456  const CSerialObject& obj,
457  const string& accession,
458  const int version);
459 
460  void ValidateSubmitBlock(const CSubmit_block& block, const CSeq_submit& ss);
461 
462  void InitializeSourceQualTags();
463  void ValidateSourceQualTags(const string& str, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
464 
465  bool IsMixedStrands(const CSeq_loc& loc);
466 
467  void ValidatePubGen(const CCit_gen& gen, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
468  void ValidatePubArticle(const CCit_art& art, TEntrezId uid, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
469  void ValidatePubArticleNoPMID(const CCit_art& art, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
470  void x_ValidatePages(const string& pages, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
471  void ValidateAuthorList(const CAuth_list::C_Names& names, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
472  void ValidateAuthorsInPubequiv (const CPub_equiv& pe, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
473  void ValidatePubHasAuthor(const CPubdesc& pubdesc, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
474 
475  bool HasName(const CAuth_list& authors);
476  bool HasTitle(const CTitle& title);
477  bool HasIsoJTA(const CTitle& title);
478 
479  void FindEmbeddedScript(const CSerialObject& obj);
480  void FindNonAsciiText (const CSerialObject& obj);
481  void FindCollidingSerialNumbers (const CSerialObject& obj);
482 
483 
484  void GatherTentativeName (const CSeq_entry& se, vector<CConstRef<CSeqdesc> >& usr_descs, vector<CConstRef<CSeq_entry> >& desc_ctxs, vector<CConstRef<CSeq_feat> >& usr_feats);
485 
486  static bool s_IsSalmonellaGenus(const string& taxname);
487  EDiagSev x_SalmonellaErrorLevel();
488 
489  typedef struct tagSLocCheck {
490  bool chk;
493  bool has_other;
497  const CSeq_interval *int_cur = nullptr;
498  const CSeq_interval *int_prv = nullptr;
501  string prefix;
502  } SLocCheck;
503 
504  void x_InitLocCheck(SLocCheck& lc, const string& prefix);
505  void x_CheckForStrandChange(SLocCheck& lc);
506  void x_CheckLoc(const CSeq_loc& loc, const CSerialObject& obj, SLocCheck& lc, bool lowerSev = false);
507  void x_CheckPackedInt(const CPacked_seqint& packed_int,
508  SLocCheck& lc,
509  const CSerialObject& obj);
510  bool x_CheckSeqInt(CConstRef<CSeq_id>& id_cur,
511  const CSeq_interval * int_cur,
512  ENa_strand& strand_cur);
513  void x_ReportInvalidFuzz(const CPacked_seqint& packed_int, const CSerialObject& obj);
514  void x_ReportInvalidFuzz(const CSeq_interval& interval, const CSerialObject& obj);
515  void x_ReportInvalidFuzz(const CSeq_point& point, const CSerialObject& obj);
516  void x_ReportInvalidFuzz(const CSeq_loc& loc, const CSerialObject& obj);
517  void x_ReportPCRSeqProblem(const string& primer_kind,
518  char badch,
519  const CSerialObject& obj,
520  const CSeq_entry *ctx);
521  void x_CheckPCRPrimer(const CPCRPrimer& primer,
522  const string& primer_kind,
523  const CSerialObject& obj,
524  const CSeq_entry *ctx);
525 
526  void x_DoBarcodeTests(CSeq_entry_Handle seh);
527 
528  bool x_DowngradeForMissingAffil(const CCit_sub& cs);
529 
535 
538 
539  // error repoitory
541 
542  // flags derived from options parameter
543  bool m_NonASCII; // User sets if Non ASCII char found
544  bool m_SuppressContext; // Include context in errors if true
545  bool m_ValidateAlignments; // Validate Alignments if true
546  bool m_ValidateExons; // Check exon feature splice sites
547  bool m_OvlPepErr; // Peptide overlap error if true, else warn
548  bool m_RequireISOJTA; // Journal requires ISO JTA
549  bool m_ValidateIdSet; // validate update against ID set in database
550  bool m_RemoteFetch; // Remote fetch enabled?
551  bool m_FarFetchMRNAproducts; // Remote fetch mRNA products
552  bool m_FarFetchCDSproducts; // Remote fetch proteins
560  bool m_IgnoreExceptions; // ignore exceptions when validating translation
561  bool m_ValidateInferenceAccessions; // check that accessions in inferences are valid
566  bool m_CollectLocusTags; // collect locus tags for use in special formatted reports
567  bool m_SeqSubmitParent; // some errors are suppressed if this is run on a newly created submission
571 
572  // flags calculated by examining data in record
574 
575  bool m_IsNC=false;
576  bool m_IsNG=false;
577  bool m_IsNM=false;
578  bool m_IsNP=false;
579  bool m_IsNR=false;
580  bool m_IsNZ=false;
581  bool m_IsNS=false;
582  bool m_IsNT=false;
583  bool m_IsNW=false;
584  bool m_IsWP=false;
585  bool m_IsXR=false;
586 
589 
591 
592  // seq ids contained within the orignal seq entry.
593  // (used to check for far location)
594  vector< CConstRef<CSeq_id> > m_InitialSeqIds;
595  // Bioseqs without source (should be considered only if m_NoSource is false)
596  vector< CConstRef<CBioseq> > m_BioseqWithNoSource;
597 
598  // list of publication serial numbers
599  vector< int > m_PubSerialNumbers;
600 
611 
617 
620 
623 
625 
626  // Taxonomy service interface.
627  unique_ptr<CTaxValidationAndCleanup> x_CreateTaxValidator() const;
628 
629  shared_ptr<SValidatorContext> m_pContext;
630  unique_ptr<CValidatorEntryInfo> m_pEntryInfo = make_unique<CValidatorEntryInfo>();
631 };
632 
633 
634 END_SCOPE(validator)
637 
638 #endif /* VALIDATOR___VALIDERROR_IMP__HPP */
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define static
User-defined methods of the data storage class.
EErrType
std representation
Definition: Affil_.hpp:91
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
CBioseq_Handle –.
CByte_graph –.
Definition: Byte_graph.hpp:66
CCdregion –.
Definition: Cdregion.hpp:66
Definition: Dbtag.hpp:53
CDelta_seq –.
Definition: Delta_seq.hpp:66
CFeat_CI –.
Definition: feat_ci.hpp:64
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
CMappedGraph –.
Definition: graph_ci.hpp:61
CObjectManager –.
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
CPCRPrimer –.
Definition: PCRPrimer.hpp:66
CPCRReactionSet –.
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CScope –.
Definition: scope.hpp:92
CSeq_annot_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Base class for all serializable objects.
Definition: serialbase.hpp:150
CSubmit_block –.
CTaxon3_reply –.
Definition: Title.hpp:51
CScopeRestorer(CRef< CScope > &scope)
CRef< CObjectManager > m_ObjMgr
bool DoRubiscoTest() const
SIZE_TYPE GetGeneCount(void) const
void PostErr(EDiagSev sv, EErrType et, const string &msg, TDesc ds)
const CSeq_entry_Handle & GetTSEH()
bool ReportSpliceAsError() const
void AddToPseudoCount(SIZE_TYPE num)
bool IsLatLonCheckState() const
SIZE_TYPE m_NumPseudogene
CRef< CScope > m_Scope
bool IsRemoteFetch() const
SIZE_TYPE m_NumTpaWithHistory
bool IsSuppressContext() const
CValidator::TProgressCallback m_PrgCallback
void AddToGeneCount(SIZE_TYPE num)
bool DoTaxLookup() const
CValidError * m_ErrRepository
void IncrementMisplacedFeatureCount()
bool IsValidateAlignments() const
const CConstRef< CSeq_annot > & GetSeqAnnot()
void ResetSmallGenomeSetMisplacedCount()
void ResetTpaWithoutHistoryCount()
void AddToGeneXrefCount(SIZE_TYPE num)
bool GenerateGoldenFile() const
bool IsStandaloneAnnot() const
void AddToTpaWithoutHistoryCount(SIZE_TYPE num)
CConstRef< CSeq_feat > GetCachedGene(const CSeq_feat *f)
CConstRef< CSeq_annot > m_SeqAnnot
bool IsValidateIdSet() const
const CSeq_feat & TFeat
SIZE_TYPE m_NumMisplacedGraphs
void AddToMisplacedGraphCount(SIZE_TYPE num)
bool IsLocusTagGeneralMatch() const
void AddToTpaWithHistoryCount(SIZE_TYPE num)
bool IgnoreInferences() const
SIZE_TYPE m_NumTpaWithoutHistory
CValidator::CProgressInfo m_PrgInfo
const CTSE_Handle & GetTSE_Handle()
bool ValidateInferenceAccessions() const
void IncrementMisplacedGraphCount()
void IncrementPseudogeneCount()
SIZE_TYPE m_NumSmallGenomeSetMisplaced
bool m_ValidateInferenceAccessions
vector< CConstRef< CBioseq > > m_BioseqWithNoSource
SIZE_TYPE GetGeneXrefCount(void) const
const CSeq_annot & TAnnot
bool IsRequireTaxonID() const
const CBioseq_set & TSet
bool IsSeqSubmitParent() const
void AddToPseudogeneCount(SIZE_TYPE num)
SIZE_TYPE m_NumGeneXrefs
const CSeq_graph & TGraph
bool IsOvlPepErr() const
bool IsNonASCII() const
bool IsGenomeSubmission() const
CSeq_entry_Handle m_TSEH
SIZE_TYPE m_NumBioseq_set
void IncrementGeneXrefCount()
bool UseEntrez() const
bool IsLatLonIgnoreWater() const
bool IsFarFetchCDSproducts() const
void ResetTpaWithHistoryCount()
void AddToSmallGenomeSetMisplacedCount(SIZE_TYPE num)
void AddToMisplacedFeatureCount(SIZE_TYPE num)
void IncrementTpaWithHistoryCount()
bool IsValidateExons() const
map< CSubSource::TSubtype, int > TCount
bool IgnoreExceptions() const
vector< CConstRef< CSeq_id > > m_InitialSeqIds
CBioSourceKind m_biosource_kind
CConstRef< CSeq_entry > m_TSE
CGeneCache m_GeneCache
void IncrementTpaWithoutHistoryCount()
bool IsRefSeqConventions() const
bool IsIndexerVersion() const
bool IsRequireISOJTA() const
CGeneCache & GetGeneCache()
map< const CSeq_feat *, const CSeq_annot * > & TFeatAnnotMap
void IncrementSmallGenomeSetMisplacedCount()
const CSeq_align & TAlign
vector< int > m_PubSerialNumbers
void ResetMisplacedGraphCount()
const CSeq_entry & TEntry
shared_ptr< SValidatorContext > m_pContext
const CSeq_entry & GetTSE() const
bool IsFarFetchMRNAproducts() const
bool DoCompareVDJCtoCDS() const
SIZE_TYPE m_NumMisplacedFeatures
const CSeqdesc & TDesc
CCacheImpl & GetCache()
bool ShouldSubdivide() const
void ResetMisplacedFeatureCount()
CValidError_imp(const CValidError_imp &)
const CBioseq & TBioseq
Cache various information for one validation run.
Definition: cache_impl.hpp:126
static const CTSE_Handle kEmptyTSEHandle
Definition: cache_impl.hpp:237
bool(* TProgressCallback)(CProgressInfo *)
Definition: validator.hpp:249
Definition: map.hpp:338
Definition: set.hpp:45
Include a standard set of the NCBI C++ Toolkit most basic headers.
API (CDeflineGenerator) for computing sequences' titles ("definitions").
static uch flags
CS_CONTEXT * ctx
Definition: t0006.c:12
static const struct name_t names[]
#define false
Definition: bool.h:36
static int lc
Definition: getdata.c:30
static const char * str(char *buf, int n)
Definition: stats.c:84
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
#define ZERO_TAX_ID
Definition: ncbimisc.hpp:1115
SStrictId_Tax::TId TTaxId
Taxon id type.
Definition: ncbimisc.hpp:1048
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
#define NCBI_DEPRECATED
#define NCBI_STD_DEPRECATED(message)
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define NCBI_VALIDATOR_EXPORT
Definition: ncbi_export.h:913
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
static int version
Definition: mdb_load.c:29
std::istream & in(std::istream &in_, double &x_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static wxString GetContext(const wxString &str, int pos)
bool IsInOrganelleSmallGenomeSet(const CSeq_id &id, CScope &scope)
Definition: utilities.cpp:3050
bool BadMultipleSequenceLocation(const CSeq_loc &loc, CScope &scope)
Definition: utilities.cpp:3080
bool IsNG(const CSeq_id &id)
Definition: utilities.cpp:2790
const CSeq_feat::TDbxref TDbtags
Definition: utilities.hpp:199
static const char * prefix[]
Definition: pcregrep.c:405
Definition: type.c:6
CScope & GetScope()
#define const
Definition: zconf.h:232
Modified on Mon Apr 22 04:00:12 2024 by modify_doxy.py rev. 669887