NCBI C++ ToolKit
validerror_imp.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: validerror_imp.hpp 101299 2023-11-28 18:18:38Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *`
26  * Author: Jonathan Kans, Clifford Clausen, Aaron Ucko......
27  *
28  * File Description:
29  * Privae classes and definition for the validator
30  * .......
31  *
32  */
33 
34 #ifndef VALIDATOR___VALIDERROR_IMP__HPP
35 #define VALIDATOR___VALIDERROR_IMP__HPP
36 
37 #include <corelib/ncbistd.hpp>
39 
40 #include <objmgr/scope.hpp>
41 #include <objmgr/feat_ci.hpp> // for CMappedFeat
44 #include <objects/seq/GIBB_mol.hpp>
54 
61 
63 
65 
66 #include <objmgr/util/feature.hpp>
67 #include <memory>
68 
71 
72 class CSeq_entry;
73 class CCit_sub;
74 class CCit_art;
75 class CCit_gen;
76 class CSeq_feat;
77 class CBioseq;
78 class CSeqdesc;
79 class CSeq_annot;
80 class CTrna_ext;
81 class CProt_ref;
82 class CSeq_loc;
83 class CFeat_CI;
84 class CPub_set;
85 class CAuth_list;
86 class CTitle;
87 class CMolInfo;
88 class CUser_object;
89 class CSeqdesc_CI;
90 class CSeq_graph;
91 class CMappedGraph;
92 class CDense_diag;
93 class CDense_seg;
94 class CSeq_align_set;
95 class CPubdesc;
96 class CBioSource;
97 class COrg_ref;
98 class CByte_graph;
99 class CDelta_seq;
100 class CGene_ref;
101 class CCdregion;
102 class CRNA_ref;
103 class CImp_feat;
104 class CSeq_literal;
105 class CBioseq_Handle;
106 class CSeq_feat_Handle;
107 class CCountries;
109 class CComment_set;
110 class CTaxon3_reply;
111 class ITaxon3;
112 class CT3Error;
113 
114 BEGIN_SCOPE(validator)
115 
116 
117 struct SValidatorContext;
118 class CValidError_desc;
119 class CValidError_descr;
121 
122 
123 // =========================== Central Validation ==========================
124 
125 // CValidError_imp provides the entry point to the validation process.
126 // It calls upon the various validation classes to perform validation of
127 // each part.
128 // The class holds all the data for the validation process.
130 {
131 public:
133 
135  shared_ptr<SValidatorContext> pContext,
136  CValidError* errors,
137  Uint4 options=0);
138 
139  // Destructor
140  virtual ~CValidError_imp();
141 
142  void SetOptions (Uint4 options);
143  void SetErrorRepository (CValidError* errors);
144  void Reset();
145 
146  // Validation methods
147  bool Validate(const CSeq_entry& se, const CCit_sub* cs = nullptr,
148  CScope* scope = nullptr);
149  bool Validate(
150  const CSeq_entry_Handle& seh, const CCit_sub* cs = nullptr);
151  void Validate(
152  const CSeq_submit& ss, CScope* scope = nullptr);
153  void Validate(const CSeq_annot_Handle& sa);
154 
155  void Validate(const CSeq_feat& feat, CScope* scope = nullptr);
156  void Validate(const CBioSource& src, CScope* scope = nullptr);
157  void Validate(const CPubdesc& pubdesc, CScope* scope = nullptr);
158  void Validate(const CSeqdesc& desc, const CSeq_entry& ctx);
159  void ValidateSubAffil(const CAffil::TStd& std, const CSerialObject& obj, const CSeq_entry *ctx);
160  void ValidateAffil(const CAffil::TStd& std, const CSerialObject& obj, const CSeq_entry *ctx);
161 
162  bool GetTSANStretchErrors(const CSeq_entry_Handle& se);
163  bool GetTSACDSOnMinusStrandErrors (const CSeq_entry_Handle& se);
164  bool GetTSAConflictingBiomolTechErrors (const CSeq_entry_Handle& se);
165  bool GetTSANStretchErrors(const CBioseq& seq);
166  bool GetTSACDSOnMinusStrandErrors (const CSeq_feat& f, const CBioseq& seq);
167  bool GetTSAConflictingBiomolTechErrors (const CBioseq& seq);
168 
169 
170  void SetProgressCallback(CValidator::TProgressCallback callback,
171  void* user_data);
172 
173  void SetTSE(const CSeq_entry_Handle& seh);
174 
175  bool ShouldSubdivide() const { if (m_NumTopSetSiblings > 1000) return true; else return false; }
176 
177  SValidatorContext& SetContext();
178  const SValidatorContext& GetContext() const;
179 
180  bool IsHugeFileMode() const;
181  bool IsHugeSet(const CBioseq_set& bioseqSet) const;
182  bool IsHugeSet(CBioseq_set::TClass setClass) const;
183 
184 public:
185  // interface to be used by the various validation classes
186 
187  // typedefs:
188  typedef const CSeq_feat& TFeat;
189  typedef const CBioseq& TBioseq;
190  typedef const CBioseq_set& TSet;
191  typedef const CSeqdesc& TDesc;
192  typedef const CSeq_annot& TAnnot;
193  typedef const CSeq_graph& TGraph;
194  typedef const CSeq_align& TAlign;
195  typedef const CSeq_entry& TEntry;
197 
198 
199  const CValidatorEntryInfo& GetEntryInfo() const;
200 
201  // Posts errors.
202  void PostErr(EDiagSev sv, EErrType et, const string& msg,
203  const CSerialObject& obj);
204  void PostErr(EDiagSev sv, EErrType et, const string& msg, TDesc ds);
205  void PostErr(EDiagSev sv, EErrType et, const string& msg, TFeat ft);
206  void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq);
207  void PostErr(EDiagSev sv, EErrType et, const string& msg, TEntry ctx,
208  TDesc ds);
209  void PostErr(EDiagSev sv, EErrType et, const string& msg, TSet set);
210  void PostErr(EDiagSev sv, EErrType et, const string& msg, TAnnot annot);
211  void PostErr(EDiagSev sv, EErrType et, const string& msg, TGraph graph);
212  void PostErr(EDiagSev sv, EErrType et, const string& msg, TBioseq sq,
213  TGraph graph);
214  void PostErr(EDiagSev sv, EErrType et, const string& msg, TAlign align);
215  void PostErr(EDiagSev sv, EErrType et, const string& msg, TEntry entry);
216  void PostErr(EDiagSev sv, EErrType et, const string& msg, const CBioSource& src);
217  void PostErr(EDiagSev sv, EErrType et, const string& msg, const COrg_ref& org);
218  void PostErr(EDiagSev sv, EErrType et, const string& msg, const CPubdesc& src);
219  void PostErr(EDiagSev sv, EErrType et, const string& msg, const CSeq_submit& ss);
220  void PostObjErr (EDiagSev sv, EErrType et, const string& msg, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
221  void PostBadDateError (EDiagSev sv, const string& msg, int flags, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
222 
223  void HandleTaxonomyError(const CT3Error& error, const string& host, const COrg_ref& orf);
224  void HandleTaxonomyError(const CT3Error& error, const EErrType type, const CSeq_feat& feat);
225  void HandleTaxonomyError(const CT3Error& error, const EErrType type, const CSeqdesc& desc, const CSeq_entry* entry);
226 
227  bool RaiseGenomeSeverity(EErrType et);
228 
229  // General use validation methods
230  void ValidatePubdesc(const CPubdesc& pub, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
231  void ValidateBioSource(const CBioSource& bsrc, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
232  void ValidatePCRReactionSet(const CPCRReactionSet& pcrset, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
233  void ValidateSubSource(const CSubSource& subsrc, const CSerialObject& obj, const CSeq_entry *ctx = nullptr, const bool isViral = false);
234  void ValidateOrgRef(const COrg_ref& orgref, const CSerialObject& obj, const CSeq_entry *ctx, const bool checkForUndefinedSpecies = false, const bool is_single_cell_amplification = false);
235  void ValidateTaxNameOrgname(const string& taxname, const COrgName& orgname, const CSerialObject& obj, const CSeq_entry *ctx);
236  void ValidateOrgName(const COrgName& orgname, const bool has_taxon, const CSerialObject& obj, const CSeq_entry *ctx);
237  void ValidateOrgModVoucher(const COrgMod& orgmod, const CSerialObject& obj, const CSeq_entry *ctx);
238  void ValidateBioSourceForSeq(const CBioSource& bsrc, const CSerialObject& obj, const CSeq_entry *ctx, const CBioseq_Handle& bsh);
239 
240  void ValidateLatLonCountry(string countryname, string lat_lon, const CSerialObject& obj, const CSeq_entry *ctx);
241 
242  static bool IsSyntheticConstruct (const CBioSource& src);
243  bool IsArtificial (const CBioSource& src);
244  bool IsOtherDNA(const CBioseq_Handle& bsh) const;
245  void ValidateSeqLoc(const CSeq_loc& loc, const CBioseq_Handle& seq, bool report_abutting,
246  const string& prefix, const CSerialObject& obj, bool lowerSev = false);
247 
248  void ValidateSeqLocIds(const CSeq_loc& loc, const CSerialObject& obj);
249  NCBI_STD_DEPRECATED("Please use corresponding function in objtools/validator/utilities.hpp")
251  NCBI_STD_DEPRECATED("Please use corresponding function in objtools/validator/utilities.hpp")
253  void CheckMultipleIds(const CSeq_loc& loc, const CSerialObject& obj);
254  void ValidateDbxref(const CDbtag& xref, const CSerialObject& obj,
255  bool biosource = false, const CSeq_entry *ctx = nullptr);
256  void ValidateDbxref(TDbtags& xref_list, const CSerialObject& obj,
257  bool biosource = false, const CSeq_entry *ctx = nullptr);
258  void ValidateCitSub(const CCit_sub& cs, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
259  void ValidateTaxonomy(const CSeq_entry& se);
260  void ValidateOrgRefs(CTaxValidationAndCleanup& tval);
261  void ValidateSpecificHost(CTaxValidationAndCleanup& tval);
262  void ValidateStrain(CTaxValidationAndCleanup& tval);
263  void ValidateSpecificHost (const CSeq_entry& se);
264  void ValidateTentativeName(const CSeq_entry& se);
265  void ValidateTaxonomy(const COrg_ref& org, int genome = CBioSource::eGenome_unknown);
266  void ValidateMultipleTaxIds(const CSeq_entry_Handle& seh);
267  void ValidateCitations (const CSeq_entry_Handle& seh);
268  bool x_IsFarFetchFailure (const CSeq_loc& loc);
269 
270  // getters
271  inline CScope* GetScope() { return m_Scope; }
272  inline CCacheImpl& GetCache() { return m_cache; }
273 
274  inline CConstRef<CSeq_feat> GetCachedGene(const CSeq_feat* f) { return m_GeneCache.GetGeneFromCache(f, *m_Scope); }
275  inline CGeneCache& GetGeneCache() { return m_GeneCache; }
276 
277  // flags derived from options parameter
278  bool IsNonASCII() const { return m_NonASCII; }
279  bool IsSuppressContext() const { return m_SuppressContext; }
280  bool IsValidateAlignments() const { return m_ValidateAlignments; }
281  bool IsValidateExons() const { return m_ValidateExons; }
282  bool IsOvlPepErr() const { return m_OvlPepErr; }
283  bool IsRequireTaxonID() const { return !m_SeqSubmitParent; }
284  bool IsSeqSubmitParent() const { return m_SeqSubmitParent; }
285  bool IsRequireISOJTA() const { return m_RequireISOJTA; }
286  bool IsValidateIdSet() const { return m_ValidateIdSet; }
287  bool IsRemoteFetch() const { return m_RemoteFetch; }
288  bool IsFarFetchMRNAproducts() const { return m_FarFetchMRNAproducts; }
289  bool IsFarFetchCDSproducts() const { return m_FarFetchCDSproducts; }
290  bool IsLocusTagGeneralMatch() const { return m_LocusTagGeneralMatch; }
291  bool DoRubiscoTest() const { return m_DoRubiscoText; }
292  bool IsIndexerVersion() const { return m_IndexerVersion; }
293  bool IsGenomeSubmission() const { return m_genomeSubmission; }
294  bool UseEntrez() const { return m_UseEntrez; }
295  bool DoTaxLookup() const { return m_DoTaxLookup; }
296  bool ValidateInferenceAccessions() const { return m_ValidateInferenceAccessions; }
297  bool IgnoreExceptions() const { return m_IgnoreExceptions; }
298  bool ReportSpliceAsError() const { return m_ReportSpliceAsError; }
299  bool IsLatLonCheckState() const { return m_LatLonCheckState; }
300  bool IsLatLonIgnoreWater() const { return m_LatLonIgnoreWater; }
301  bool IsRefSeqConventions() const { return m_RefSeqConventions; }
302  bool GenerateGoldenFile() const { return m_GenerateGoldenFile; }
303  bool DoCompareVDJCtoCDS() const { return m_CompareVDJCtoCDS; }
304 
305 
306  // flags calculated by examining data in record
307  bool IsStandaloneAnnot() const { return m_IsStandaloneAnnot; }
308  bool IsNoPubs() const;
309  bool IsNoCitSubPubs() const;
310  bool IsNoBioSource() const;
311  bool IsGPS() const;
312  bool IsGED() const;
313  bool IsPDB() const;
314  bool IsPatent() const;
315  bool IsRefSeq() const;
316  bool IsEmbl() const;
317  bool IsDdbj() const;
318  bool IsTPE() const;
319  NCBI_DEPRECATED bool IsNC() const;
320  NCBI_DEPRECATED bool IsNG() const;
321  NCBI_DEPRECATED bool IsNM() const;
322  NCBI_DEPRECATED bool IsNP() const;
323  NCBI_DEPRECATED bool IsNR() const;
324  NCBI_DEPRECATED bool IsNZ() const;
325  NCBI_DEPRECATED bool IsNS() const;
326  bool IsNT() const;
327  NCBI_DEPRECATED bool IsNW() const;
328  bool IsWP() const;
329  bool IsXR() const;
330  bool IsGI() const;
331  bool IsGpipe() const;
332  bool IsHtg() const;
333  bool IsLocalGeneralOnly() const;
334  bool HasGiOrAccnVer() const;
335  bool IsGenomic() const;
336  bool IsSeqSubmit() const;
337  bool IsSmallGenomeSet() const;
338  bool IsNoncuratedRefSeq(const CBioseq& seq, EDiagSev& sev);
339  bool IsGenbank() const;
340  bool DoesAnyFeatLocHaveGI() const;
341  bool DoesAnyProductLocHaveGI() const;
342  bool DoesAnyGeneHaveLocusTag() const;
343  bool DoesAnyProteinHaveGeneralID() const;
344  bool IsINSDInSep() const;
345  bool IsGeneious() const;
346  const CBioSourceKind& BioSourceKind() const;
347 
348  // counting number of misplaced features
349  inline void ResetMisplacedFeatureCount() { m_NumMisplacedFeatures = 0; }
350  inline void IncrementMisplacedFeatureCount() { m_NumMisplacedFeatures++; }
351  inline void AddToMisplacedFeatureCount(SIZE_TYPE num) { m_NumMisplacedFeatures += num; }
352 
353  // counting number of small genome set misplaced features
354  inline void ResetSmallGenomeSetMisplacedCount() { m_NumSmallGenomeSetMisplaced = 0; }
355  inline void IncrementSmallGenomeSetMisplacedCount() { m_NumSmallGenomeSetMisplaced++; }
356  inline void AddToSmallGenomeSetMisplacedCount(SIZE_TYPE num) { m_NumSmallGenomeSetMisplaced += num; }
357 
358  // counting number of misplaced graphs
359  inline void ResetMisplacedGraphCount() { m_NumMisplacedGraphs = 0; }
360  inline void IncrementMisplacedGraphCount() { m_NumMisplacedGraphs++; }
361  inline void AddToMisplacedGraphCount(SIZE_TYPE num) { m_NumMisplacedGraphs += num; }
362 
363  // counting number of genes and gene xrefs
364  inline void ResetGeneCount() { m_NumGenes = 0; }
365  inline void IncrementGeneCount() { m_NumGenes++; }
366  inline void AddToGeneCount(SIZE_TYPE num) { m_NumGenes += num; }
367  inline SIZE_TYPE GetGeneCount(void) const { return m_NumGenes; }
368  inline void ResetGeneXrefCount() { m_NumGeneXrefs = 0; }
369  inline void IncrementGeneXrefCount() { m_NumGeneXrefs++; }
370  inline void AddToGeneXrefCount(SIZE_TYPE num) { m_NumGeneXrefs += num; }
371  inline SIZE_TYPE GetGeneXrefCount(void) const { return m_NumGeneXrefs; }
372 
373  // counting sequences with and without TPA history
374  inline void ResetTpaWithHistoryCount() { m_NumTpaWithHistory = 0; }
375  inline void IncrementTpaWithHistoryCount() { m_NumTpaWithHistory++; }
376  inline void AddToTpaWithHistoryCount(SIZE_TYPE num) { m_NumTpaWithHistory += num; }
377  inline void ResetTpaWithoutHistoryCount() { m_NumTpaWithoutHistory = 0; }
378  inline void IncrementTpaWithoutHistoryCount() { m_NumTpaWithoutHistory++; }
379  inline void AddToTpaWithoutHistoryCount(SIZE_TYPE num) { m_NumTpaWithoutHistory += num; }
380 
381  // counting number of Pseudos and Pseudogenes
382  inline void ResetPseudoCount() { m_NumPseudo = 0; }
383  inline void IncrementPseudoCount() { m_NumPseudo++; }
384  inline void AddToPseudoCount(SIZE_TYPE num) { m_NumPseudo += num; }
385  inline void ResetPseudogeneCount() { m_NumPseudogene = 0; }
386  inline void IncrementPseudogeneCount() { m_NumPseudogene++; }
387  inline void AddToPseudogeneCount(SIZE_TYPE num) { m_NumPseudogene += num; }
388 
389  // set flag for farfetchfailure
390  inline void SetFarFetchFailure() { m_FarFetchFailure = true; }
391 
392  bool IsFarSequence(const CSeq_id& id); // const;
393 
394  const CSeq_entry& GetTSE() const { return *m_TSE; };
395  const CSeq_entry_Handle& GetTSEH() { return m_TSEH; }
396  const CTSE_Handle& GetTSE_Handle() { return
397  (m_TSEH ? m_TSEH.GetTSE_Handle() : CCacheImpl::kEmptyTSEHandle); }
398 
399  CBioseq_Handle GetBioseqHandleFromTSE(const CSeq_id& id);
400  CBioseq_Handle GetLocalBioseqHandle(const CSeq_id& id); // Local here means not far
401 
402  const CConstRef<CSeq_annot>& GetSeqAnnot() { return m_SeqAnnot; }
403 
404  void AddBioseqWithNoPub(const CBioseq& seq);
405  void AddBioseqWithNoBiosource(const CBioseq& seq);
406  void AddProtWithoutFullRef(const CBioseq_Handle& seq);
407  static bool IsWGSIntermediate(const CBioseq& seq);
408  static bool IsTSAIntermediate(const CBioseq& seq);
409  void ReportMissingPubs(const CSeq_entry& se, const CCit_sub* cs);
410  void ReportMissingBiosource(const CSeq_entry& se);
411 
412  CConstRef<CSeq_feat> GetCDSGivenProduct(const CBioseq& seq);
413  NCBI_DEPRECATED CConstRef<CSeq_feat> GetmRNAGivenProduct(const CBioseq& seq);
414  CConstRef<CSeq_feat> GetmRNAGivenProduct(const CBioseq_Handle& seq);
415  const CSeq_entry* GetAncestor(const CBioseq& seq, CBioseq_set::EClass clss);
416  bool IsSerialNumberInComment(const string& comment);
417 
418  bool IsTransgenic(const CBioSource& bsrc);
419 
420  bool RequireLocalProduct(const CSeq_id* sid) const;
421 
422 private:
423 
424  // Setup common options during consturction;
425  void x_Init(Uint4 options);
426 
427  // This is so we can temporarily set m_Scope in a function
428  // and be sure that it will be set to its old value when we're done
430  public:
432  m_scopeToRestore(scope), m_scopeOriginalValue(scope) { }
433 
434  ~CScopeRestorer() { m_scopeToRestore = m_scopeOriginalValue; }
435  private:
438  };
439 
440  // Prohibit copy constructor & assignment operator
442  CValidError_imp& operator= (const CValidError_imp&);
443 
444  void Setup(const CSeq_entry_Handle& seh);
445  void Setup(const CSeq_annot_Handle& sa);
446  CSeq_entry_Handle Setup(const CBioseq& seq);
447  void SetScope(const CSeq_entry& se);
448 
449  CValidatorEntryInfo& x_SetEntryInfo();
450 
451  void x_AddValidErrItem(EDiagSev sev,
452  EErrType type,
453  const string& msg,
454  const string& desc,
455  const CSerialObject& obj,
456  const string& accession,
457  const int version);
458 
459  void ValidateSubmitBlock(const CSubmit_block& block, const CSeq_submit& ss);
460 
461  void InitializeSourceQualTags();
462  void ValidateSourceQualTags(const string& str, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
463 
464  bool IsMixedStrands(const CSeq_loc& loc);
465 
466  void ValidatePubGen(const CCit_gen& gen, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
467  void ValidatePubArticle(const CCit_art& art, TEntrezId uid, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
468  void ValidatePubArticleNoPMID(const CCit_art& art, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
469  void x_ValidatePages(const string& pages, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
470  void ValidateAuthorList(const CAuth_list::C_Names& names, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
471  void ValidateAuthorsInPubequiv (const CPub_equiv& pe, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
472  void ValidatePubHasAuthor(const CPubdesc& pubdesc, const CSerialObject& obj, const CSeq_entry *ctx = nullptr);
473 
474  bool HasName(const CAuth_list& authors);
475  bool HasTitle(const CTitle& title);
476  bool HasIsoJTA(const CTitle& title);
477 
478  void FindEmbeddedScript(const CSerialObject& obj);
479  void FindNonAsciiText (const CSerialObject& obj);
480  void FindCollidingSerialNumbers (const CSerialObject& obj);
481 
482 
483  void GatherTentativeName (const CSeq_entry& se, vector<CConstRef<CSeqdesc> >& usr_descs, vector<CConstRef<CSeq_entry> >& desc_ctxs, vector<CConstRef<CSeq_feat> >& usr_feats);
484 
485  static bool s_IsSalmonellaGenus(const string& taxname);
486  EDiagSev x_SalmonellaErrorLevel();
487 
488  typedef struct tagSLocCheck {
489  bool chk;
492  bool has_other;
496  const CSeq_interval *int_cur = nullptr;
497  const CSeq_interval *int_prv = nullptr;
500  string prefix;
501  } SLocCheck;
502 
503  void x_InitLocCheck(SLocCheck& lc, const string& prefix);
504  void x_CheckForStrandChange(SLocCheck& lc);
505  void x_CheckLoc(const CSeq_loc& loc, const CSerialObject& obj, SLocCheck& lc, bool lowerSev = false);
506  void x_CheckPackedInt(const CPacked_seqint& packed_int,
507  SLocCheck& lc,
508  const CSerialObject& obj);
509  bool x_CheckSeqInt(CConstRef<CSeq_id>& id_cur,
510  const CSeq_interval * int_cur,
511  ENa_strand& strand_cur);
512  void x_ReportInvalidFuzz(const CPacked_seqint& packed_int, const CSerialObject& obj);
513  void x_ReportInvalidFuzz(const CSeq_interval& interval, const CSerialObject& obj);
514  void x_ReportInvalidFuzz(const CSeq_point& point, const CSerialObject& obj);
515  void x_ReportInvalidFuzz(const CSeq_loc& loc, const CSerialObject& obj);
516  void x_ReportPCRSeqProblem(const string& primer_kind,
517  char badch,
518  const CSerialObject& obj,
519  const CSeq_entry *ctx);
520  void x_CheckPCRPrimer(const CPCRPrimer& primer,
521  const string& primer_kind,
522  const CSerialObject& obj,
523  const CSeq_entry *ctx);
524 
525  void x_DoBarcodeTests(CSeq_entry_Handle seh);
526 
527  bool x_DowngradeForMissingAffil(const CCit_sub& cs);
528 
534 
537 
538  // error repoitory
540 
541  // flags derived from options parameter
542  bool m_NonASCII; // User sets if Non ASCII char found
543  bool m_SuppressContext; // Include context in errors if true
544  bool m_ValidateAlignments; // Validate Alignments if true
545  bool m_ValidateExons; // Check exon feature splice sites
546  bool m_OvlPepErr; // Peptide overlap error if true, else warn
547  bool m_RequireISOJTA; // Journal requires ISO JTA
548  bool m_ValidateIdSet; // validate update against ID set in database
549  bool m_RemoteFetch; // Remote fetch enabled?
550  bool m_FarFetchMRNAproducts; // Remote fetch mRNA products
551  bool m_FarFetchCDSproducts; // Remote fetch proteins
559  bool m_IgnoreExceptions; // ignore exceptions when validating translation
560  bool m_ValidateInferenceAccessions; // check that accessions in inferences are valid
565  bool m_CollectLocusTags; // collect locus tags for use in special formatted reports
566  bool m_SeqSubmitParent; // some errors are suppressed if this is run on a newly created submission
569 
570  // flags calculated by examining data in record
572 
573  bool m_IsNC=false;
574  bool m_IsNG=false;
575  bool m_IsNM=false;
576  bool m_IsNP=false;
577  bool m_IsNR=false;
578  bool m_IsNZ=false;
579  bool m_IsNS=false;
580  bool m_IsNT=false;
581  bool m_IsNW=false;
582  bool m_IsWP=false;
583  bool m_IsXR=false;
584 
587 
589 
590  // seq ids contained within the orignal seq entry.
591  // (used to check for far location)
592  vector< CConstRef<CSeq_id> > m_InitialSeqIds;
593  // Bioseqs without source (should be considered only if m_NoSource is false)
594  vector< CConstRef<CBioseq> > m_BioseqWithNoSource;
595 
596  // list of publication serial numbers
597  vector< int > m_PubSerialNumbers;
598 
609 
615 
618 
621 
623 
624  // Taxonomy service interface.
625  unique_ptr<CTaxValidationAndCleanup> x_CreateTaxValidator() const;
626 
627  shared_ptr<SValidatorContext> m_pContext;
628  unique_ptr<CValidatorEntryInfo> m_pEntryInfo = make_unique<CValidatorEntryInfo>();
629 };
630 
631 
632 END_SCOPE(validator)
635 
636 #endif /* VALIDATOR___VALIDERROR_IMP__HPP */
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
#define static
User-defined methods of the data storage class.
EErrType
#define false
Definition: bool.h:36
std representation
Definition: Affil_.hpp:91
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
CBioseq_Handle –.
CByte_graph –.
Definition: Byte_graph.hpp:66
CCdregion –.
Definition: Cdregion.hpp:66
Definition: Dbtag.hpp:53
CDelta_seq –.
Definition: Delta_seq.hpp:66
CFeat_CI –.
Definition: feat_ci.hpp:64
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
CMappedGraph –.
Definition: graph_ci.hpp:61
CObjectManager –.
@OrgMod.hpp User-defined methods of the data storage class.
Definition: OrgMod.hpp:54
CPCRPrimer –.
Definition: PCRPrimer.hpp:66
CPCRReactionSet –.
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CScope –.
Definition: scope.hpp:92
CSeq_annot_Handle –.
CSeq_entry_Handle –.
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
Base class for all serializable objects.
Definition: serialbase.hpp:150
CSubmit_block –.
CTaxon3_reply –.
Definition: Title.hpp:51
CScopeRestorer(CRef< CScope > &scope)
CRef< CObjectManager > m_ObjMgr
bool DoRubiscoTest() const
SIZE_TYPE GetGeneCount(void) const
void PostErr(EDiagSev sv, EErrType et, const string &msg, TDesc ds)
const CSeq_entry_Handle & GetTSEH()
bool ReportSpliceAsError() const
void AddToPseudoCount(SIZE_TYPE num)
bool IsLatLonCheckState() const
SIZE_TYPE m_NumPseudogene
CRef< CScope > m_Scope
bool IsRemoteFetch() const
SIZE_TYPE m_NumTpaWithHistory
bool IsSuppressContext() const
CValidator::TProgressCallback m_PrgCallback
void AddToGeneCount(SIZE_TYPE num)
bool DoTaxLookup() const
CValidError * m_ErrRepository
void IncrementMisplacedFeatureCount()
bool IsValidateAlignments() const
const CConstRef< CSeq_annot > & GetSeqAnnot()
void ResetSmallGenomeSetMisplacedCount()
void ResetTpaWithoutHistoryCount()
void AddToGeneXrefCount(SIZE_TYPE num)
map< int, int > TCount
bool GenerateGoldenFile() const
bool IsStandaloneAnnot() const
void AddToTpaWithoutHistoryCount(SIZE_TYPE num)
CConstRef< CSeq_feat > GetCachedGene(const CSeq_feat *f)
CConstRef< CSeq_annot > m_SeqAnnot
bool IsValidateIdSet() const
const CSeq_feat & TFeat
SIZE_TYPE m_NumMisplacedGraphs
void AddToMisplacedGraphCount(SIZE_TYPE num)
bool IsLocusTagGeneralMatch() const
void AddToTpaWithHistoryCount(SIZE_TYPE num)
SIZE_TYPE m_NumTpaWithoutHistory
CValidator::CProgressInfo m_PrgInfo
const CTSE_Handle & GetTSE_Handle()
bool ValidateInferenceAccessions() const
void IncrementMisplacedGraphCount()
void IncrementPseudogeneCount()
SIZE_TYPE m_NumSmallGenomeSetMisplaced
bool m_ValidateInferenceAccessions
vector< CConstRef< CBioseq > > m_BioseqWithNoSource
SIZE_TYPE GetGeneXrefCount(void) const
const CSeq_annot & TAnnot
bool IsRequireTaxonID() const
const CBioseq_set & TSet
bool IsSeqSubmitParent() const
void AddToPseudogeneCount(SIZE_TYPE num)
SIZE_TYPE m_NumGeneXrefs
const CSeq_graph & TGraph
bool IsOvlPepErr() const
bool IsNonASCII() const
bool IsGenomeSubmission() const
CSeq_entry_Handle m_TSEH
SIZE_TYPE m_NumBioseq_set
void IncrementGeneXrefCount()
bool UseEntrez() const
bool IsLatLonIgnoreWater() const
bool IsFarFetchCDSproducts() const
void ResetTpaWithHistoryCount()
void AddToSmallGenomeSetMisplacedCount(SIZE_TYPE num)
void AddToMisplacedFeatureCount(SIZE_TYPE num)
void IncrementTpaWithHistoryCount()
bool IsValidateExons() const
bool IgnoreExceptions() const
vector< CConstRef< CSeq_id > > m_InitialSeqIds
CBioSourceKind m_biosource_kind
CConstRef< CSeq_entry > m_TSE
CGeneCache m_GeneCache
void IncrementTpaWithoutHistoryCount()
bool IsRefSeqConventions() const
bool IsIndexerVersion() const
bool IsRequireISOJTA() const
CGeneCache & GetGeneCache()
map< const CSeq_feat *, const CSeq_annot * > & TFeatAnnotMap
void IncrementSmallGenomeSetMisplacedCount()
const CSeq_align & TAlign
vector< int > m_PubSerialNumbers
void ResetMisplacedGraphCount()
const CSeq_entry & TEntry
shared_ptr< SValidatorContext > m_pContext
const CSeq_entry & GetTSE() const
bool IsFarFetchMRNAproducts() const
bool DoCompareVDJCtoCDS() const
SIZE_TYPE m_NumMisplacedFeatures
const CSeqdesc & TDesc
CCacheImpl & GetCache()
bool ShouldSubdivide() const
void ResetMisplacedFeatureCount()
CValidError_imp(const CValidError_imp &)
const CBioseq & TBioseq
Cache various information for one validation run.
Definition: cache_impl.hpp:126
static const CTSE_Handle kEmptyTSEHandle
Definition: cache_impl.hpp:237
bool(* TProgressCallback)(CProgressInfo *)
Definition: validator.hpp:248
Definition: set.hpp:45
Include a standard set of the NCBI C++ Toolkit most basic headers.
API (CDeflineGenerator) for computing sequences' titles ("definitions").
static uch flags
CS_CONTEXT * ctx
Definition: t0006.c:12
static const struct name_t names[]
static int lc
Definition: getdata.c:30
SStrictId_Entrez::TId TEntrezId
TEntrezId type for entrez ids which require the same strictness as TGi.
Definition: ncbimisc.hpp:1041
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
#define NCBI_DEPRECATED
#define NCBI_STD_DEPRECATED(message)
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
NCBI_NS_STD::string::size_type SIZE_TYPE
Definition: ncbistr.hpp:132
#define NCBI_VALIDATOR_EXPORT
Definition: ncbi_export.h:913
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
static int version
Definition: mdb_load.c:29
std::istream & in(std::istream &in_, double &x_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
static wxString GetContext(const wxString &str, int pos)
bool IsInOrganelleSmallGenomeSet(const CSeq_id &id, CScope &scope)
Definition: utilities.cpp:3050
bool BadMultipleSequenceLocation(const CSeq_loc &loc, CScope &scope)
Definition: utilities.cpp:3080
bool IsNG(const CSeq_id &id)
Definition: utilities.cpp:2790
const CSeq_feat::TDbxref TDbtags
Definition: utilities.hpp:199
static const char * prefix[]
Definition: pcregrep.c:405
static const char * str(char *buf, int n)
Definition: stats.c:84
Definition: type.c:6
CScope & GetScope()
#define const
Definition: zconf.h:230
Modified on Fri Dec 01 04:43:42 2023 by modify_doxy.py rev. 669887