NCBI C++ ToolKit
single_feat_validator.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: single_feat_validator.hpp 101216 2023-11-16 17:16:13Z stakhovv $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *`
26  * Author: Colleen Bollin, Jonathan Kans, Clifford Clausen, Aaron Ucko......
27  *
28  * File Description:
29  * For validating individual features
30  * .......
31  *
32  */
33 
34 #ifndef VALIDATOR___SINGLE_FEAT_VALIDATOR__HPP
35 #define VALIDATOR___SINGLE_FEAT_VALIDATOR__HPP
36 
37 #include <corelib/ncbistd.hpp>
39 
40 #include <objmgr/scope.hpp>
41 #include <objmgr/feat_ci.hpp> // for CMappedFeat
44 
50 
51 #include <objmgr/util/feature.hpp>
52 
55 
56 class CSeq_entry;
57 class CCit_sub;
58 class CCit_art;
59 class CCit_gen;
60 class CSeq_feat;
61 class CBioseq;
62 class CSeqdesc;
63 class CSeq_annot;
64 class CTrna_ext;
65 class CProt_ref;
66 class CSeq_loc;
67 class CFeat_CI;
68 class CPub_set;
69 class CAuth_list;
70 class CTitle;
71 class CMolInfo;
72 class CUser_object;
73 class CSeqdesc_CI;
74 //class CDense_diag;
75 //class CDense_seg;
76 //class CSeq_align_set;
77 class CPubdesc;
78 class CBioSource;
79 class COrg_ref;
80 class CDelta_seq;
81 class CGene_ref;
82 class CCdregion;
83 class CRNA_ref;
84 class CImp_feat;
85 class CSeq_literal;
86 class CBioseq_Handle;
87 class CSeq_feat_Handle;
88 class CCountries;
90 class CComment_set;
91 
92 BEGIN_SCOPE(validator)
93 
94 class CValidError_imp;
95 class CValidError_base;
96 
97 // ============================= Validate SeqFeat ============================
98 
99 
101 {
102 public:
103  CSingleFeatValidator(const CSeq_feat& feat, CScope& scope, CValidError_imp& imp);
105 
106  virtual void Validate();
107 
108  static bool x_HasSeqLocBond(const CSeq_feat& feat);
109  static bool s_IsPseudo(const CSeq_feat& feat);
110  static bool s_IsPseudo(const CGene_ref& ref);
112  static bool s_GeneRefsAreEquivalent(const CGene_ref& g1, const CGene_ref& g2, string& label);
115 protected:
122 
123  void PostErr(EDiagSev sv, EErrType et, const string& msg);
124 
127  void x_ValidateBothStrands();
128  static void x_LocHasStrandBoth(const CSeq_loc& feat, bool& both, bool& both_rev);
129  void x_ValidateGeneId();
130  void x_ValidateFeatCit();
131  virtual void x_ValidateFeatComment();
132  void x_ValidateGbQual(const CGb_qual& qual);
134 
136 
137  bool x_HasNamedQual(const string& qual_name);
138 
140  virtual void x_ValidateSeqFeatLoc();
142 
143  typedef enum {
151 
153 
154  static size_t x_CalculateLocationGaps(CBioseq_Handle bsh, const CSeq_loc& loc, vector<TSeqPos>& gap_starts);
155  static bool x_IsMostlyNs(const CSeq_loc& loc, CBioseq_Handle bsh);
156  static TSeqPos x_FindStartOfGap(CBioseq_Handle bsh, TSeqPos pos, CScope* scope);
157 
158  void x_ValidateExcept();
159  virtual void x_ValidateExceptText(const string& text);
160 
161  void x_ValidateGbquals();
162  virtual bool x_ReportOrigProteinId();
163  void x_ValidateRptUnitVal(const string& val, const string& key);
164  void x_ValidateRptUnitSeqVal(const string& val, const string& key);
165  void x_ValidateRptUnitRangeVal(const string& val);
166  void x_ValidateLabelVal(const string& val);
167  void x_ValidateCompareVal(const string& val);
168  void x_ValidateReplaceQual(const string& key, const string& qual_str, const string& val);
169 
170  CBioseq_Handle x_GetFeatureProduct(bool look_far, bool& is_far);
171  CBioseq_Handle x_GetFeatureProduct(bool& is_far);
172 
173  void ValidateCharactersInField (string value, string field_name);
174  void ValidateSplice(bool gene_pseudo, bool check_all);
176  void x_ReportSpliceProblems(const CSpliceProblems& problems, const string& label);
177  void x_ReportDonorSpliceSiteReadErrors(const CSpliceProblems::TSpliceProblem& problem, const string& label);
179 
180  static bool x_BioseqHasNmAccession (CBioseq_Handle bsh);
181 
182  void x_ValidateNonImpFeat();
183  void x_ValidateGeneXRef();
184  void x_ValidateGeneFeaturePair(const CSeq_feat& gene);
185  void x_ValidateNonGene();
186  void x_ValidateOldLocusTag(const string& old_locus_tag);
187 
188  void x_ValidateImpFeatLoc();
189  void x_ValidateImpFeatQuals();
191 
194 
196 };
197 
199 {
200 public:
201  CCdregionValidator(const CSeq_feat& feat, CScope& scope, CValidError_imp& imp);
202 
203  void Validate() override;
204 
205 protected:
206  void x_ValidateFeatComment() override;
207  void x_ValidateExceptText(const string& text) override;
208  void x_ValidateQuals();
209  bool x_ReportOrigProteinId() override;
210  static bool IsPlastid(int genome);
211  void x_ValidateGeneticCode();
213  bool x_HasGoodParent();
214  void x_ValidateSeqFeatLoc() override;
215  void x_ValidateFarProducts();
216  void x_ValidateCDSPeptides();
217  void x_ValidateCDSPartial();
218  bool x_BypassCDSPartialTest() const;
219  bool x_CDS3primePartialTest() const;
220  bool x_CDS5primePartialTest() const;
221 
222  bool x_IsProductMisplaced() const;
223 
224  typedef pair<TSeqPos, TSeqPos> TShortIntron;
225  static vector<TShortIntron> x_GetShortIntrons(const CSeq_loc& loc, CScope* scope);
226  static void x_AddToIntronList(vector<TShortIntron>& shortlist, TSeqPos last_start, TSeqPos last_stop, TSeqPos this_start, TSeqPos this_stop);
227  static string x_FormatIntronInterval(const TShortIntron& interval);
228  void ReportShortIntrons();
229 
230  void x_ValidateTrans();
231  void x_ValidateCodebreak();
233  void x_ReportTranslExceptProblems(const CCDSTranslationProblems::TTranslExceptProblems& problems, bool has_exception);
235  string MapToNTCoords(TSeqPos pos);
236 
237  void x_ValidateProductId();
238  void x_ValidateConflict();
240 
242  void x_ValidateParentPartialness(const CSeq_loc& parent_loc, const string& parent_name);
244  bool x_CheckPosNOrGap(TSeqPos pos, const CSeqVector& vec);
245 
248 };
249 
250 
252 {
253 public:
255 
256  void Validate() override;
257 
258 protected:
259  void x_ValidateExceptText(const string& text) override;
260  void x_ValidateOperon();
263 };
264 
265 
267 {
268 public:
270 
271  void Validate() override;
272 
273 protected:
274  void x_CheckForEmpty();
276  void x_ValidateECNumbers();
277  void x_ValidateProteinName(const string& prot_name);
279 };
280 
281 
283 {
284 public:
285  CRNAValidator(const CSeq_feat& feat, CScope& scope, CValidError_imp& imp)
286  : CSingleFeatValidator(feat, scope, imp) {}
288 
289  void Validate() override;
290 
291 protected:
292  void x_ValidateRnaProduct(bool feat_pseudo, bool pseudo);
294  void x_ReportRNATranslationProblems(size_t problems, size_t mismatches);
295  void x_ValidateRnaTrans();
296 
297  // for tRNAs
298  void x_ValidateAnticodon(const CSeq_loc& anticodon);
299  void x_ValidateTrnaCodons();
300  void x_ValidateTrnaType();
301  void x_ValidateTrnaData();
302  void x_ValidateTrnaOverlap();
303 
304 };
305 
306 
308 {
309 public:
310  CMRNAValidator(const CSeq_feat& feat, CScope& scope, CValidError_imp& imp);
311 
312  void Validate() override;
313 
314 protected:
315  // for mRNAs
316  void x_ValidateMrna();
318  void x_ValidateMrnaGene();
319 
323 };
324 
325 
327 {
328 public:
330 
331  void Validate() override;
332 
333 protected:
334 };
335 
336 
338 {
339 public:
341 
342  void Validate() override;
343 
344 protected:
345 };
346 
347 
349 {
350 public:
352 
353  void x_ValidateSeqFeatLoc() override;
354 
355 protected:
356 };
357 
358 
360 {
361 public:
363 
364  void x_ValidateSeqFeatLoc() override;
365 
366 protected:
367 };
368 
369 
371 {
372 public:
373  CPeptideValidator(const CSeq_feat& feat, CScope& scope, CValidError_imp& imp);
374 
375  void Validate() override;
376 
377 protected:
379 
381 };
382 
383 
384 
386 {
387 public:
389 
390  void Validate() override;
391 
392 protected:
393 };
394 
395 
397 {
398 public:
400 
401  void Validate() override;
402 
403 protected:
404  bool x_IsIntronShort(bool pseudo);
405 };
406 
407 
409 {
410 public:
412 
413  void Validate() override;
414 
415 protected:
416 };
417 
418 
420 {
421 public:
423 
424  void Validate() override;
425 
426 protected:
427 };
428 
429 
431 {
432 public:
434 
435  void Validate() override;
436 
437 protected:
438 };
439 
440 
442 {
443 public:
445  void Validate() override;
446 protected:
447 };
448 
450 
451 
452 END_SCOPE(validator)
455 
456 #endif /* VALIDATOR___SINGLE_FEAT_VALIDATOR__HPP */
EErrType
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
CBioseq_Handle –.
vector< STranslExceptProblem > TTranslExceptProblems
vector< STranslationMismatch > TTranslationMismatches
void x_ReportTranslationMismatches(const CCDSTranslationProblems::TTranslationMismatches &mismatches)
bool x_CDS5primePartialTest() const
void x_ValidateSeqFeatLoc() override
void Validate() override
bool x_CDS3primePartialTest() const
static string x_FormatIntronInterval(const TShortIntron &interval)
static bool IsPlastid(int genome)
CConstRef< CSeq_feat > m_Gene
bool x_IsProductMisplaced() const
static vector< TShortIntron > x_GetShortIntrons(const CSeq_loc &loc, CScope *scope)
pair< TSeqPos, TSeqPos > TShortIntron
CCdregionValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
bool x_BypassCDSPartialTest() const
void x_ReportTranslationProblems(const CCDSTranslationProblems &problems)
bool x_CheckPosNOrGap(TSeqPos pos, const CSeqVector &vec)
string MapToNTCoords(TSeqPos pos)
static void x_AddToIntronList(vector< TShortIntron > &shortlist, TSeqPos last_start, TSeqPos last_stop, TSeqPos this_start, TSeqPos this_stop)
void x_ValidateFeatComment() override
void x_ValidateExceptText(const string &text) override
bool x_ReportOrigProteinId() override
void x_ReportTranslExceptProblems(const CCDSTranslationProblems::TTranslExceptProblems &problems, bool has_exception)
CCdregion –.
Definition: Cdregion.hpp:66
CDelta_seq –.
Definition: Delta_seq.hpp:66
void Validate() override
CFeat_CI –.
Definition: feat_ci.hpp:64
@Gb_qual.hpp User-defined methods of the data storage class.
Definition: Gb_qual.hpp:61
void Validate() override
bool x_AllIntervalGapsAreMobileElements()
void x_ValidateMultiIntervalGene()
void x_ValidateExceptText(const string &text) override
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
bool x_IsIntronShort(bool pseudo)
CMRNAValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
CConstRef< CSeq_feat > m_Gene
void Validate() override
CPeptideValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
CConstRef< CSeq_feat > m_CDS
void x_ValidateSeqFeatLoc() override
void Validate() override
void x_ValidateProteinName(const string &prot_name)
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
void Validate() override
CRNAValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
void x_ReportRNATranslationProblems(size_t problems, size_t mismatches)
void x_ValidateAnticodon(const CSeq_loc &anticodon)
void x_ValidateRnaProduct(bool feat_pseudo, bool pseudo)
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
static bool x_IsMostlyNs(const CSeq_loc &loc, CBioseq_Handle bsh)
CBioseq_Handle x_GetFeatureProduct(bool look_far, bool &is_far)
void ValidateCharactersInField(string value, string field_name)
void PostErr(EDiagSev sv, EErrType et, const string &msg)
void x_ValidateLabelVal(const string &val)
CSingleFeatValidator(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
void x_ReportAcceptorSpliceSiteReadErrors(const CSpliceProblems::TSpliceProblem &problem, const string &label)
void x_ValidateRptUnitVal(const string &val, const string &key)
void x_ValidateReplaceQual(const string &key, const string &qual_str, const string &val)
CBioseq_Handle x_GetBioseqByLocation(const CSeq_loc &loc)
static bool s_IsPseudo(const CSeq_feat &feat)
static void s_RemoveDuplicateGoTerms(CSeq_feat &feat)
void x_ValidateCompareVal(const string &val)
void x_ValidateRptUnitSeqVal(const string &val, const string &key)
static void s_RemoveDuplicateGoTerms(CUser_object::TData &field_list)
void x_ReportDonorSpliceSiteReadErrors(const CSpliceProblems::TSpliceProblem &problem, const string &label)
static bool s_GeneRefsAreEquivalent(const CGene_ref &g1, const CGene_ref &g2, string &label)
void x_ValidateGbQual(const CGb_qual &qual)
void x_ValidateGeneFeaturePair(const CSeq_feat &gene)
void x_ValidateOldLocusTag(const string &old_locus_tag)
virtual void x_ValidateFeatComment()
bool x_HasNamedQual(const string &qual_name)
void x_ValidateLocusTagGeneralMatch(CConstRef< CSeq_feat > gene)
static bool x_HasSeqLocBond(const CSeq_feat &feat)
void x_ReportPseudogeneConflict(CConstRef< CSeq_feat > gene)
void x_ValidateRptUnitRangeVal(const string &val)
static bool x_BioseqHasNmAccession(CBioseq_Handle bsh)
void ValidateSplice(bool gene_pseudo, bool check_all)
void x_ReportSpliceProblems(const CSpliceProblems &problems, const string &label)
static TSeqPos x_FindStartOfGap(CBioseq_Handle bsh, TSeqPos pos, CScope *scope)
static size_t x_CalculateLocationGaps(CBioseq_Handle bsh, const CSeq_loc &loc, vector< TSeqPos > &gap_starts)
virtual void x_ValidateExceptText(const string &text)
static bool s_BioseqHasRefSeqThatStartsWithPrefix(CBioseq_Handle bsh, string prefix)
static void x_LocHasStrandBoth(const CSeq_loc &feat, bool &both, bool &both_rev)
pair< size_t, TSeqPos > TSpliceProblem
Definition: Title.hpp:51
char value[7]
Definition: config.c:431
Include a standard set of the NCBI C++ Toolkit most basic headers.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
EDiagSev
Severity level for the posted diagnostics.
Definition: ncbidiag.hpp:650
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static const char label[]
vector< CRef< CUser_field > > TData
static void text(MDB_val *v)
Definition: mdb_dump.c:62
const struct ncbi::grid::netcache::search::fields::KEY key
static const char * prefix[]
Definition: pcregrep.c:405
CSingleFeatValidator * FeatValidatorFactory(const CSeq_feat &feat, CScope &scope, CValidError_imp &imp)
Modified on Mon Feb 26 04:02:24 2024 by modify_doxy.py rev. 669887