NCBI C++ ToolKit
splice_problems.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: splice_problems.hpp 82806 2018-07-05 10:48:05Z bollin $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *`
26  * Author: Colleen Bollin
27  *
28  * File Description:
29  * For validating splice sites
30  * .......
31  *
32  */
33 
34 #ifndef VALIDATOR___SPLICE_PROBLEMS__HPP
35 #define VALIDATOR___SPLICE_PROBLEMS__HPP
36 
37 #include <corelib/ncbistd.hpp>
39 
40 #include <objmgr/scope.hpp>
43 
44 #include <objmgr/util/feature.hpp>
45 
48 
49 class CSeq_entry;
50 class CCit_sub;
51 class CCit_art;
52 class CCit_gen;
53 class CSeq_feat;
54 class CBioseq;
55 class CSeqdesc;
56 class CSeq_annot;
57 class CTrna_ext;
58 class CProt_ref;
59 class CSeq_loc;
60 class CFeat_CI;
61 class CPub_set;
62 class CAuth_list;
63 class CTitle;
64 class CMolInfo;
65 class CUser_object;
66 class CSeqdesc_CI;
67 class CDense_diag;
68 class CDense_seg;
69 class CSeq_align_set;
70 class CPubdesc;
71 class CBioSource;
72 class COrg_ref;
73 class CDelta_seq;
74 class CGene_ref;
75 class CCdregion;
76 class CRNA_ref;
77 class CImp_feat;
78 class CSeq_literal;
79 class CBioseq_Handle;
80 class CSeq_feat_Handle;
81 class CCountries;
83 class CComment_set;
84 class CTaxon3_reply;
85 class ITaxon3;
86 class CT3Error;
87 
88 BEGIN_SCOPE(validator)
89 
90 class CValidError_imp;
92 class CGeneCache;
93 class CValidError_base;
94 
95 typedef Char(&TSpliceSite)[2];
96 
97 // ============================= Validate SeqFeat ============================
98 
99 
100 
102 public:
103  CSpliceProblems() : m_ExceptionUnnecessary(false), m_ErrorsNotExpected(false) {};
105 
106  void CalculateSpliceProblems(const CSeq_feat& feat, bool check_all, bool pseudo, CBioseq_Handle loc_handle);
107 
108  // first is problem flags, second is position
109  typedef pair<size_t, TSeqPos> TSpliceProblem;
110  typedef vector<TSpliceProblem> TSpliceProblemList;
111 
112  typedef enum {
113  eSpliceSiteRead_OK = 0,
117  eSpliceSiteRead_WrongNT
118  } ESpliceSiteRead;
119 
120  bool SpliceSitesHaveErrors();
121  bool IsExceptionUnnecessary() const { return m_ExceptionUnnecessary; }
122  bool AreErrorsUnexpected() const { return m_ErrorsNotExpected; }
123  const TSpliceProblemList& GetDonorProblems() const { return m_DonorProblems; }
124  const TSpliceProblemList& GetAcceptorProblems() const { return m_AcceptorProblems; }
125 
126 private:
131 
132  void ValidateSpliceCdregion(const CSeq_feat& feat, const CBioseq_Handle& bsh, ENa_strand strand);
133  void ValidateSpliceMrna(const CSeq_feat& feat, const CBioseq_Handle& bsh, ENa_strand strand);
134  void ValidateSpliceExon(const CSeq_feat& feat, const CBioseq_Handle& bsh, ENa_strand strand);
135  void ValidateDonorAcceptorPair(ENa_strand strand, TSeqPos stop, const CSeqVector& vec_donor, TSeqPos seq_len_donor,
136  TSeqPos start, const CSeqVector& vec_acceptor, TSeqPos seq_len_acceptor);
137 
138 
139  ESpliceSiteRead ReadDonorSpliceSite(ENa_strand strand, TSeqPos stop, const CSeqVector& vec, TSeqPos seq_len, TSpliceSite& site);
140  ESpliceSiteRead ReadDonorSpliceSite(ENa_strand strand, TSeqPos stop, const CSeqVector& vec, TSeqPos seq_len);
141  ESpliceSiteRead ReadAcceptorSpliceSite(ENa_strand strand, TSeqPos start, const CSeqVector& vec, TSeqPos seq_len, TSpliceSite& site);
142  ESpliceSiteRead ReadAcceptorSpliceSite(ENa_strand strand, TSeqPos start, const CSeqVector& vec, TSeqPos seq_len);
143 };
144 
145 
146 const string kSpliceSiteGTAG = "GT-AG";
147 const string kSpliceSiteGCAG = "GC-AG";
148 const string kSpliceSiteATAC = "AT-AC";
149 const string kSpliceSiteGT = "GT";
150 const string kSpliceSiteGC = "GC";
151 const string kSpliceSiteAG = "AG";
152 
153 typedef Char const (&TConstSpliceSite)[2];
154 
155 bool CheckAdjacentSpliceSites(const string& signature, ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor);
156 bool CheckSpliceSite(const string& signature, ENa_strand strand, TConstSpliceSite site);
158 bool CheckIntronDonor(ENa_strand strand, TConstSpliceSite donor);
159 bool CheckIntronAcceptor(ENa_strand strand, TConstSpliceSite acceptor);
160 
161 
162 
163 
164 END_SCOPE(validator)
167 
168 #endif /* VALIDATOR___SPLICE_PROBLEMS__HPP */
@Auth_list.hpp User-defined methods of the data storage class.
Definition: Auth_list.hpp:57
CBioseq_Handle –.
CCdregion –.
Definition: Cdregion.hpp:66
CDelta_seq –.
Definition: Delta_seq.hpp:66
CFeat_CI –.
Definition: feat_ci.hpp:64
@Imp_feat.hpp User-defined methods of the data storage class.
Definition: Imp_feat.hpp:54
@Pubdesc.hpp User-defined methods of the data storage class.
Definition: Pubdesc.hpp:54
@RNA_ref.hpp User-defined methods of the data storage class.
Definition: RNA_ref.hpp:54
CSeqVector –.
Definition: seq_vector.hpp:65
Definition: Seq_entry.hpp:56
CSeq_feat_Handle –.
namespace ncbi::objects::
Definition: Seq_feat.hpp:58
CSeqdesc_CI –.
Definition: seqdesc_ci.hpp:65
const TSpliceProblemList & GetDonorProblems() const
vector< TSpliceProblem > TSpliceProblemList
bool IsExceptionUnnecessary() const
TSpliceProblemList m_DonorProblems
bool AreErrorsUnexpected() const
pair< size_t, TSeqPos > TSpliceProblem
TSpliceProblemList m_AcceptorProblems
const TSpliceProblemList & GetAcceptorProblems() const
CTaxon3_reply –.
Definition: Title.hpp:51
Include a standard set of the NCBI C++ Toolkit most basic headers.
#define false
Definition: bool.h:36
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define Char
Definition: ncbistd.hpp:124
char Char
Alias for char.
Definition: ncbitype.h:93
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_VALIDATOR_EXPORT
Definition: ncbi_export.h:913
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
ESERV_Site site
bool CheckIntronAcceptor(ENa_strand strand, TConstSpliceSite acceptor)
const string kSpliceSiteGT
const string kSpliceSiteGTAG
bool CheckIntronSpliceSites(ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor)
Char const (& TConstSpliceSite)[2]
const string kSpliceSiteGC
bool CheckAdjacentSpliceSites(const string &signature, ENa_strand strand, TConstSpliceSite donor, TConstSpliceSite acceptor)
const string kSpliceSiteAG
bool CheckIntronDonor(ENa_strand strand, TConstSpliceSite donor)
Char(& TSpliceSite)[2]
const string kSpliceSiteATAC
bool CheckSpliceSite(const string &signature, ENa_strand strand, TConstSpliceSite site)
const string kSpliceSiteGCAG
Modified on Fri Sep 20 14:57:57 2024 by modify_doxy.py rev. 669887