NCBI C++ ToolKit
score_builder_base.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJMGR_UTIL___SCORE_BUILDER_BASE__HPP
2 #define OBJMGR_UTIL___SCORE_BUILDER_BASE__HPP
3 
4 /* $Id: score_builder_base.hpp 84759 2018-12-07 14:44:03Z evgeniev $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Mike DiCuccio
30  *
31  * File Description:
32  *
33  */
34 
35 #include <corelib/ncbiobj.hpp>
36 #include <util/range_coll.hpp>
38 
41 
42 class CScope;
43 
45 {
46 public:
47 
49  virtual ~CScoreBuilderBase();
50 
51  enum EScoreType {
52  //< typical blast 'score'
53  //< NOTE: implemented in a derived class!!
55 
56  //< blast 'bit_score' score
57  //< NOTE: implemented in a derived class!!
59 
60  //< blast 'e_value' score
61  //< NOTE: implemented in a derived class!!
63 
64  //< count of ungapped identities as 'num_ident'
66 
67  //< count of ungapped identities as 'num_mismatch'
69 
70  //< percent identity as defined in CSeq_align, range 0.0-100.0
71  //< this will also create 'num_ident' and 'num_mismatch'
72  //< NOTE: see Seq_align.hpp for definitions
74 
75  //< percent coverage of query as 'pct_coverage', range 0.0-100.0
76  eScore_PercentCoverage
77  };
78 
79  /// Error handling while adding scores that are not implemented
80  /// or unsupported (cannot be defined) for certain types
81  /// of alignments.
82  ///
83  /// Transient errors, such as problems retrieving sequence
84  /// data, will always throw.
85  enum EErrorMode {
86  eError_Silent, ///< Try to ignore errors, continue adding scores.
87  eError_Report, ///< Print error messages, but do not fail.
88  eError_Throw ///< Throw exceptions on errors.
89  };
90 
91  /// @name Functions to add scores directly to Seq-aligns
92  /// @{
93 
94  EErrorMode GetErrorMode(void) const { return m_ErrorMode; }
95  void SetErrorMode(EErrorMode mode) { m_ErrorMode = mode; }
96 
97  void AddScore(CScope& scope, CSeq_align& align,
99  void AddScore(CScope& scope, list< CRef<CSeq_align> >& aligns,
100  CSeq_align::EScoreType score);
101 
102  /// @}
103 
104  /// @name Functions to compute scores without adding
105  /// @{
106 
107  double ComputeScore(CScope& scope, const CSeq_align& align,
108  CSeq_align::EScoreType score);
109  double ComputeScore(CScope& scope, const CSeq_align& align,
110  const TSeqRange &range,
111  CSeq_align::EScoreType score);
112  virtual double ComputeScore(CScope& scope, const CSeq_align& align,
113  const CRangeCollection<TSeqPos> &ranges,
114  CSeq_align::EScoreType score);
115 
116  /// Compute the six splign scores. Add them to scores object, or, if it is
117  /// not provided, to alignment itself
118  void AddSplignScores(const CSeq_align& align, CSeq_align::TScore &scores);
119 
121  {
122  AddSplignScores(align, align.SetScore());
123  }
124 
125 
126  /// Compute percent identity (range 0-100)
128  eGapped, //< count gaps as mismatches
129  eUngapped, //< ignore gaps; only count aligned bases
130  eGBDNA //< each gap counts as a 1nt mismatch
131  };
132  double GetPercentIdentity(CScope& scope, const CSeq_align& align,
133  EPercentIdentityType type = eGapped);
134 
135  /// Compute percent coverage of the query (sequence 0) (range 0-100)
136  double GetPercentCoverage(CScope& scope, const CSeq_align& align, unsigned query = 0);
137 
138  /// Compute percent identity or coverage of the query within specified range
139  double GetPercentIdentity(CScope& scope, const CSeq_align& align,
140  const TSeqRange &range,
141  EPercentIdentityType type = eGapped);
142  double GetPercentCoverage(CScope& scope, const CSeq_align& align,
143  const TSeqRange &range, unsigned query = 0);
144 
145  /// Compute percent identity or coverage of the query within specified
146  /// collection of ranges
147  double GetPercentIdentity(CScope& scope, const CSeq_align& align,
148  const CRangeCollection<TSeqPos> &ranges,
149  EPercentIdentityType type = eGapped);
150  double GetPercentCoverage(CScope& scope, const CSeq_align& align,
151  const CRangeCollection<TSeqPos> &ranges,
152  unsigned query = 0);
153 
154  /// Compute the number of identities in the alignment
155  int GetIdentityCount (CScope& scope, const CSeq_align& align);
156 
157  /// Compute the number of mismatches in the alignment
158  int GetMismatchCount (CScope& scope, const CSeq_align& align);
159  void GetMismatchCount (CScope& scope, const CSeq_align& align,
160  int& identities, int& mismatches);
161 
162  /// Compute identity and/or mismatch counts within specified range
163  int GetIdentityCount (CScope& scope, const CSeq_align& align,
164  const TSeqRange &range);
165  int GetMismatchCount (CScope& scope, const CSeq_align& align,
166  const TSeqRange &range);
167  void GetMismatchCount (CScope& scope, const CSeq_align& align,
168  const TSeqRange &range,
169  int& identities, int& mismatches);
170 
171  /// Compute identity and/or mismatch counts within specified
172  /// collection of ranges
173  int GetIdentityCount (CScope& scope, const CSeq_align& align,
174  const CRangeCollection<TSeqPos> &ranges);
175  int GetMismatchCount (CScope& scope, const CSeq_align& align,
176  const CRangeCollection<TSeqPos> &ranges);
177  void GetMismatchCount (CScope& scope, const CSeq_align& align,
178  const CRangeCollection<TSeqPos> &ranges,
179  int& identities, int& mismatches);
180 
181  /// counts based on substitution matrix for protein alignments
182  int GetPositiveCount (CScope& scope, const CSeq_align& align);
183  int GetNegativeCount (CScope& scope, const CSeq_align& align);
184  void GetMatrixCounts (CScope& scope, const CSeq_align& align,
185  int& positives, int& negatives);
186 
187  /// Compute the number of gaps in the alignment
188  int GetGapCount (const CSeq_align& align);
189  int GetGapCount (const CSeq_align& align,
190  const TSeqRange &range);
191  int GetGapCount (const CSeq_align& align,
192  const CRangeCollection<TSeqPos> &ranges);
193 
194  /// Compute the number of gap bases in the alignment (= length of all gap
195  /// segments)
196  int GetGapBaseCount (const CSeq_align& align);
197  int GetGapBaseCount (const CSeq_align& align,
198  const TSeqRange &range);
199  int GetGapBaseCount (const CSeq_align& align,
200  const CRangeCollection<TSeqPos> &ranges);
201 
202  /// Compute the length of the alignment (= length of all segments, gaps +
203  /// aligned)
204  TSeqPos GetAlignLength(const CSeq_align& align, bool ungapped=false);
205  TSeqPos GetAlignLength(const CSeq_align& align,
206  const TSeqRange &range, bool ungapped=false);
207  TSeqPos GetAlignLength(const CSeq_align& align,
208  const CRangeCollection<TSeqPos> &ranges,
209  bool ungapped=false);
210 
211  void SetSubstMatrix(const string &name);
212 
213  /// @}
214 
215 private:
218 
219  void x_GetMatrixCounts(CScope& scope,
220  const CSeq_align& align,
221  int* positives, int* negatives);
222 };
223 
224 
225 
228 
229 #endif // OBJMGR_UTIL___SCORE_BUILDER_BASE__HPP
CScope –.
Definition: scope.hpp:92
EErrorMode
Error handling while adding scores that are not implemented or unsupported (cannot be defined) for ce...
@ eError_Report
Print error messages, but do not fail.
@ eError_Silent
Try to ignore errors, continue adding scores.
EPercentIdentityType
Compute percent identity (range 0-100)
void SetErrorMode(EErrorMode mode)
void AddSplignScores(CSeq_align &align)
EErrorMode GetErrorMode(void) const
EScoreType
enum controlling known named scores
Definition: Seq_align.hpp:128
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XALNMGR_EXPORT
Definition: ncbi_export.h:1065
TScore & SetScore(void)
Assign a value to Score data member.
Definition: Seq_align_.hpp:902
vector< CRef< CScore > > TScore
Definition: Seq_align_.hpp:398
range(_Ty, _Ty) -> range< _Ty >
mdb_mode_t mode
Definition: lmdb++.h:38
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
static string query
Definition: type.c:6
Modified on Fri Sep 20 14:58:09 2024 by modify_doxy.py rev. 669887