NCBI C++ ToolKit
cuFlexiDm.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef CU_FLEXIDM_HPP
2 #define CU_FLEXIDM_HPP
3 
4 /* $Id: cuFlexiDm.hpp 42015 2009-05-28 19:23:37Z lanczyck $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Charlie Liu
30 *
31 * File Description:
32 *
33 * Concrete distance matrix class.
34 * Distance is computed based on pure percent pairwise AA identity in
35 * aligned blocks, with or without a correction for multiple AA
36 * substitutions as per Kimura.
37 *
38 */
39 
42 
45 BEGIN_SCOPE(cd_utils)
46 
48 {
49  ResidueCell(char aa, bool isaligned) : residue(aa), aligned(isaligned){}
50  char residue;
51  bool aligned;
52 };
53 
55 {
56 public:
57  ResidueMatrix(unsigned numRows);
58  void read(ColumnResidueProfile& crp);
59  bool getAlignedPair(unsigned row1, unsigned row2, pair< string, string >& seqPair);
60  typedef vector< ResidueCell > RowContent;
61  RowContent& getRow(int row) {return m_rows[row];}
62 private:
63 
64  vector< RowContent > m_rows;
65  int m_numRows;
66 };
67 
68 // This class simply uses the number of AA identities in the specified region
69 // to define the distance between two sequences:
70 //
71 // d[i][j] = 1 - (n_matched/n_tested); d is in [0, 1]
72 
73 class FlexiDm : public DistanceMatrix {
74 
75  static const double MAX_DISTANCE;
76  static const EDistMethod DIST_METHOD;
77 
78 public:
79 
80  FlexiDm(EScoreMatrixType type = GLOBAL_DEFAULT_SCORE_MATRIX, int uniformLength = -1);
81 
82  ~FlexiDm();
83  bool ComputeMatrix(pProgressFunction pFunc);
84 
85  // Allow the user to define a single length to use in GetPercentIdentities.
86  // This length will *not* be used if (# of identities/uniformLength) > 1.
87  void SetUniformLength(int uniformLength) {m_uniformLength = (uniformLength != 0) ? uniformLength : -1;}
88  int GetUniformLength() const {return m_uniformLength;}
89 
90  // Distance is 1 - (fraction of identical residues)
91  static double GetDistance(int identities, int alignment_length);
92 
93 private:
94 
95  // When positive, always use this as the length when computing distances
96  // to force normalization to a common size alignment. E.g., when there
97  // are normal and pending rows, one might wish to force to measure the
98  // number of identities between pending rows relative to the length of the
99  // normal alignment.
100  // When zero or negative, this value is ignored.
101  // In addition, this length will *not* be used if (# of identities/uniformLength) > 1.
103 
105  void initDMIdentities(EScoreMatrixType type, int nExt=0, int cExt=0);
106 };
107 
108 END_SCOPE(cd_utils)
110 
111 #endif /* CU_FlexiDm__HPP */
virtual void read(ColumnResidueProfile &crp)=0
static const EDistMethod DIST_METHOD
Definition: cuFlexiDm.hpp:76
~FlexiDm()
Definition: cuFlexiDm.cpp:93
int GetUniformLength() const
Definition: cuFlexiDm.hpp:88
bool ComputeMatrix(pProgressFunction pFunc)
Definition: cuFlexiDm.cpp:112
FlexiDm(EScoreMatrixType type=GLOBAL_DEFAULT_SCORE_MATRIX, int uniformLength=-1)
Definition: cuFlexiDm.cpp:96
static const double MAX_DISTANCE
Definition: cuFlexiDm.hpp:75
static double GetDistance(int identities, int alignment_length)
Definition: cuFlexiDm.cpp:186
int m_uniformLength
Definition: cuFlexiDm.hpp:102
void initDMIdentities(EScoreMatrixType type, int nExt=0, int cExt=0)
Definition: cuFlexiDm.cpp:100
void GetPercentIdentities(pProgressFunction pFunc)
Definition: cuFlexiDm.cpp:124
void SetUniformLength(int uniformLength)
Definition: cuFlexiDm.hpp:87
vector< RowContent > m_rows
Definition: cuFlexiDm.hpp:64
RowContent & getRow(int row)
Definition: cuFlexiDm.hpp:61
vector< ResidueCell > RowContent
Definition: cuFlexiDm.hpp:60
EDistMethod
Definition: cuDistmat.hpp:60
void(* pProgressFunction)(int Num, int Total)
Definition: cuDistmat.hpp:47
USING_SCOPE(objects)
const EScoreMatrixType GLOBAL_DEFAULT_SCORE_MATRIX
EScoreMatrixType
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_CDUTILS_EXPORT
Definition: ncbi_export.h:376
#define row(bind, expected)
Definition: string_bind.c:73
ResidueCell(char aa, bool isaligned)
Definition: cuFlexiDm.hpp:49
char residue
Definition: cuFlexiDm.hpp:50
bool aligned
Definition: cuFlexiDm.hpp:51
Definition: type.c:6
Modified on Fri Sep 20 14:57:35 2024 by modify_doxy.py rev. 669887