NCBI C++ ToolKit
cuDmBlastscore.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef CU_DM_BLASTSCORE__HPP
2 #define CU_DM_BLASTSCORE__HPP
3 
4 /* $Id: cuDmBlastscore.hpp 33815 2007-05-04 17:18:18Z kazimird $
5 * ===========================================================================
6 *
7 * PUBLIC DOMAIN NOTICE
8 * National Center for Biotechnology Information
9 *
10 * This software/database is a "United States Government Work" under the
11 * terms of the United States Copyright Act. It was written as part of
12 * the author's official duties as a United States Government employee and
13 * thus cannot be copyrighted. This software/database is freely available
14 * to the public for use. The National Library of Medicine and the U.S.
15 * Government have not placed any restriction on its use or reproduction.
16 *
17 * Although all reasonable efforts have been taken to ensure the accuracy
18 * and reliability of the software and data, the NLM and the U.S.
19 * Government do not and cannot warrant the performance or results that
20 * may be obtained by using this software or data. The NLM and the U.S.
21 * Government disclaim all warranties, express or implied, including
22 * warranties of performance, merchantability or fitness for any particular
23 * purpose.
24 *
25 * Please cite the author in any work or product based on this material.
26 *
27 * ===========================================================================
28 *
29 * Author: Chris Lanczycki
30 *
31 * File Description: cdt_dm_blastscore.hpp
32 *
33 * Concrete distance matrix class.
34 * Distance is computed based on a scoring matrix, where the
35 * score is derived from a pairwise BLAST of sequences in the CD.
36 * There is an option to extend an alignment at the N-terminal and C-terminal
37 * end of an existing alignment by a specified amount. One can also specify
38 * to do an unrestricted BLAST of the complete sequences.
39 * (See cdt_scoring_matrices.hpp for the supported scoring matrices).
40 *
41 */
42 
45 BEGIN_SCOPE(cd_utils)
46 
47 // (BEGIN_NCBI_SCOPE must be followed by END_NCBI_SCOPE later in this file)
48 //BEGIN_NCBI_SCOPE
49 
50 
51 // The distance between two rows in an alignment is pairwise BLAST
52 // score of the specified region of the sequences:
53 //
54 // d[i][j] = offset - pairwise_BlastScore(i, j)
55 //
56 // where 'offset' is a CD-dependent constant that allows transformation of the
57 // largest Blast scores to the shortest distances. Note that each row will
58 // in general have a different score for a Blast against itself, making d=0 ambiguous.
59 
60 class DM_BlastScore : public DistanceMatrix {
61 
62  static const bool USE_FULL_SEQUENCE_DEFAULT;
63  static const double E_VAL_ON_BLAST_FAILURE;
64 public:
65  static const double SCORE_ON_BLAST_FAILURE;
66 // static EDistMethod DIST_METHOD;
67 
68 public:
69 
71  DM_BlastScore(EScoreMatrixType type = GLOBAL_DEFAULT_SCORE_MATRIX, int nTermExt=0, int cTermExt=0);
72  //DM_BlastScore(const CCd* cd, EScoreMatrixType type = GLOBAL_DEFAULT_SCORE_MATRIX, int ext=NO_EXTENSION);
73  //DM_BlastScore(const CCd* cd, EScoreMatrixType type, int nTermExt, int cTermExt);
74 
75  bool useFullSequence() { return m_useFullSequence;}
76  void SetUseFullSequence(bool value);
77 
78  //double GetBlastScore(int id1, int id2, string matrix_name, bool unrestricted=false);
79  virtual bool ComputeMatrix(pProgressFunction pFunc);
80  virtual ~DM_BlastScore();
81 
82 private:
83 
84  // m_useFullSequence tells whether to use all residues in sequences, or
85  // will be using a subset. Differs from m_useAligned in parent in that
86  // even if don't use full sequence, don't necessarily need to use the
87  // aligned residues. Even in the footprint (first aligned to last aligned
88  // residue on a sequence) the non-aligned intervening residues will be
89  // used in the blast here. To exclude even those, use eScoreAligned.
91  //CCd::AlignmentUsage m_alignUse;
92  vector<CRef <CSeq_entry> > m_sequences;
93 
94  void initDMBlastScore(EScoreMatrixType type, int nTermExt, int cTermExt);
95 // void ExtractScoreFromSeqAlign(const CSeq_align* seqAlign, double& result, bool eval);
96 // int ExtractScoreFromScoreList(const CSeq_align::TScore& scores, double& result, bool eval);
97 
98  // Distance is shifted Blast score over a specified region
99  //void CalcPairwiseScores(pProgressFunction pFunc); // slow way; set up for each blast
100  bool CalcPairwiseScoresOnTheFly(pProgressFunction pFunc); // faster way; set up for all blasts w/ row i at once
101 
102 };
103 
104 END_SCOPE(cd_utils)
106 
107 #endif
bool useFullSequence()
vector< CRef< CSeq_entry > > m_sequences
static const double E_VAL_ON_BLAST_FAILURE
static const double SCORE_ON_BLAST_FAILURE
static const bool USE_FULL_SEQUENCE_DEFAULT
void(* pProgressFunction)(int Num, int Total)
Definition: cuDistmat.hpp:47
const EScoreMatrixType GLOBAL_DEFAULT_SCORE_MATRIX
EScoreMatrixType
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
Definition: type.c:6
Modified on Wed Jun 19 17:01:16 2024 by modify_doxy.py rev. 669887