NCBI C++ ToolKit
pssm_input.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_BLAST_API__PSSM_INPUT__HPP
2 #define ALGO_BLAST_API__PSSM_INPUT__HPP
3 
4 /* $Id: pssm_input.hpp 51610 2011-10-18 12:21:49Z fongah2 $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Christiam Camacho
30  *
31  */
32 
33 /** @file pssm_input.hpp
34  * Defines interface for a sequence alignment processor that can populate a
35  * multiple alignment data structure used by the PSSM engine.
36  */
37 
38 #include <corelib/ncbistl.hpp>
40 #include <util/math/matrix.hpp>
41 #include <objects/seq/Bioseq.hpp>
42 
43 /** @addtogroup AlgoBlast
44  *
45  * @{
46  */
47 
48 
50 BEGIN_SCOPE(blast)
51 
52 /// Base class for the IPssmInputData and IPssmInputFreqRatios interfaces,
53 /// provided to avoid duplicating the methods that are common to both
54 /// interfaces
55 struct IPssmInput_Base : public CObject
56 {
57  /// virtual destructor
58  virtual ~IPssmInput_Base() {}
59 
60  /// Get the query sequence used as master for the multiple sequence
61  /// alignment in ncbistdaa encoding.
62  virtual unsigned char* GetQuery() = 0;
63 
64  /// Get the query's length
65  virtual unsigned int GetQueryLength() = 0;
66 
67  /// Obtain the name of the underlying matrix to use when building the PSSM
68  virtual const char* GetMatrixName() {
69  return BLAST_DEFAULT_MATRIX;
70  }
71 
72  /// Obtain the gap existence value for the underlying matrix used to build the PSSM.
73  virtual int GetGapExistence() {
74  return BLAST_GAP_OPEN_PROT;
75  }
76 
77  /// Obtain the gap extension value for the underlying matrix used to build the PSSM.
78  virtual int GetGapExtension() {
79  return BLAST_GAP_EXTN_PROT;
80  }
81 
82 
83 
84  /// Get a CBioseq object for attachment into the CPssmWithParameters
85  /// that CPssmEngine produces (only attached if it's not NULL). This is
86  /// required for any PSSM which is intended to be used as a starting point
87  /// for a PSI-BLAST iteration
90  }
91 };
92 
93 /// Abstract base class to encapsulate the source(s) and pre-processing of
94 /// PSSM input data as well as options to the PSI-BLAST PSSM engine.
95 ///
96 /// This interface represents the strategy to pre-process PSSM input data and
97 /// to provide to the PSSM engine (context) the multiple sequence alignment
98 /// structure and options that it can use to build the PSSM.
99 /// This class is meant to provide a uniform interface that the PSSM engine can
100 /// use to obtain its data to create a PSSM, allowing subclasses to provide
101 /// implementations to obtain this data from disparate sources (e.g.:
102 /// Seq-aligns, Cdd models, multiple sequence alignments, etc).
103 /// @note Might need to add the PSIDiagnosticsRequest structure
104 /// @sa CPsiBlastInputData
106 {
107  /// virtual destructor
108  virtual ~IPssmInputData() {}
109 
110  /// Algorithm to produce multiple sequence alignment structure should be
111  /// implemented in this method. This will be invoked by the CPssmEngine
112  /// object before calling GetData()
113  virtual void Process() = 0;
114 
115  /// Obtain the multiple sequence alignment structure
116  virtual PSIMsa* GetData() = 0;
117 
118  /// Obtain the options for the PSSM engine
119  virtual const PSIBlastOptions* GetOptions() = 0;
120 
121  /// Obtain the diagnostics data that is requested from the PSSM engine
122  /// Its results will be populated in the PssmWithParameters ASN.1 object
124  return NULL; // default is not requesting any diagnostics
125  }
126 };
127 
128 /// Interface used to retrieve the PSSM frequency ratios to allow for "restart"
129 /// processing in PSI-BLAST: Given a preliminary
131 {
132  /// virtual destructor
134 
135  /// Algorithm to produce the PSSM's frequecy ratios should be
136  /// implemented in this method. This will be invoked by the CPssmEngine
137  /// object before calling GetData()
138  virtual void Process() = 0;
139 
140  /// Obtain a matrix of frequency ratios with this->GetQueryLength() columns
141  /// and BLASTAA_SIZE rows
142  virtual const CNcbiMatrix<double>& GetData() = 0;
143 
144  virtual double GetImpalaScaleFactor(){
145  return kPSSM_NoImpalaScaling;
146  }
147 };
148 
149 END_SCOPE(blast)
151 
152 /* @} */
153 
154 #endif /* ALGO_BLAST_API__PSSM_INPUT_HPP */
#define BLAST_GAP_OPEN_PROT
Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
Definition: blast_options.h:84
#define BLAST_GAP_EXTN_PROT
cost to extend a gap.
Definition: blast_options.h:92
#define BLAST_DEFAULT_MATRIX
Default matrix name: BLOSUM62.
Definition: blast_options.h:77
const double kPSSM_NoImpalaScaling
Value used to indicate that no IMPALA-style scaling should be performed when scaling a PSSM.
Definition: blast_options.c:43
High level definitions and declarations for the PSSM engine of PSI-BLAST.
CObject –.
Definition: ncbiobj.hpp:180
virtual void Process()=0
Algorithm to produce multiple sequence alignment structure should be implemented in this method.
virtual const char * GetMatrixName()
Obtain the name of the underlying matrix to use when building the PSSM.
Definition: pssm_input.hpp:68
virtual ~IPssmInputData()
virtual destructor
Definition: pssm_input.hpp:108
virtual unsigned char * GetQuery()=0
Get the query sequence used as master for the multiple sequence alignment in ncbistdaa encoding.
virtual ~IPssmInputFreqRatios()
virtual destructor
Definition: pssm_input.hpp:133
virtual const PSIDiagnosticsRequest * GetDiagnosticsRequest()
Obtain the diagnostics data that is requested from the PSSM engine Its results will be populated in t...
Definition: pssm_input.hpp:123
virtual double GetImpalaScaleFactor()
Definition: pssm_input.hpp:144
virtual int GetGapExistence()
Obtain the gap existence value for the underlying matrix used to build the PSSM.
Definition: pssm_input.hpp:73
virtual unsigned int GetQueryLength()=0
Get the query's length.
virtual int GetGapExtension()
Obtain the gap extension value for the underlying matrix used to build the PSSM.
Definition: pssm_input.hpp:78
virtual const PSIBlastOptions * GetOptions()=0
Obtain the options for the PSSM engine.
virtual const CNcbiMatrix< double > & GetData()=0
Obtain a matrix of frequency ratios with this->GetQueryLength() columns and BLASTAA_SIZE rows.
virtual void Process()=0
Algorithm to produce the PSSM's frequecy ratios should be implemented in this method.
virtual PSIMsa * GetData()=0
Obtain the multiple sequence alignment structure.
virtual CRef< objects::CBioseq > GetQueryForPssm()
Get a CBioseq object for attachment into the CPssmWithParameters that CPssmEngine produces (only atta...
Definition: pssm_input.hpp:88
virtual ~IPssmInput_Base()
virtual destructor
Definition: pssm_input.hpp:58
#define NULL
Definition: ncbistd.hpp:225
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
The NCBI C++/STL use hints.
Abstract base class to encapsulate the source(s) and pre-processing of PSSM input data as well as opt...
Definition: pssm_input.hpp:106
Interface used to retrieve the PSSM frequency ratios to allow for "restart" processing in PSI-BLAST: ...
Definition: pssm_input.hpp:131
Base class for the IPssmInputData and IPssmInputFreqRatios interfaces, provided to avoid duplicating ...
Definition: pssm_input.hpp:56
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Structure to allow requesting various diagnostics data to be collected by PSSM engine.
Definition: blast_psi.h:181
Multiple sequence alignment (msa) data structure containing the raw data needed by the PSSM engine to...
Definition: blast_psi.h:75
Modified on Wed May 22 11:29:46 2024 by modify_doxy.py rev. 669887