NCBI C++ ToolKit
data4xmlformat.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: data4xmlformat.hpp 100101 2023-06-15 14:10:29Z merezhuk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Authors: Jason Papadopoulos, Christiam Camacho
27 *
28 */
29 
30 /** @file data4xmlformat.hpp
31  * Implementation of interface class to produce data required for generating
32  * BLAST XML output
33  */
34 
35 #ifndef APP___DATA4XMLFORMAT__HPP
36 #define APP___DATA4XMLFORMAT__HPP
37 
39 
43 
46 
48 
49 /// Strategy class to gather the data for generating BLAST XML output
51 {
52 public:
53  /// Constructor
54  /// @param queries Query sequences [in]
55  /// @param results results set containing one query per element or one
56  /// iteration per element in the case of PSI-BLAST [in]
57  /// @param opts Blast options container [in]
58  /// @param dbname Name of database to search ("" if none) [in]
59  /// @param db_is_aa true if database contains protein sequences [in]
60  /// @param qgencode Genetic code used to translate query sequences
61  /// (if applicable) [in]
62  /// @param dbgencode Genetic code used to translate database sequences
63  /// (if applicable) [in]
64  /// @param dbfilt_algorithm DB Filtering algorithm to use, -1 means not
65  /// applicable FIXME: this is not being reported
67  const blast::CSearchResultSet& results,
68  const blast::CBlastOptions& opts,
69  const string& dbname, bool db_is_aa,
70  int qgencode = BLAST_GENETIC_CODE,
71  int dbgencode = BLAST_GENETIC_CODE,
72  bool is_remote = false,
73  int dbfilt_algorithm = -1);
74 
75 
76  /// Constructor
77  /// @param queries Query sequences [in]
78  /// @param results results set containing one query per element or one
79  /// iteration per element in the case of PSI-BLAST [in]
80  /// @param opts Blast options container [in]
81  /// @param dbInfo vector of SDbInfo containing db names and type [in]
82  /// @param qgencode Genetic code used to translate query sequences
83  /// (if applicable) [in]
84  /// @param dbgencode Genetic code used to translate database sequences
85  /// (if applicable) [in]
86  /// @param dbfilt_algorithm DB Filtering algorithm to use, -1 means not
87  /// applicable FIXME: this is not being reported
89  const blast::CSearchResultSet& results,
90  const blast::CBlastOptions& opts,
91  const vector<align_format::CAlignFormatUtil::SDbInfo> & dbInfo,
92  int qgencode = BLAST_GENETIC_CODE,
93  int dbgencode = BLAST_GENETIC_CODE,
94  bool is_remote = false,
95  int dbfilt_algorithm = -1);
96 
97  /// Destructor
99 
100  //------------ callbacks needed by IBlastXMLReportData ---------
101 
102  /// @inheritDoc
103  string GetBlastProgramName(void) const {
104  // Program type for deltablast is eBlastTypePsiBlast, because the
105  // sequence search is done by CPsiBlast
106  if (m_Options.GetProgram() == blast::eDeltaBlast) {
107  return "deltablast";
108  }
109 
110  return blast::Blast_ProgramNameFromType(m_Options.GetProgramType());
111  }
112 
113  /// @inheritDoc
115  return m_Options.GetProgram();
116  }
117 
118  /// @inheritDoc
119  string GetDatabaseName(void) const { return m_DbName; }
120 
121  /// @inheritDoc
122  double GetEvalueThreshold(void) const {
123  return m_Options.GetEvalueThreshold();
124  }
125 
126  /// @inheritDoc
127  int GetGapOpeningCost(void) const {
128  return m_Options.GetGapOpeningCost();
129  }
130 
131  /// @inheritDoc
132  int GetGapExtensionCost(void) const {
133  return m_Options.GetGapExtensionCost();
134  }
135 
136  /// @inheritDoc
137  int GetMatchReward(void) const {
138  return m_Options.GetMatchReward();
139  }
140 
141  /// @inheritDoc
142  int GetMismatchPenalty(void) const {
143  return m_Options.GetMismatchPenalty();
144  }
145 
146  /// @inheritDoc
147  string GetPHIPattern(void) const {
148  const char *tmp = m_Options.GetPHIPattern();
149  return tmp == NULL ? string() : string(tmp);
150  }
151 
152  /// @inheritDoc
153  string GetFilterString(void) const {
155  m_Options.GetFilterString(); /* NCBI_FAKE_WARNING */
156  return tmp.get() == NULL ? NcbiEmptyString : string(tmp.get());
157  }
158 
159  /// @inheritDoc
160  string GetMatrixName(void) const {
161  const char *tmp = m_Options.GetMatrixName();
162  return tmp == NULL ? string() : string(tmp);
163  }
164 
165  /// @inheritDoc
166  CBlastFormattingMatrix* GetMatrix(void) const;
167 
168  /// @inheritDoc
169  unsigned int GetNumQueries(void) const { return static_cast<unsigned int>(m_Queries->Size()); }
170 
171  /// @inheritDoc
172  const TMaskedQueryRegions*
173  GetMaskLocations(int query_index) const {
174  _ASSERT(query_index < (int)m_Masks.size());
175  if (m_NoHitsFound) {
176  return NULL;
177  }
178  return &m_Masks[query_index];
179  }
180 
181  /// @inheritDoc
182  int GetDbNumSeqs(void) const {
183  return m_NumSequences;
184  }
185 
186  /// @inheritDoc
187  Int8 GetDbLength(void) const {
188  return m_NumBases;
189  }
190 
191  /// @inheritDoc
192  int GetLengthAdjustment(int /*query_index*/) const;
193 
194  /// @inheritDoc
195  Int8 GetEffectiveSearchSpace(int query_index) const {
196  _ASSERT(query_index < (int)m_AncillaryData.size());
197  if (m_NoHitsFound) {
198  return 0;
199  }
200  return m_AncillaryData[query_index]->GetSearchSpace();
201  }
202 
203  /// @inheritDoc
204  double GetLambda(int query_index) const;
205 
206  /// @inheritDoc
207  double GetKappa(int query_index) const;
208 
209  /// @inheritDoc
210  double GetEntropy(int query_index) const;
211 
212  /// @inheritDoc
213  const objects::CSeq_loc* GetQuery(int query_index) const {
214  _ASSERT(query_index < (int)m_Queries->Size());
215  return m_Queries->GetQuerySeqLoc(query_index);
216  }
217 
218  /// @inheritDoc
219  objects::CScope* GetScope(int query_index) const {
220  _ASSERT(query_index < (int)m_Queries->Size());
221  return m_Queries->GetScope(query_index);
222  }
223 
224  /// @inheritDoc
225  const CSeq_align_set* GetAlignment(int query_index) const {
226  _ASSERT(query_index < (int)m_Alignments.size());
227  if (m_NoHitsFound) {
228  return NULL;
229  }
230  return m_Alignments[query_index].GetPointer();
231  }
232 
233  /// @inheritDoc
234  bool GetGappedMode(void) const {
235  return m_Options.GetGappedMode();
236  }
237 
238  /// @inheritDoc
239  int GetMasterGeneticCode() const { return m_QueryGeneticCode; }
240 
241  /// @inheritDoc
242  int GetSlaveGeneticCode() const { return m_DbGeneticCode; }
243 
244  /// @inheritDoc
245  vector<string> GetMessages() const { return m_Errors; }
246 
247 private:
248  /// Query sequences
250  /// BLAST algorithm options
251  const blast::CBlastOptions& m_Options;
252  string m_DbName; ///< name of blast database
253  /// genetic code for the query
255  /// genetic code for the database
257 
258  /// ancillary results data
259  vector<CRef<blast::CBlastAncillaryData> > m_AncillaryData;
260  /// the alignments
261  vector<CConstRef<CSeq_align_set> > m_Alignments;
262  /// masks for the queries
264  /// True if results did not find any hits
266  /// Error messages (one element per query)
267  vector<string> m_Errors;
268 
269  /// Number of columns used in score matrices
270  static const unsigned int kMatrixCols = 28;
271 
272  /// Score matrix used to determine neighboring protein residues
274 
275  /// Number of sequences in all BLAST databases involved in this search
277  /// Number of bases in all BLAST databases involved in this search
279 
280  /// Initialize the score matrix to be used for formatting
281  /// (if applicable)
282  /// @param matrix_name Name of score matrix. NULL defaults to
283  /// BLOSUM62 [in]
284  ///
285  void x_FillScoreMatrix(const char *matrix_name = BLAST_DEFAULT_MATRIX);
286 
287  // Used in constructors to facilitate initialization
289  const blast::CSearchResultSet& results,
290  const blast::CBlastOptions& opts,
291  const vector<align_format::CAlignFormatUtil::SDbInfo> & dbInfo,
292  int qgencode,
293  int dbgencode,
294  bool is_remote,
295  int dbfilt_algorith);
296 };
297 
299 
300 #endif /* !APP___DATA4XMLFORMAT__HPP */
301 
BLAST formatter utilities.
Formatting of pairwise sequence alignments in XML form.
#define BLAST_DEFAULT_MATRIX
Default matrix name: BLOSUM62.
Definition: blast_options.h:77
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eDeltaBlast
Delta Blast.
Definition: blast_types.hpp:71
256x256 matrix used for calculating positives etc.
Strategy class to gather the data for generating BLAST XML output.
string GetMatrixName(void) const
@inheritDoc
CRef< blast::CBlastQueryVector > m_Queries
Query sequences.
vector< CRef< blast::CBlastAncillaryData > > m_AncillaryData
ancillary results data
int m_NumSequences
Number of sequences in all BLAST databases involved in this search.
objects::CScope * GetScope(int query_index) const
@inheritDoc
bool m_NoHitsFound
True if results did not find any hits.
vector< string > GetMessages() const
@inheritDoc
Int8 GetEffectiveSearchSpace(int query_index) const
@inheritDoc
blast::EProgram GetBlastTask(void) const
@inheritDoc
int GetSlaveGeneticCode() const
@inheritDoc
int m_QueryGeneticCode
genetic code for the query
int * m_Matrix[kMatrixCols]
Score matrix used to determine neighboring protein residues.
TSeqLocInfoVector m_Masks
masks for the queries
unsigned int GetNumQueries(void) const
@inheritDoc
const TMaskedQueryRegions * GetMaskLocations(int query_index) const
@inheritDoc
const blast::CBlastOptions & m_Options
BLAST algorithm options.
Int8 m_NumBases
Number of bases in all BLAST databases involved in this search.
int GetMatchReward(void) const
@inheritDoc
Int8 GetDbLength(void) const
@inheritDoc
string GetBlastProgramName(void) const
@inheritDoc
const CSeq_align_set * GetAlignment(int query_index) const
@inheritDoc
void x_FillScoreMatrix(const char *matrix_name=BLAST_DEFAULT_MATRIX)
Initialize the score matrix to be used for formatting (if applicable)
string m_DbName
name of blast database
int GetLengthAdjustment(int) const
@inheritDoc
string GetPHIPattern(void) const
@inheritDoc
int GetMismatchPenalty(void) const
@inheritDoc
CCmdLineBlastXMLReportData(CRef< blast::CBlastQueryVector > queries, const blast::CSearchResultSet &results, const blast::CBlastOptions &opts, const string &dbname, bool db_is_aa, int qgencode=BLAST_GENETIC_CODE, int dbgencode=BLAST_GENETIC_CODE, bool is_remote=false, int dbfilt_algorithm=-1)
Constructor.
CCmdLineBlastXMLReportData(CRef< blast::CBlastQueryVector > queries, const blast::CSearchResultSet &results, const blast::CBlastOptions &opts, const vector< align_format::CAlignFormatUtil::SDbInfo > &dbInfo, int qgencode=BLAST_GENETIC_CODE, int dbgencode=BLAST_GENETIC_CODE, bool is_remote=false, int dbfilt_algorithm=-1)
Constructor.
double GetEvalueThreshold(void) const
@inheritDoc
double GetEntropy(int query_index) const
@inheritDoc
vector< string > m_Errors
Error messages (one element per query)
void x_Init(CRef< blast::CBlastQueryVector > queries, const blast::CSearchResultSet &results, const blast::CBlastOptions &opts, const vector< align_format::CAlignFormatUtil::SDbInfo > &dbInfo, int qgencode, int dbgencode, bool is_remote, int dbfilt_algorith)
int GetGapOpeningCost(void) const
@inheritDoc
const objects::CSeq_loc * GetQuery(int query_index) const
@inheritDoc
vector< CConstRef< CSeq_align_set > > m_Alignments
the alignments
int GetGapExtensionCost(void) const
@inheritDoc
int GetMasterGeneticCode() const
@inheritDoc
double GetLambda(int query_index) const
@inheritDoc
int m_DbGeneticCode
genetic code for the database
string GetDatabaseName(void) const
@inheritDoc
bool GetGappedMode(void) const
@inheritDoc
string GetFilterString(void) const
@inheritDoc
int GetDbNumSeqs(void) const
@inheritDoc
~CCmdLineBlastXMLReportData()
Destructor.
double GetKappa(int query_index) const
@inheritDoc
static const unsigned int kMatrixCols
Number of columns used in score matrices.
CBlastFormattingMatrix * GetMatrix(void) const
@inheritDoc
Interface for filling the top layer of the XML report.
Collection of masked regions for a single query sequence.
Definition: seqlocinfo.hpp:113
AutoPtr< Char, CDeleter< Char > > TAutoCharPtr
Declares TAutoCharPtr (for Char arrays allocated with malloc/calloc)
Definition: blast_aux.hpp:100
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
Definition: blast_aux.cpp:813
string
Definition: cgiapp.hpp:687
#define NULL
Definition: ncbistd.hpp:225
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NcbiEmptyString
Definition: ncbistr.hpp:122
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
static char tmp[2048]
Definition: utf8.c:42
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Definition: seqlocinfo.hpp:139
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Definition of SSeqLoc structure.
#define _ASSERT
Uniform BLAST Search Interface.
Modified on Sat Dec 09 04:48:22 2023 by modify_doxy.py rev. 669887