NCBI C++ ToolKit
blast_setup.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_setup.h 36361 2007-12-20 22:31:20Z camacho $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Tom Madden
27  *
28  */
29 
30 /** @file blast_setup.h
31  * Utilities initialize/setup BLAST.
32  */
33 
34 #ifndef __BLAST_SETUP__
35 #define __BLAST_SETUP__
36 
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 /** "Main" setup routine for BLAST. Calculates all information for BLAST search
53  * that is dependent on the ASN.1 structures.
54  * @todo FIXME: this function only filters query and sets up score block structure
55  * @param program_number Type of BLAST program (0=blastn, ...). [in]
56  * @param qsup_options options for query setup. [in]
57  * @param scoring_options options for scoring. [in]
58  * @param query_blk BLAST_SequenceBlk* for the query. [in]
59  * @param query_info The query information block [in]
60  * @param scale_factor Multiplier for cutoff and dropoff scores [in]
61  * @param lookup_segments Start/stop locations for non-masked query
62  * segments [out]
63  * @param mask masking locations. [out]
64  * @param sbpp Contains scoring information. [out]
65  * @param blast_message error or warning [out]
66  * @param get_path callback function to get matrix path [in]
67  */
70  const QuerySetUpOptions* qsup_options,
71  const BlastScoringOptions* scoring_options,
72  BLAST_SequenceBlk* query_blk,
73  const BlastQueryInfo* query_info,
74  double scale_factor,
75  BlastSeqLoc* *lookup_segments,
77  BlastScoreBlk* *sbpp,
78  Blast_Message* *blast_message,
79  GET_MATRIX_PATH get_path);
80 
81 /** Blast_ScoreBlkKbpGappedCalc, fills the ScoreBlkPtr for a gapped search.
82  * Should be moved to blast_stat.c in the future.
83  * @param sbp Contains fields to be set, should not be NULL. [out]
84  * @param scoring_options Scoring_options [in]
85  * @param program Used to set fields on sbp [in]
86  * @param query_info Query information containing context information [in]
87  * @param error_return Pointer to structure for returning errors. [in][out]
88  * @return Status.
89  */
92  const BlastScoringOptions * scoring_options,
93  EBlastProgramType program,
94  const BlastQueryInfo * query_info,
95  Blast_Message** error_return);
96 
97 /** Function to calculate effective query length and db length as well as
98  * effective search space.
99  * @param program_number blastn, blastp, blastx, etc. [in]
100  * @param scoring_options options for scoring. [in]
101  * @param eff_len_params Used to calculate effective lengths [in]
102  * @param sbp Karlin-Altschul parameters [out]
103  * @param query_info The query information block, which stores the effective
104  * search spaces for all queries [in] [out]
105  * @param blast_message Error message [out]
106 */
109  const BlastScoringOptions* scoring_options,
110  const BlastEffectiveLengthsParameters* eff_len_params,
111  const BlastScoreBlk* sbp, BlastQueryInfo* query_info,
112  Blast_Message **blast_message);
113 
114 /** Set up the auxiliary structures for gapped alignment / traceback only
115  * @param program_number blastn, blastp, blastx, etc. [in]
116  * @param seq_src Sequence source information, with callbacks to get
117  * sequences, their lengths, etc. [in]
118  * @param scoring_options options for scoring. [in]
119  * @param eff_len_options Options overriding real database sizes for
120  * calculating effective lengths [in]
121  * @param ext_options options for gapped extension. [in]
122  * @param hit_options options for saving hits. [in]
123  * @param query_info The query information block [in]
124  * @param sbp Contains scoring information. [in]
125  * @param score_params Parameters for scoring [out]
126  * @param ext_params Parameters for gapped extension [out]
127  * @param hit_params Parameters for saving hits [out]
128  * @param eff_len_params Parameters for search space calculations [out]
129  * @param gap_align Gapped alignment information and allocated memory [out]
130  */
133  const BlastSeqSrc* seq_src,
134  const BlastScoringOptions* scoring_options,
135  const BlastEffectiveLengthsOptions* eff_len_options,
136  const BlastExtensionOptions* ext_options,
137  const BlastHitSavingOptions* hit_options,
138  BlastQueryInfo* query_info,
139  BlastScoreBlk* sbp,
140  BlastScoringParameters** score_params,
141  BlastExtensionParameters** ext_params,
142  BlastHitSavingParameters** hit_params,
143  BlastEffectiveLengthsParameters** eff_len_params,
144  BlastGapAlignStruct** gap_align);
145 
146 /** Recalculates the parameters that depend on an individual sequence, if
147  * this is not a database search.
148  * @param program_number BLAST program [in]
149  * @param subject_length Length of the current subject sequence [in]
150  * @param scoring_options Scoring options [in]
151  * @param query_info The query information structure. Effective lengths
152  * are recalculated here. [in] [out]
153  * @param sbp Scoring statistical parameters [in]
154  * @param hit_params Parameters for saving hits. Score cutoffs are recalculated
155  * here [in] [out]
156  * @param word_params Parameters for ungapped extension. Score cutoffs are
157  * recalculated here [in] [out]
158  * @param eff_len_params Parameters for effective lengths calculation. Reset
159  * with the current sequence data [in] [out]
160  */
163  Uint4 subject_length,
164  const BlastScoringOptions* scoring_options,
165  BlastQueryInfo* query_info,
166  const BlastScoreBlk* sbp,
167  BlastHitSavingParameters* hit_params,
168  BlastInitialWordParameters* word_params,
169  BlastEffectiveLengthsParameters* eff_len_params);
170 
171 /** Initializes the substitution matrix in the BlastScoreBlk according to the
172  * scoring options specified.
173  * @todo Should be moved to blast_stat.c in the future.
174  * @param program_number Used to set fields on sbp [in]
175  * @param scoring_options Scoring_options [in]
176  * @param sbp Contains fields to be set, should not be NULL. [out]
177  * @param get_path callback function to get matrix path [in]
178  *
179 */
182  const BlastScoringOptions* scoring_options,
183  BlastScoreBlk* sbp,
184  GET_MATRIX_PATH get_path);
185 
186 /** Initializes the score block structure.
187  * @param query_blk Query sequence(s) [in]
188  * @param query_info Additional query information [in]
189  * @param scoring_options Scoring options [in]
190  * @param program_number BLAST program type [in]
191  * @param sbpp Initialized score block [out]
192  * @param scale_factor Matrix scaling factor for this search [in]
193  * @param blast_message Error message [out]
194  * @param get_path callback function to get matrix path [in]
195  */
198  const BlastQueryInfo* query_info,
199  const BlastScoringOptions* scoring_options,
200  EBlastProgramType program_number,
201  BlastScoreBlk* *sbpp,
202  double scale_factor,
203  Blast_Message* *blast_message,
204  GET_MATRIX_PATH get_path);
205 
206 
207 /** Adjusts the mask locations coordinates to a sequence interval. Removes those
208  * mask locations that do not intersect the interval. Can do this either for all
209  * queries or only for the first one.
210  * @param mask Structure containing a mask location. [in] [out]
211  * @param from Starting offset of a sequence interval [in]
212  * @param to Ending offset of a sequence interval [in]
213  */
215 void
217 
218 
219 /** In a PHI BLAST search, adds pattern information to the BlastQueryInfo
220  * structure.
221  * @param program Type of PHI BLAST program [in]
222  * @param pattern_blk Auxiliary pattern items structure [in]
223  * @param query Query sequence [in]
224  * @param lookup_segments Locations on query sequence to find pattern on [in]
225  * @param query_info Query information structure, where pattern occurrences
226  * will be saved. [in][out]
227  * @param blast_message will be filled in if pattern not found on query [in][out]
228  * @return Status, 0 on success, -1 on error.
229  */
231 Int2
233  const SPHIPatternSearchBlk * pattern_blk,
234  const BLAST_SequenceBlk * query,
235  const BlastSeqLoc * lookup_segments,
236  BlastQueryInfo * query_info,
237  Blast_Message** blast_message);
238 
239 /** Auxiliary function to retrieve the subject's number of sequences and total
240  * length.
241  * @note In the case of a Blast2Sequences search, this function assumes a
242  * single sequence and returns the length of the first sequence only
243  */
245 void
246 BLAST_GetSubjectTotals(const BlastSeqSrc* seqsrc,
247  Int8* total_length,
248  Int4* num_seqs);
249 
250 /** Validation function for the setup of queries for the BLAST search.
251  * @param query_info properly set up BlastQueryInfo structure [in]
252  * @param score_blk optional properly set up BlastScoreBlk structure (may be
253  * NULL)[in]
254  * @return If no valid queries are found, 1 is returned, otherwise 0.
255  */
257 Int2
258 BlastSetup_Validate(const BlastQueryInfo* query_info,
259  const BlastScoreBlk* score_blk);
260 
261 #ifdef __cplusplus
262 }
263 #endif
264 #endif /* !__BLAST_SETUP__ */
Definitions used throughout BLAST.
Defines to provide correct exporting from BLAST DLL in Windows.
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
Structures and functions prototypes used for BLAST gapped extension.
Structures for BLAST messages.
The structures and functions in blast_options.
Structure and function definitions for BLAST parameter structures, which are internal to the CORE of ...
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Definitions and functions associated with the BlastQueryInfo structure.
Int2 BlastSetup_Validate(const BlastQueryInfo *query_info, const BlastScoreBlk *score_blk)
Validation function for the setup of queries for the BLAST search.
Definition: blast_setup.c:535
void BLAST_GetSubjectTotals(const BlastSeqSrc *seqsrc, Int8 *total_length, Int4 *num_seqs)
Auxiliary function to retrieve the subject's number of sequences and total length.
Definition: blast_setup.c:853
Int2 BlastSetup_ScoreBlkInit(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastScoringOptions *scoring_options, EBlastProgramType program_number, BlastScoreBlk **sbpp, double scale_factor, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
Initializes the score block structure.
Definition: blast_setup.c:456
Int2 Blast_SetPHIPatternInfo(EBlastProgramType program, const SPHIPatternSearchBlk *pattern_blk, const BLAST_SequenceBlk *query, const BlastSeqLoc *lookup_segments, BlastQueryInfo *query_info, Blast_Message **blast_message)
In a PHI BLAST search, adds pattern information to the BlastQueryInfo structure.
Definition: blast_setup.c:1065
Int2 BLAST_MainSetUp(EBlastProgramType program_number, const QuerySetUpOptions *qsup_options, const BlastScoringOptions *scoring_options, BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, double scale_factor, BlastSeqLoc **lookup_segments, BlastMaskLoc **mask, BlastScoreBlk **sbpp, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
"Main" setup routine for BLAST.
Definition: blast_setup.c:563
Int2 Blast_ScoreBlkKbpGappedCalc(BlastScoreBlk *sbp, const BlastScoringOptions *scoring_options, EBlastProgramType program, const BlastQueryInfo *query_info, Blast_Message **error_return)
Blast_ScoreBlkKbpGappedCalc, fills the ScoreBlkPtr for a gapped search.
Definition: blast_setup.c:41
Int2 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
Initializes the substitution matrix in the BlastScoreBlk according to the scoring options specified.
Definition: blast_setup.c:330
Int2 BLAST_OneSubjectUpdateParameters(EBlastProgramType program_number, Uint4 subject_length, const BlastScoringOptions *scoring_options, BlastQueryInfo *query_info, const BlastScoreBlk *sbp, BlastHitSavingParameters *hit_params, BlastInitialWordParameters *word_params, BlastEffectiveLengthsParameters *eff_len_params)
Recalculates the parameters that depend on an individual sequence, if this is not a database search.
Definition: blast_setup.c:1001
void BlastSeqLoc_RestrictToInterval(BlastSeqLoc **mask, Int4 from, Int4 to)
Adjusts the mask locations coordinates to a sequence interval.
Definition: blast_setup.c:1030
Int2 BLAST_GapAlignSetUp(EBlastProgramType program_number, const BlastSeqSrc *seq_src, const BlastScoringOptions *scoring_options, const BlastEffectiveLengthsOptions *eff_len_options, const BlastExtensionOptions *ext_options, const BlastHitSavingOptions *hit_options, BlastQueryInfo *query_info, BlastScoreBlk *sbp, BlastScoringParameters **score_params, BlastExtensionParameters **ext_params, BlastHitSavingParameters **hit_params, BlastEffectiveLengthsParameters **eff_len_params, BlastGapAlignStruct **gap_align)
Set up the auxiliary structures for gapped alignment / traceback only.
Definition: blast_setup.c:888
Int2 BLAST_CalcEffLengths(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, const BlastEffectiveLengthsParameters *eff_len_params, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Blast_Message **blast_message)
Function to calculate effective query length and db length as well as effective search space.
Definition: blast_setup.c:699
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
char *(* GET_MATRIX_PATH)(const char *, Boolean)
callback to resolve the path to blast score matrices
Definition: blast_stat.h:61
ncbi::TMaskedQueryRegions mask
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
Functions for finding pattern matches in sequence (PHI-BLAST).
Structure to hold a sequence.
Definition: blast_def.h:242
Options for setting up effective lengths and search spaces.
Parameters for setting up effective lengths and search spaces.
Options used for gapped extension These include: a.
Computed values used as parameters for gapped alignments.
Structure supporting the gapped alignment.
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
Structure for keeping the query masking information.
Definition: blast_def.h:210
The query related information.
Structure used for scoring calculations.
Definition: blast_stat.h:177
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Structure to hold the a message from the core of the BLAST engine.
Definition: blast_message.h:70
Options required for setting up the query sequence.
Structure containing all auxiliary information needed in a pattern search.
Definition: pattern.h:155
static string query
Modified on Fri Sep 20 14:58:30 2024 by modify_doxy.py rev. 669887