NCBI C++ ToolKit
blast_gapalign.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_gapalign.h 73100 2016-06-20 15:45:40Z boratyng $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Ilya Dondoshansky
27  *
28  */
29 
30 /** @file blast_gapalign.h
31  * Structures and functions prototypes used for BLAST gapped extension
32  * @todo FIXME: elaborate on contents.
33  */
34 
35 #ifndef ALGO_BLAST_CORE__BLAST_GAPALIGN__H
36 #define ALGO_BLAST_CORE__BLAST_GAPALIGN__H
37 
48 
49 #ifdef __cplusplus
50 extern "C" {
51 #endif
52 
53 /** Split subject sequences if longer than this */
54 #define MAX_DBSEQ_LEN 5000000
55 
56 /** Auxiliary structure for dynamic programming gapped extension */
57 typedef struct {
58  Int4 best; /**< score of best path that ends in a match
59  at this position */
60  Int4 best_gap; /**< score of best path that ends in a gap
61  at this position */
62 } BlastGapDP;
63 
64 
65 typedef struct JumperGapAlign JumperGapAlign;
66 
67 
68 /** Structure supporting the gapped alignment */
69 typedef struct BlastGapAlignStruct {
70  Boolean positionBased; /**< Is this PSI-BLAST? */
71  GapStateArrayStruct* state_struct; /**< Structure to keep extension
72  state information */
73  GapEditScript* edit_script; /**< The traceback (gap) information */
74  GapPrelimEditBlock *fwd_prelim_tback; /**< traceback from right extensions */
75  GapPrelimEditBlock *rev_prelim_tback; /**< traceback from left extensions */
76  SGreedyAlignMem* greedy_align_mem;/**< Preallocated memory for the greedy
77  gapped extension */
78  BlastGapDP* dp_mem; /**< scratch structures for dynamic programming */
79  Int4 dp_mem_alloc; /**< current number of structures allocated */
80  BlastScoreBlk* sbp; /**< Pointer to the scoring information block */
81  Int4 gap_x_dropoff; /**< X-dropoff parameter to use */
82  Int4 max_mismatches; /**< Max number of mismatches for jumper */
83  Int4 mismatch_window; /**< Window sie for mismatches for jumper */
84  Int4 query_start; /**< query start offset of current alignment */
85  Int4 query_stop; /**< query end offseet of current alignment */
86  Int4 subject_start; /**< subject start offset current alignment */
87  Int4 subject_stop; /**< subject end offset of current alignment */
88  Int4 greedy_query_seed_start; /**< for greedy alignments, the query
89  offset of the gapped start point */
90  Int4 greedy_subject_seed_start; /**< for greedy alignments, the subject
91  offset of the gapped start point */
92  Int4 score; /**< Return value: alignment score */
93 
94  JumperGapAlign* jumper; /**< data for jumper alignment */
96 
97 /** Initializes the BlastGapAlignStruct structure
98  * @param score_params Parameters related to scoring alignments [in]
99  * @param ext_params parameters related to gapped extension [in]
100  * @param max_subject_length Maximum length of any subject sequence (needed
101  * for greedy extension allocation only) [in]
102  * @param sbp The scoring information block [in]
103  * @param gap_align_ptr The BlastGapAlignStruct structure [out]
104 */
106 Int2
108  const BlastExtensionParameters* ext_params,
109  Uint4 max_subject_length, BlastScoreBlk* sbp,
110  BlastGapAlignStruct** gap_align_ptr);
111 
112 /** Deallocates memory in the BlastGapAlignStruct structure */
116 
117 /** Performs gapped extension for all non-Mega BLAST programs, given
118  * that ungapped extension has been done earlier.
119  * Sorts initial HSPs by score (from ungapped extension);
120  * Deletes HSPs that are included in already extended HSPs;
121  * Performs gapped extension;
122  * Saves HSPs into an HSP list.
123  * @param program_number Type of BLAST program [in]
124  * @param query The query sequence block [in]
125  * @param query_info Query information structure, containing offsets into
126  * the concatenated sequence [in]
127  * @param subject The subject sequence block [in]
128  * @param gap_align The auxiliary structure for gapped alignment [in]
129  * @param score_params Options and parameters related to scoring [in]
130  * @param ext_params Options and parameters related to extensions [in]
131  * @param hit_params Options related to saving hits [in]
132  * @param init_hitlist List of initial HSPs (offset pairs with additional
133  * information from the ungapped alignment performed earlier) [in]
134  * @param hsp_list_ptr Structure containing all saved HSPs [out]
135  * @param gapped_stats Return statistics (not filled if NULL) [out]
136  * @param fence_hit True is returned here if overrun is detected. [in]
137  */
140  BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
142  BlastGapAlignStruct* gap_align,
143  const BlastScoringParameters* score_params,
144  const BlastExtensionParameters* ext_params,
145  const BlastHitSavingParameters* hit_params,
146  BlastInitHitList* init_hitlist,
147  BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats,
148  Boolean * fence_hit);
149 
150 /** Perform a gapped alignment with traceback
151  * @param program Type of BLAST program [in]
152  * @param query The query sequence [in]
153  * @param subject The subject sequence [in]
154  * @param gap_align The gapped alignment structure [in] [out]
155  * @param score_params Scoring parameters [in]
156  * @param q_start Offset in query where to start alignment [in]
157  * @param s_start Offset in subject where to start alignment [in]
158  * @param query_length Maximal allowed extension in query [in]
159  * @param subject_length Maximal allowed extension in subject [in]
160  * @param fence_hit True is returned here if overrun is detected. [in]
161  */
164  const Uint1* query, const Uint1* subject,
165  BlastGapAlignStruct* gap_align,
166  const BlastScoringParameters* score_params,
167  Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length,
168  Boolean * fence_hit);
169 
170 /** Greedy gapped alignment, with or without traceback.
171  * Given two sequences, relevant options and an offset pair, fills the
172  * gap_align structure with alignment endpoints and, if traceback is
173  * performed, gap information.
174  * @param query The query sequence [in]
175  * @param subject The subject sequence [in]
176  * @param query_length The query sequence length [in]
177  * @param subject_length The subject sequence length [in]
178  * @param gap_align The structure holding various information and memory
179  * needed for gapped alignment [in] [out]
180  * @param score_params Parameters related to scoring alignments [in]
181  * @param q_off Starting offset in query [in]
182  * @param s_off Starting offset in subject [in]
183  * @param compressed_subject Is subject sequence compressed? [in]
184  * @param do_traceback Should traceback be saved? [in]
185  * @param fence_hit True is returned here if overrun is detected. [in]
186  */
188 Int2
190  Int4 query_length, Int4 subject_length, BlastGapAlignStruct* gap_align,
191  const BlastScoringParameters* score_params,
192  Int4 q_off, Int4 s_off, Boolean compressed_subject, Boolean do_traceback,
193  Boolean * fence_hit);
194 
195 /** Convert initial HSP list to an HSP list: to be used in ungapped search.
196  * Ungapped data must be available in the initial HSP list for this function
197  * to work.
198  * @param init_hitlist List of initial HSPs with ungapped extension
199  * information [in]
200  * @param query_info Query information structure, containing offsets into
201  * the concatenated queries/strands/frames [in]
202  * @param subject Subject sequence block containing frame information [in]
203  * @param hit_options Hit saving options [in]
204  * @param hsp_list_ptr HSPs in the final form [out]
205  */
209  const BlastHitSavingOptions* hit_options,
210  BlastHSPList** hsp_list_ptr);
211 
212 /** Adjusts range of subject sequence to be passed for gapped extension,
213  * taking into account the length and starting position of the alignment in
214  * query.
215  * @param subject_offset_ptr Start of the subject range [out]
216  * @param subject_length_ptr Length of the subject range [out]
217  * @param query_offset Offset in query from which alignment starts [in]
218  * @param query_length Length of the query sequence [in]
219  * @param start_shift The offset by which the output range is shifted with
220  * respect to the full subject sequence [out]
221  */
223 void
224 AdjustSubjectRange(Int4* subject_offset_ptr, Int4* subject_length_ptr,
225  Int4 query_offset, Int4 query_length, Int4* start_shift);
226 
227 /** Function to look for the highest scoring window (of size HSP_MAX_WINDOW)
228  * in an HSP and return the middle of this. Used by the gapped-alignment
229  * functions to start the gapped alignments.
230  * @param query The query sequence [in]
231  * @param subject The subject sequence [in]
232  * @param sbp Scoring block, containing matrix [in]
233  * @param q_start Starting offset in query [in]
234  * @param q_length Length of HSP in query [in]
235  * @param s_start Starting offset in subject [in]
236  * @param s_length Length of HSP in subject [in]
237  * @return The offset at which alignment should be started [out]
238 */
240 Int4
242  const BlastScoreBlk* sbp, Uint4 q_start, Uint4 q_length,
243  Uint4 s_start, Uint4 s_length);
244 
245 /** Function to look for the longest identity match run (up to size HSP_MAX_IDENT_RUN)
246  * in an HSP and return the middle of this. Used by the gapped-alignment
247  * functions to start the gapped alignments.
248  * @param query The query sequence [in]
249  * @param subject The subject sequence [in]
250  * @param hsp On return, the gapped_start will be filled. [in][out]
251 */
253 void
255  BlastHSP* hsp);
256 
257 /** Function to look for the highest scoring window (of size HSP_MAX_WINDOW)
258  * in an HSP and return the middle of this. Used by the gapped-alignment
259  * functions to start the gapped alignments.
260  * Should be used instead of BlastGetStartForGappedAlignment
261  * @param query The query sequence [in]
262  * @param subject The subject sequence [in]
263  * @param sbp Scoring block, containing matrix [in]
264  * @param hsp start and stops of HSP [in]
265  * @param q_retval query offset to use [out]
266  * @param s_retval subject offset to use [out]
267  *
268 */
270 Boolean
272  const BlastScoreBlk* sbp, BlastHSP* hsp, Int4* q_retval, Int4* s_retval);
273 
274 #ifdef __cplusplus
275 }
276 #endif
277 #endif /* !ALGO_BLAST_CORE__BLAST_GAPALIGN__H */
Definitions used throughout BLAST.
Various diagnostics (hit counts, etc.) returned from the BLAST engine.
Defines to provide correct exporting from BLAST DLL in Windows.
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
Ungapped extension structures that are common to nucleotide and protein extension routines.
Boolean BlastGetOffsetsForGappedAlignment(const Uint1 *query, const Uint1 *subject, const BlastScoreBlk *sbp, BlastHSP *hsp, Int4 *q_retval, Int4 *s_retval)
Function to look for the highest scoring window (of size HSP_MAX_WINDOW) in an HSP and return the mid...
Int2 BLAST_GappedAlignmentWithTraceback(EBlastProgramType program, const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Boolean *fence_hit)
Perform a gapped alignment with traceback.
Int4 BlastGetStartForGappedAlignment(const Uint1 *query, const Uint1 *subject, const BlastScoreBlk *sbp, Uint4 q_start, Uint4 q_length, Uint4 s_start, Uint4 s_length)
Function to look for the highest scoring window (of size HSP_MAX_WINDOW) in an HSP and return the mid...
Int2 BLAST_GetUngappedHSPList(BlastInitHitList *init_hitlist, BlastQueryInfo *query_info, BLAST_SequenceBlk *subject, const BlastHitSavingOptions *hit_options, BlastHSPList **hsp_list_ptr)
Convert initial HSP list to an HSP list: to be used in ungapped search.
Int2 BLAST_GetGappedScore(EBlastProgramType program_number, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, BLAST_SequenceBlk *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, const BlastHitSavingParameters *hit_params, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list_ptr, BlastGappedStats *gapped_stats, Boolean *fence_hit)
Performs gapped extension for all non-Mega BLAST programs, given that ungapped extension has been don...
struct BlastGapAlignStruct BlastGapAlignStruct
Structure supporting the gapped alignment.
Int2 BLAST_GreedyGappedAlignment(const Uint1 *query, const Uint1 *subject, Int4 query_length, Int4 subject_length, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_off, Int4 s_off, Boolean compressed_subject, Boolean do_traceback, Boolean *fence_hit)
Greedy gapped alignment, with or without traceback.
void AdjustSubjectRange(Int4 *subject_offset_ptr, Int4 *subject_length_ptr, Int4 query_offset, Int4 query_length, Int4 *start_shift)
Adjusts range of subject sequence to be passed for gapped extension, taking into account the length a...
void BlastGetStartForGappedAlignmentNucl(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp)
Function to look for the longest identity match run (up to size HSP_MAX_IDENT_RUN) in an HSP and retu...
Int2 BLAST_GapAlignStructNew(const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, Uint4 max_subject_length, BlastScoreBlk *sbp, BlastGapAlignStruct **gap_align_ptr)
Initializes the BlastGapAlignStruct structure.
BlastGapAlignStruct * BLAST_GapAlignStructFree(BlastGapAlignStruct *gap_align)
Deallocates memory in the BlastGapAlignStruct structure.
Structures and API used for saving BLAST hits.
Structure and function definitions for BLAST parameter structures, which are internal to the CORE of ...
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Definitions and functions associated with the BlastQueryInfo structure.
Definitions of structures used for saving traceback information.
Prototypes and structures for greedy gapped alignment.
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
Structure to hold a sequence.
Definition: blast_def.h:242
Computed values used as parameters for gapped alignments.
Structure supporting the gapped alignment.
GapPrelimEditBlock * fwd_prelim_tback
traceback from right extensions
Int4 gap_x_dropoff
X-dropoff parameter to use.
GapPrelimEditBlock * rev_prelim_tback
traceback from left extensions
Boolean positionBased
Is this PSI-BLAST?
Int4 query_stop
query end offseet of current alignment
Int4 dp_mem_alloc
current number of structures allocated
Int4 subject_start
subject start offset current alignment
Int4 greedy_subject_seed_start
for greedy alignments, the subject offset of the gapped start point
BlastScoreBlk * sbp
Pointer to the scoring information block.
SGreedyAlignMem * greedy_align_mem
Preallocated memory for the greedy gapped extension.
GapStateArrayStruct * state_struct
Structure to keep extension state information.
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 max_mismatches
Max number of mismatches for jumper.
Int4 greedy_query_seed_start
for greedy alignments, the query offset of the gapped start point
Int4 mismatch_window
Window sie for mismatches for jumper.
BlastGapDP * dp_mem
scratch structures for dynamic programming
JumperGapAlign * jumper
data for jumper alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Auxiliary structure for dynamic programming gapped extension.
Int4 best_gap
score of best path that ends in a gap at this position
Int4 best
score of best path that ends in a match at this position
Structure containing hit counts from the gapped stage of a BLAST search.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Structure holding all information about an HSP.
Definition: blast_hits.h:126
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Structure to hold all initial HSPs for a given subject sequence.
Definition: blast_extend.h:158
The query related information.
Structure used for scoring calculations.
Definition: blast_stat.h:177
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Edit script: linked list of correspondencies between two sequences.
Definition: gapinfo.h:57
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
Definition: gapinfo.h:73
Structure to keep memory for state structure.
Definition: gapinfo.h:81
Gapped alignment data needed for jumper.
Definition: jumper.h:72
All auxiliary memory needed for the greedy extension algorithm.
Definition: greedy_align.h:89
static string subject
static string query
Modified on Mon Apr 22 04:02:15 2024 by modify_doxy.py rev. 669887