39 #define SWAP_SEQS(A, B) {const Uint1 *tmp = (A); (A) = (B); (B) = tmp; }
42 #define SWAP_INT(A, B) {Int4 tmp = (A); (A) = (B); (B) = tmp; }
66 Int4 final_best_score;
73 Int4 gap_open_extend = gap_open + gap_extend;
85 if (a_size < b_size) {
100 scores = gap_align->
dp_mem;
101 memset(scores, 0, (b_size + 1) *
sizeof(
BlastGapDP));
102 final_best_score = 0;
105 for (
i = 1;
i <= a_size;
i++) {
108 matrix_row = matrix[
i-1];
110 matrix_row = matrix[
A[
i-1]];
115 for (j = 1; j <= b_size; j++) {
118 best_score = scores[j].
best_gap - gap_extend;
119 if (scores[j].best - gap_open_extend > best_score)
120 best_score = scores[j].
best - gap_open_extend;
124 best_score = insert_score - gap_extend;
125 if (row_score - gap_open_extend > best_score)
126 best_score = row_score - gap_open_extend;
127 insert_score = best_score;
130 best_score =
MAX(scores[j-1].best + matrix_row[
B[j-1]], 0);
131 if (insert_score > best_score)
132 best_score = insert_score;
133 if (scores[j].best_gap > best_score)
136 if (best_score > final_best_score)
137 final_best_score = best_score;
139 scores[j-1].
best = row_score;
140 row_score = best_score;
143 scores[j-1].
best = row_score;
146 return final_best_score;
173 Int4 final_best_score;
179 Int4 gap_open_extend = gap_open + gap_extend;
192 scores = gap_align->
dp_mem;
193 memset(scores, 0, (a_size + 1) *
sizeof(
BlastGapDP));
194 final_best_score = 0;
197 for (
i = 1;
i <= b_size;
i++) {
203 matrix_row = matrix[base_pair];
207 for (j = 1; j <= a_size; j++) {
210 best_score = scores[j].
best_gap - gap_extend;
211 if (scores[j].best - gap_open_extend > best_score)
212 best_score = scores[j].
best - gap_open_extend;
216 best_score = insert_score - gap_extend;
217 if (row_score - gap_open_extend > best_score)
218 best_score = row_score - gap_open_extend;
219 insert_score = best_score;
222 best_score =
MAX(scores[j-1].best + matrix_row[
A[j-1]], 0);
223 if (insert_score > best_score)
224 best_score = insert_score;
225 if (scores[j].best_gap > best_score)
228 if (best_score > final_best_score)
229 final_best_score = best_score;
231 scores[j-1].
best = row_score;
232 row_score = best_score;
235 scores[j-1].
best = row_score;
238 return final_best_score;
292 Uint1 *traceback_row;
293 Int4 a_start, b_start;
295 Int4 curr_score = -best_score;
303 traceback_row =
trace +
i * (b_size + 1);
319 while (curr_score != 0) {
321 next_action = traceback_row[j];
327 curr_score += matrix[
i-1][
B[j-1]];
329 curr_score += matrix[
A[
i-1]][
B[j-1]];
332 traceback_row -= b_size + 1;
339 curr_score -= gap_open;
341 curr_score -= gap_extend;
345 traceback_row -= b_size + 1;
348 curr_score -= gap_open;
350 curr_score -= gap_extend;
361 for (
i = prelim_tback->
num_ops - 1, j = 0;
i >= 0;
i--, j++) {
363 final_tback->
num[j] = p->
num;
382 a_start, b_start, template_hsp->
context,
385 &final_tback, &new_hsp);
388 score_options, hit_options)) {
433 Int4 row_path_stop_i;
434 Int4 row_path_stop_j;
436 Int4 new_path_stop_i;
437 Int4 new_path_stop_j;
443 Int4 gap_open_extend = gap_open + gap_extend;
445 Uint1 *traceback_array;
446 Uint1 *traceback_row;
459 if (a_size < b_size) {
469 traceback_array = (
Uint1 *)
malloc((a_size + 1) * (b_size + 1) *
471 traceback_row = traceback_array;
472 for (
i = 0;
i <= b_size;
i++)
474 traceback_row += b_size + 1;
476 for (
i = 1;
i <= a_size;
i++) {
479 matrix_row = matrix[
i-1];
481 matrix_row = matrix[
A[
i-1]];
490 for (j = 1; j <= b_size; j++) {
493 best_score = scores[j].
best_gap - gap_extend;
495 if (scores[j].best - gap_open_extend > best_score) {
497 best_score = scores[j].
best - gap_open_extend;
502 best_score = insert_score - gap_extend;
503 if (row_score - gap_open_extend > best_score) {
505 best_score = row_score - gap_open_extend;
507 insert_score = best_score;
516 best_score =
MAX(scores[j-1].best + matrix_row[
B[j-1]], 0);
517 traceback_row[j] = script |
EDIT_SUB;
524 if (insert_score > best_score) {
525 best_score = insert_score;
528 new_path_score = row_path_score;
529 new_path_stop_i = row_path_stop_i;
530 new_path_stop_j = row_path_stop_j;
532 if (scores[j].best_gap >= best_score) {
541 if (best_score == 0) {
550 if (new_path_score >= cutoff) {
553 gap_open, gap_extend,
554 gap_align, new_path_stop_i,
555 new_path_stop_j, new_path_score,
556 hsp_list, swapped, template_hsp,
558 hit_params->
options, start_shift);
565 if (best_score > new_path_score) {
566 new_path_score = best_score;
572 scores[j-1].
best = row_score;
577 row_score = best_score;
578 row_path_score = new_path_score;
579 row_path_stop_i = new_path_stop_i;
580 row_path_stop_j = new_path_stop_j;
584 scores[j-1].
best = row_score;
593 if (scores[j-1].path_score >= cutoff) {
596 gap_open, gap_extend,
597 gap_align, scores[j-1].path_stop_i,
598 scores[j-1].path_stop_j,
599 scores[j-1].path_score,
600 hsp_list, swapped, template_hsp,
602 hit_params->
options, start_shift);
604 traceback_row += b_size + 1;
609 for (
i = 0;
i < b_size;
i++) {
610 if (scores[
i].best && scores[
i].path_score >= cutoff) {
613 gap_open, gap_extend,
614 gap_align, scores[
i].path_stop_i,
615 scores[
i].path_stop_j,
616 scores[
i].path_score,
617 hsp_list, swapped, template_hsp,
619 hit_params->
options, start_shift);
624 free(traceback_array);
644 Int4 cutoff_score = 0;
650 if (!
query || !
subject || !gap_align || !score_params || !ext_params ||
651 !hit_params || !init_hitlist || !hsp_list_ptr)
669 if (*hsp_list_ptr ==
NULL)
672 hsp_list = *hsp_list_ptr;
678 context <= query_info->last_context;
context++) {
687 if (rpsblast_pssms) {
715 if (score >= cutoff_score) {
729 *hsp_list_ptr = hsp_list;
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
Boolean Blast_HSPTestIdentityAndLength(EBlastProgramType program_number, BlastHSP *hsp, const Uint1 *query, const Uint1 *subject, const BlastScoringOptions *score_options, const BlastHitSavingOptions *hit_options)
Calculates number of identities and alignment lengths of an HSP via Blast_HSPGetNumIdentities and det...
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
void Blast_HSPAdjustSubjectOffset(BlastHSP *hsp, Int4 start_shift)
Adjusts offsets if partial sequence was used for extension.
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
static Int4 s_NuclSmithWaterman(const Uint1 *B, Int4 b_size, const Uint1 *A, Int4 a_size, Int4 gap_open, Int4 gap_extend, BlastGapAlignStruct *gap_align)
Compute the score of the best local alignment between two nucleotide sequences.
static Int4 s_SmithWatermanScoreOnly(const Uint1 *A, Int4 a_size, const Uint1 *B, Int4 b_size, Int4 gap_open, Int4 gap_extend, BlastGapAlignStruct *gap_align)
Compute the score of the best local alignment between two protein sequences.
@ EDIT_START_GAP_B
open a gap in B
@ EDIT_GAP_IN_B
Insertion.
@ EDIT_START_GAP_A
open a gap in A
@ EDIT_OP_MASK
Mask for edit script operations.
#define SWAP_INT(A, B)
swap two integers
#define SWAP_SEQS(A, B)
swap (pointers to) a pair of sequences
struct BlastGapSW BlastGapSW
Auxiliary structures for Smith-Waterman alignment with traceback.
static void s_GetTraceback(EBlastProgramType program_number, Uint1 *trace, const Uint1 *A, const Uint1 *B, Int4 b_size, Int4 gap_open, Int4 gap_extend, BlastGapAlignStruct *gap_align, Int4 a_end, Int4 b_end, Int4 best_score, BlastHSPList *hsp_list, Boolean swapped, BlastHSP *template_hsp, const BlastScoringOptions *score_options, const BlastHitSavingOptions *hit_options, Int4 start_shift)
Generate the traceback for a single local alignment between two (unpacked) sequences,...
Smith-Waterman alignment for use within the infrastructure of BLAST.
Various auxiliary BLAST utility functions.
Int4 BLAST_FrameToContext(Int2 frame, EBlastProgramType program)
Convert translation frame or strand into a context number suitable for indexing into the BlastQueryIn...
#define NCBI2NA_UNPACK_BASE(x, N)
Macro to extract base N from a byte x (N >= 0, N < 4)
void GapPrelimEditBlockAdd(GapPrelimEditBlock *edit_block, EGapAlignOpType op_type, Int4 num_ops)
Add a new operation to a preliminary edit block, possibly combining it with the last operation if the...
EGapAlignOpType
Operation types within the edit script.
@ eGapAlignIns
Insertion: a gap in subject.
@ eGapAlignSub
Substitution.
@ eGapAlignDel
Deletion: a gap in query.
GapEditScript * GapEditScriptNew(Int4 size)
Initialize the edit script structure.
void GapPrelimEditBlockReset(GapPrelimEditBlock *edit_block)
Reset a preliminary edit block without freeing it.
void SmithWatermanScoreWithTraceback(EBlastProgramType program_number, const Uint1 *A, Int4 a_size, const Uint1 *B, Int4 b_size, BlastHSP *template_hsp, BlastHSPList *hsp_list, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, BlastGapAlignStruct *gap_align, Int4 start_shift, Int4 cutoff)
Find all local alignments between two (unpacked) sequences, using the Smith-Waterman algorithm,...
Int2 BLAST_SmithWatermanGetGappedScore(EBlastProgramType program_number, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, BLAST_SequenceBlk *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, const BlastHitSavingParameters *hit_params, const BlastInitialWordParameters *word_params, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list_ptr, BlastGappedStats *gapped_stats, Boolean *fence_hit)
Performs score-only Smith-Waterman gapped alignment of the subject sequence with all contexts in the ...
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define MAX(a, b)
returns larger of a and b.
Structure to hold a sequence.
The context related information.
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Computed values used as parameters for gapped alignments.
Structure supporting the gapped alignment.
GapPrelimEditBlock * fwd_prelim_tback
traceback from right extensions
Boolean positionBased
Is this PSI-BLAST?
Int4 dp_mem_alloc
current number of structures allocated
BlastScoreBlk * sbp
Pointer to the scoring information block.
BlastGapDP * dp_mem
scratch structures for dynamic programming
Auxiliary structure for dynamic programming gapped extension.
Int4 best_gap
score of best path that ends in a gap at this position
Int4 best
score of best path that ends in a match at this position
Auxiliary structures for Smith-Waterman alignment with traceback.
Int4 path_stop_j
Offset (plus one) on the second sequence where path_score occurs.
Int4 path_score
The highest score that the alignment at this position has previously achieved.
Int4 best_gap
Score of best alignment at this position that ends in a gap.
Int4 path_stop_i
Offset (plus one) on the first sequence where path_score occurs.
Int4 best
Score of best alignment at this position.
Int4 cutoff_score
Raw cutoff score corresponding to the e-value provided by the user if no sum stats,...
Structure containing hit counts from the gapped stage of a BLAST search.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Structure holding all information about an HSP.
BlastSeg query
Query sequence info.
Int4 context
Context number of query.
BlastSeg subject
Subject sequence info.
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastGappedCutoffs * cutoffs
per-context gapped cutoff information
BlastHitSavingOptions * options
The original (unparsed) options.
Structure to hold all initial HSPs for a given subject sequence.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
SPsiBlastScoreMatrix * psi_matrix
PSSM and associated data.
SBlastScoreMatrix * matrix
scoring matrix data
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Int4 gap_extend
Penalty for each gap residue (scaled version)
Int4 gap_open
Extra penalty for starting a gap (scaled version)
BlastScoringOptions * options
User-provided values for these params.
Int2 frame
Translation frame.
Edit script: linked list of correspondencies between two sequences.
Int4 * num
Array of number of operations.
EGapAlignOpType * op_type
Array of type of operation.
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
GapPrelimEditScript * edit_ops
array of edit operations
Int4 num_ops
number of edit ops presently in use
A version of GapEditScript used to store initial results from the gapped alignment routines.
Int4 num
Number of operations.
EGapAlignOpType op_type
Type of operation.
int ** data
actual scoring matrix data, stored in row-major form
SBlastScoreMatrix * pssm
position-specific score matrix
static CS_CONTEXT * context
voidp calloc(uInt items, uInt size)