99 Int4 temp_sub_score = 0;
100 Int4 temp_indel_score = 0;
109 Int1 *editInstructions;
115 Int4 editStep, nextEditStep;
117 Int4 gapCost = gapOpen + gapExtend;
120 band = highDiag-lowDiag+1;
127 for (index1 = 1; index1 <= end1; index1++)
132 rightd = highDiag-lowDiag+1;
134 score_array[leftd].
best = 0;
135 state[0][leftd] = -1;
136 initialScore = -gapOpen;
137 for (diagIndex = leftd+1; diagIndex <= rightd; diagIndex++) {
138 score_array[diagIndex].
best = initialScore = initialScore-gapExtend;
139 score_array[diagIndex-1].
best_gap = initialScore-gapCost;
142 score_array[rightd+1].
best = kMinScore;
143 score_array[rightd].
best_gap = kMinScore;
144 score_array[leftd-1].
best_gap = -gapCost;
145 score_array[leftd-1].
best = kMinScore;
146 for (
i = 1;
i <= end1;
i++) {
147 if (
i > end2-highDiag)
151 matrixRow = matrix[seq1[
i]];
152 temp_indel_score = score_array[leftd].
best_gap;
154 if ((index2 = leftd+lowDiag-1+
i) > 0)
155 temp_sub_score = score_array[leftd].best+matrixRow[seq2[index2]];
156 if (temp_indel_score > temp_sub_score || index2 <= 0) {
157 temp_sub_score = temp_indel_score;
160 tempHorScore = temp_sub_score-gapCost;
162 if ((temp_indel_score-= gapExtend) >= tempHorScore) {
163 score_array[leftd-1].
best_gap = temp_indel_score;
166 score_array[leftd-1].
best_gap = tempHorScore;
169 stateRow = &
state[
i][leftd];
170 *stateRow++ = nextState;
171 score_array[leftd].
best = temp_sub_score;
172 for (curd=leftd+1, score_row = &score_array[curd]; curd <= rightd; curd++) {
173 temp_sub_score = score_row->
best + matrixRow[seq2[curd+lowDiag-1+
i]];
174 if ((temp_indel_score=score_row->
best_gap) > temp_sub_score) {
175 if (temp_indel_score > tempHorScore) {
176 score_row->
best = temp_indel_score;
177 tempHorScore -= gapExtend;
178 (score_row++-1)->best_gap = temp_indel_score-gapExtend;
181 score_row->
best = tempHorScore;
182 tempHorScore -= gapExtend;
183 (score_row++-1)->best_gap = temp_indel_score-gapExtend;
186 }
else if (tempHorScore > temp_sub_score) {
187 score_row->
best = tempHorScore;
188 tempHorScore -= gapExtend;
189 (score_row++-1)->best_gap = temp_indel_score-gapExtend;
192 score_row->
best = temp_sub_score;
193 if ((temp_sub_score -= gapCost) >
194 (tempHorScore-=gapExtend)) {
195 tempHorScore = temp_sub_score;
200 if (temp_sub_score > (temp_indel_score -= gapExtend)) {
201 *stateRow++= nextState;
202 (score_row++-1)->best_gap = temp_sub_score;
205 (score_row++-1)->best_gap = temp_indel_score;
212 score = (score_row-1)->best;
215 for (index1 = end1, diagIndex = rightd, editStep=0, charCounter = 0;
216 index1>=0; index1--, charCounter++) {
218 if (stateDecoder == -1)
230 editInstructions[charCounter] = editStep = nextEditStep;
232 for (charIndex = charCounter-1; charIndex >= 0; charIndex--) {
233 switch(editInstructions[charIndex]) {
245 sfree(editInstructions);
263 return (length <= 0 ? 0 : (gap_open + gap_extend*length));
287 lowDiag =
MIN(
MAX(-start1, lowDiag),
MIN(start2-start1,0));
288 highDiag =
MAX(
MIN(start2, highDiag),
MAX(start2-start1,0));
293 return -
s_GapCost(gapOpen, gapExtend, start1);
297 return -
s_GapCost(gapOpen, gapExtend, start2);
300 if ((highDiag-lowDiag+1) <= 1) {
302 for (
i = 1;
i <= start1;
i++) {
304 score += matrix[seq1[
i]][seq2[
i]];
309 score =
s_Align(seq1, seq2, start1, start2, lowDiag, highDiag, matrix,
310 gapOpen, gapExtend, alignScript);
331 Int4 prefixMatchedBitPattern = 0;
342 maskShiftPlus1 = (
mask << 1) +1;
343 for (
i = 0, prefixMatchedBitPattern= 0;
i <
len;
i++) {
344 prefixMatchedBitPattern =
345 ((prefixMatchedBitPattern << 1) | maskShiftPlus1) &
350 &rightOne, &rightMaskOnly);
352 *start = rightMaskOnly + 1;
372 Int4 *prefixMatchedBitPattern;
382 prefixMatchedBitPattern = (
Int4 *)
384 for (wordIndex = 0; wordIndex < num_words; wordIndex++) {
386 prefixMatchedBitPattern[wordIndex] = 0;
389 for (
i = 0;
i <
len;
i++) {
393 prefixMatchedBitPattern,
400 for (wordIndex = 0; (wordIndex < num_words) && (!found);
403 if ((prefixMatchedBitPattern[wordIndex]>>j) % 2 == 1)
405 else if ((multiword_items->
match_maskL[wordIndex] >> j)%2 == 1)
413 sfree(prefixMatchedBitPattern);
415 *start = rightMaskOnly+1;
420 #define MAX_HITS_IN_WORD 64
434 Int4 wordIndex, wordIndex2;
435 Int4 twiceHitsOneWord;
450 for (wordIndex = 1; wordIndex < num_words; wordIndex++) {
454 for (j = 0; j <
i; j += wordIndex) {
455 Int4 lastOffset = hitArray[j+wordIndex-1];
462 for (hitIndex = 0; hitIndex < twiceHitsOneWord;
463 hitIndex+= 2, pos+= wordIndex+1) {
464 for (wordIndex2 = 0; wordIndex2 < wordIndex; wordIndex2++)
465 hitArray1[pos+wordIndex2] = hitArray[j+wordIndex2];
466 hitArray1[pos+wordIndex2] = hitArray1[pos+wordIndex2-1] +
467 oneWordHitArray[hitIndex] + 1;
471 for (j = 0; j < pos; j++)
472 hitArray[j] = hitArray1[j];
474 for (j = 0; j < pos; j+= num_words) {
475 if (hitArray[j+num_words-1] ==
len) {
476 for (
i = 0;
i < num_words;
i++)
477 hitArray[
i] = hitArray[
i+j];
503 const int kBandLow = -5;
504 const int kBandHigh = 5;
506 Int4 startQueryMatch, endQueryMatch;
508 Int4 startDbMatch, endDbMatch;
511 Int4 queryMatchOffset, dbMatchOffset;
514 Int4 patternPosQuery, patternPosDb;
528 endQueryMatch = lenQuerySeq - 1;
530 endDbMatch = lenDbSeq - 1;
535 &endQueryMatch, pattern_blk);
537 &endDbMatch, pattern_blk);
540 &endQueryMatch, pattern_blk);
542 &endDbMatch, pattern_blk);
544 Int4 QueryWord, DbWord;
545 Int4 QueryVarSize, DbVarSize;
556 queryMatchOffset = dbMatchOffset = 0;
557 QueryWord = DbWord = 0;
558 QueryVarSize = DbVarSize = 0;
566 if (patternWord < 0) {
567 QueryVarSize += hitArray1[QueryWord] -
568 hitArray1[QueryWord-1] -
570 DbVarSize += hitArray2[DbWord] -
571 hitArray2[DbWord-1] -
579 if (QueryVarSize || DbVarSize) {
580 if (QueryVarSize == DbVarSize) {
586 QueryVarSize, DbVarSize,
587 kBandLow, kBandHigh, matrix,
588 gap_open, gap_extend, alignScript);
590 queryMatchOffset += QueryVarSize;
591 querySeq += QueryVarSize;
592 dbMatchOffset += DbVarSize;
601 QueryVarSize = DbVarSize = 0;
604 if (queryMatchOffset + QueryVarSize >= hitArray1[QueryWord])
606 if (dbMatchOffset + DbVarSize >= hitArray2[DbWord])
616 for (patternPosQuery = startQueryMatch, patternPosDb = startDbMatch;
617 patternPosQuery <= endQueryMatch || patternPosDb <= endDbMatch; ) {
628 for (queryMatchOffset =0;
631 patternPosQuery <= endQueryMatch;
632 patternPosQuery++, queryMatchOffset++) ;
633 for (dbMatchOffset = 0;
636 patternPosDb <= endDbMatch;
637 patternPosDb++, dbMatchOffset++) ;
638 if (queryMatchOffset == dbMatchOffset) {
644 }
while (queryMatchOffset > 0);
647 s_BandedAlign(querySeq-1, dbSeq-1, queryMatchOffset, dbMatchOffset,
648 kBandLow, kBandHigh, matrix, gap_open, gap_extend,
650 querySeq+=queryMatchOffset;
651 dbSeq+=dbMatchOffset;
675 Int4 query_offset,
Int4 subject_offset,
676 Int4 query_length,
Int4 subject_length)
678 Boolean found_start, found_end;
679 Int4 q_length=0, s_length=0, score_right, score_left;
680 Int4 private_q_start, private_s_start;
683 if (gap_align ==
NULL)
686 q_length = query_offset;
687 s_length = subject_offset;
696 if (q_length != 0 && s_length != 0) {
700 &private_q_start, &private_s_start,
TRUE,
NULL, gap_align,
703 gap_align->
query_start = q_length - private_q_start + 1;
709 q_length += query_length - 1;
710 s_length += subject_length - 1;
713 if (q_length < query_blk->length && s_length < subject_blk->length) {
724 if (found_start ==
FALSE) {
729 if (found_end ==
FALSE) {
730 gap_align->
query_stop = query_offset + query_length;
731 gap_align->
subject_stop = subject_offset + subject_length;
734 gap_align->
score = score_right+score_left;
763 if (!
query || !
subject || !gap_align || !score_params ||
764 !hit_params || !init_hitlist || !hsp_list_ptr)
767 if (init_hitlist->
total == 0)
770 hit_options = hit_params->
options;
773 if (*hsp_list_ptr ==
NULL)
776 hsp_list = *hsp_list_ptr;
780 for (pattern_index = 0; pattern_index < num_patterns; ++pattern_index) {
786 for (index=0; index<init_hitlist->
total; index++) {
788 Int4 s_pat_offset, s_pat_length;
800 q_pat_offset, s_pat_offset, q_pat_length,
812 q_pat_offset, s_pat_offset,
832 *hsp_list_ptr = hsp_list;
840 Int4 q_pat_length,
Int4 s_pat_length,
844 Int4 score_right, score_left, private_q_length, private_s_length;
850 if (!gap_align || !score_params || !pattern_blk)
862 &private_q_length, &private_s_length,
FALSE, rev_prelim_tback,
864 gap_align->
query_start = q_start - private_q_length;
868 s_pat_length, pat_prelim_tback, score_params->
options,
879 q_start += q_pat_length - 1;
880 s_start += s_pat_length - 1;
882 if ((q_start < query_length) && (s_start < subject_length)) {
886 query_length-q_start-1, subject_length-s_start-1,
887 &private_q_length, &private_s_length,
FALSE, fwd_prelim_tback,
890 gap_align->
query_stop = q_start + private_q_length + 1;
891 gap_align->
subject_stop = s_start + private_s_length + 1;
894 if (found_end ==
FALSE) {
903 gap_align->
score = score_right + score_left;
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Int4 Blast_SemiGappedAlign(const Uint1 *A, const Uint1 *B, Int4 M, Int4 N, Int4 *a_offset, Int4 *b_offset, Boolean score_only, GapPrelimEditBlock *edit_block, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Boolean reversed, Boolean reverse_sequence, Boolean *fence_hit)
Low level function to perform gapped extension in one direction with or without traceback.
GapEditScript * Blast_PrelimEditBlockToGapEditScript(GapPrelimEditBlock *rev_prelim_tback, GapPrelimEditBlock *fwd_prelim_tback)
Convert the initial list of traceback actions from a non-OOF gapped alignment into a blast edit scrip...
Structures and functions prototypes used for BLAST gapped extension.
Private interface for blast_gapalign.c.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
void GapPrelimEditBlockAdd(GapPrelimEditBlock *edit_block, EGapAlignOpType op_type, Int4 num_ops)
Add a new operation to a preliminary edit block, possibly combining it with the last operation if the...
GapPrelimEditBlock * GapPrelimEditBlockFree(GapPrelimEditBlock *edit_block)
Frees a preliminary edit block structure.
@ eGapAlignIns
Insertion: a gap in subject.
@ eGapAlignSub
Substitution.
@ eGapAlignDel
Deletion: a gap in query.
void GapPrelimEditBlockAppend(GapPrelimEditBlock *edit_block1, GapPrelimEditBlock *edit_block2)
Append one GapPrelimEditBlock to the end of the other.
GapPrelimEditBlock * GapPrelimEditBlockNew(void)
Allocates a preliminary edit block structure.
void GapPrelimEditBlockReset(GapPrelimEditBlock *edit_block)
Reset a preliminary edit block without freeing it.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
#define MIN(a, b)
returns smaller of a and b.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ASSERT
macro for assert.
#define INT4_MIN
Smallest (most negative) number represented by signed int.
#define MAX(a, b)
returns larger of a and b.
Int4 _PHIBlastFindHitsShort(Int4 *hitArray, const Uint1 *seq, Int4 len1, const SPHIPatternSearchBlk *pattern_blk)
Routine to find hits of pattern to sequence when sequence is proteins.
void _PHIPatternWordsLeftShift(Int4 *a, Uint1 b, Int4 numWords)
Shift each word in the array left by 1 bit and add bit b.
Int4 _PHIPatternWordsBitwiseAnd(Int4 *result, Int4 *a, Int4 *b, Int4 numWords)
Do a word-by-word bit-wise and of two integer arrays and put the result in a new array.
void _PHIGetRightOneBits(Int4 s, Int4 mask, Int4 *rightOne, Int4 *rightMaskOnly)
Looks for 1 bits in the same position of s and mask Let R be the rightmost position where s and mask ...
void _PHIPatternWordsBitwiseOr(Int4 *a, Int4 *b, Int4 numWords)
Do a word-by-word bit-wise or of two integer arrays and put the result back in the first array.
@ eMultiWord
Does pattern consist of a multiple words?
@ eOneWord
Does pattern consist of a single word?
#define PHI_BITS_PACKED_PER_WORD
Number of bits packed in a word.
#define PHI_MAX_HIT
Maximal size of an array of pattern hits.
Auxiliary functions for finding pattern matches in sequence (PHI-BLAST), that are used in multiple so...
const int kMaskAaAlphabetBits
Masks all bits corresponding to the aminoacid alphabet, i.e.
#define MAX_HITS_IN_WORD
Maximal possible size of the hits array for one word of pattern.
static Int4 s_Align(Uint1 *seq1, Uint1 *seq2, Int4 end1, Int4 end2, Int4 lowDiag, Int4 highDiag, Int4 **matrix, Int4 gapOpen, Int4 gapExtend, GapPrelimEditBlock *alignScript)
Returns the cost of an optimum conversion within highDiag and lowDiag between two sequence segments a...
static Int4 s_BandedAlign(Uint1 *seq1, Uint1 *seq2, Int4 start1, Int4 start2, Int4 lowDiag, Int4 highDiag, Int4 **matrix, Int4 gapOpen, Int4 gapExtend, GapPrelimEditBlock *alignScript)
Do a banded gapped alignment of two sequences.
Int2 PHIGappedAlignmentWithTraceback(Uint1 *query, Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Int4 q_pat_length, Int4 s_pat_length, SPHIPatternSearchBlk *pattern_blk)
Perform a gapped alignment with traceback for PHI BLAST.
static Int4 s_PHIBlastAlignPatterns(Uint1 *querySeq, Uint1 *dbSeq, Int4 lenQuerySeq, Int4 lenDbSeq, GapPrelimEditBlock *alignScript, const BlastScoringOptions *score_options, SBlastScoreMatrix *score_matrix, SPHIPatternSearchBlk *pattern_blk)
Align pattern occurrences of the query and subject sequences.
Int2 PHIGetGappedScore(EBlastProgramType program_number, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, BLAST_SequenceBlk *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, const BlastHitSavingParameters *hit_params, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list_ptr, BlastGappedStats *gapped_stats, Boolean *fence_hit)
Preliminary gapped alignment for PHI BLAST.
static Int2 s_PHIGetShortPattern(Uint1 *seq, Int4 len, Int4 *start, Int4 *end, SPHIPatternSearchBlk *pattern_blk)
Finds the position of the first pattern match in an input sequence, if pattern consists of a single w...
static Int2 s_PHIGetExtraLongPattern(Uint1 *seq, Int4 len, Int4 *hitArray, SPHIPatternSearchBlk *pattern_blk)
Find pattern occurrences in seq when the pattern description is extra long, report the results back i...
static Int4 s_GapCost(Int4 gap_open, Int4 gap_extend, Int4 length)
k-symbol indel cost.
static void s_PHIGetLongPattern(Uint1 *seq, Int4 len, Int4 *start, Int4 *end, SPHIPatternSearchBlk *pattern_blk)
Finds the position of the first pattern match in an input sequence, if pattern consists of a more tha...
static Int2 s_PHIGappedAlignment(BLAST_SequenceBlk *query_blk, BLAST_SequenceBlk *subject_blk, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Int4 subject_offset, Int4 query_length, Int4 subject_length)
Performs gapped extension for PHI BLAST, given two sequence blocks, scoring and extension options,...
Function prototypes used for PHI BLAST gapped extension and gapped extension with traceback.
Structure to hold a sequence.
Int4 length
Length of sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Computed values used as parameters for gapped alignments.
Structure supporting the gapped alignment.
GapPrelimEditBlock * fwd_prelim_tback
traceback from right extensions
GapPrelimEditBlock * rev_prelim_tback
traceback from left extensions
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
BlastScoreBlk * sbp
Pointer to the scoring information block.
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Auxiliary structure for dynamic programming gapped extension.
Int4 best_gap
score of best path that ends in a gap at this position
Int4 best
score of best path that ends in a match at this position
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Structure holding all information about an HSP.
SPHIHspInfo * pat_info
In PHI BLAST, information about this pattern match.
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Int4 cutoff_score_min
smallest cutoff score across all contexts
BlastHitSavingOptions * options
The original (unparsed) options.
Structure to hold the initial HSP information.
BlastOffsetPair offsets
Offsets in query and subject, or, in PHI BLAST, start and end of pattern in subject.
Structure to hold all initial HSPs for a given subject sequence.
Int4 total
Total number of hits currently saved.
BlastInitHSP * init_hsp_array
Array of offset pairs, possibly with scores.
The query related information.
BlastContextInfo * contexts
Information per context.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
SBlastScoreMatrix * matrix
scoring matrix data
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Boolean gapped_calculation
gap-free search if FALSE
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
BlastScoringOptions * options
User-provided values for these params.
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
Scoring matrix used in BLAST.
int ** data
actual scoring matrix data, stored in row-major form
Auxiliary items needed for a PHI BLAST search with pattern containing multiple words.
Int4 match_maskL[100]
Bit mask representation of input pattern for long patterns.
SExtraLongPatternItems * extra_long_items
Additional items necessary if pattern contains pieces longer than a word.
Int4 SLL[100][256]
For each letter in the alphabet and each word in the masked pattern representation,...
Int4 inputPatternMasked[(30 *11)]
Masked input pattern.
Int4 bitPatternByLetter[256][11]
Which positions can a character occur in for long patterns.
Int4 numWords
Number of words need to hold bit representation of pattern.
In PHI BLAST: information about pattern match in a given HSP.
Int4 index
Index of query pattern occurrence for this HSP.
Int4 length
Length of this pattern occurrence in subject.
Information about a single pattern occurence in the query.
Int4 length
Length of this pattern occurrence.
Int4 offset
Starting offset of this pattern occurrence.
Structure containing all auxiliary information needed in a pattern search.
SShortPatternItems * one_word_items
Items necessary when pattern fits in one word.
EPatternType flagPatternLength
Indicates if the whole pattern fits in 1 word, each of several parts of the pattern fit in a word,...
SLongPatternItems * multi_word_items
Additional items, when pattern requires multiple words.
Int4 num_patterns
Number of pattern occurrences in query.
SPHIPatternInfo * occurrences
Array of pattern occurrence information structures.
Auxiliary items needed for a PHI BLAST search with a pattern that fits in a single word.
Int4 * whichPositionPtr
Array of positions where pattern lettern should match, for a single word of the pattern.
Int4 match_mask
Bit mask representation of input pattern for patterns that fit in a word.
Uint4 s_start
Start offset of pattern in subject.
Uint4 s_end
End offset of pattern in subject.
struct BlastOffsetPair::@7 phi_offsets
Pattern offsets in subject (PHI BLAST only)
voidp calloc(uInt items, uInt size)