NCBI C++ ToolKit
phi_gapalign.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: phi_gapalign.c 73329 2016-06-30 12:29:20Z madden $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Ilya Dondoshansky
27  *
28  */
29 
30 /** @file phi_gapalign.c
31  * Functions to perform gapped alignment in PHI BLAST.
32  * Pattern alignment does not contribute to the score.
33  * The main function calls are described below in pseudo code.
34  *
35  * <pre>
36  * Preliminary gapped alignment (does not align patterns). The following
37  * function is called once per query/subject sequence pair.
38  *
39  * PHIGetGappedScore
40  * for each pattern occurrence in query and in subject {
41  * s_PHIGappedAlignment
42  * Left score = Blast_SemiGappedAlign(...)
43  * Right score = Blast_SemiGappedAlign(...)
44  * }
45  *
46  * Alignment with traceback, including alignment of patterns, called for each
47  * HSP saved in the preliminary stage.
48  *
49  * PHIGappedAlignmentWithTraceback
50  * Left score = Blast_SemiGappedAlign(...)
51  * s_PHIBlastAlignPatterns(...)
52  * Right score = Blast_SemiGappedAlign(...)
53  * </pre>
54  */
55 
59 #include "blast_gapalign_priv.h"
60 #include "pattern_priv.h"
61 
62 /** @todo FIXME Figure out what these mean and document. */
63 typedef enum {
67  eDeleteCode = 20
69 
70 /** Returns the cost of an optimum conversion within highDiag and lowDiag
71  * between two sequence segments and appends such a conversion to the current
72  * script.
73  * @param seq1 Points 1 byte before the start of the first sequence segment [in]
74  * @param seq2 Points 1 byte before the start of the second sequence segment [in]
75  * @param end1 Length of the first sequence segment [in]
76  * @param end2 Length of the second sequence segment [in]
77  * @param lowDiag Low diagonal for the alignment [in]
78  * @param highDiag High diagonal for the alignment [in]
79  * @param matrix Scoring matrix [in]
80  * @param gapOpen Gap opening cost [in]
81  * @param gapExtend Gap extension cost [in]
82  * @param alignScript Stores traceback information. [out]
83  */
84 static Int4
85 s_Align(Uint1 * seq1, Uint1 * seq2, Int4 end1, Int4 end2, Int4 lowDiag,
86  Int4 highDiag, Int4 **matrix, Int4 gapOpen, Int4 gapExtend,
87  GapPrelimEditBlock* alignScript)
88 {
89  Int4 nextState; /*stores code for next entry in state*/
90  Int4 score; /*score to return*/
91  Int4 band; /*number of diagonals between highDiag and lowDiag
92  inclusive*/
93  Int4 diagIndex; /*loop index over diagonals*/
94  Int4 leftd, rightd; /* diagonal indices for CC, DD, CP and DP */
95  BlastGapDP* score_array; /*array for dynamic program information*/
96  Int4 curd; /* current index for CC, DD CP and DP */
97  Int4 i; /*loop index*/
98  Int4 index1; /*index on seq1*/
99  Int4 temp_sub_score = 0; /*placeholder for a substitution score */
100  Int4 temp_indel_score = 0; /*placeholder for an indel score */
101  Int4 tempHorScore; /*dual of temp_indel_score for the case where a
102  horizontal edge (insertion) is the last step*/
103  BlastGapDP* score_row = NULL; /*points to a row of CD*/
104  Int4 stateDecoder; /*used to decode the edge information in a state*/
105  Int4 initialScore; /*score to initialize dynamic program entries*/
106  Int4 *matrixRow; /*row of score matrix*/
107  Int1 **state; /*stores dynamic program information*/
108  Int1 *stateRow; /*holds one row of state*/
109  Int1 *editInstructions; /*holds instruction for string-to-string edit*/
110  Int4 index2; /*index on seq2*/
111  Int4 charCounter; /*counts number of characters involved in
112  optimal edit sequence*/
113  Int4 charIndex; /*index over character positions in optimal
114  edit sequence*/
115  Int4 editStep, nextEditStep; /*steps in string conversion sequence*/
116  const Int4 kMinScore = INT4_MIN/2;
117  Int4 gapCost = gapOpen + gapExtend;
118 
119  /* Boundary cases: end1 <= 0 , end2 <= 0, or highDiag-lowDiag <= 0 */
120  band = highDiag-lowDiag+1;
121 
122  /* Allocate array of scores. */
123  score_array = (BlastGapDP*) calloc(band+2, sizeof(BlastGapDP));
124 
125  state = (Int1 **) malloc(sizeof(Int1 *)*(end1+1));
126  state[0] = (Int1 *) malloc((end1+1)*(band+2));
127  for (index1 = 1; index1 <= end1; index1++)
128  state[index1] = state[index1-1]+band+2;
129 
130  /* Initialization */
131  leftd = 1-lowDiag;
132  rightd = highDiag-lowDiag+1;
133 
134  score_array[leftd].best = 0;
135  state[0][leftd] = -1;
136  initialScore = -gapOpen;
137  for (diagIndex = leftd+1; diagIndex <= rightd; diagIndex++) {
138  score_array[diagIndex].best = initialScore = initialScore-gapExtend;
139  score_array[diagIndex-1].best_gap = initialScore-gapCost;
140  state[0][diagIndex] = eDiagonalInsert;
141  }
142  score_array[rightd+1].best = kMinScore;
143  score_array[rightd].best_gap = kMinScore;
144  score_array[leftd-1].best_gap = -gapCost;
145  score_array[leftd-1].best = kMinScore;
146  for (i = 1; i <= end1; i++) {
147  if (i > end2-highDiag)
148  rightd--;
149  if (leftd > 1)
150  leftd--;
151  matrixRow = matrix[seq1[i]];
152  temp_indel_score = score_array[leftd].best_gap;
153  nextState = 0;
154  if ((index2 = leftd+lowDiag-1+i) > 0)
155  temp_sub_score = score_array[leftd].best+matrixRow[seq2[index2]];
156  if (temp_indel_score > temp_sub_score || index2 <= 0) {
157  temp_sub_score = temp_indel_score;
158  nextState = eDiagonalDelete;
159  }
160  tempHorScore = temp_sub_score-gapCost;
161  if (leftd >= 1) {
162  if ((temp_indel_score-= gapExtend) >= tempHorScore) {
163  score_array[leftd-1].best_gap = temp_indel_score;
164  nextState += eDeleteCode;
165  } else {
166  score_array[leftd-1].best_gap = tempHorScore;
167  }
168  }
169  stateRow = &state[i][leftd];
170  *stateRow++ = nextState;
171  score_array[leftd].best = temp_sub_score;
172  for (curd=leftd+1, score_row = &score_array[curd]; curd <= rightd; curd++) {
173  temp_sub_score = score_row->best + matrixRow[seq2[curd+lowDiag-1+i]];
174  if ((temp_indel_score=score_row->best_gap) > temp_sub_score) {
175  if (temp_indel_score > tempHorScore) {
176  score_row->best = temp_indel_score;
177  tempHorScore -= gapExtend;
178  (score_row++-1)->best_gap = temp_indel_score-gapExtend;
179  *stateRow++=eDeleteCode + eInsertCode + eDiagonalDelete;
180  } else {
181  score_row->best = tempHorScore;
182  tempHorScore -= gapExtend;
183  (score_row++-1)->best_gap = temp_indel_score-gapExtend;
184  *stateRow++=eDeleteCode + eInsertCode + eDiagonalInsert;
185  }
186  } else if (tempHorScore > temp_sub_score) {
187  score_row->best = tempHorScore;
188  tempHorScore -= gapExtend;
189  (score_row++-1)->best_gap = temp_indel_score-gapExtend;
190  *stateRow++=eDeleteCode + eInsertCode + eDiagonalInsert;;
191  } else {
192  score_row->best = temp_sub_score;
193  if ((temp_sub_score -= gapCost) >
194  (tempHorScore-=gapExtend)) {
195  tempHorScore = temp_sub_score;
196  nextState = 0;
197  } else {
198  nextState = eInsertCode;
199  }
200  if (temp_sub_score > (temp_indel_score -= gapExtend)) {
201  *stateRow++= nextState;
202  (score_row++-1)->best_gap = temp_sub_score;
203  } else {
204  *stateRow++ = nextState+eDeleteCode;
205  (score_row++-1)->best_gap = temp_indel_score;
206  }
207  }
208  }
209  }
210 
211  /* decide which path to be traced back */
212  score = (score_row-1)->best;
213 
214  editInstructions = (Int1*) malloc(end1+end2);
215  for (index1 = end1, diagIndex = rightd, editStep=0, charCounter = 0;
216  index1>=0; index1--, charCounter++) {
217  nextEditStep = (stateDecoder=state[index1][diagIndex]) % eInsertCode;
218  if (stateDecoder == -1)
219  break;
220  if (editStep == eDiagonalInsert && ((stateDecoder/eInsertCode)%2) == 1)
221  nextEditStep = eDiagonalInsert;
222  if (editStep == eDiagonalDelete && (stateDecoder/eDeleteCode)== 1)
223  nextEditStep = eDiagonalDelete;
224  if (nextEditStep == eDiagonalInsert) {
225  diagIndex--;
226  index1++;
227  } else if (nextEditStep == eDiagonalDelete) {
228  diagIndex++;
229  }
230  editInstructions[charCounter] = editStep = nextEditStep;
231  }
232  for (charIndex = charCounter-1; charIndex >= 0; charIndex--) {
233  switch(editInstructions[charIndex]) {
234  case 0:
235  GapPrelimEditBlockAdd(alignScript, eGapAlignSub, 1);
236  break;
237  case eDiagonalInsert:
238  GapPrelimEditBlockAdd(alignScript, eGapAlignDel, 1);
239  break;
240  case eDiagonalDelete:
241  GapPrelimEditBlockAdd(alignScript, eGapAlignIns, 1);
242  break;
243  }
244  }
245  sfree(editInstructions);
246  sfree(state[0]);
247  sfree(state);
248  sfree(score_array);
249 
250  return(score);
251 }
252 
253 
254 /** k-symbol indel cost.
255  * @param gap_open Gap opening cost [in]
256  * @param gap_extend Gap extension cost [in]
257  * @param length Length of a gap [in]
258  * @return Total cost of this gap = gap open + (gap extend)*length.
259  */
260 static Int4
261 s_GapCost(Int4 gap_open, Int4 gap_extend, Int4 length)
262 {
263  return (length <= 0 ? 0 : (gap_open + gap_extend*length));
264 }
265 
266 /** Do a banded gapped alignment of two sequences.
267  * @param seq1 First sequence [in]
268  * @param seq2 Second sequence [in]
269  * @param start1 Starting position in seq1 [in]
270  * @param start2 Starting position in seq2 [in]
271  * @param lowDiag Low diagonal in the banded alignment [in]
272  * @param highDiag High diagonal in the banded alignment [in]
273  * @param matrix Scoring matrix [in]
274  * @param gapOpen Gap opening penalty [in]
275  * @param gapExtend Gap extension penalty [in]
276  * @param alignScript Stores traceback information [in] [out]
277  * @return Alignment score.
278  */
279 static Int4
280 s_BandedAlign(Uint1 *seq1, Uint1 *seq2,Int4 start1, Int4 start2,
281  Int4 lowDiag, Int4 highDiag, Int4 **matrix, Int4 gapOpen,
282  Int4 gapExtend, GapPrelimEditBlock* alignScript)
283 {
284  Int4 score; /*score to return*/
285  Int4 i; /*index over sequences*/
286 
287  lowDiag = MIN(MAX(-start1, lowDiag),MIN(start2-start1,0));
288  highDiag = MAX(MIN(start2, highDiag),MAX(start2-start1,0));
289 
290  if (start2 <= 0) {
291  if (start1 > 0)
292  GapPrelimEditBlockAdd(alignScript, eGapAlignIns, start1);
293  return -s_GapCost(gapOpen, gapExtend, start1);
294  }
295  if (start1 <= 0) {
296  GapPrelimEditBlockAdd(alignScript, eGapAlignDel, start2);
297  return -s_GapCost(gapOpen, gapExtend, start2);
298  }
299 
300  if ((highDiag-lowDiag+1) <= 1) {
301  score = 0;
302  for (i = 1; i <= start1; i++) {
303  GapPrelimEditBlockAdd(alignScript, eGapAlignSub, 1);
304  score += matrix[seq1[i]][seq2[i]];
305  }
306  return score;
307  }
308 
309  score = s_Align(seq1, seq2, start1, start2, lowDiag, highDiag, matrix,
310  gapOpen, gapExtend, alignScript);
311 
312  return score;
313 }
314 
315 /** Finds the position of the first pattern match in an input sequence, if
316  * pattern consists of a single word.
317  * The existence of the pattern in sequence must be already established.
318  * @param seq Sequence to find pattern in [in]
319  * @param len Length of seq [in]
320  * @param start Start of pattern [out]
321  * @param end End of pattern [out]
322  * @param pattern_blk Pattern information [in]
323  */
324 static Int2
326  SPHIPatternSearchBlk *pattern_blk)
327 {
328  Int4 mask; /*mask of input pattern positions after which
329  a match can be declared*/
330  Int4 maskShiftPlus1; /*mask shifted left plus 1*/
331  Int4 prefixMatchedBitPattern = 0; /*indicates where pattern aligns
332  with seq; e.g., if value is 9 = 0101 then
333  last 3 chars of seq match first 3 positions in pattern
334  and last 1 char of seq matches 1 position of pattern*/
335  Int4 i; /*index over seq */
336  Int4 rightOne; /*loop index looking for 1 in both mask and
337  prefixMatchedBitPattern*/
338  Int4 rightMaskOnly; /*rightmost bit that is 1 in the mask only*/
339  SShortPatternItems* word_items = pattern_blk->one_word_items;
340 
341  mask = word_items->match_mask;
342  maskShiftPlus1 = (mask << 1) +1;
343  for (i = 0, prefixMatchedBitPattern= 0; i < len; i++) {
344  prefixMatchedBitPattern =
345  ((prefixMatchedBitPattern << 1) | maskShiftPlus1) &
346  word_items->whichPositionPtr[seq[i]];
347  }
348 
349  _PHIGetRightOneBits(prefixMatchedBitPattern, mask,
350  &rightOne, &rightMaskOnly);
351 
352  *start = rightMaskOnly + 1;
353  *end = rightOne;
354  return 0;
355 }
356 
357 /** Finds the position of the first pattern match in an input sequence, if
358  * pattern consists of a more than one word. The existence of the pattern in
359  * sequence must be already established.
360  * @param seq Sequence to find pattern in [in]
361  * @param len Length of seq [in]
362  * @param start Start of pattern [out]
363  * @param end End of pattern [out]
364  * @param pattern_blk Pattern information [in]
365  */
366 static void
368  SPHIPatternSearchBlk *pattern_blk)
369 {
370  Int4 *mask; /*mask of input pattern positions after which
371  a match can be declared*/
372  Int4 *prefixMatchedBitPattern; /*indicates where pattern aligns with seq*/
373  Int4 wordIndex; /*index over words in pattern*/
374  Int4 i; /*index over seq*/
375  Int4 rightMaskOnly; /*rightmost bit that is 1 in the mask only*/
376  Int4 j = 0; /*index over bits in a word*/
377  Boolean found = FALSE; /*found match position yet*/
378  SLongPatternItems* multiword_items = pattern_blk->multi_word_items;
379  Int4 num_words = multiword_items->numWords;
380 
381  mask = (Int4 *) calloc(num_words, sizeof(Int4));
382  prefixMatchedBitPattern = (Int4 *)
383  calloc(num_words, sizeof(Int4));
384  for (wordIndex = 0; wordIndex < num_words; wordIndex++) {
385  mask[wordIndex] = multiword_items->match_maskL[wordIndex];
386  prefixMatchedBitPattern[wordIndex] = 0;
387  }
388  _PHIPatternWordsLeftShift(mask, 1, num_words);
389  for (i = 0; i < len; i++) {
390  _PHIPatternWordsLeftShift(prefixMatchedBitPattern, 0, num_words);
391  _PHIPatternWordsBitwiseOr(prefixMatchedBitPattern, mask, num_words);
392  _PHIPatternWordsBitwiseAnd(prefixMatchedBitPattern,
393  prefixMatchedBitPattern,
394  multiword_items->bitPatternByLetter[seq[i]],
395  num_words);
396  }
397  _PHIPatternWordsBitwiseAnd(prefixMatchedBitPattern, prefixMatchedBitPattern,
398  multiword_items->match_maskL, num_words);
399  rightMaskOnly = -1;
400  for (wordIndex = 0; (wordIndex < num_words) && (!found);
401  wordIndex++) {
402  for (j = 0; j < PHI_BITS_PACKED_PER_WORD && (!found); j++) {
403  if ((prefixMatchedBitPattern[wordIndex]>>j) % 2 == 1)
404  found = TRUE;
405  else if ((multiword_items->match_maskL[wordIndex] >> j)%2 == 1)
406  rightMaskOnly = wordIndex*PHI_BITS_PACKED_PER_WORD+j;
407  }
408  }
409  if (found) {
410  wordIndex--;
411  j --;
412  }
413  sfree(prefixMatchedBitPattern);
414  sfree(mask);
415  *start = rightMaskOnly+1;
416  *end = wordIndex*PHI_BITS_PACKED_PER_WORD+j;
417 }
418 
419 /** Maximal possible size of the hits array for one word of pattern. */
420 #define MAX_HITS_IN_WORD 64
421 
422 /** Find pattern occurrences in seq when the pattern description is
423  * extra long, report the results back in hitArray
424  * @param seq Sequence to find pattern in [in]
425  * @param len Length of seq [in]
426  * @param hitArray Stores pairs of length/position for matches [out]
427  * @param pattern_blk All necessary information about pattern [in]
428  */
429 static Int2
431  SPHIPatternSearchBlk *pattern_blk)
432 {
433  Int4 i, j; /*indices on one word hits*/
434  Int4 wordIndex, wordIndex2; /*indices on words in pattern representation*/
435  Int4 twiceHitsOneWord; /*Twice the number of hits against one
436  word of the pattern*/
437  Int4 hitIndex; /*index over hits against one word*/
438  Int4 pos = 0; /*keeps track of how many intermediate hits*/
439  Int4 hitArray1[PHI_MAX_HIT];
440  Int4 oneWordHitArray[MAX_HITS_IN_WORD]; /*hits for one word of
441  pattern representation*/
442  SShortPatternItems* one_word_items = pattern_blk->one_word_items;
443  SLongPatternItems* multiword_items = pattern_blk->multi_word_items;
444  SExtraLongPatternItems* extra_items = multiword_items->extra_long_items;
445  Int4 num_words = multiword_items->numWords;
446 
447  i = 1;
448 
449  hitArray[0] = extra_items->numPlacesInWord[0];
450  for (wordIndex = 1; wordIndex < num_words; wordIndex++) {
451  one_word_items->whichPositionPtr = multiword_items->SLL[wordIndex];
452  one_word_items->match_mask = multiword_items->match_maskL[wordIndex];
453  pos = 0;
454  for (j = 0; j < i; j += wordIndex) {
455  Int4 lastOffset = hitArray[j+wordIndex-1];
456  twiceHitsOneWord =
457  _PHIBlastFindHitsShort(oneWordHitArray,
458  seq + lastOffset,
459  MIN(len-lastOffset, extra_items->spacing[wordIndex-1] +
460  extra_items->numPlacesInWord[wordIndex]),
461  pattern_blk);
462  for (hitIndex = 0; hitIndex < twiceHitsOneWord;
463  hitIndex+= 2, pos+= wordIndex+1) {
464  for (wordIndex2 = 0; wordIndex2 < wordIndex; wordIndex2++)
465  hitArray1[pos+wordIndex2] = hitArray[j+wordIndex2];
466  hitArray1[pos+wordIndex2] = hitArray1[pos+wordIndex2-1] +
467  oneWordHitArray[hitIndex] + 1;
468  }
469  }
470  i = pos;
471  for (j = 0; j < pos; j++)
472  hitArray[j] = hitArray1[j];
473  }
474  for (j = 0; j < pos; j+= num_words) {
475  if (hitArray[j+num_words-1] == len) {
476  for (i = 0; i < num_words; i++)
477  hitArray[i] = hitArray[i+j];
478  return 0;
479  }
480  }
481  /* This point should never be reached. */
482  return -1;
483 }
484 
485 /** Align pattern occurrences of the query and subject sequences.
486  * @param querySeq Pointer to start of pattern in query [in]
487  * @param dbSeq Pointer to start of pattern in subject [in]
488  * @param lenQuerySeq Length of pattern occurrence in query [in]
489  * @param lenDbSeq Length of pattern occurrence in subject [in]
490  * @param alignScript Traceback script [out]
491  * @param score_options Scoring options [in]
492  * @param score_matrix Scoring matrix [in]
493  * @param pattern_blk Structure with information about pattern [in]
494  * @return Score for this alignment.
495  */
496 static Int4
497 s_PHIBlastAlignPatterns(Uint1 *querySeq, Uint1 *dbSeq, Int4 lenQuerySeq,
498  Int4 lenDbSeq, GapPrelimEditBlock *alignScript,
499  const BlastScoringOptions* score_options,
500  SBlastScoreMatrix* score_matrix,
501  SPHIPatternSearchBlk *pattern_blk)
502 {
503  const int kBandLow = -5;
504  const int kBandHigh = 5;
505 
506  Int4 startQueryMatch, endQueryMatch; /*positions delimiting
507  where query matches pattern first */
508  Int4 startDbMatch, endDbMatch; /*positions delimiting where
509  database sequence matches pattern first*/
510  Int4 local_score; /*score for return*/
511  Int4 queryMatchOffset, dbMatchOffset; /*offset from sequence start where
512  pattern character matches,
513  used as indices*/
514  Int4 patternPosQuery, patternPosDb; /*positions in pattern
515  for matches to query and database sequences*/
516 
517  Int4 placeIndex; /*index over places in pattern*/
518  Int4 *hitArray1=NULL, *hitArray2=NULL;
519  Int4 gap_open; /*gap opening penalty*/
520  Int4 gap_extend; /*gap extension penalty*/
521  Int4** matrix = score_matrix->data;
522  SLongPatternItems* multiword_items = pattern_blk->multi_word_items;
523 
524  gap_open = score_options->gap_open;
525  gap_extend = score_options->gap_extend;
526 
527  startQueryMatch = 0;
528  endQueryMatch = lenQuerySeq - 1;
529  startDbMatch = 0;
530  endDbMatch = lenDbSeq - 1;
531  local_score = 0;
532 
533  if (pattern_blk->flagPatternLength == eOneWord) {
534  s_PHIGetShortPattern(querySeq, lenQuerySeq, &startQueryMatch,
535  &endQueryMatch, pattern_blk);
536  s_PHIGetShortPattern(dbSeq, lenDbSeq, &startDbMatch,
537  &endDbMatch, pattern_blk);
538  } else if (pattern_blk->flagPatternLength == eMultiWord) {
539  s_PHIGetLongPattern(querySeq, lenQuerySeq, &startQueryMatch,
540  &endQueryMatch, pattern_blk);
541  s_PHIGetLongPattern(dbSeq, lenDbSeq, &startDbMatch,
542  &endDbMatch, pattern_blk);
543  } else {
544  Int4 QueryWord, DbWord;
545  Int4 QueryVarSize, DbVarSize;
546  SExtraLongPatternItems* extra_items = multiword_items->extra_long_items;
547 
548  hitArray1 = calloc(PHI_MAX_HIT, sizeof(Int4));
549  hitArray2 = calloc(PHI_MAX_HIT, sizeof(Int4));
550  /* Populate the hit arrays by matching the pattern to the query and
551  subject sequence segments. */
552  s_PHIGetExtraLongPattern(querySeq, lenQuerySeq, hitArray1,
553  pattern_blk);
554  s_PHIGetExtraLongPattern(dbSeq, lenDbSeq, hitArray2, pattern_blk);
555 
556  queryMatchOffset = dbMatchOffset = 0;
557  QueryWord = DbWord = 0;
558  QueryVarSize = DbVarSize = 0;
559 
560  for (placeIndex = 0;
561  placeIndex < extra_items->highestPlace; placeIndex++) {
562 
563  Int4 patternWord =
564  multiword_items->inputPatternMasked[placeIndex];
565 
566  if (patternWord < 0) {
567  QueryVarSize += hitArray1[QueryWord] -
568  hitArray1[QueryWord-1] -
569  extra_items->numPlacesInWord[QueryWord];
570  DbVarSize += hitArray2[DbWord] -
571  hitArray2[DbWord-1] -
572  extra_items->numPlacesInWord[DbWord];
573  }
574  else if (patternWord == kMaskAaAlphabetBits) {
575  QueryVarSize++;
576  DbVarSize++;
577  }
578  else {
579  if (QueryVarSize || DbVarSize) {
580  if (QueryVarSize == DbVarSize) {
581  GapPrelimEditBlockAdd(alignScript, eGapAlignSub,
582  QueryVarSize);
583  }
584  else {
585  local_score += s_BandedAlign(querySeq-1, dbSeq-1,
586  QueryVarSize, DbVarSize,
587  kBandLow, kBandHigh, matrix,
588  gap_open, gap_extend, alignScript);
589  }
590  queryMatchOffset += QueryVarSize;
591  querySeq += QueryVarSize;
592  dbMatchOffset += DbVarSize;
593  dbSeq += DbVarSize;
594  }
595 
596  GapPrelimEditBlockAdd(alignScript, eGapAlignSub, 1);
597  queryMatchOffset++;
598  querySeq++;
599  dbMatchOffset++;
600  dbSeq++;
601  QueryVarSize = DbVarSize = 0;
602  }
603 
604  if (queryMatchOffset + QueryVarSize >= hitArray1[QueryWord])
605  QueryWord++;
606  if (dbMatchOffset + DbVarSize >= hitArray2[DbWord])
607  DbWord++;
608  }
609 
610  sfree(hitArray1);
611  sfree(hitArray2);
612 
613  return local_score;
614  }
615 
616  for (patternPosQuery = startQueryMatch, patternPosDb = startDbMatch;
617  patternPosQuery <= endQueryMatch || patternPosDb <= endDbMatch; ) {
618  if (multiword_items->inputPatternMasked[patternPosQuery]
619  != kMaskAaAlphabetBits &&
620  multiword_items->inputPatternMasked[patternPosDb] !=
622  GapPrelimEditBlockAdd(alignScript, eGapAlignSub, 1);
623  patternPosQuery++;
624  patternPosDb++;
625  querySeq++;
626  dbSeq++;
627  } else {
628  for (queryMatchOffset =0;
629  multiword_items->inputPatternMasked[patternPosQuery] ==
631  patternPosQuery <= endQueryMatch;
632  patternPosQuery++, queryMatchOffset++) ;
633  for (dbMatchOffset = 0;
634  multiword_items->inputPatternMasked[patternPosDb] ==
636  patternPosDb <= endDbMatch;
637  patternPosDb++, dbMatchOffset++) ;
638  if (queryMatchOffset == dbMatchOffset) {
639  do {
640  GapPrelimEditBlockAdd(alignScript, eGapAlignSub, 1);
641  querySeq++;
642  dbSeq++;
643  queryMatchOffset--;
644  } while (queryMatchOffset > 0);
645  } else {
646  local_score +=
647  s_BandedAlign(querySeq-1, dbSeq-1, queryMatchOffset, dbMatchOffset,
648  kBandLow, kBandHigh, matrix, gap_open, gap_extend,
649  alignScript);
650  querySeq+=queryMatchOffset;
651  dbSeq+=dbMatchOffset;
652  }
653  }
654  }
655  return local_score;
656 }
657 
658 /** Performs gapped extension for PHI BLAST, given two
659  * sequence blocks, scoring and extension options, and an initial HSP
660  * with information from the previously performed ungapped extension
661  * @param query_blk The query sequence block [in]
662  * @param subject_blk The subject sequence block [in]
663  * @param gap_align The auxiliary structure for gapped alignment [in]
664  * @param score_params Parameters related to scoring [in]
665  * @param query_offset Start of pattern in query [in]
666  * @param subject_offset Start of pattern in subject [in]
667  * @param query_length Length of pattern in query [in]
668  * @param subject_length Length of pattern in subject [in]
669  */
670 static Int2
672  BLAST_SequenceBlk* subject_blk,
673  BlastGapAlignStruct* gap_align,
674  const BlastScoringParameters* score_params,
675  Int4 query_offset, Int4 subject_offset,
676  Int4 query_length, Int4 subject_length)
677 {
678  Boolean found_start, found_end;
679  Int4 q_length=0, s_length=0, score_right, score_left;
680  Int4 private_q_start, private_s_start;
681  Uint1* query,* subject;
682 
683  if (gap_align == NULL)
684  return -1;
685 
686  q_length = query_offset;
687  s_length = subject_offset;
688  query = query_blk->sequence;
689  subject = subject_blk->sequence;
690 
691  found_start = FALSE;
692  found_end = FALSE;
693 
694  /* Looking for "left" score */
695  score_left = 0;
696  if (q_length != 0 && s_length != 0) {
697  found_start = TRUE;
698  score_left =
699  Blast_SemiGappedAlign(query, subject, q_length, s_length,
700  &private_q_start, &private_s_start, TRUE, NULL, gap_align,
701  score_params, query_offset, FALSE, TRUE, NULL);
702 
703  gap_align->query_start = q_length - private_q_start + 1;
704  gap_align->subject_start = s_length - private_s_start + 1;
705 
706  }
707 
708  /* Pattern itself is not included in the gapped alignment */
709  q_length += query_length - 1;
710  s_length += subject_length - 1;
711 
712  score_right = 0;
713  if (q_length < query_blk->length && s_length < subject_blk->length) {
714  found_end = TRUE;
715  score_right = Blast_SemiGappedAlign(query+q_length,
716  subject+s_length, query_blk->length-q_length-1,
717  subject_blk->length-s_length-1, &(gap_align->query_stop),
718  &(gap_align->subject_stop), TRUE, NULL, gap_align,
719  score_params, q_length, FALSE, FALSE, NULL);
720  gap_align->query_stop += q_length;
721  gap_align->subject_stop += s_length;
722  }
723 
724  if (found_start == FALSE) { /* Start never found */
725  gap_align->query_start = query_offset;
726  gap_align->subject_start = subject_offset;
727  }
728 
729  if (found_end == FALSE) {
730  gap_align->query_stop = query_offset + query_length;
731  gap_align->subject_stop = subject_offset + subject_length;
732  }
733 
734  gap_align->score = score_right+score_left;
735 
736  return 0;
737 }
738 
740  BLAST_SequenceBlk* query, BlastQueryInfo* query_info,
742  BlastGapAlignStruct* gap_align,
743  const BlastScoringParameters* score_params,
744  const BlastExtensionParameters* ext_params,
745  const BlastHitSavingParameters* hit_params,
746  BlastInitHitList* init_hitlist,
747  BlastHSPList** hsp_list_ptr, BlastGappedStats* gapped_stats,
748  Boolean * fence_hit)
749 
750 {
751  BlastHSPList* hsp_list;
752  BlastInitHSP* init_hsp;
753  Int4 index;
754  Int2 status = 0;
755  BlastHitSavingOptions* hit_options;
756  Int4 pattern_index;
757  Int4 num_patterns;
758  Int4 HspNumMax=0;
759 
760  /* PHI does not support partial fetching at the moment. */
761  ASSERT(! fence_hit);
762 
763  if (!query || !subject || !gap_align || !score_params ||
764  !hit_params || !init_hitlist || !hsp_list_ptr)
765  return -1;
766 
767  if (init_hitlist->total == 0)
768  return 0;
769 
770  hit_options = hit_params->options;
771  HspNumMax = BlastHspNumMax(score_params->options->gapped_calculation, hit_options);
772 
773  if (*hsp_list_ptr == NULL)
774  *hsp_list_ptr = hsp_list = Blast_HSPListNew(HspNumMax);
775  else
776  hsp_list = *hsp_list_ptr;
777 
778  num_patterns = query_info->pattern_info->num_patterns;
779 
780  for (pattern_index = 0; pattern_index < num_patterns; ++pattern_index) {
781  SPHIPatternInfo* query_pattern =
782  &query_info->pattern_info->occurrences[pattern_index];
783  Uint4 q_pat_offset = query_pattern->offset;
784  Uint4 q_pat_length = query_pattern->length;
785 
786  for (index=0; index<init_hitlist->total; index++) {
787  BlastHSP* new_hsp;
788  Int4 s_pat_offset, s_pat_length;
789 
790  init_hsp = &init_hitlist->init_hsp_array[index];
791  s_pat_offset = init_hsp->offsets.phi_offsets.s_start;
792  s_pat_length = init_hsp->offsets.phi_offsets.s_end -
793  init_hsp->offsets.phi_offsets.s_start + 1;
794 
795  if (gapped_stats)
796  ++gapped_stats->extensions;
797 
798  status =
799  s_PHIGappedAlignment(query, subject, gap_align, score_params,
800  q_pat_offset, s_pat_offset, q_pat_length,
801  s_pat_length);
802 
803  if (status) {
804  return status;
805  }
806 
807  /* PHI BLAST does not support query concatenation, so context is
808  always 0. */
809  if (gap_align->score >= hit_params->cutoff_score_min) {
810  Blast_HSPInit(gap_align->query_start, gap_align->query_stop,
811  gap_align->subject_start, gap_align->subject_stop,
812  q_pat_offset, s_pat_offset,
813  0, query_info->contexts[0].frame,
814  subject->frame, gap_align->score,
815  &(gap_align->edit_script), &new_hsp);
816 
817  /* Save pattern index and subject length in the HSP structure. */
818  new_hsp->pat_info =
819  (SPHIHspInfo*) malloc(sizeof(SPHIHspInfo));
820 
821  new_hsp->pat_info->index = pattern_index;
822  new_hsp->pat_info->length = s_pat_length;
823 
824  Blast_HSPListSaveHSP(hsp_list, new_hsp);
825  }
826  }
827  }
828 
829  /* Sort the HSP array by score */
830  Blast_HSPListSortByScore(hsp_list);
831 
832  *hsp_list_ptr = hsp_list;
833  return status;
834 }
835 
837  BlastGapAlignStruct* gap_align,
838  const BlastScoringParameters* score_params,
839  Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length,
840  Int4 q_pat_length, Int4 s_pat_length,
841  SPHIPatternSearchBlk *pattern_blk)
842 {
843  Boolean found_end;
844  Int4 score_right, score_left, private_q_length, private_s_length;
845  Int2 status = 0;
846  GapPrelimEditBlock *fwd_prelim_tback;
847  GapPrelimEditBlock *rev_prelim_tback;
848  GapPrelimEditBlock *pat_prelim_tback = GapPrelimEditBlockNew();
849 
850  if (!gap_align || !score_params || !pattern_blk)
851  return -1;
852 
853  fwd_prelim_tback = gap_align->fwd_prelim_tback;
854  rev_prelim_tback = gap_align->rev_prelim_tback;
855  GapPrelimEditBlockReset(fwd_prelim_tback);
856  GapPrelimEditBlockReset(rev_prelim_tback);
857 
858  found_end = FALSE;
859 
860  score_left =
861  Blast_SemiGappedAlign(query, subject, q_start, s_start,
862  &private_q_length, &private_s_length, FALSE, rev_prelim_tback,
863  gap_align, score_params, q_start, FALSE, TRUE, NULL);
864  gap_align->query_start = q_start - private_q_length;
865  gap_align->subject_start = s_start - private_s_length;
866 
867  s_PHIBlastAlignPatterns(query+q_start, subject+s_start, q_pat_length,
868  s_pat_length, pat_prelim_tback, score_params->options,
869  gap_align->sbp->matrix, pattern_blk);
870 
871  /* Pattern traceback and left alignment traceback are both going in forward
872  direction, so the former can be simply appended to the end of the
873  latter. */
874  GapPrelimEditBlockAppend(rev_prelim_tback, pat_prelim_tback);
875  GapPrelimEditBlockFree(pat_prelim_tback);
876 
877  score_right = 0;
878 
879  q_start += q_pat_length - 1;
880  s_start += s_pat_length - 1;
881 
882  if ((q_start < query_length) && (s_start < subject_length)) {
883  found_end = TRUE;
884  score_right =
885  Blast_SemiGappedAlign(query+q_start, subject+s_start,
886  query_length-q_start-1, subject_length-s_start-1,
887  &private_q_length, &private_s_length, FALSE, fwd_prelim_tback,
888  gap_align, score_params, q_start, FALSE, FALSE, NULL);
889 
890  gap_align->query_stop = q_start + private_q_length + 1;
891  gap_align->subject_stop = s_start + private_s_length + 1;
892  }
893 
894  if (found_end == FALSE) {
895  gap_align->query_stop = q_start;
896  gap_align->subject_stop = s_start;
897  }
898 
899  gap_align->edit_script =
900  Blast_PrelimEditBlockToGapEditScript(rev_prelim_tback,
901  fwd_prelim_tback);
902 
903  gap_align->score = score_right + score_left;
904  return status;
905 }
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Int4 Blast_SemiGappedAlign(const Uint1 *A, const Uint1 *B, Int4 M, Int4 N, Int4 *a_offset, Int4 *b_offset, Boolean score_only, GapPrelimEditBlock *edit_block, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Boolean reversed, Boolean reverse_sequence, Boolean *fence_hit)
Low level function to perform gapped extension in one direction with or without traceback.
GapEditScript * Blast_PrelimEditBlockToGapEditScript(GapPrelimEditBlock *rev_prelim_tback, GapPrelimEditBlock *fwd_prelim_tback)
Convert the initial list of traceback actions from a non-OOF gapped alignment into a blast edit scrip...
Structures and functions prototypes used for BLAST gapped extension.
Private interface for blast_gapalign.c.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Definition: blast_hits.c:151
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
Definition: blast_hits.c:213
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Definition: blast_hits.c:1558
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
Definition: blast_hits.c:1754
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Definition: blast_hits.c:1374
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
ncbi::TMaskedQueryRegions mask
void GapPrelimEditBlockAdd(GapPrelimEditBlock *edit_block, EGapAlignOpType op_type, Int4 num_ops)
Add a new operation to a preliminary edit block, possibly combining it with the last operation if the...
Definition: gapinfo.c:175
GapPrelimEditBlock * GapPrelimEditBlockFree(GapPrelimEditBlock *edit_block)
Frees a preliminary edit block structure.
Definition: gapinfo.c:202
@ eGapAlignIns
Insertion: a gap in subject.
Definition: gapinfo.h:51
@ eGapAlignSub
Substitution.
Definition: gapinfo.h:48
@ eGapAlignDel
Deletion: a gap in query.
Definition: gapinfo.h:45
void GapPrelimEditBlockAppend(GapPrelimEditBlock *edit_block1, GapPrelimEditBlock *edit_block2)
Append one GapPrelimEditBlock to the end of the other.
Definition: gapinfo.c:222
GapPrelimEditBlock * GapPrelimEditBlockNew(void)
Allocates a preliminary edit block structure.
Definition: gapinfo.c:188
void GapPrelimEditBlockReset(GapPrelimEditBlock *edit_block)
Reset a preliminary edit block without freeing it.
Definition: gapinfo.c:213
#define NULL
Definition: ncbistd.hpp:225
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int8_t Int1
1-byte (8-bit) signed integer
Definition: ncbitype.h:98
int i
int len
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
#define INT4_MIN
Smallest (most negative) number represented by signed int.
Definition: ncbi_std.h:146
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:117
Int4 _PHIBlastFindHitsShort(Int4 *hitArray, const Uint1 *seq, Int4 len1, const SPHIPatternSearchBlk *pattern_blk)
Routine to find hits of pattern to sequence when sequence is proteins.
Definition: pattern.c:104
void _PHIPatternWordsLeftShift(Int4 *a, Uint1 b, Int4 numWords)
Shift each word in the array left by 1 bit and add bit b.
Definition: pattern.c:236
Int4 _PHIPatternWordsBitwiseAnd(Int4 *result, Int4 *a, Int4 *b, Int4 numWords)
Do a word-by-word bit-wise and of two integer arrays and put the result in a new array.
Definition: pattern.c:265
void _PHIGetRightOneBits(Int4 s, Int4 mask, Int4 *rightOne, Int4 *rightMaskOnly)
Looks for 1 bits in the same position of s and mask Let R be the rightmost position where s and mask ...
Definition: pattern.c:61
void _PHIPatternWordsBitwiseOr(Int4 *a, Int4 *b, Int4 numWords)
Do a word-by-word bit-wise or of two integer arrays and put the result back in the first array.
Definition: pattern.c:257
@ eMultiWord
Does pattern consist of a multiple words?
Definition: pattern.h:75
@ eOneWord
Does pattern consist of a single word?
Definition: pattern.h:74
#define PHI_BITS_PACKED_PER_WORD
Number of bits packed in a word.
Definition: pattern.h:52
#define PHI_MAX_HIT
Maximal size of an array of pattern hits.
Definition: pattern.h:57
Auxiliary functions for finding pattern matches in sequence (PHI-BLAST), that are used in multiple so...
const int kMaskAaAlphabetBits
Masks all bits corresponding to the aminoacid alphabet, i.e.
Definition: phi_lookup.c:41
#define MAX_HITS_IN_WORD
Maximal possible size of the hits array for one word of pattern.
Definition: phi_gapalign.c:420
static Int4 s_Align(Uint1 *seq1, Uint1 *seq2, Int4 end1, Int4 end2, Int4 lowDiag, Int4 highDiag, Int4 **matrix, Int4 gapOpen, Int4 gapExtend, GapPrelimEditBlock *alignScript)
Returns the cost of an optimum conversion within highDiag and lowDiag between two sequence segments a...
Definition: phi_gapalign.c:85
static Int4 s_BandedAlign(Uint1 *seq1, Uint1 *seq2, Int4 start1, Int4 start2, Int4 lowDiag, Int4 highDiag, Int4 **matrix, Int4 gapOpen, Int4 gapExtend, GapPrelimEditBlock *alignScript)
Do a banded gapped alignment of two sequences.
Definition: phi_gapalign.c:280
Int2 PHIGappedAlignmentWithTraceback(Uint1 *query, Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Int4 q_pat_length, Int4 s_pat_length, SPHIPatternSearchBlk *pattern_blk)
Perform a gapped alignment with traceback for PHI BLAST.
Definition: phi_gapalign.c:836
static Int4 s_PHIBlastAlignPatterns(Uint1 *querySeq, Uint1 *dbSeq, Int4 lenQuerySeq, Int4 lenDbSeq, GapPrelimEditBlock *alignScript, const BlastScoringOptions *score_options, SBlastScoreMatrix *score_matrix, SPHIPatternSearchBlk *pattern_blk)
Align pattern occurrences of the query and subject sequences.
Definition: phi_gapalign.c:497
Int2 PHIGetGappedScore(EBlastProgramType program_number, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, BLAST_SequenceBlk *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, const BlastHitSavingParameters *hit_params, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list_ptr, BlastGappedStats *gapped_stats, Boolean *fence_hit)
Preliminary gapped alignment for PHI BLAST.
Definition: phi_gapalign.c:739
EDiagonalState
Definition: phi_gapalign.c:63
@ eDiagonalDelete
Definition: phi_gapalign.c:65
@ eDiagonalInsert
Definition: phi_gapalign.c:64
@ eInsertCode
Definition: phi_gapalign.c:66
@ eDeleteCode
Definition: phi_gapalign.c:67
static Int2 s_PHIGetShortPattern(Uint1 *seq, Int4 len, Int4 *start, Int4 *end, SPHIPatternSearchBlk *pattern_blk)
Finds the position of the first pattern match in an input sequence, if pattern consists of a single w...
Definition: phi_gapalign.c:325
static Int2 s_PHIGetExtraLongPattern(Uint1 *seq, Int4 len, Int4 *hitArray, SPHIPatternSearchBlk *pattern_blk)
Find pattern occurrences in seq when the pattern description is extra long, report the results back i...
Definition: phi_gapalign.c:430
static Int4 s_GapCost(Int4 gap_open, Int4 gap_extend, Int4 length)
k-symbol indel cost.
Definition: phi_gapalign.c:261
static void s_PHIGetLongPattern(Uint1 *seq, Int4 len, Int4 *start, Int4 *end, SPHIPatternSearchBlk *pattern_blk)
Finds the position of the first pattern match in an input sequence, if pattern consists of a more tha...
Definition: phi_gapalign.c:367
static Int2 s_PHIGappedAlignment(BLAST_SequenceBlk *query_blk, BLAST_SequenceBlk *subject_blk, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Int4 subject_offset, Int4 query_length, Int4 subject_length)
Performs gapped extension for PHI BLAST, given two sequence blocks, scoring and extension options,...
Definition: phi_gapalign.c:671
Function prototypes used for PHI BLAST gapped extension and gapped extension with traceback.
Structure to hold a sequence.
Definition: blast_def.h:242
Int4 length
Length of sequence.
Definition: blast_def.h:246
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Computed values used as parameters for gapped alignments.
Structure supporting the gapped alignment.
GapPrelimEditBlock * fwd_prelim_tback
traceback from right extensions
GapPrelimEditBlock * rev_prelim_tback
traceback from left extensions
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
BlastScoreBlk * sbp
Pointer to the scoring information block.
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Auxiliary structure for dynamic programming gapped extension.
Int4 best_gap
score of best path that ends in a gap at this position
Int4 best
score of best path that ends in a match at this position
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Structure holding all information about an HSP.
Definition: blast_hits.h:126
SPHIHspInfo * pat_info
In PHI BLAST, information about this pattern match.
Definition: blast_hits.h:142
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Int4 cutoff_score_min
smallest cutoff score across all contexts
BlastHitSavingOptions * options
The original (unparsed) options.
Structure to hold the initial HSP information.
Definition: blast_extend.h:150
BlastOffsetPair offsets
Offsets in query and subject, or, in PHI BLAST, start and end of pattern in subject.
Definition: blast_extend.h:151
Structure to hold all initial HSPs for a given subject sequence.
Definition: blast_extend.h:158
Int4 total
Total number of hits currently saved.
Definition: blast_extend.h:159
BlastInitHSP * init_hsp_array
Array of offset pairs, possibly with scores.
Definition: blast_extend.h:161
The query related information.
BlastContextInfo * contexts
Information per context.
struct SPHIQueryInfo * pattern_info
Counts of PHI BLAST pattern occurrences, used in PHI BLAST only.
SBlastScoreMatrix * matrix
scoring matrix data
Definition: blast_stat.h:185
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Boolean gapped_calculation
gap-free search if FALSE
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
BlastScoringOptions * options
User-provided values for these params.
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
Definition: gapinfo.h:73
Scoring matrix used in BLAST.
Definition: blast_stat.h:139
int ** data
actual scoring matrix data, stored in row-major form
Definition: blast_stat.h:140
Auxiliary items needed for a PHI BLAST search with pattern that contains pieces longer than a word.
Definition: pattern.h:105
Int4 numPlacesInWord[100]
When pattern has more than 7 words, keep track of how many places of pattern in each word of the repr...
Definition: pattern.h:108
Int4 highestPlace
Number of places in pattern representation as computed in input_pattern.
Definition: pattern.h:111
Int4 spacing[100]
Spaces until next word due to wildcard.
Definition: pattern.h:110
Auxiliary items needed for a PHI BLAST search with pattern containing multiple words.
Definition: pattern.h:131
Int4 match_maskL[100]
Bit mask representation of input pattern for long patterns.
Definition: pattern.h:134
SExtraLongPatternItems * extra_long_items
Additional items necessary if pattern contains pieces longer than a word.
Definition: pattern.h:147
Int4 SLL[100][256]
For each letter in the alphabet and each word in the masked pattern representation,...
Definition: pattern.h:142
Int4 inputPatternMasked[(30 *11)]
Masked input pattern.
Definition: pattern.h:143
Int4 bitPatternByLetter[256][11]
Which positions can a character occur in for long patterns.
Definition: pattern.h:137
Int4 numWords
Number of words need to hold bit representation of pattern.
Definition: pattern.h:132
In PHI BLAST: information about pattern match in a given HSP.
Definition: blast_hits.h:104
Int4 index
Index of query pattern occurrence for this HSP.
Definition: blast_hits.h:105
Int4 length
Length of this pattern occurrence in subject.
Definition: blast_hits.h:106
Information about a single pattern occurence in the query.
Definition: blast_def.h:292
Int4 length
Length of this pattern occurrence.
Definition: blast_def.h:294
Int4 offset
Starting offset of this pattern occurrence.
Definition: blast_def.h:293
Structure containing all auxiliary information needed in a pattern search.
Definition: pattern.h:155
SShortPatternItems * one_word_items
Items necessary when pattern fits in one word.
Definition: pattern.h:163
EPatternType flagPatternLength
Indicates if the whole pattern fits in 1 word, each of several parts of the pattern fit in a word,...
Definition: pattern.h:159
SLongPatternItems * multi_word_items
Additional items, when pattern requires multiple words.
Definition: pattern.h:166
Int4 num_patterns
Number of pattern occurrences in query.
Definition: blast_def.h:301
SPHIPatternInfo * occurrences
Array of pattern occurrence information structures.
Definition: blast_def.h:302
Auxiliary items needed for a PHI BLAST search with a pattern that fits in a single word.
Definition: pattern.h:94
Int4 * whichPositionPtr
Array of positions where pattern lettern should match, for a single word of the pattern.
Definition: pattern.h:97
Int4 match_mask
Bit mask representation of input pattern for patterns that fit in a word.
Definition: pattern.h:95
static string subject
static string query
Uint4 s_start
Start offset of pattern in subject.
Definition: blast_def.h:147
Uint4 s_end
End offset of pattern in subject.
Definition: blast_def.h:148
struct BlastOffsetPair::@7 phi_offsets
Pattern offsets in subject (PHI BLAST only)
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Tue Apr 30 06:41:23 2024 by modify_doxy.py rev. 669887