NCBI C++ ToolKit
greedy_align.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: greedy_align.c 85701 2019-03-04 21:01:49Z boratyng $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Functions to perform greedy affine and non-affine gapped alignment
27  *
28  */
29 
30 /** @file greedy_align.c
31  * Functions to perform greedy affine and non-affine gapped alignment.
32  * Reference:
33  * Zhang et. al., "A Greedy Algorithm for Aligning DNA Sequences"
34  * Journal of Computational Biology vol 7 pp 203-214
35  */
36 
39 #include <algo/blast/core/blast_util.h> /* for NCBI2NA_UNPACK_BASE macros */
40 
41 /** see greedy_align.h for description */
42 SMBSpace*
43 MBSpaceNew(int num_space_arrays)
44 {
45  SMBSpace* new_space;
46  const Int4 kMinSpace = 1000000;
47 
48  num_space_arrays = MAX(kMinSpace, num_space_arrays);
49 
50  new_space = (SMBSpace*)malloc(sizeof(SMBSpace));
51  if (new_space == NULL)
52  return NULL;
53 
54  new_space->space_array = (SGreedyOffset*)malloc(
55  num_space_arrays * sizeof(SGreedyOffset));
56  if (new_space->space_array == NULL) {
57  sfree(new_space);
58  return NULL;
59  }
60  new_space->space_used = 0;
61  new_space->space_allocated = num_space_arrays;
62  new_space->next = NULL;
63 
64  return new_space;
65 }
66 
67 /** Mark the input collection of space structures as unused
68  @param space The space to mark
69 */
70 static void
72 {
73  while (space != NULL) {
74  space->space_used = 0;
75  space = space->next;
76  }
77 }
78 
79 /** see greedy_align.h for description */
80 void MBSpaceFree(SMBSpace* space)
81 {
82  SMBSpace* next_space;
83 
84  while (space != NULL) {
85  next_space = space->next;
86  sfree(space->space_array);
87  sfree(space);
88  space = next_space;
89  }
90 }
91 
92 /** Allocate a specified number of SGreedyOffset structures from
93  a memory pool
94 
95  @param pool The memory pool [in]
96  @param num_alloc The number of structures to allocate [in]
97  @return Pointer to the allocated memory, or NULL in case of error
98 */
99 static SGreedyOffset*
100 s_GetMBSpace(SMBSpace* pool, Int4 num_alloc)
101 {
102  SGreedyOffset* out_ptr;
103  if (num_alloc < 0)
104  return NULL;
105 
106  /** @todo FIXME: Calling code must never ask for more
107  than kMinSpace structures (defined in MBSpaceNew()) */
108 
109  while (pool->space_used + num_alloc > pool->space_allocated) {
110  if (pool->next == NULL) {
111  pool->next = MBSpaceNew(num_alloc);
112  if (pool->next == NULL) {
113 #ifdef ERR_POST_EX_DEFINED
114  ErrPostEx(SEV_WARNING, 0, 0, "Cannot get new space for greedy extension");
115 #endif
116  return NULL;
117  }
118  }
119  pool = pool->next;
120  }
121 
122  out_ptr = pool->space_array + pool->space_used;
123  pool->space_used += num_alloc;
124 
125  return out_ptr;
126 }
127 
128 /** Signal that a diagonal is invalid */
129 static const Int4 kInvalidOffset = -2;
130 
131 /** During the traceback for a greedy alignment with affine
132  gap penalties, determine the next state of the traceback after
133  moving upwards in the traceback array from a substitution
134 
135  @param last_seq2_off Array of offsets into the second sequence;
136  last_seq2_off[d][k] gives the largest offset into
137  the second sequence that lies on diagonal k and
138  has distance d [in]
139  @param diag_lower Array of lower bounds on diagonal index [in]
140  @param diag_upper Array of upper bounds on diagonal index [in]
141  @param d Starting distance [in][out]
142  @param diag The diagonal of the current traceback position [in]
143  @param op_cost The sum of the match and mismatch scores [in]
144  @param seq2_index The offset into the second sequence after the traceback
145  operation has completed [out]
146  @return The state for the next traceback operation
147 */
148 static EGapAlignOpType
149 s_GetNextAffineTbackFromMatch(SGreedyOffset** last_seq2_off, Int4* diag_lower,
150  Int4* diag_upper, Int4* d, Int4 diag, Int4 op_cost,
151  Int4* seq2_index)
152 {
153  Int4 new_seq2_index;
154 
155  /* the goal here is to choose the largest seq2 offset
156  that leads to the current distance. There are three
157  choices possible and each is checked in turn */
158 
159  if (diag >= diag_lower[(*d) - op_cost] &&
160  diag <= diag_upper[(*d) - op_cost]) {
161 
162  new_seq2_index = last_seq2_off[(*d) - op_cost][diag].match_off;
163  if (new_seq2_index >= MAX(last_seq2_off[*d][diag].insert_off,
164  last_seq2_off[*d][diag].delete_off)) {
165  *d -= op_cost;
166  *seq2_index = new_seq2_index;
167  return eGapAlignSub;
168  }
169  }
170  if (last_seq2_off[*d][diag].insert_off >
171  last_seq2_off[*d][diag].delete_off) {
172  *seq2_index = last_seq2_off[*d][diag].insert_off;
173  return eGapAlignIns;
174  }
175  else {
176  *seq2_index = last_seq2_off[*d][diag].delete_off;
177  return eGapAlignDel;
178  }
179 }
180 
181 /** During the traceback for a greedy alignment with affine
182  gap penalties, determine the next state of the traceback after
183  moving upwards in the traceback array from an insertion or deletion
184 
185  @param last_seq2_off Array of offsets into the second sequence;
186  last_seq2_off[d][k] gives the largest offset into
187  the second sequence that lies on diagonal k and
188  has distance d [in]
189  @param diag_lower Array of lower bounds on diagonal index [in]
190  @param diag_upper Array of upper bounds on diagonal index [in]
191  @param d Starting distance [in][out]
192  @param diag The diagonal of the current traceback position [in]
193  @param gap_open open a gap [in]
194  @param gap_extend (Modified) cost to extend a gap [in]
195  @param IorD The state of the traceback at present [in]
196  @return The state for the next traceback operation
197 */
198 static EGapAlignOpType
199 s_GetNextAffineTbackFromIndel(SGreedyOffset** last_seq2_off, Int4* diag_lower,
200  Int4* diag_upper, Int4* d, Int4 diag, Int4 gap_open,
201  Int4 gap_extend, EGapAlignOpType IorD)
202 {
203  Int4 new_diag;
204  Int4 new_seq2_index;
205  Int4 gap_open_extend = gap_open + gap_extend;
206  Int4 last_d;
207 
208  /* as with the previous routine, the traceback operation
209  that leads to the current one must be determined. Either
210  a gap is opened from a previous run of matches, or a
211  gap is extended. If several traceback choices are
212  available, choose the one that generates the largest
213  seq2 offset */
214 
215  /* if the previous traceback operation is a gap, then it
216  starts one diagonal away from the current diagonal... */
217 
218  if (IorD == eGapAlignIns)
219  new_diag = diag - 1;
220  else
221  new_diag = diag + 1;
222 
223  /* ...and its seq2 offset is fixed */
224 
225  last_d = (*d) - gap_extend;
226  if (new_diag >= diag_lower[last_d] &&
227  new_diag <= diag_upper[last_d]) {
228 
229  if (IorD == eGapAlignIns)
230  new_seq2_index =
231  last_seq2_off[last_d][new_diag].insert_off;
232  else
233  new_seq2_index =
234  last_seq2_off[last_d][new_diag].delete_off;
235  }
236  else {
237  /* signal that no gap can be extended
238  to achieve distance d */
239  new_seq2_index = kInvalidOffset;
240  }
241 
242  /* make the next traceback operation a match if it is
243  valid to do so and the resulting seq2 offset exceeds the
244  one derived from extending a gap */
245 
246  last_d = (*d) - gap_open_extend;
247  if (new_diag >= diag_lower[last_d] &&
248  new_diag <= diag_upper[last_d] &&
249  new_seq2_index < last_seq2_off[last_d][new_diag].match_off) {
250 
251  *d -= gap_open_extend;
252  return eGapAlignSub;
253  }
254 
255  ASSERT(new_seq2_index != kInvalidOffset);
256  *d -= gap_extend;
257  return IorD;
258 }
259 
260 /** During the traceback for a non-affine greedy alignment,
261  compute the diagonal that will result from the next
262  traceback operation
263 
264  @param last_seq2_off Array of offsets into the second sequence;
265  last_seq2_off[d][k] gives the largest offset into
266  the second sequence that lies on diagonal k and
267  has distance d [in]
268  @param d Starting distance [in]
269  @param diag Index of diagonal that produced the starting distance [in]
270  @param seq2_index The offset into the second sequence after the traceback
271  operation has completed [out]
272  @return The diagonal resulting from the next traceback operation
273  being applied
274 */
275 static Int4
276 s_GetNextNonAffineTback(Int4 **last_seq2_off, Int4 d,
277  Int4 diag, Int4 *seq2_index)
278 {
279  /* choose the traceback operation that results in the
280  largest seq2 offset at this point, then compute the
281  new diagonal that is implied by the operation */
282 
283  if (last_seq2_off[d-1][diag-1] >
284  MAX(last_seq2_off[d-1][diag], last_seq2_off[d-1][diag+1])) {
285  *seq2_index = last_seq2_off[d-1][diag-1];
286  return diag - 1; /* gap in seq2 */
287  }
288  if (last_seq2_off[d-1][diag] > last_seq2_off[d-1][diag+1]) {
289  *seq2_index = last_seq2_off[d-1][diag];
290  return diag; /* match */
291  }
292  *seq2_index = last_seq2_off[d-1][diag+1];
293  return diag + 1; /* gap in seq1 */
294 }
295 
296 
297 /** Find the first mismatch in a pair of sequences
298  * @param seq1 First sequence (always uncompressed) [in]
299  * @param seq2 Second sequence (compressed or uncompressed) [in]
300  * @param len1 Length of seq1 [in]
301  * @param len2 Length of seq2 [in]
302  * @param seq1_index Starting offset in seq1 [in]
303  * @param seq2_index Starting offset in seq2 [in]
304  * @param fence_hit Set to TRUE if an end-of-initialized-data sentinel
305  * is encountered in seq2 [out]
306  * @param reverse If TRUE, comparison proceeds backwards from the end
307  * of seq1 and seq2 [in]
308  * @param rem When seq2 is part of a larger compressed sequence, this is
309  * the offset within a byte of seq2[0]. Set to 4 if seq2
310  * is uncompressed [in]
311  * @return Number of exact matches found before a mismatch was encountered
312  */
313 static NCBI_INLINE Int4 s_FindFirstMismatch(const Uint1 *seq1, const Uint1 *seq2,
314  Int4 len1, Int4 len2,
315  Int4 seq1_index, Int4 seq2_index,
316  Boolean *fence_hit,
317  Boolean reverse, Uint1 rem)
318 {
319  Int4 tmp = seq1_index;
320 
321  /* Sentry detection here should be relatively inexpensive: The
322  sentry value cannot appear in the query, so detection only
323  needs to be done at exit from the subject-query matching loop.
324  For uncompressed sequences, ambiguities in the query (i.e. seq1)
325  always count as mismatches */
326 
327  if (reverse) {
328  if (rem == 4) {
329  while (seq1_index < len1 && seq2_index < len2 &&
330  seq1[len1-1 - seq1_index] < 4 &&
331  seq1[len1-1 - seq1_index] == seq2[len2-1 - seq2_index]) {
332  ++seq1_index;
333  ++seq2_index;
334  }
335 
336  if (seq2_index < len2 && seq2[len2-1-seq2_index] == FENCE_SENTRY) {
337  ASSERT(fence_hit);
338  *fence_hit = TRUE;
339  }
340  } else {
341  while (seq1_index < len1 && seq2_index < len2 &&
342  seq1[len1-1 - seq1_index] ==
343  NCBI2NA_UNPACK_BASE(seq2[(len2-1-seq2_index) / 4],
344  3 - (len2-1-seq2_index) % 4)) {
345  ++seq1_index;
346  ++seq2_index;
347  }
348  }
349  }
350  else {
351  if (rem == 4) {
352  while (seq1_index < len1 && seq2_index < len2 &&
353  seq1[seq1_index] < 4 &&
354  seq1[seq1_index] == seq2[seq2_index]) {
355  ++seq1_index;
356  ++seq2_index;
357  }
358 
359  if (seq2_index < len2 && seq2[seq2_index] == FENCE_SENTRY) {
360  ASSERT(fence_hit);
361  *fence_hit = TRUE;
362  }
363  }
364  else {
365  while (seq1_index < len1 && seq2_index < len2 &&
366  seq1[seq1_index] ==
367  NCBI2NA_UNPACK_BASE(seq2[(seq2_index + rem) / 4],
368  3 - (seq2_index + rem) % 4)) {
369  ++seq1_index;
370  ++seq2_index;
371  }
372  }
373  }
374 
375  return seq1_index - tmp;
376 }
377 
378 
379 /** see greedy_align.h for description */
380 Int4 BLAST_GreedyAlign(const Uint1* seq1, Int4 len1,
381  const Uint1* seq2, Int4 len2,
382  Boolean reverse, Int4 xdrop_threshold,
383  Int4 match_cost, Int4 mismatch_cost,
384  Int4* seq1_align_len, Int4* seq2_align_len,
385  SGreedyAlignMem* aux_data,
386  GapPrelimEditBlock *edit_block, Uint1 rem,
387  Boolean * fence_hit, SGreedySeed *seed)
388 {
389  Int4 seq1_index;
390  Int4 seq2_index;
391  Int4 index;
392  Int4 d;
393  Int4 k;
394  Int4 diag_lower, diag_upper;
395  Int4 max_dist;
396  Int4 diag_origin;
397  Int4 best_dist;
398  Int4 best_diag;
399  Int4** last_seq2_off;
400  Int4* max_score;
401  Int4 xdrop_offset;
402  Int4 longest_match_run;
403  Boolean end1_reached, end2_reached;
404  SMBSpace* mem_pool;
405  Boolean converged = FALSE;
406 
407  /* ordinary dynamic programming alignment, for each offset
408  in seq1, walks through offsets in seq2 until an X-dropoff
409  test fails, saving the best score encountered along
410  the way. Instead of score, this code tracks the 'distance'
411  (number of mismatches plus number of gaps) between seq1
412  and seq2. Instead of walking through sequence offsets, it
413  walks through diagonals that can achieve a given distance.
414 
415  Note that in what follows, the numbering of diagonals implies
416  a dot matrix where increasing seq1 offsets go to the right on
417  the x axis, and increasing seq2 offsets go up the y axis.
418  The gapped alignment thus proceeds up and to the right in
419  the graph, and diagonals are numbered increasing to the right */
420 
421  best_dist = 0;
422  best_diag = 0;
423 
424  /* set the number of distinct distances the algorithm will
425  examine in the search for an optimal alignment. The
426  heuristic worst-case running time of the algorithm is
427  O(max_dist**2 + (len1+len2)); for sequences which are
428  very similar, the average running time will be sig-
429  nificantly better than this */
430 
431  max_dist = aux_data->max_dist;
432 
433  /* the main loop assumes that the index of all diagonals is
434  biased to lie in the middle of allocated bookkeeping
435  structures */
436 
437  diag_origin = max_dist + 2;
438 
439  /* last_seq2_off[d][k] is the largest offset into seq2 that
440  lies on diagonal k and has distance d */
441 
442  last_seq2_off = aux_data->last_seq2_off;
443 
444  /* Instead of tracking the best alignment score and using
445  xdrop_theshold directly, track the best score for each
446  unique distance and use the best score for some previously
447  computed distance to implement the X-dropoff test.
448 
449  xdrop_offset gives the distance backwards in the score
450  array to look */
451 
452  xdrop_offset = (xdrop_threshold + match_cost / 2) /
453  (match_cost + mismatch_cost) + 1;
454 
455  /* find the offset of the first mismatch between seq1 and seq2 */
456 
457  index = s_FindFirstMismatch(seq1, seq2, len1, len2, 0, 0,
458  fence_hit, reverse, rem);
459 
460  /* update the extents of the alignment, and bail out
461  early if no further work is needed */
462 
463  *seq1_align_len = index;
464  *seq2_align_len = index;
465  seq1_index = index;
466 
467  seed->start_q = 0;
468  seed->start_s = 0;
469  seed->match_length = longest_match_run = index;
470 
471  if (index == len1 || index == len2) {
472  /* Return the number of differences, which is zero here */
473 
474  if (edit_block != NULL)
475  GapPrelimEditBlockAdd(edit_block, eGapAlignSub, index);
476  return 0;
477  }
478 
479  /* set up the memory pool */
480 
481  mem_pool = aux_data->space;
482  if (edit_block == NULL) {
483  mem_pool = NULL;
484  }
485  else if (mem_pool == NULL) {
486  aux_data->space = mem_pool = MBSpaceNew(0);
487  }
488  else {
489  s_RefreshMBSpace(mem_pool);
490  }
491 
492  /* set up the array of per-distance maximum scores. There
493  are max_diags + xdrop_offset distances to track, the first
494  xdrop_offset of which are 0 */
495 
496  max_score = aux_data->max_score + xdrop_offset;
497  for (index = 0; index < xdrop_offset; index++)
498  aux_data->max_score[index] = 0;
499 
500  /* fill in the initial offsets of the distance matrix */
501 
502  last_seq2_off[0][diag_origin] = seq1_index;
503  max_score[0] = seq1_index * match_cost;
504  diag_lower = diag_origin - 1;
505  diag_upper = diag_origin + 1;
506  end1_reached = end2_reached = FALSE;
507 
508  /* for each distance */
509  for (d = 1; d <= max_dist; d++) {
510  Int4 xdrop_score;
511  Int4 curr_score;
512  Int4 curr_extent = 0;
513  Int4 curr_seq2_index = 0;
514  Int4 curr_diag = 0;
515  Int4 tmp_diag_lower = diag_lower;
516  Int4 tmp_diag_upper = diag_upper;
517 
518  /* assign impossible seq2 offsets to any diagonals that
519  are not in the range (diag_lower,diag_upper).
520  These will serve as sentinel values for the
521  inner loop */
522 
523  last_seq2_off[d - 1][diag_lower-1] = kInvalidOffset;
524  last_seq2_off[d - 1][diag_lower] = kInvalidOffset;
525  last_seq2_off[d - 1][diag_upper] = kInvalidOffset;
526  last_seq2_off[d - 1][diag_upper+1] = kInvalidOffset;
527 
528  /* compute the score for distance d that corresponds to
529  the X-dropoff criterion */
530 
531  xdrop_score = max_score[d - xdrop_offset] +
532  (match_cost + mismatch_cost) * d - xdrop_threshold;
533  xdrop_score = (Int4)ceil((double)xdrop_score / (match_cost / 2));
534 
535  /* for each diagonal of interest */
536 
537  for (k = tmp_diag_lower; k <= tmp_diag_upper; k++) {
538 
539  /* find the largest offset into seq2 that increases
540  the distance from d-1 to d (i.e. keeps the alignment
541  from getting worse for as long as possible), then
542  choose the offset into seq1 that will keep the
543  resulting diagonal fixed at k
544 
545  Note that this requires kInvalidOffset+1 to be smaller
546  than any valid offset into seq2, i.e. to be negative */
547 
548  seq2_index = MAX(last_seq2_off[d - 1][k + 1],
549  last_seq2_off[d - 1][k ]) + 1;
550  seq2_index = MAX(seq2_index, last_seq2_off[d - 1][k - 1]);
551  seq1_index = seq2_index + k - diag_origin;
552 
553  if (seq2_index < 0 || seq1_index + seq2_index < xdrop_score) {
554 
555  /* if no valid diagonal can reach distance d, or the
556  X-dropoff test fails, narrow the range of diagonals
557  to test and skip to the next diagonal */
558 
559  if (k == diag_lower)
560  diag_lower++;
561  else
562  last_seq2_off[d][k] = kInvalidOffset;
563  continue;
564  }
565  diag_upper = k;
566 
567  /* slide down diagonal k until a mismatch
568  occurs. As long as only matches are encountered,
569  the current distance d will not change */
570 
571  index = s_FindFirstMismatch(seq1, seq2, len1, len2,
572  seq1_index, seq2_index,
573  fence_hit, reverse, rem);
574  if(fence_hit && *fence_hit){
575  return 0;
576  }
577 
578  if (index > longest_match_run) {
579  seed->start_q = seq1_index;
580  seed->start_s = seq2_index;
581  seed->match_length = longest_match_run = index;
582  }
583  seq1_index += index;
584  seq2_index += index;
585 
586  /* set the new largest seq2 offset that achieves
587  distance d on diagonal k */
588 
589  last_seq2_off[d][k] = seq2_index;
590 
591  /* since all values of k are constrained to have the
592  same distance d, the value of k which maximizes the
593  alignment score is the one that covers the most
594  of seq1 and seq2 */
595 
596  if (seq1_index + seq2_index > curr_extent) {
597  curr_extent = seq1_index + seq2_index;
598  curr_seq2_index = seq2_index;
599  curr_diag = k;
600  }
601 
602  /* clamp the bounds on diagonals to avoid walking off
603  either sequence. Because the bounds increase by at
604  most one for each distance, diag_lower and diag_upper
605  can each be of size at most max_diags+2 */
606 
607  if (seq2_index == len2) {
608  diag_lower = k + 1;
609  end2_reached = TRUE;
610  }
611  if (seq1_index == len1) {
612  diag_upper = k - 1;
613  end1_reached = TRUE;
614  }
615  } /* end loop over diagonals */
616 
617  /* compute the maximum score possible for distance d */
618 
619  curr_score = curr_extent * (match_cost / 2) -
620  d * (match_cost + mismatch_cost);
621 
622  /* if this is the best score seen so far, update the
623  statistics of the best alignment */
624 
625  if (curr_score >= max_score[d - 1]) {
626  max_score[d] = curr_score;
627  best_dist = d;
628  best_diag = curr_diag;
629  *seq2_align_len = curr_seq2_index;
630  *seq1_align_len = curr_seq2_index + best_diag - diag_origin;
631  }
632  else {
633  max_score[d] = max_score[d - 1];
634  }
635 
636  /* alignment has finished if the lower and upper bounds
637  on diagonals to check have converged to each other */
638 
639  if (diag_lower > diag_upper) {
640  converged = TRUE;
641  break;
642  }
643 
644  /* set up for the next distance to examine. Because the
645  bounds increase by at most one for each distance,
646  diag_lower and diag_upper can each be of size at
647  most max_diags+2 */
648 
649  if (!end2_reached)
650  diag_lower--;
651  if (!end1_reached)
652  diag_upper++;
653 
654  /* if no traceback is specified, the next row of
655  last_seq2_off can reuse previously allocated memory */
656 
657  if (edit_block == NULL) {
658 
659  /** @todo FIXME The following assumes two arrays of
660  at least max_dist+4 Int4's have already been allocated */
661 
662  last_seq2_off[d + 1] = last_seq2_off[d - 1];
663  }
664  else {
665 
666  /* traceback requires all rows of last_seq2_off to be saved,
667  so a new row must be allocated. The allocator provides
668  SThreeVal structures which are 3 times larger than Int4,
669  so divide requested amount by 3 */
670 
671  /** @todo FIXME Should make allocator more general */
672 
673  last_seq2_off[d + 1] = (Int4*) s_GetMBSpace(mem_pool,
674  (diag_upper - diag_lower + 7) / 3);
675 
676  /* move the origin for this row backwards */
677 
678  last_seq2_off[d + 1] = last_seq2_off[d + 1] - diag_lower + 2;
679  }
680  } /* end loop over distinct distances */
681 
682  if (!converged)
683  return -1;
684 
685  if (edit_block == NULL)
686  return best_dist;
687 
688  /* perform traceback */
689 
690  d = best_dist;
691  seq2_index = *seq2_align_len;
692 
693  /* for all positive distances */
694 
695  if (fence_hit && *fence_hit)
696  goto done;
697 
698  while (d > 0) {
699  Int4 new_diag;
700  Int4 new_seq2_index;
701 
702  /* retrieve the value of the diagonal after the next
703  traceback operation. best_diag starts off with the
704  value computed during the alignment process */
705 
706  new_diag = s_GetNextNonAffineTback(last_seq2_off, d,
707  best_diag, &new_seq2_index);
708 
709  if (new_diag == best_diag) {
710 
711  /* same diagonal: issue a group of substitutions */
712 
713  if (seq2_index - new_seq2_index > 0) {
714  GapPrelimEditBlockAdd(edit_block, eGapAlignSub,
715  seq2_index - new_seq2_index);
716  }
717  }
718  else if (new_diag < best_diag) {
719 
720  /* smaller diagonal: issue a group of substitutions
721  and then a gap in seq2 */
722 
723  if (seq2_index - new_seq2_index > 0) {
724  GapPrelimEditBlockAdd(edit_block, eGapAlignSub,
725  seq2_index - new_seq2_index);
726  }
727  GapPrelimEditBlockAdd(edit_block, eGapAlignIns, 1);
728  }
729  else {
730  /* larger diagonal: issue a group of substitutions
731  and then a gap in seq1 */
732 
733  if (seq2_index - new_seq2_index - 1 > 0) {
735  seq2_index - new_seq2_index - 1);
736  }
737  GapPrelimEditBlockAdd(edit_block, eGapAlignDel, 1);
738  }
739  d--;
740  best_diag = new_diag;
741  seq2_index = new_seq2_index;
742  }
743 
744 done:
745  /* handle the final group of substitutions back to distance zero,
746  i.e. back to offset zero of seq1 and seq2 */
747 
748  GapPrelimEditBlockAdd(edit_block, eGapAlignSub,
749  last_seq2_off[0][diag_origin]);
750 
751  return best_dist;
752 }
753 
754 /** See greedy_align.h for description */
756  const Uint1* seq2, Int4 len2,
757  Boolean reverse, Int4 xdrop_threshold,
758  Int4 match_score, Int4 mismatch_score,
759  Int4 in_gap_open, Int4 in_gap_extend,
760  Int4* seq1_align_len, Int4* seq2_align_len,
761  SGreedyAlignMem* aux_data,
762  GapPrelimEditBlock *edit_block, Uint1 rem,
763  Boolean * fence_hit, SGreedySeed *seed)
764 {
765  Int4 seq1_index;
766  Int4 seq2_index;
767  Int4 index;
768  Int4 d;
769  Int4 k;
770  Int4 max_dist;
771  Int4 scaled_max_dist;
772  Int4 diag_origin;
773  Int4 best_dist;
774  Int4 best_diag;
775  Int4 longest_match_run;
776  SGreedyOffset** last_seq2_off;
777  Int4* max_score;
778  Int4 xdrop_offset;
779  Int4 end1_diag, end2_diag;
780  SMBSpace* mem_pool;
781 
782  Int4 op_cost;
783  Int4 gap_open;
784  Int4 gap_extend;
785  Int4 gap_open_extend;
786  Int4 max_penalty;
787  Int4 score_common_factor;
788  Int4 match_score_half;
789 
790  Int4 *diag_lower;
791  Int4 *diag_upper;
792  Int4 curr_diag_lower;
793  Int4 curr_diag_upper;
794 
795  Int4 num_nonempty_dist;
796  const Int4 kInvalidDiag = 100000000; /* larger than any valid diag. index */
797  Boolean converged = FALSE;
798 
799  /* make sure bits of match_score don't disappear if it
800  is divided by 2 */
801 
802  if (match_score % 2 == 1) {
803  match_score *= 2;
804  mismatch_score *= 2;
805  xdrop_threshold *= 2;
806  in_gap_open *= 2;
807  in_gap_extend *= 2;
808  }
809 
810  if (in_gap_open == 0 && in_gap_extend == 0) {
811  return BLAST_GreedyAlign(seq1, len1, seq2, len2, reverse,
812  xdrop_threshold, match_score,
813  mismatch_score, seq1_align_len,
814  seq2_align_len, aux_data, edit_block,
815  rem, fence_hit, seed);
816  }
817 
818  /* ordinary dynamic programming alignment, for each offset
819  in seq1, walks through offsets in seq2 until an X-dropoff
820  test fails, saving the best score encountered along
821  the way. Instead of score, this code tracks the 'distance'
822  (number of mismatches plus number of gaps) between seq1
823  and seq2. Instead of walking through sequence offsets, it
824  walks through diagonals that can achieve a given distance
825 
826  Note that in what follows, the numbering of diagonals implies
827  a dot matrix where increasing seq1 offsets go to the right on
828  the x axis, and increasing seq2 offsets go up the y axis.
829  The gapped alignment thus proceeds up and to the right in
830  the graph, and diagonals are numbered increasing to the right */
831 
832  best_dist = 0;
833  best_diag = 0;
834 
835  /* fill in derived scores and penalties */
836 
837  match_score_half = match_score / 2;
838  op_cost = match_score + mismatch_score;
839  gap_open = in_gap_open;
840  gap_extend = in_gap_extend + match_score_half;
841  score_common_factor = BLAST_Gdb3(&op_cost, &gap_open, &gap_extend);
842  gap_open_extend = gap_open + gap_extend;
843  max_penalty = MAX(op_cost, gap_open_extend);
844 
845  /* set the number of distinct distances the algorithm will
846  examine in the search for an optimal alignment */
847 
848  max_dist = aux_data->max_dist;
849  scaled_max_dist = max_dist * gap_extend;
850 
851  /* the main loop assumes that the index of all diagonals is
852  biased to lie in the middle of allocated bookkeeping structures */
853 
854  diag_origin = max_dist + 2;
855 
856  /* last_seq2_off[d][k] is the largest offset into seq2 that
857  lies on diagonal k and has distance d. Unlike the non-affine
858  case, the largest offset for paths ending in an insertion,
859  deletion, and match must all be separately saved for
860  each d and k */
861 
862  last_seq2_off = aux_data->last_seq2_off_affine;
863 
864  /* Instead of tracking the best alignment score and using
865  xdrop_theshold directly, track the best score for each
866  unique distance and use the best score for some previously
867  computed distance to implement the X-dropoff test.
868 
869  xdrop_offset gives the distance backwards in the score
870  array to look */
871 
872  xdrop_offset = (xdrop_threshold + match_score_half) /
873  score_common_factor + 1;
874 
875  /* find the offset of the first mismatch between seq1 and seq2 */
876 
877  index = s_FindFirstMismatch(seq1, seq2, len1, len2, 0, 0,
878  fence_hit, reverse, rem);
879 
880  if (fence_hit && *fence_hit) {
881  return -1;
882  }
883 
884 
885  /* update the extents of the alignment, and bail out
886  early if no further work is needed */
887 
888  *seq1_align_len = index;
889  *seq2_align_len = index;
890  seq1_index = index;
891 
892  seed->start_q = 0;
893  seed->start_s = 0;
894  seed->match_length = longest_match_run = index;
895 
896  if (index == len1 || index == len2) {
897  /* return the score of the run of matches */
898  if (edit_block != NULL)
899  GapPrelimEditBlockAdd(edit_block, eGapAlignSub, index);
900  return index * match_score;
901  }
902 
903  /* set up the memory pool */
904 
905  mem_pool = aux_data->space;
906  if (edit_block == NULL) {
907  mem_pool = NULL;
908  }
909  else if (!mem_pool) {
910  aux_data->space = mem_pool = MBSpaceNew(0);
911  }
912  else {
913  s_RefreshMBSpace(mem_pool);
914  }
915 
916  /* set up the array of per-distance maximum scores. There
917  are scaled_max_dist + xdrop_offset distances to track,
918  the first xdrop_offset of which are 0 */
919 
920  max_score = aux_data->max_score + xdrop_offset;
921  for (index = 0; index < xdrop_offset; index++)
922  aux_data->max_score[index] = 0;
923 
924  /* For affine greedy alignment, contributions to distance d
925  can come from distances further back than d-1 (which is
926  sufficient for non-affine alignment). Where non-affine
927  alignment only needs to track the current bounds on diagonals
928  to test, the present code must also track the upper and
929  lower bounds on diagonals for max_penalty previous distances.
930  These share the same preallocated array */
931 
932  diag_lower = aux_data->diag_bounds;
933  diag_upper = aux_data->diag_bounds +
934  scaled_max_dist + 1 + max_penalty;
935 
936  /* the first max_penalty elements correspond to negative
937  distances; initialize with an empty range of diagonals */
938 
939  for (index = 0; index < max_penalty; index++) {
940  diag_lower[index] = kInvalidDiag;
941  diag_upper[index] = -kInvalidDiag;
942  }
943  diag_lower += max_penalty;
944  diag_upper += max_penalty;
945 
946  /* fill in the statistics for distance zero, i.e. the initial
947  run of exact matches */
948 
949  last_seq2_off[0][diag_origin].match_off = seq1_index;
950  last_seq2_off[0][diag_origin].insert_off = kInvalidOffset;
951  last_seq2_off[0][diag_origin].delete_off = kInvalidOffset;
952  max_score[0] = seq1_index * match_score;
953  diag_lower[0] = diag_origin;
954  diag_upper[0] = diag_origin;
955 
956  /* set up for distance 1 */
957 
958  curr_diag_lower = diag_origin - 1;
959  curr_diag_upper = diag_origin + 1;
960  end1_diag = 0;
961  end2_diag = 0;
962  num_nonempty_dist = 1;
963  d = 1;
964 
965  /* for each distance */
966 
967  while (d <= scaled_max_dist) {
968  Int4 xdrop_score;
969  Int4 curr_score;
970  Int4 curr_extent = 0;
971  Int4 curr_seq2_index = 0;
972  Int4 curr_diag = 0;
973  Int4 tmp_diag_lower = curr_diag_lower;
974  Int4 tmp_diag_upper = curr_diag_upper;
975 
976  /* compute the score for distance d that corresponds to
977  the X-dropoff criterion */
978 
979  xdrop_score = max_score[d - xdrop_offset] +
980  score_common_factor * d - xdrop_threshold;
981  xdrop_score = (Int4)ceil((double)xdrop_score / match_score_half);
982  if (xdrop_score < 0)
983  xdrop_score = 0;
984 
985  /* for each valid diagonal */
986 
987  for (k = tmp_diag_lower; k <= tmp_diag_upper; k++) {
988 
989  /* As with the non-affine algorithm, the object is
990  to find the largest offset into seq2 that can
991  achieve distance d from diagonal k. Here, however,
992  contributions are possible from distances < d-1 */
993 
994  /* begin by assuming the best offset comes from opening
995  a gap in seq1. Since opening a gap costs gap_open_extend,
996  use the offset associated with a match from that
997  far back in the table. Do not use diagonal k+1 if
998  it was not valid back then */
999 
1000  seq2_index = kInvalidOffset;
1001  if (k + 1 <= diag_upper[d - gap_open_extend] &&
1002  k + 1 >= diag_lower[d - gap_open_extend]) {
1003  seq2_index = last_seq2_off[d - gap_open_extend][k+1].match_off;
1004  }
1005 
1006  /* Replace with the offset derived from extending a gap
1007  in seq1, if that is larger */
1008 
1009  if (k + 1 <= diag_upper[d - gap_extend] &&
1010  k + 1 >= diag_lower[d - gap_extend] &&
1011  seq2_index < last_seq2_off[d - gap_extend][k+1].delete_off) {
1012  seq2_index = last_seq2_off[d - gap_extend][k+1].delete_off;
1013  }
1014 
1015  /* save the index; if it was valid, a deletion
1016  (= gap in seq1) means seq2 offset slips by one */
1017 
1018  if (seq2_index == kInvalidOffset)
1019  last_seq2_off[d][k].delete_off = kInvalidOffset;
1020  else
1021  last_seq2_off[d][k].delete_off = seq2_index + 1;
1022 
1023  /* repeat the process assuming a gap is opened or
1024  extended in seq2. Gaps in seq2 do not change seq2_index */
1025 
1026  seq2_index = kInvalidOffset;
1027  if (k - 1 <= diag_upper[d - gap_open_extend] &&
1028  k - 1 >= diag_lower[d - gap_open_extend]) {
1029  seq2_index = last_seq2_off[d - gap_open_extend][k-1].match_off;
1030  }
1031  if (k - 1 <= diag_upper[d - gap_extend] &&
1032  k - 1 >= diag_lower[d - gap_extend] &&
1033  seq2_index < last_seq2_off[d - gap_extend][k-1].insert_off) {
1034  seq2_index = last_seq2_off[d - gap_extend][k-1].insert_off;
1035  }
1036  last_seq2_off[d][k].insert_off = seq2_index;
1037 
1038  /* Compare the greater of the two previous answers with
1039  the offset associated with a match on diagonal k. */
1040 
1041  seq2_index = MAX(last_seq2_off[d][k].insert_off,
1042  last_seq2_off[d][k].delete_off);
1043  if (k <= diag_upper[d - op_cost] &&
1044  k >= diag_lower[d - op_cost]) {
1045  seq2_index = MAX(seq2_index,
1046  last_seq2_off[d - op_cost][k].match_off + 1);
1047  }
1048 
1049  /* choose the offset into seq1 so as to remain on diagonal k */
1050 
1051  seq1_index = seq2_index + k - diag_origin;
1052 
1053  /* perform the X-dropoff test; if it fails, or no
1054  previous cell can contribute to the current one,
1055  give up and try the next diagonal, adjusting the
1056  bounds on diagonals for distance d */
1057 
1058  if (seq2_index < 0 || seq1_index + seq2_index < xdrop_score) {
1059  if (k == curr_diag_lower)
1060  curr_diag_lower++;
1061  else
1062  last_seq2_off[d][k].match_off = kInvalidOffset;
1063  continue;
1064  }
1065  curr_diag_upper = k;
1066 
1067  /* slide down diagonal k until a mismatch
1068  occurs. As long as only matches are encountered,
1069  the current distance d will not change */
1070 
1071  index = s_FindFirstMismatch(seq1, seq2, len1, len2,
1072  seq1_index, seq2_index,
1073  fence_hit, reverse, rem);
1074 
1075  if (fence_hit && *fence_hit) {
1076  return -1;
1077  }
1078 
1079 
1080  if (index > longest_match_run) {
1081  seed->start_q = seq1_index;
1082  seed->start_s = seq2_index;
1083  seed->match_length = longest_match_run = index;
1084  }
1085  seq1_index += index;
1086  seq2_index += index;
1087 
1088  /* since all values of k are constrained to have the
1089  same distance d, the value of k which maximizes the
1090  alignment score is the one that covers the most
1091  of seq1 and seq2 */
1092 
1093  last_seq2_off[d][k].match_off = seq2_index;
1094  if (seq1_index + seq2_index > curr_extent) {
1095  curr_extent = seq1_index + seq2_index;
1096  curr_seq2_index = seq2_index;
1097  curr_diag = k;
1098  }
1099 
1100  /* clamp the bounds on diagonals to avoid walking off
1101  either sequence */
1102 
1103  if (seq1_index == len1) {
1104  curr_diag_upper = k;
1105  end1_diag = k - 1;
1106  }
1107  if (seq2_index == len2) {
1108  curr_diag_lower = k;
1109  end2_diag = k + 1;
1110  }
1111  } /* end loop over diagonals */
1112 
1113  /* compute the maximum score possible for distance d */
1114 
1115  curr_score = curr_extent * match_score_half - d * score_common_factor;
1116 
1117  /* if this is the best score seen so far, update the
1118  statistics of the best alignment */
1119 
1120  if (curr_score > max_score[d - 1]) {
1121  max_score[d] = curr_score;
1122  best_dist = d;
1123  best_diag = curr_diag;
1124  *seq2_align_len = curr_seq2_index;
1125  *seq1_align_len = curr_seq2_index + best_diag - diag_origin;
1126  }
1127  else {
1128  max_score[d] = max_score[d - 1];
1129  }
1130 
1131  /* save the bounds on diagonals to examine for distance d.
1132  Note that in the non-affine case the alignment could stop
1133  if these bounds converged to each other. Here, however,
1134  it's possible for distances less than d to continue the
1135  alignment even if no diagonals are available at distance d.
1136  Hence we can only stop if max_penalty consecutive ranges
1137  of diagonals are empty */
1138 
1139  if (curr_diag_lower <= curr_diag_upper) {
1140  num_nonempty_dist++;
1141  diag_lower[d] = curr_diag_lower;
1142  diag_upper[d] = curr_diag_upper;
1143  }
1144  else {
1145  diag_lower[d] = kInvalidDiag;
1146  diag_upper[d] = -kInvalidDiag;
1147  }
1148 
1149  if (diag_lower[d - max_penalty] <= diag_upper[d - max_penalty])
1150  num_nonempty_dist--;
1151 
1152  if (num_nonempty_dist == 0) {
1153  converged = TRUE;
1154  break;
1155  }
1156 
1157  /* compute the range of diagonals to test for the next
1158  value of d. These must be conservative, in that any
1159  diagonal that could possibly contribute must be allowed.
1160  curr_diag_lower and curr_diag_upper can each be of size at
1161  most scaled_max_diags+2; they can also represent an
1162  empty range, in which case the next value of d will never
1163  improve the best score */
1164 
1165  d++;
1166  curr_diag_lower = MIN(diag_lower[d - gap_open_extend],
1167  diag_lower[d - gap_extend]) - 1;
1168  curr_diag_lower = MIN(curr_diag_lower, diag_lower[d - op_cost]);
1169 
1170  if (end2_diag > 0)
1171  curr_diag_lower = MAX(curr_diag_lower, end2_diag);
1172 
1173  curr_diag_upper = MAX(diag_upper[d - gap_open_extend],
1174  diag_upper[d - gap_extend]) + 1;
1175  curr_diag_upper = MAX(curr_diag_upper,
1176  diag_upper[d - op_cost]);
1177 
1178  if (end1_diag > 0)
1179  curr_diag_upper = MIN(curr_diag_upper, end1_diag);
1180 
1181  if (d > max_penalty) {
1182  if (edit_block == NULL) {
1183 
1184  /* if no traceback is required, the next row of
1185  last_seq2_off can reuse previously allocated memory */
1186 
1187  last_seq2_off[d] = last_seq2_off[d - max_penalty - 1];
1188  }
1189  else {
1190 
1191  /* traceback requires all rows of last_seq2_off to be saved,
1192  so a new row must be allocated */
1193 
1194  last_seq2_off[d] = s_GetMBSpace(mem_pool,
1195  curr_diag_upper - curr_diag_lower + 1) -
1196  curr_diag_lower;
1197  }
1198  }
1199  } /* end loop over distances */
1200 
1201  if (! converged) return -1;
1202 
1203  /* compute the traceback if necessary */
1204 
1205  if (edit_block != NULL) {
1207 
1208  d = best_dist;
1209  seq2_index = *seq2_align_len;
1210  state = eGapAlignSub;
1211 
1212  while (d > 0) {
1213  if (state == eGapAlignSub) {
1214  /* substitution */
1215  Int4 new_seq2_index;
1216  state = s_GetNextAffineTbackFromMatch(last_seq2_off,
1217  diag_lower, diag_upper, &d, best_diag,
1218  op_cost, &new_seq2_index);
1219 
1220  ASSERT(seq2_index > new_seq2_index);
1221  GapPrelimEditBlockAdd(edit_block, eGapAlignSub,
1222  seq2_index - new_seq2_index);
1223  seq2_index = new_seq2_index;
1224  }
1225  else if (state == eGapAlignIns) {
1226  /* gap in seq2 */
1227  GapPrelimEditBlockAdd(edit_block, eGapAlignIns, 1);
1228  state = s_GetNextAffineTbackFromIndel(last_seq2_off,
1229  diag_lower, diag_upper, &d, best_diag,
1230  gap_open, gap_extend, eGapAlignIns);
1231  best_diag--;
1232  }
1233  else {
1234  /* gap in seq1 */
1235  GapPrelimEditBlockAdd(edit_block, eGapAlignDel, 1);
1236  state = s_GetNextAffineTbackFromIndel(last_seq2_off,
1237  diag_lower, diag_upper, &d, best_diag,
1238  gap_open, gap_extend, eGapAlignDel);
1239  best_diag++;
1240  seq2_index--;
1241  }
1242  }
1243 
1244  /* write the last group of matches */
1245  GapPrelimEditBlockAdd(edit_block, eGapAlignSub,
1246  last_seq2_off[0][diag_origin].match_off);
1247  }
1248 
1249  return max_score[best_dist];
1250 }
1251 
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Various auxiliary BLAST utility functions.
#define FENCE_SENTRY
This sentry value is used as a 'fence' around the valid portions of partially decoded sequences.
Definition: blast_util.h:364
#define NCBI2NA_UNPACK_BASE(x, N)
Macro to extract base N from a byte x (N >= 0, N < 4)
Definition: blast_util.h:55
static char tmp[3200]
Definition: utf8.c:42
void GapPrelimEditBlockAdd(GapPrelimEditBlock *edit_block, EGapAlignOpType op_type, Int4 num_ops)
Add a new operation to a preliminary edit block, possibly combining it with the last operation if the...
Definition: gapinfo.c:175
EGapAlignOpType
Operation types within the edit script.
Definition: gapinfo.h:44
@ eGapAlignIns
Insertion: a gap in subject.
Definition: gapinfo.h:51
@ eGapAlignSub
Substitution.
Definition: gapinfo.h:48
@ eGapAlignDel
Deletion: a gap in query.
Definition: gapinfo.h:45
#define SEV_WARNING
Definition: gicache.c:90
static Int4 s_GetNextNonAffineTback(Int4 **last_seq2_off, Int4 d, Int4 diag, Int4 *seq2_index)
During the traceback for a non-affine greedy alignment, compute the diagonal that will result from th...
Definition: greedy_align.c:276
static SGreedyOffset * s_GetMBSpace(SMBSpace *pool, Int4 num_alloc)
Allocate a specified number of SGreedyOffset structures from a memory pool.
Definition: greedy_align.c:100
Int4 BLAST_AffineGreedyAlign(const Uint1 *seq1, Int4 len1, const Uint1 *seq2, Int4 len2, Boolean reverse, Int4 xdrop_threshold, Int4 match_score, Int4 mismatch_score, Int4 in_gap_open, Int4 in_gap_extend, Int4 *seq1_align_len, Int4 *seq2_align_len, SGreedyAlignMem *aux_data, GapPrelimEditBlock *edit_block, Uint1 rem, Boolean *fence_hit, SGreedySeed *seed)
See greedy_align.h for description.
Definition: greedy_align.c:755
Int4 BLAST_GreedyAlign(const Uint1 *seq1, Int4 len1, const Uint1 *seq2, Int4 len2, Boolean reverse, Int4 xdrop_threshold, Int4 match_cost, Int4 mismatch_cost, Int4 *seq1_align_len, Int4 *seq2_align_len, SGreedyAlignMem *aux_data, GapPrelimEditBlock *edit_block, Uint1 rem, Boolean *fence_hit, SGreedySeed *seed)
see greedy_align.h for description
Definition: greedy_align.c:380
static const Int4 kInvalidOffset
Signal that a diagonal is invalid.
Definition: greedy_align.c:129
SMBSpace * MBSpaceNew(int num_space_arrays)
see greedy_align.h for description
Definition: greedy_align.c:43
static NCBI_INLINE Int4 s_FindFirstMismatch(const Uint1 *seq1, const Uint1 *seq2, Int4 len1, Int4 len2, Int4 seq1_index, Int4 seq2_index, Boolean *fence_hit, Boolean reverse, Uint1 rem)
Find the first mismatch in a pair of sequences.
Definition: greedy_align.c:313
static EGapAlignOpType s_GetNextAffineTbackFromMatch(SGreedyOffset **last_seq2_off, Int4 *diag_lower, Int4 *diag_upper, Int4 *d, Int4 diag, Int4 op_cost, Int4 *seq2_index)
During the traceback for a greedy alignment with affine gap penalties, determine the next state of th...
Definition: greedy_align.c:149
static EGapAlignOpType s_GetNextAffineTbackFromIndel(SGreedyOffset **last_seq2_off, Int4 *diag_lower, Int4 *diag_upper, Int4 *d, Int4 diag, Int4 gap_open, Int4 gap_extend, EGapAlignOpType IorD)
During the traceback for a greedy alignment with affine gap penalties, determine the next state of th...
Definition: greedy_align.c:199
void MBSpaceFree(SMBSpace *space)
see greedy_align.h for description
Definition: greedy_align.c:80
static void s_RefreshMBSpace(SMBSpace *space)
Mark the input collection of space structures as unused.
Definition: greedy_align.c:71
Prototypes and structures for greedy gapped alignment.
#define NULL
Definition: ncbistd.hpp:225
#define ErrPostEx(sev, err_code,...)
Definition: ncbierr.hpp:78
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
for(len=0;yy_str[len];++len)
Prototypes for portable math library (ported from C Toolkit)
Int4 BLAST_Gdb3(Int4 *a, Int4 *b, Int4 *c)
Divide 3 numbers by their greatest common divisor.
Definition: ncbi_math.c:422
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
#define NCBI_INLINE
"inline" seems to work on our remaining in-house compilers (WorkShop, Compaq, ICC,...
Definition: ncbi_std.h:81
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:117
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
Definition: gapinfo.h:73
All auxiliary memory needed for the greedy extension algorithm.
Definition: greedy_align.h:89
SGreedyOffset ** last_seq2_off_affine
Like last_seq2_off but for affine searches.
Definition: greedy_align.h:94
Int4 * max_score
array of maximum scores
Definition: greedy_align.h:93
SMBSpace * space
local memory pool for SGreedyOffset structs
Definition: greedy_align.h:97
Int4 max_dist
max distance to search
Definition: greedy_align.h:90
Int4 * diag_bounds
bounds on ranges of diagonals
Definition: greedy_align.h:96
Int4 ** last_seq2_off
2-D array of distances
Definition: greedy_align.h:92
Bookkeeping structure for greedy alignment.
Definition: greedy_align.h:58
Int4 delete_off
Best offset for a path ending in a deletion.
Definition: greedy_align.h:61
Int4 insert_off
Best offset for a path ending in an insertion.
Definition: greedy_align.h:59
Int4 match_off
Best offset for a path ending in a match.
Definition: greedy_align.h:60
Structure for locating high-scoring seeds for greedy alignment.
Definition: greedy_align.h:102
Space structure for greedy alignment algorithm.
Definition: greedy_align.h:65
struct SMBSpace * next
pointer to next structure in list
Definition: greedy_align.h:69
Int4 space_allocated
number of structures allocated
Definition: greedy_align.h:67
Int4 space_used
number of structures actually in use
Definition: greedy_align.h:68
SGreedyOffset * space_array
array of bookkeeping structures
Definition: greedy_align.h:66
static int seed
Definition: test_table.cpp:132
done
Definition: token1.c:1
voidp malloc(uInt size)
Modified on Fri Apr 12 17:22:08 2024 by modify_doxy.py rev. 669887