NCBI C++ ToolKit
link_hsps.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 
2 /* $Id: link_hsps.c 93234 2021-03-20 19:41:33Z vakatov $
3  * ===========================================================================
4  *
5  * PUBLIC DOMAIN NOTICE
6  * National Center for Biotechnology Information
7  *
8  * This software/database is a "United States Government Work" under the
9  * terms of the United States Copyright Act. It was written as part of
10  * the author's official duties as a United States Government employee and
11  * thus cannot be copyrighted. This software/database is freely available
12  * to the public for use. The National Library of Medicine and the U.S.
13  * Government have not placed any restriction on its use or reproduction.
14  *
15  * Although all reasonable efforts have been taken to ensure the accuracy
16  * and reliability of the software and data, the NLM and the U.S.
17  * Government do not and cannot warrant the performance or results that
18  * may be obtained by using this software or data. The NLM and the U.S.
19  * Government disclaim all warranties, express or implied, including
20  * warranties of performance, merchantability or fitness for any particular
21  * purpose.
22  *
23  * Please cite the author in any work or product based on this material.
24  *
25  * ===========================================================================
26  *
27  * Author: Ilya Dondoshansky
28  *
29  */
30 
31 /** @file link_hsps.c
32  * Functions to link with use of sum statistics
33  */
34 
37 #include "blast_hits_priv.h"
38 
39 /******************************************************************************
40  * Structures and functions used only in even (small or large) gap linking *
41  * methods. *
42  ******************************************************************************/
43 
44 /** Describes the method for ordering HSPs. Note that these values
45  * are used to index an array, so their values must linearly increase
46  */
47 typedef enum ELinkOrderingMethod {
48  eLinkSmallGaps = 0, /**< favor small gaps when linking an HSP */
49  eLinkLargeGaps = 1, /**< favor large gaps when linking an HSP */
50  eOrderingMethods = 2 /**< number of methods (last in list) */
52 
53 /* Forward declaration */
54 struct LinkHSPStruct;
55 
56 /** The following structure is used in "link_hsps" to decide between
57  * two different "gapping" models. Here link is used to hook up
58  * a chain of HSP's, num is the number of links, and sum is the sum score.
59  * Once the best gapping model has been found, this information is
60  * transferred up to the LinkHSPStruct. This structure should not be
61  * used outside of the function Blast_EvenGapLinkHSPs.
62  */
63 typedef struct BlastHSPLink {
64  struct LinkHSPStruct* link[eOrderingMethods]; /**< Best
65  choice of HSP to link with */
66  Int2 num[eOrderingMethods]; /**< number of HSP in the ordering. */
67  Int4 sum[eOrderingMethods]; /**< Sum-Score of HSP. */
68  double xsum[eOrderingMethods]; /**< Sum-Score of HSP,
69  multiplied by the appropriate Lambda. */
70  Int4 changed; /**< Has the link been changed since previous access? */
72 
73 /** Structure containing all information internal to the process of linking
74  * HSPs.
75  */
76 typedef struct LinkHSPStruct {
77  BlastHSP* hsp; /**< Specific HSP this structure corresponds to */
78  struct LinkHSPStruct* prev; /**< Previous HSP in a set, if any */
79  struct LinkHSPStruct* next; /**< Next HSP in a set, if any */
80  BlastHSPLink hsp_link; /**< Auxiliary structure for keeping track of sum
81  scores, etc. */
82  Boolean linked_set; /**< Is this HSp part of a linked set? */
83  Boolean start_of_chain; /**< If TRUE, this HSP starts a chain along the
84  "link" pointer. */
85  Int4 linked_to; /**< Where this HSP is linked to? */
86  double xsum; /**< Normalized score of a set of HSPs */
87  ELinkOrderingMethod ordering_method; /**< Which method (max or
88  no max for gaps) was
89  used for linking HSPs? */
90  Int4 q_offset_trim; /**< Start of trimmed hsp in query */
91  Int4 q_end_trim; /**< End of trimmed HSP in query */
92  Int4 s_offset_trim; /**< Start of trimmed hsp in subject */
93  Int4 s_end_trim; /**< End of trimmed HSP in subject */
95 
96 /** The helper array contains the info used frequently in the inner
97  * for loops of the HSP linking algorithm.
98  * One array of helpers will be allocated for each thread.
99  */
100 typedef struct LinkHelpStruct {
101  LinkHSPStruct* ptr; /**< The HSP to which the info belongs */
102  Int4 q_off_trim; /**< query start of trimmed HSP */
103  Int4 s_off_trim; /**< subject start of trimmed HSP */
104  Int4 sum[eOrderingMethods]; /**< raw score of linked set containing HSP(?) */
105  Int4 maxsum1; /**< threshold for stopping link attempts (?) */
106  Int4 next_larger; /**< offset into array of HelpStructs containing
107  HSP with higher score, used in bailout
108  calculations */
110 
111 /** Callback used by qsort to sort a list of HSPs, encapsulated in
112  * LinkHSPStruct structures, in order of increasing query start offset.
113  * The subject start offset of HSPs is used as a tiebreaker, and no HSPs
114  * may be NULL.
115  * @param v1 first HSP in list [in]
116  * @param v2 second HSP in list [in]
117  * @return -1, 0, or 1 depending on HSPs
118 */
119 static int
120 s_FwdCompareHSPs(const void* v1, const void* v2)
121 {
122  LinkHSPStruct** hp1,** hp2;
123  BlastHSP* h1,* h2;
124 
125  hp1 = (LinkHSPStruct**) v1;
126  hp2 = (LinkHSPStruct**) v2;
127 
128  h1 = (*hp1)->hsp;
129  h2 = (*hp2)->hsp;
130 
131  if (h1->context < h2->context)
132  return -1;
133  if (h1->context > h2->context)
134  return 1;
135 
136  if (h1->query.offset < h2->query.offset)
137  return -1;
138  if (h1->query.offset > h2->query.offset)
139  return 1;
140  /* Necessary in case both HSP's have the same query offset. */
141  if (h1->subject.offset < h2->subject.offset)
142  return -1;
143  if (h1->subject.offset > h2->subject.offset)
144  return 1;
145 
146  return 0;
147 }
148 
149 /** Like s_FwdCompareHSPs, except with additional logic to
150  * distinguish HSPs that lie within different strands of
151  * a single translated query sequence
152  * @param v1 first HSP in list [in]
153  * @param v2 second HSP in list [in]
154  * @return -1, 0, or 1 depending on HSPs
155 */
156 static int
157 s_FwdCompareHSPsTransl(const void* v1, const void* v2)
158 {
159  BlastHSP* h1,* h2;
160  LinkHSPStruct** hp1,** hp2;
161  Int4 context1, context2;
162 
163  hp1 = (LinkHSPStruct**) v1;
164  hp2 = (LinkHSPStruct**) v2;
165  h1 = (*hp1)->hsp;
166  h2 = (*hp2)->hsp;
167 
168  context1 = h1->context/(NUM_FRAMES / 2);
169  context2 = h2->context/(NUM_FRAMES / 2);
170 
171  if (context1 < context2)
172  return -1;
173  else if (context1 > context2)
174  return 1;
175 
176  if (h1->query.offset < h2->query.offset)
177  return -1;
178  if (h1->query.offset > h2->query.offset)
179  return 1;
180  /* Necessary in case both HSP's have the same query offset. */
181  if (h1->subject.offset < h2->subject.offset)
182  return -1;
183  if (h1->subject.offset > h2->subject.offset)
184  return 1;
185 
186  return 0;
187 }
188 
189 /** Callback used by qsort to sort a list of HSPs (encapsulated in
190  * LinkHSPStruct structures) in order of decreasing query start offset.
191  * The subject start offset of HSPs is used as a tiebreaker, and no HSPs
192  * may be NULL
193  * @param v1 first HSP in list [in]
194  * @param v2 second HSP in list [in]
195  * @return -1, 0, or 1 depending on HSPs
196 */
197 static int
198 s_RevCompareHSPs(const void *v1, const void *v2)
199 
200 {
201  BlastHSP* h1,* h2;
202  LinkHSPStruct** hp1,** hp2;
203 
204  hp1 = (LinkHSPStruct**) v1;
205  hp2 = (LinkHSPStruct**) v2;
206  h1 = (*hp1)->hsp;
207  h2 = (*hp2)->hsp;
208 
209  if (h1->context < h2->context)
210  return -1;
211  else if (h1->context > h2->context)
212  return 1;
213 
214  if (h1->query.offset < h2->query.offset)
215  return 1;
216  if (h1->query.offset > h2->query.offset)
217  return -1;
218  /* Necessary in case both HSP's have the same query offset. */
219  if (h1->subject.offset < h2->subject.offset)
220  return 1;
221  if (h1->subject.offset > h2->subject.offset)
222  return -1;
223  return 0;
224 }
225 
226 /** Like s_RevCompareHSPs, except with additional logic to
227  * distinguish HSPs that lie within different strands of
228  * a single translated query sequence
229  * @param v1 first HSP in list [in]
230  * @param v2 second HSP in list [in]
231  * @return -1, 0, or 1 depending on HSPs
232 */
233 static int
234 s_RevCompareHSPsTransl(const void *v1, const void *v2)
235 
236 {
237  BlastHSP* h1,* h2;
238  LinkHSPStruct** hp1,** hp2;
239  Int4 context1, context2;
240 
241  hp1 = (LinkHSPStruct**) v1;
242  hp2 = (LinkHSPStruct**) v2;
243  h1 = (*hp1)->hsp;
244  h2 = (*hp2)->hsp;
245 
246  context1 = h1->context/(NUM_FRAMES / 2);
247  context2 = h2->context/(NUM_FRAMES / 2);
248 
249  if (context1 < context2)
250  return -1;
251  else if (context1 > context2)
252  return 1;
253 
254  if (h1->query.offset < h2->query.offset)
255  return 1;
256  if (h1->query.offset > h2->query.offset)
257  return -1;
258  /* Necessary in case both HSP's have the same query offset. */
259  if (h1->subject.offset < h2->subject.offset)
260  return 1;
261  if (h1->subject.offset > h2->subject.offset)
262  return -1;
263  return 0;
264 }
265 
266 /** Callback used by qsort to sort a list of HSPs (encapsulated in
267  * LinkHSPStruct structures) in order of decreasing query start offset
268  * (suitable for use with tblastn). HSPs are first separated by frame
269  * of a translated subject sequence, and tiebreaking is by decreasing
270  * query end offset, then subject start offset, then subject end offset.
271  * HSPs may not be NULL
272  * @param v1 first HSP in list [in]
273  * @param v2 second HSP in list [in]
274  * @return -1, 0, or 1 depending on HSPs
275 */
276 static int
277 s_RevCompareHSPsTbn(const void *v1, const void *v2)
278 
279 {
280  BlastHSP* h1,* h2;
281  LinkHSPStruct** hp1,** hp2;
282 
283  hp1 = (LinkHSPStruct**) v1;
284  hp2 = (LinkHSPStruct**) v2;
285  h1 = (*hp1)->hsp;
286  h2 = (*hp2)->hsp;
287 
288  if (h1->context < h2->context)
289  return -1;
290  else if (h1->context > h2->context)
291  return 1;
292 
293  if (SIGN(h1->subject.frame) != SIGN(h2->subject.frame))
294  {
295  if (h1->subject.frame > h2->subject.frame)
296  return 1;
297  else
298  return -1;
299  }
300 
301  if (h1->query.offset < h2->query.offset)
302  return 1;
303  if (h1->query.offset > h2->query.offset)
304  return -1;
305  if (h1->query.end < h2->query.end)
306  return 1;
307  if (h1->query.end > h2->query.end)
308  return -1;
309  if (h1->subject.offset < h2->subject.offset)
310  return 1;
311  if (h1->subject.offset > h2->subject.offset)
312  return -1;
313  if (h1->subject.end < h2->subject.end)
314  return 1;
315  if (h1->subject.end > h2->subject.end)
316  return -1;
317  return 0;
318 }
319 
320 /** Callback used by qsort to sort a list of HSPs (encapsulated in
321  * LinkHSPStruct structures) in order of decreasing query start offset
322  * (suitable for use with tblastx). HSPs are first separated by frame
323  * of a translated query sequence and then by frame of a translated
324  * subject sequence. Tiebreaking is by decreasing query end offset,
325  * then subject start offset, then subject end offset. HSPs may not be NULL
326  * @param v1 first HSP in list [in]
327  * @param v2 second HSP in list [in]
328  * @return -1, 0, or 1 depending on HSPs
329 */
330 static int
331 s_RevCompareHSPsTbx(const void *v1, const void *v2)
332 
333 {
334  BlastHSP* h1,* h2;
335  LinkHSPStruct** hp1,** hp2;
336  Int4 context1, context2;
337 
338  hp1 = (LinkHSPStruct**) v1;
339  hp2 = (LinkHSPStruct**) v2;
340  h1 = (*hp1)->hsp;
341  h2 = (*hp2)->hsp;
342 
343  context1 = h1->context/(NUM_FRAMES / 2);
344  context2 = h2->context/(NUM_FRAMES / 2);
345 
346  if (context1 < context2)
347  return -1;
348  else if (context1 > context2)
349  return 1;
350 
351  if (SIGN(h1->subject.frame) != SIGN(h2->subject.frame))
352  {
353  if (h1->subject.frame > h2->subject.frame)
354  return 1;
355  else
356  return -1;
357  }
358 
359  if (h1->query.offset < h2->query.offset)
360  return 1;
361  if (h1->query.offset > h2->query.offset)
362  return -1;
363  if (h1->query.end < h2->query.end)
364  return 1;
365  if (h1->query.end > h2->query.end)
366  return -1;
367  if (h1->subject.offset < h2->subject.offset)
368  return 1;
369  if (h1->subject.offset > h2->subject.offset)
370  return -1;
371  if (h1->subject.end < h2->subject.end)
372  return 1;
373  if (h1->subject.end > h2->subject.end)
374  return -1;
375  return 0;
376 }
377 
378 /** Initialize a LinkHSPStruct
379  * @param lhsp Pointer to struct to initialize. If NULL, struct gets
380  * allocated and then initialized [in/modified]
381  * @return Pointer to initialized struct
382  */
383 static LinkHSPStruct*
385 {
386  BlastHSP* hsp;
387 
388  if (!lhsp) {
389  lhsp = (LinkHSPStruct*) calloc(1, sizeof(LinkHSPStruct));
390  lhsp->hsp = (BlastHSP*) calloc(1, sizeof(BlastHSP));
391  } else {
392  if (!lhsp->hsp) {
393  hsp = (BlastHSP*) calloc(1, sizeof(BlastHSP));
394  } else {
395  hsp = lhsp->hsp;
396  memset(hsp, 0, sizeof(BlastHSP));
397  }
398  memset(lhsp, 0, sizeof(LinkHSPStruct));
399  lhsp->hsp = hsp;
400  }
401  return lhsp;
402 }
403 
404 /** Perform even gap linking on a list of HSPs
405  * @param program_number The blast program that generated the HSPs [in]
406  * @param hsp_list List of HSPs to link [in/modified]
407  * @param query_info List of structures describing all query sequences [in]
408  * @param subject_length Number of letters in the subject sequence [in]
409  * @param sbp Score block [in]
410  * @param link_hsp_params Configuration information for the linking process [in]
411  * @param gapped_calculation TRUE if the HSPs are from a gapped search [in]
412  * @return 0 if linking succeeded, nonzero otherwise
413  */
414 static Int2
416  const BlastQueryInfo* query_info, Int4 subject_length,
417  const BlastScoreBlk* sbp, const BlastLinkHSPParameters* link_hsp_params,
418  Boolean gapped_calculation)
419 {
420  LinkHSPStruct* H,* H2,* best[2],* first_hsp,* last_hsp,** hp_frame_start;
421  LinkHSPStruct* hp_start = NULL;
422  BlastHSP* hsp;
423  BlastHSP** hsp_array;
424  Blast_KarlinBlk** kbp;
425  Int4 maxscore, cutoff[2];
426  Boolean linked_set, ignore_small_gaps;
427  double gap_decay_rate, gap_prob, prob[2];
428  Int4 index, index1, num_links, frame_index;
429  ELinkOrderingMethod ordering_method;
430  Int4 num_query_frames, num_subject_frames;
431  Int4 *hp_frame_number;
432  Int4 window_size, trim_size;
433  Int4 number_of_hsps, total_number_of_hsps;
434  Int4 query_length, length_adjustment;
435  Int4 subject_length_orig = subject_length;
436  LinkHSPStruct* link;
437  Int4 H2_index,H_index;
438  Int4 i;
439  Int4 path_changed; /* will be set if an element is removed that may change an existing path */
440  Int4 first_pass, use_current_max;
441  LinkHelpStruct *lh_helper=0;
442  Int4 lh_helper_size;
443  Int4 query_context; /* AM: to support query concatenation. */
444  const Boolean kTranslatedQuery = Blast_QueryIsTranslated(program_number);
445  LinkHSPStruct** link_hsp_array;
446 
447  if (hsp_list == NULL)
448  return -1;
449 
450  hsp_array = hsp_list->hsp_array;
451 
452  lh_helper_size = MAX(1024,hsp_list->hspcnt+5);
453  lh_helper = (LinkHelpStruct *)
454  calloc(lh_helper_size, sizeof(LinkHelpStruct));
455 
456  if (gapped_calculation)
457  kbp = sbp->kbp_gap;
458  else
459  kbp = sbp->kbp;
460 
461  total_number_of_hsps = hsp_list->hspcnt;
462 
463  number_of_hsps = total_number_of_hsps;
464 
465  /* For convenience, include overlap size into the gap size */
466  window_size = link_hsp_params->gap_size + link_hsp_params->overlap_size + 1;
467  trim_size = (link_hsp_params->overlap_size + 1) / 2;
468  gap_prob = link_hsp_params->gap_prob;
469  gap_decay_rate = link_hsp_params->gap_decay_rate;
470 
471  if (Blast_SubjectIsTranslated(program_number))
472  num_subject_frames = NUM_STRANDS;
473  else
474  num_subject_frames = 1;
475 
476  link_hsp_array =
477  (LinkHSPStruct**) malloc(total_number_of_hsps*sizeof(LinkHSPStruct*));
478  for (index = 0; index < total_number_of_hsps; ++index) {
479  link_hsp_array[index] = (LinkHSPStruct*) calloc(1, sizeof(LinkHSPStruct));
480  link_hsp_array[index]->hsp = hsp_array[index];
481  }
482 
483  /* Sort by (reverse) position. */
484  if (kTranslatedQuery) {
485  qsort(link_hsp_array,total_number_of_hsps,sizeof(LinkHSPStruct*),
487  } else {
488  qsort(link_hsp_array,total_number_of_hsps,sizeof(LinkHSPStruct*),
490  }
491 
492  cutoff[0] = link_hsp_params->cutoff_small_gap;
493  cutoff[1] = link_hsp_params->cutoff_big_gap;
494 
495  ignore_small_gaps = (cutoff[0] == 0);
496 
497  /* If query is nucleotide, it has 2 strands that should be separated. */
498  if (Blast_QueryIsNucleotide(program_number))
499  num_query_frames = NUM_STRANDS*query_info->num_queries;
500  else
501  num_query_frames = query_info->num_queries;
502 
503  hp_frame_start =
504  calloc(num_subject_frames*num_query_frames, sizeof(LinkHSPStruct*));
505  hp_frame_number = calloc(num_subject_frames*num_query_frames, sizeof(Int4));
506 
507  /* hook up the HSP's */
508  hp_frame_start[0] = link_hsp_array[0];
509 
510  /* Put entries from different strands into separate 'query_frame's. */
511  {
512  Int4 cur_frame=0;
513  Int4 strand_factor = (kTranslatedQuery ? 3 : 1);
514  for (index=0;index<number_of_hsps;index++)
515  {
516  H=link_hsp_array[index];
517  H->start_of_chain = FALSE;
518  hp_frame_number[cur_frame]++;
519 
520  H->prev= index ? link_hsp_array[index-1] : NULL;
521  H->next= index<(number_of_hsps-1) ? link_hsp_array[index+1] : NULL;
522  if (H->prev != NULL &&
523  ((H->hsp->context/strand_factor) !=
524  (H->prev->hsp->context/strand_factor) ||
525  (SIGN(H->hsp->subject.frame) != SIGN(H->prev->hsp->subject.frame))))
526  { /* If frame switches, then start new list. */
527  hp_frame_number[cur_frame]--;
528  hp_frame_number[++cur_frame]++;
529  hp_frame_start[cur_frame] = H;
530  H->prev->next = NULL;
531  H->prev = NULL;
532  }
533  }
534  num_query_frames = cur_frame+1;
535  }
536 
537  /* trim_size is the maximum amount q.offset can differ from
538  q.offset_trim */
539  /* This is used to break out of H2 loop early */
540  for (index=0;index<number_of_hsps;index++)
541  {
542  Int4 q_length, s_length;
543  H = link_hsp_array[index];
544  hsp = H->hsp;
545  q_length = (hsp->query.end - hsp->query.offset) / 4;
546  s_length = (hsp->subject.end - hsp->subject.offset) / 4;
547  H->q_offset_trim = hsp->query.offset + MIN(q_length, trim_size);
548  H->q_end_trim = hsp->query.end - MIN(q_length, trim_size);
549  H->s_offset_trim = hsp->subject.offset + MIN(s_length, trim_size);
550  H->s_end_trim = hsp->subject.end - MIN(s_length, trim_size);
551  }
552 
553  for (frame_index=0; frame_index<num_query_frames; frame_index++)
554  {
555  hp_start = s_LinkHSPStructReset(hp_start);
556  hp_start->next = hp_frame_start[frame_index];
557  hp_frame_start[frame_index]->prev = hp_start;
558  number_of_hsps = hp_frame_number[frame_index];
559  query_context = hp_start->next->hsp->context;
560  length_adjustment = query_info->contexts[query_context].length_adjustment;
561  query_length = query_info->contexts[query_context].query_length;
562  query_length = MAX(query_length - length_adjustment, 1);
563  subject_length = subject_length_orig; /* in nucleotides even for tblast[nx] */
564  /* If subject is translated, length adjustment is given in nucleotide
565  scale. */
566  if (Blast_SubjectIsTranslated(program_number))
567  {
568  length_adjustment /= CODON_LENGTH;
569  subject_length /= CODON_LENGTH;
570  }
571  subject_length = MAX(subject_length - length_adjustment, 1);
572 
573  lh_helper[0].ptr = hp_start;
574  lh_helper[0].q_off_trim = 0;
575  lh_helper[0].s_off_trim = 0;
576  lh_helper[0].maxsum1 = -10000;
577  lh_helper[0].next_larger = 0;
578 
579  /* lh_helper[0] = empty = additional end marker
580  * lh_helper[1] = hsp_start = empty entry used in original code
581  * lh_helper[2] = hsp_array->next = hsp_array[0]
582  * lh_helper[i] = ... = hsp_array[i-2] (for i>=2)
583  */
584  first_pass=1; /* do full search */
585  path_changed=1;
586  for (H=hp_start->next; H!=NULL; H=H->next)
587  H->hsp_link.changed=1;
588 
589  while (number_of_hsps > 0)
590  {
591  Int4 max[3];
592  max[0]=max[1]=max[2]=-10000;
593  /* Initialize the 'best' parameter */
594  best[0] = best[1] = NULL;
595 
596 
597  /* See if we can avoid recomputing all scores:
598  * - Find the max paths (based on old scores).
599  * - If no paths were changed by removal of nodes (ie research==0)
600  * then these max paths are still the best.
601  * - else if these max paths were unchanged, then they are still the best.
602  */
603  use_current_max=0;
604  if (!first_pass){
605  Int4 max0,max1;
606  /* Find the current max sums */
607  if(!ignore_small_gaps){
608  max0 = -cutoff[0];
609  max1 = -cutoff[1];
610  for (H=hp_start->next; H!=NULL; H=H->next) {
611  Int4 sum0=H->hsp_link.sum[0];
612  Int4 sum1=H->hsp_link.sum[1];
613  if(sum0>=max0)
614  {
615  max0=sum0;
616  best[0]=H;
617  }
618  if(sum1>=max1)
619  {
620  max1=sum1;
621  best[1]=H;
622  }
623  }
624  } else {
625  maxscore = -cutoff[1];
626  for (H=hp_start->next; H!=NULL; H=H->next) {
627  Int4 sum=H->hsp_link.sum[1];
628  if(sum>=maxscore)
629  {
630  maxscore=sum;
631  best[1]=H;
632  }
633  }
634  }
635  if(path_changed==0){
636  /* No path was changed, use these max sums. */
637  use_current_max=1;
638  }
639  else{
640  /* If max path hasn't chaged, we can use it */
641  /* Walk down best, give up if we find a removed item in path */
642  use_current_max=1;
643  if(!ignore_small_gaps){
644  for (H=best[0]; H!=NULL; H=H->hsp_link.link[0])
645  if (H->linked_to==-1000) {use_current_max=0; break;}
646  }
647  if(use_current_max)
648  for (H=best[1]; H!=NULL; H=H->hsp_link.link[1])
649  if (H->linked_to==-1000) {use_current_max=0; break;}
650 
651  }
652  }
653 
654  /* reset helper_info */
655  /* Inside this while loop, the linked list order never changes
656  * So here we initialize an array of commonly used info,
657  * and in this loop we access these arrays instead of the actual list
658  */
659  if(!use_current_max){
660  for (H=hp_start,H_index=1; H!=NULL; H=H->next,H_index++) {
661  Int4 s_frame = H->hsp->subject.frame;
662  Int4 s_off_t = H->s_offset_trim;
663  Int4 q_off_t = H->q_offset_trim;
664  lh_helper[H_index].ptr = H;
665  lh_helper[H_index].q_off_trim = q_off_t;
666  lh_helper[H_index].s_off_trim = s_off_t;
667  for(i=0;i<eOrderingMethods;i++)
668  lh_helper[H_index].sum[i] = H->hsp_link.sum[i];
669  max[SIGN(s_frame)+1]=
670  MAX(max[SIGN(s_frame)+1],H->hsp_link.sum[1]);
671  lh_helper[H_index].maxsum1 =max[SIGN(s_frame)+1];
672 
673  /* set next_larger to link back to closest entry with a sum1
674  larger than this */
675  {
676  Int4 cur_sum=lh_helper[H_index].sum[1];
677  Int4 prev = H_index-1;
678  Int4 prev_sum = lh_helper[prev].sum[1];
679  while((cur_sum>=prev_sum) && (prev>0)){
680  prev=lh_helper[prev].next_larger;
681  prev_sum = lh_helper[prev].sum[1];
682  }
683  lh_helper[H_index].next_larger = prev;
684  }
685  H->linked_to = 0;
686  }
687 
688  lh_helper[1].maxsum1 = -10000;
689 
690  /****** loop iter for index = 0 **************************/
691  if(!ignore_small_gaps)
692  {
693  index=0;
694  maxscore = -cutoff[index];
695  H_index = 2;
696  for (H=hp_start->next; H!=NULL; H=H->next,H_index++)
697  {
698  Int4 H_hsp_num=0;
699  Int4 H_hsp_sum=0;
700  double H_hsp_xsum=0.0;
701  LinkHSPStruct* H_hsp_link=NULL;
702  if (H->hsp->score > cutoff[index]) {
703  Int4 H_query_etrim = H->q_end_trim;
704  Int4 H_sub_etrim = H->s_end_trim;
705  Int4 H_q_et_gap = H_query_etrim+window_size;
706  Int4 H_s_et_gap = H_sub_etrim+window_size;
707 
708  /* We only walk down hits with the same frame sign */
709  /* for (H2=H->prev; H2!=NULL; H2=H2->prev,H2_index--) */
710  for (H2_index=H_index-1; H2_index>1; H2_index=H2_index-1)
711  {
712  Int4 b1,b2,b4,b5;
713  Int4 q_off_t,s_off_t,sum;
714 
715  /* s_frame = lh_helper[H2_index].s_frame; */
716  q_off_t = lh_helper[H2_index].q_off_trim;
717  s_off_t = lh_helper[H2_index].s_off_trim;
718 
719  /* combine tests to reduce mispredicts -cfj */
720  b1 = q_off_t <= H_query_etrim;
721  b2 = s_off_t <= H_sub_etrim;
722  sum = lh_helper[H2_index].sum[index];
723 
724 
725  b4 = ( q_off_t > H_q_et_gap ) ;
726  b5 = ( s_off_t > H_s_et_gap ) ;
727 
728  /* list is sorted by q_off, so q_off should only increase.
729  * q_off_t can only differ from q_off by trim_size
730  * So once q_off_t is large enough (ie it exceeds limit
731  * by trim_size), we can stop. -cfj
732  */
733  if(q_off_t > (H_q_et_gap+trim_size))
734  break;
735 
736  if (b1|b2|b5|b4) continue;
737 
738  if (sum>H_hsp_sum)
739  {
740  H2=lh_helper[H2_index].ptr;
741  H_hsp_num=H2->hsp_link.num[index];
742  H_hsp_sum=H2->hsp_link.sum[index];
743  H_hsp_xsum=H2->hsp_link.xsum[index];
744  H_hsp_link=H2;
745  }
746  } /* end for H2... */
747  }
748  {
749  Int4 score=H->hsp->score;
750  double new_xsum =
751  H_hsp_xsum + score*kbp[H->hsp->context]->Lambda -
752  kbp[H->hsp->context]->logK;
753  Int4 new_sum = H_hsp_sum + (score - cutoff[index]);
754 
755  H->hsp_link.sum[index] = new_sum;
756  H->hsp_link.num[index] = H_hsp_num+1;
757  H->hsp_link.link[index] = H_hsp_link;
758  lh_helper[H_index].sum[index] = new_sum;
759  if (new_sum >= maxscore)
760  {
761  maxscore=new_sum;
762  best[index]=H;
763  }
764  H->hsp_link.xsum[index] = new_xsum;
765  if(H_hsp_link)
766  ((LinkHSPStruct*)H_hsp_link)->linked_to++;
767  }
768  } /* end for H=... */
769  }
770  /****** loop iter for index = 1 **************************/
771  index=1;
772  maxscore = -cutoff[index];
773  H_index = 2;
774  for (H=hp_start->next; H!=NULL; H=H->next,H_index++)
775  {
776  Int4 H_hsp_num=0;
777  Int4 H_hsp_sum=0;
778  double H_hsp_xsum=0.0;
779  LinkHSPStruct* H_hsp_link=NULL;
780 
781  H->hsp_link.changed=1;
782  H2 = H->hsp_link.link[index];
783  if ((!first_pass) && ((H2==0) || (H2->hsp_link.changed==0)))
784  {
785  /* If the best choice last time has not been changed, then
786  it is still the best choice, so no need to walk down list.
787  */
788  if(H2){
789  H_hsp_num=H2->hsp_link.num[index];
790  H_hsp_sum=H2->hsp_link.sum[index];
791  H_hsp_xsum=H2->hsp_link.xsum[index];
792  }
793  H_hsp_link=H2;
794  H->hsp_link.changed=0;
795  } else if (H->hsp->score > cutoff[index]) {
796  Int4 H_query_etrim = H->q_end_trim;
797  Int4 H_sub_etrim = H->s_end_trim;
798 
799 
800  /* Here we look at what was the best choice last time, if it's
801  * still around, and set this to the initial choice. By
802  * setting the best score to a (potentially) large value
803  * initially, we can reduce the number of hsps checked.
804  */
805 
806  /* Currently we set the best score to a value just less than
807  * the real value. This is not really necessary, but doing
808  * this ensures that in the case of a tie, we make the same
809  * selection the original code did.
810  */
811 
812  if(!first_pass&&H2&&H2->linked_to>=0){
813  if(1){
814  /* We set this to less than the real value to keep the
815  original ordering in case of ties. */
816  H_hsp_sum=H2->hsp_link.sum[index]-1;
817  }else{
818  H_hsp_num=H2->hsp_link.num[index];
819  H_hsp_sum=H2->hsp_link.sum[index];
820  H_hsp_xsum=H2->hsp_link.xsum[index];
821  H_hsp_link=H2;
822  }
823  }
824 
825  /* We now only walk down hits with the same frame sign */
826  /* for (H2=H->prev; H2!=NULL; H2=H2->prev,H2_index--) */
827  for (H2_index=H_index-1; H2_index>1;)
828  {
829  Int4 b0,b1,b2;
830  Int4 q_off_t,s_off_t,sum,next_larger;
831  LinkHelpStruct * H2_helper=&lh_helper[H2_index];
832  sum = H2_helper->sum[index];
833  next_larger = H2_helper->next_larger;
834 
835  s_off_t = H2_helper->s_off_trim;
836  q_off_t = H2_helper->q_off_trim;
837 
838  b0 = sum <= H_hsp_sum;
839 
840  /* Compute the next H2_index */
841  H2_index--;
842  if(b0){ /* If this sum is too small to beat H_hsp_sum, advance to a larger sum */
843  H2_index=next_larger;
844  }
845 
846  /* combine tests to reduce mispredicts -cfj */
847  b1 = q_off_t <= H_query_etrim;
848  b2 = s_off_t <= H_sub_etrim;
849 
850  if(0) if(H2_helper->maxsum1<=H_hsp_sum)break;
851 
852  if (!(b0|b1|b2) )
853  {
854  H2 = H2_helper->ptr;
855 
856  H_hsp_num=H2->hsp_link.num[index];
857  H_hsp_sum=H2->hsp_link.sum[index];
858  H_hsp_xsum=H2->hsp_link.xsum[index];
859  H_hsp_link=H2;
860  }
861  } /* end for H2_index... */
862  } /* end if(H->score>cuttof[]) */
863  {
864  Int4 score=H->hsp->score;
865  double new_xsum =
866  H_hsp_xsum + score*kbp[H->hsp->context]->Lambda -
867  kbp[H->hsp->context]->logK;
868  Int4 new_sum = H_hsp_sum + (score - cutoff[index]);
869 
870  H->hsp_link.sum[index] = new_sum;
871  H->hsp_link.num[index] = H_hsp_num+1;
872  H->hsp_link.link[index] = H_hsp_link;
873  lh_helper[H_index].sum[index] = new_sum;
874  lh_helper[H_index].maxsum1 = MAX(lh_helper[H_index-1].maxsum1, new_sum);
875  /* Update this entry's 'next_larger' field */
876  {
877  Int4 cur_sum=lh_helper[H_index].sum[1];
878  Int4 prev = H_index-1;
879  Int4 prev_sum = lh_helper[prev].sum[1];
880  while((cur_sum>=prev_sum) && (prev>0)){
881  prev=lh_helper[prev].next_larger;
882  prev_sum = lh_helper[prev].sum[1];
883  }
884  lh_helper[H_index].next_larger = prev;
885  }
886 
887  if (new_sum >= maxscore)
888  {
889  maxscore=new_sum;
890  best[index]=H;
891  }
892  H->hsp_link.xsum[index] = new_xsum;
893  if(H_hsp_link)
894  ((LinkHSPStruct*)H_hsp_link)->linked_to++;
895  }
896  }
897  path_changed=0;
898  first_pass=0;
899  }
900  /********************************/
901  if (!ignore_small_gaps)
902  {
903  /* Select the best ordering method.
904  First we add back in the value cutoff[index] * the number
905  of links, as this was subtracted out for purposes of the
906  comparison above. */
907  best[0]->hsp_link.sum[0] +=
908  (best[0]->hsp_link.num[0])*cutoff[0];
909 
911  best[0]->hsp_link.num[0], best[0]->hsp_link.xsum[0],
912  query_length, subject_length,
913  query_info->contexts[query_context].eff_searchsp,
914  BLAST_GapDecayDivisor(gap_decay_rate,
915  best[0]->hsp_link.num[0]) );
916 
917  /* Adjust the e-value because we are performing multiple tests */
918  if( best[0]->hsp_link.num[0] > 1 ) {
919  if( gap_prob == 0 || (prob[0] /= gap_prob) > INT4_MAX ) {
920  prob[0] = INT4_MAX;
921  }
922  }
923 
924  prob[1] = BLAST_LargeGapSumE(best[1]->hsp_link.num[1],
925  best[1]->hsp_link.xsum[1],
926  query_length, subject_length,
927  query_info->contexts[query_context].eff_searchsp,
928  BLAST_GapDecayDivisor(gap_decay_rate,
929  best[1]->hsp_link.num[1]));
930 
931  if( best[1]->hsp_link.num[1] > 1 ) {
932  if( 1 - gap_prob == 0 || (prob[1] /= 1 - gap_prob) > INT4_MAX ) {
933  prob[1] = INT4_MAX;
934  }
935  }
936  ordering_method =
938  }
939  else
940  {
941  /* We only consider the case of big gaps. */
942  best[1]->hsp_link.sum[1] +=
943  (best[1]->hsp_link.num[1])*cutoff[1];
944 
946  best[1]->hsp_link.num[1],
947  best[1]->hsp_link.xsum[1],
948  query_length, subject_length,
949  query_info->contexts[query_context].eff_searchsp,
950  BLAST_GapDecayDivisor(gap_decay_rate,
951  best[1]->hsp_link.num[1]));
952  ordering_method = eLinkLargeGaps;
953  }
954 
955  best[ordering_method]->start_of_chain = TRUE;
956  best[ordering_method]->hsp->evalue = prob[ordering_method];
957 
958  /* remove the links that have been ordered already. */
959  if (best[ordering_method]->hsp_link.link[ordering_method])
960  linked_set = TRUE;
961  else
962  linked_set = FALSE;
963 
964  if (best[ordering_method]->linked_to>0) path_changed=1;
965  for (H=best[ordering_method]; H!=NULL;
966  H=H->hsp_link.link[ordering_method])
967  {
968  if (H->linked_to>1) path_changed=1;
969  H->linked_to=-1000;
970  H->hsp_link.changed=1;
971  /* record whether this is part of a linked set. */
972  H->linked_set = linked_set;
973  H->ordering_method = ordering_method;
974  H->hsp->evalue = prob[ordering_method];
975  if (H->next)
976  (H->next)->prev=H->prev;
977  if (H->prev)
978  (H->prev)->next=H->next;
979  number_of_hsps--;
980  }
981 
982  } /* end while num_hsps... */
983  } /* end for frame_index ... */
984 
985  sfree(hp_frame_start);
986  sfree(hp_frame_number);
987  sfree(hp_start->hsp);
988  sfree(hp_start);
989 
990  if (kTranslatedQuery) {
991  qsort(link_hsp_array,total_number_of_hsps,sizeof(LinkHSPStruct*),
993  qsort(link_hsp_array, total_number_of_hsps,sizeof(LinkHSPStruct*),
995  } else {
996  qsort(link_hsp_array,total_number_of_hsps,sizeof(LinkHSPStruct*),
998  qsort(link_hsp_array, total_number_of_hsps,sizeof(LinkHSPStruct*),
1000  }
1001 
1002  /* Sort by starting position. */
1003 
1004 
1005  for (index=0, last_hsp=NULL;index<total_number_of_hsps; index++)
1006  {
1007  H = link_hsp_array[index];
1008  H->prev = NULL;
1009  H->next = NULL;
1010  }
1011 
1012  /* hook up the HSP's. */
1013  first_hsp = NULL;
1014  for (index=0, last_hsp=NULL;index<total_number_of_hsps; index++)
1015  {
1016  H = link_hsp_array[index];
1017 
1018  /* If this is not a single piece or the start of a chain, then Skip it. */
1019  if (H->linked_set == TRUE && H->start_of_chain == FALSE)
1020  continue;
1021 
1022  /* If the HSP has no "link" connect the "next", otherwise follow the "link"
1023  chain down, connecting them with "next" and "prev". */
1024  if (last_hsp == NULL)
1025  first_hsp = H;
1026  H->prev = last_hsp;
1027  ordering_method = H->ordering_method;
1028  if (H->hsp_link.link[ordering_method] == NULL)
1029  {
1030  /* Grab the next HSP that is not part of a chain or the start of a chain */
1031  /* The "next" pointers are not hooked up yet in HSP's further down array. */
1032  index1=index;
1033  H2 = index1<(total_number_of_hsps-1) ? link_hsp_array[index1+1] : NULL;
1034  while (H2 && H2->linked_set == TRUE &&
1035  H2->start_of_chain == FALSE)
1036  {
1037  index1++;
1038  H2 = index1<(total_number_of_hsps-1) ? link_hsp_array[index1+1] : NULL;
1039  }
1040  H->next= H2;
1041  }
1042  else
1043  {
1044  /* The first one has the number of links correct. */
1045  num_links = H->hsp_link.num[ordering_method];
1046  link = H->hsp_link.link[ordering_method];
1047  while (link)
1048  {
1049  H->hsp->num = num_links;
1050  H->xsum = H->hsp_link.xsum[ordering_method];
1051  H->next = (LinkHSPStruct*) link;
1052  H->prev = last_hsp;
1053  last_hsp = H;
1054  H = H->next;
1055  if (H != NULL)
1056  link = H->hsp_link.link[ordering_method];
1057  else
1058  break;
1059  }
1060  /* Set these for last link in chain. */
1061  H->hsp->num = num_links;
1062  H->xsum = H->hsp_link.xsum[ordering_method];
1063  /* Grab the next HSP that is not part of a chain or the start of a chain */
1064  index1=index;
1065  H2 = index1<(total_number_of_hsps-1) ? link_hsp_array[index1+1] : NULL;
1066  while (H2 && H2->linked_set == TRUE &&
1067  H2->start_of_chain == FALSE)
1068  {
1069  index1++;
1070  H2 = index1<(total_number_of_hsps-1) ? link_hsp_array[index1+1] : NULL;
1071  }
1072  H->next= H2;
1073  H->prev = last_hsp;
1074  }
1075  last_hsp = H;
1076  }
1077 
1078  /* The HSP's may be in a different order than they were before,
1079  but first_hsp contains the first one. */
1080  for (index = 0, H = first_hsp; index < hsp_list->hspcnt; index++) {
1081  hsp_list->hsp_array[index] = H->hsp;
1082  /* Free the wrapper structure */
1083  H2 = H->next;
1084  sfree(H);
1085  H = H2;
1086  }
1087  sfree(link_hsp_array);
1088  sfree(lh_helper);
1089 
1090  return 0;
1091 }
1092 
1093 /******************************************************************************
1094  * Structures and functions used only in uneven gap linking method. *
1095  ******************************************************************************/
1096 
1097 /** Simple doubly linked list of HSPs, used for calculating sum statistics. */
1098 typedef struct BlastLinkedHSPSet {
1099  BlastHSP* hsp; /**< HSP for the current link in the chain. */
1100  Uint4 queryId; /**< Used for support of OOF linking */
1101  struct BlastLinkedHSPSet* next;/**< Next link in the chain. */
1102  struct BlastLinkedHSPSet* prev;/**< Previous link in the chain. */
1103  double sum_score; /**< Sum bit score for the linked set. */
1105 
1106 /** Calculates e-value of a set of HSPs with sum statistics.
1107  * @param program_number Type of BLAST program [in]
1108  * @param query_info Query information structure [in]
1109  * @param subject_length Subject sequence length [in]
1110  * @param link_hsp_params Parameters for linking HSPs [in]
1111  * @param head_hsp Set of HSPs with previously calculated sum score/evalue [in]
1112  * @param new_hsp New HSP candidate to join the set [in]
1113  * @param sum_score Normalized score for the collection if HSPs[out]
1114  * @return E-value of all the HSPs together
1115  */
1116 static double
1118  const BlastQueryInfo* query_info, Int4 subject_length,
1119  const BlastLinkHSPParameters* link_hsp_params,
1120  BlastLinkedHSPSet* head_hsp, BlastLinkedHSPSet* new_hsp, double* sum_score)
1121 {
1122  double gap_decay_rate, sum_evalue;
1123  Int2 num;
1124  Int4 subject_eff_length, query_eff_length, len_adj;
1125  Int4 context = head_hsp->hsp->context;
1126  Int4 query_window_size;
1127  Int4 subject_window_size;
1128 
1129  ASSERT(program_number != eBlastTypeTblastx);
1130 
1131  subject_eff_length = (Blast_SubjectIsTranslated(program_number)) ?
1132  subject_length/3 : subject_length;
1133 
1134  gap_decay_rate = link_hsp_params->gap_decay_rate;
1135 
1136  num = head_hsp->hsp->num + new_hsp->hsp->num;
1137 
1138  len_adj = query_info->contexts[context].length_adjustment;
1139 
1140  query_eff_length = MAX(query_info->contexts[context].query_length - len_adj, 1);
1141 
1142  subject_eff_length = MAX(subject_eff_length - len_adj, 1);
1143 
1144  *sum_score = new_hsp->sum_score + head_hsp->sum_score;
1145 
1146  query_window_size =
1147  link_hsp_params->overlap_size + link_hsp_params->gap_size + 1;
1148  subject_window_size =
1149  link_hsp_params->overlap_size + link_hsp_params->longest_intron + 1;
1150 
1151  sum_evalue =
1152  BLAST_UnevenGapSumE(query_window_size, subject_window_size,
1153  num, *sum_score, query_eff_length, subject_eff_length,
1154  query_info->contexts[context].eff_searchsp,
1155  BLAST_GapDecayDivisor(gap_decay_rate, num));
1156 
1157  return sum_evalue;
1158 }
1159 
1160 /** Callback for sorting an array of HSPs, encapsulated in BlastLinkedHSPSet
1161  * structures, in order of increasing query starting offset.
1162  * The subject end offset of HSPs is used as a tiebreaker, and no HSPs may be
1163  * NULL. The comparison is applied only to HSPs from the same context.
1164  * Otherwise, the sorting is in increasing order of contexts.
1165  * @param v1 first HSP in list [in]
1166  * @param v2 second HSP in list [in]
1167  * @return -1, 0, or 1 depending on HSPs
1168  */
1169 static int
1170 s_FwdCompareLinkedHSPSets(const void* v1, const void* v2)
1171 {
1172  BlastLinkedHSPSet** hp1,** hp2;
1173  BlastHSP* hsp1,* hsp2;
1174 
1175  hp1 = (BlastLinkedHSPSet**) v1;
1176  hp2 = (BlastLinkedHSPSet**) v2;
1177 
1178  /* check to see if hsp are within the same context */
1179  if ((*hp1)->queryId != (*hp2)->queryId)
1180  return (*hp1)->queryId - (*hp2)->queryId;
1181 
1182  hsp1 = (*hp1)->hsp;
1183  hsp2 = (*hp2)->hsp;
1184 
1185  if (hsp1->query.offset < hsp2->query.offset)
1186  return -1;
1187  if (hsp1->query.offset > hsp2->query.offset)
1188  return 1;
1189  /* Necessary in case both HSP's have the same query offset. */
1190  if (hsp1->subject.offset < hsp2->subject.offset)
1191  return -1;
1192  if (hsp1->subject.offset > hsp2->subject.offset)
1193  return 1;
1194 
1195  return 0;
1196 }
1197 
1198 /** Callback used by qsort to sort a list of BlastLinkedHSPSet structures
1199  * in order of decreasing sum score. Entries in the list may be NULL
1200  * @param v1 first HSP in list [in]
1201  * @param v2 second HSP in list [in]
1202  * @return -1, 0, or 1 depending on HSPs
1203 */
1204 static int
1205 s_SumScoreCompareLinkedHSPSets(const void* v1, const void* v2)
1206 {
1207  BlastLinkedHSPSet* h1,* h2;
1208  BlastLinkedHSPSet** hp1,** hp2;
1209 
1210  hp1 = (BlastLinkedHSPSet**) v1;
1211  hp2 = (BlastLinkedHSPSet**) v2;
1212  h1 = *hp1;
1213  h2 = *hp2;
1214 
1215  if (!h1 && !h2)
1216  return 0;
1217  else if (!h1)
1218  return 1;
1219  else if (!h2)
1220  return -1;
1221 
1222  if (h1->sum_score < h2->sum_score)
1223  return 1;
1224  if (h1->sum_score > h2->sum_score)
1225  return -1;
1226 
1227  return ScoreCompareHSPs(&h1->hsp, &h2->hsp);
1228 }
1229 
1230 /** Find an HSP on the same queryId as the one given, with closest start offset
1231  * that is greater than a specified value. The list of HSPs to search must
1232  * be sorted by query offset and in increasing order of queryId.
1233  * @param hsp_array List of pointers to HSPs, encapsulated within
1234  * BlastLinkedHSPSet structures [in]
1235  * @param size Number of elements in the array [in]
1236  * @param queryId Context of the target HSP [in]
1237  * @param offset The target offset to search for [in]
1238  * @return The index in the array of the HSP whose start/end offset
1239  * is closest to but >= the value 'offset'
1240  */
1241 static Int4
1244 {
1245  Int4 index, begin, end;
1246 
1247  begin = 0;
1248  end = size;
1249  while (begin < end) {
1250  index = (begin + end) / 2;
1251 
1252  if (hsp_array[index]->queryId < queryId)
1253  begin = index + 1;
1254  else if (hsp_array[index]->queryId > queryId)
1255  end = index;
1256  else {
1257  if (hsp_array[index]->hsp->query.offset >= offset)
1258  end = index;
1259  else
1260  begin = index + 1;
1261  }
1262  }
1263 
1264  return end;
1265 }
1266 
1267 /** Find an HSP in an array sorted in increasing order of query offsets and
1268  * increasing order of queryId, with the smallest index such that its query end
1269  * is >= to a given offset.
1270  * @param hsp_array Array of pointers to HSPs, encapsulated within
1271  * BlastLinkedHSPSet structures. Must be sorted by queryId and
1272  * query offsets. [in]
1273  * @param size Number of elements in the array [in]
1274  * @param qend_index_array Array indexing query ends in the hsp_array [in]
1275  * @param queryId Context of the target HSP [in]
1276  * @param offset The target offset to search for [in]
1277  * @return The found index in the hsp_array.
1278  */
1279 static Int4
1281  Int4* qend_index_array, Uint4 queryId, Int4 offset)
1282 {
1283  Int4 begin, end;
1284 
1285  begin = 0;
1286  end = size;
1287  while (begin < end) {
1288  Int4 right_index = (begin + end) / 2;
1289  Int4 left_index = qend_index_array[right_index];
1290 
1291  if (hsp_array[right_index]->queryId < queryId)
1292  begin = right_index + 1;
1293  else if (hsp_array[right_index]->queryId > queryId)
1294  end = left_index;
1295  else {
1296  if (hsp_array[left_index]->hsp->query.end >= offset)
1297  end = left_index;
1298  else
1299  begin = right_index + 1;
1300  }
1301  }
1302 
1303  return end;
1304 }
1305 
1306 /** Merges HSPs from two linked HSP sets into an array of HSPs, sorted in
1307  * increasing order of contexts and increasing order of query offsets.
1308  * @param hsp_set1 First linked set. [in]
1309  * @param hsp_set2 Second linked set. [in]
1310  * @param merged_size The total number of HSPs in two sets. [out]
1311  * @return The array of pointers to HSPs representing a merged set.
1312  */
1313 static BlastLinkedHSPSet**
1315  Int4* merged_size)
1316 {
1317  Int4 index;
1318  Int4 length;
1319  BlastLinkedHSPSet** merged_hsps;
1320 
1321  /* Find the first link of the old HSP chain. */
1322  while (hsp_set1->prev)
1323  hsp_set1 = hsp_set1->prev;
1324  /* Find first and last link in the new HSP chain. */
1325  while (hsp_set2->prev)
1326  hsp_set2 = hsp_set2->prev;
1327 
1328  *merged_size = length = hsp_set1->hsp->num + hsp_set2->hsp->num;
1329 
1330  merged_hsps = (BlastLinkedHSPSet**)
1331  malloc(length*sizeof(BlastLinkedHSPSet*));
1332 
1333  index = 0;
1334  while (hsp_set1 || hsp_set2) {
1335  /* NB: HSP sets for which some HSPs have identical query offsets cannot
1336  possibly be admissible, so it doesn't matter how to deal with equal
1337  offsets. */
1338  if (!hsp_set2 || (hsp_set1 &&
1339  hsp_set1->hsp->query.offset < hsp_set2->hsp->query.offset)) {
1340  merged_hsps[index] = hsp_set1;
1341  hsp_set1 = hsp_set1->next;
1342  } else {
1343  merged_hsps[index] = hsp_set2;
1344  hsp_set2 = hsp_set2->next;
1345  }
1346  ++index;
1347  }
1348  return merged_hsps;
1349 }
1350 
1351 /** Combines two linked sets of HSPs into a single set.
1352  * @param hsp_set1 First set of HSPs [in]
1353  * @param hsp_set2 Second set of HSPs [in]
1354  * @param sum_score The sum score of the combined linked set
1355  * @param evalue The E-value of the combined linked set
1356  * @return Combined linked set.
1357  */
1358 static BlastLinkedHSPSet*
1360  double sum_score, double evalue)
1361 {
1362  BlastLinkedHSPSet** merged_hsps;
1363  BlastLinkedHSPSet* head_hsp;
1364  Int4 index, new_num;
1365 
1366  if (!hsp_set2)
1367  return hsp_set1;
1368  else if (!hsp_set1)
1369  return hsp_set2;
1370 
1371  merged_hsps = s_MergeLinkedHSPSets(hsp_set1, hsp_set2, &new_num);
1372 
1373  head_hsp = merged_hsps[0];
1374  head_hsp->prev = NULL;
1375  for (index = 0; index < new_num; ++index) {
1376  BlastLinkedHSPSet* link = merged_hsps[index];
1377  if (index < new_num - 1) {
1378  BlastLinkedHSPSet* next_link = merged_hsps[index+1];
1379  link->next = next_link;
1380  next_link->prev = link;
1381  } else {
1382  link->next = NULL;
1383  }
1384  link->sum_score = sum_score;
1385  link->hsp->evalue = evalue;
1386  link->hsp->num = new_num;
1387  }
1388 
1389  sfree(merged_hsps);
1390  return head_hsp;
1391 }
1392 
1393 /** Checks if new candidate HSP is admissible to be linked to a set of HSPs on
1394  * the left. The new HSP must start strictly before the parent HSP in both query
1395  * and subject, and its end must lie within an interval from the parent HSP's
1396  * start, determined by the allowed gap and overlap sizes in query and subject.
1397  * This function also indicates whether parent is already too far to the right
1398  * of the candidate HSP, via a boolean pointer.
1399  * @param hsp_set1 First linked set of HSPs. [in]
1400  * @param hsp_set2 Second linked set of HSPs. [in]
1401  * @param link_hsp_params Parameters for linking HSPs. [in]
1402  * @param program Type of BLAST program (blastx or tblastn) [in]
1403  * @return Do the two sets satisfy the admissibility criteria to form a
1404  * combined set?
1405  */
1406 static Boolean
1408  BlastLinkedHSPSet* hsp_set2,
1409  const BlastLinkHSPParameters* link_hsp_params,
1410  EBlastProgramType program)
1411 {
1412  Int4 gap_s, gap_q, overlap;
1413  BlastLinkedHSPSet** merged_hsps;
1414  Int4 combined_size = 0;
1415  Int4 index;
1416 
1417  if (!hsp_set1 || !hsp_set2 || !link_hsp_params)
1418  return FALSE;
1419 
1420  /* The first input HSP must be the head of its set. */
1421  if (hsp_set1->prev)
1422  return FALSE;
1423 
1424  /* The second input HSP may not be the head of its set. Hence follow the
1425  previous pointers to get to the head. */
1426  for ( ; hsp_set2->prev; hsp_set2 = hsp_set2->prev);
1427 
1428  /* If left and right HSP are the same, return inadmissible status. */
1429  if (hsp_set1 == hsp_set2)
1430  return FALSE;
1431 
1432  /* Check if these HSPs are for the same protein sequence (same queryId) */
1433  if (hsp_set1->queryId != hsp_set2->queryId)
1434  return FALSE;
1435 
1436  /* Check if new HSP and hsp_set2 are on the same nucleotide sequence strand.
1437  (same sign of subject frame) */
1438  if (SIGN(hsp_set1->hsp->subject.frame) !=
1439  SIGN(hsp_set2->hsp->subject.frame))
1440  return FALSE;
1441 
1442  /* Merge the two sets into an array with increasing order of query
1443  offsets. */
1444  merged_hsps = s_MergeLinkedHSPSets(hsp_set1, hsp_set2, &combined_size);
1445 
1446  gap_s = link_hsp_params->longest_intron; /* Maximal gap size in
1447  subject */
1448  gap_q = link_hsp_params->gap_size; /* Maximal gap size in query */
1449 
1450  overlap = link_hsp_params->overlap_size; /* Maximal overlap size in
1451  query or subject */
1452 
1453  /* swap gap_s and gap_q if blastx */
1454  if (program == eBlastTypeBlastx) {
1455  gap_s = link_hsp_params->gap_size;
1456  gap_q = link_hsp_params->longest_intron;
1457  }
1458 
1459  for (index = 0; index < combined_size - 1; ++index) {
1460  BlastLinkedHSPSet* left_hsp = merged_hsps[index];
1461  BlastLinkedHSPSet* right_hsp = merged_hsps[index+1];
1462 
1463 
1464  /* If the new HSP is too far to the left from the right_hsp, indicate this by
1465  setting the boolean output value to TRUE. */
1466  if (left_hsp->hsp->query.end < right_hsp->hsp->query.offset - gap_q)
1467  break;
1468 
1469  /* Check if the left HSP's query offset is to the right of the right HSP's
1470  offset, i.e. they came in wrong order. */
1471  if (left_hsp->hsp->query.offset >= right_hsp->hsp->query.offset)
1472  break;
1473 
1474  /* Check the remaining condition for query offsets: left HSP cannot end
1475  further than the maximal allowed overlap from the right HSP's offset;
1476  and left HSP must end before the right HSP. */
1477  if (left_hsp->hsp->query.end > right_hsp->hsp->query.offset + overlap ||
1478  left_hsp->hsp->query.end >= right_hsp->hsp->query.end)
1479  break;
1480 
1481  /* Check the subject offsets conditions. */
1482  if (left_hsp->hsp->subject.end >
1483  right_hsp->hsp->subject.offset + overlap ||
1484  left_hsp->hsp->subject.end <
1485  right_hsp->hsp->subject.offset - gap_s ||
1486  left_hsp->hsp->subject.offset >= right_hsp->hsp->subject.offset ||
1487  left_hsp->hsp->subject.end >= right_hsp->hsp->subject.end)
1488  break;
1489  }
1490 
1491  sfree(merged_hsps);
1492 
1493  if (index < combined_size - 1)
1494  return FALSE;
1495 
1496  return TRUE;
1497 }
1498 
1499 /** Sets up an array of wrapper structures for an array of BlastHSP's.
1500  * @param hsp_array Original array of HSP structures. [in]
1501  * @param hspcnt Size of hsp_array. [in]
1502  * @param kbp_array Array of Karlin blocks - structures containing
1503  * Karlin-Altschul parameters. [in]
1504  * @param program BLAST program (tblastn or blastx) [in]
1505  * @return Array of wrapper structures, used for linking HSPs.
1506  */
1507 static BlastLinkedHSPSet**
1509  Blast_KarlinBlk ** kbp_array,
1510  EBlastProgramType program)
1511 {
1512  Int4 index;
1513  BlastLinkedHSPSet** link_hsp_array =
1514  (BlastLinkedHSPSet**) malloc(hspcnt*sizeof(BlastLinkedHSPSet*));
1515 
1516  for (index = 0; index < hspcnt; ++index) {
1517  BlastHSP * hsp = hsp_array[index];
1518  link_hsp_array[index] =
1520 
1521  link_hsp_array[index]->hsp = hsp;
1522  link_hsp_array[index]->sum_score =
1523  kbp_array[hsp->context]->Lambda * hsp->score -
1524  kbp_array[hsp->context]->logK;
1525  link_hsp_array[index]->queryId =
1526  (program == eBlastTypeBlastx) ?
1527  hsp->context / 3 : hsp->context;
1528 
1529  hsp_array[index]->num = 1;
1530  }
1531 
1532  return link_hsp_array;
1533 }
1534 
1535 /** Frees the array of special structures, used for linking HSPs and restores
1536  * the original contexts and subject/query order in BlastHSP structures, when
1537  * necessary.
1538  * @param link_hsp_array Array of wrapper HSP structures, used for linking. [in]
1539  * @param hspcnt Size of the array. [in]
1540  * @return NULL.
1541  */
1542 static BlastLinkedHSPSet**
1544 {
1545  Int4 index;
1546 
1547  /* Free the BlastLinkedHSPSet wrapper structures. */
1548  for (index = 0; index < hspcnt; ++index)
1549  sfree(link_hsp_array[index]);
1550 
1551  sfree(link_hsp_array);
1552  return NULL;
1553 }
1554 
1555 /** Given an array of HSPs (H), sorted in increasing order of query offsets,
1556  * fills an array of indices into array H such that for each i, the index is the
1557  * smallest HSP index, for which query ending offset is >= than query ending
1558  * offset of H[i]. This indexing is performed before any of the HSPs in H are
1559  * linked.
1560  * @param hsp_array Array of wrapper HSP structures. [in]
1561  * @param hspcnt Size of the hsp_array. [in]
1562  * @param qend_index_ptr Pointer to an array of special structures indexing the
1563  * largest query ends in an HSP array sorting by query
1564  * offset.
1565  */
1566 static Int2
1568  Int4** qend_index_ptr)
1569 {
1570  Int4 index;
1571  Int4* qend_index_array = NULL;
1572  BlastLinkedHSPSet* link;
1573  Int4 current_end = 0;
1574  Int4 current_index = 0;
1575 
1576  /* Allocate the array. */
1577  *qend_index_ptr = qend_index_array = (Int4*) calloc(hspcnt, sizeof(Int4));
1578  if (!qend_index_array)
1579  return -1;
1580 
1581  current_end = hsp_array[0]->hsp->query.end;
1582 
1583  for (index = 1; index < hspcnt; ++index) {
1584  link = hsp_array[index];
1585  if (link->queryId > hsp_array[current_index]->queryId ||
1586  link->hsp->query.end > current_end) {
1587  current_index = index;
1588  current_end = link->hsp->query.end;
1589  }
1590  qend_index_array[index] = current_index;
1591  }
1592  return 0;
1593 }
1594 
1595 
1596 /** Greedy algorithm to link HSPs with uneven gaps.
1597  * Sorts HSPs by score. Starting with the highest scoring HSP, finds
1598  * an HSP that produces the best sum e-value when added to the HSP set under
1599  * consideration. The neighboring HSPs in a set must have endpoints within a
1600  * window of each other on the protein axis, and within the longest allowed
1601  * intron length on the nucleotide axis. When no more HSPs can be added to the
1602  * highest scoring set, the next highest scoring HSP is considered that is not
1603  * yet part of any set.
1604  * @param program Type of BLAST program (blastx or tblastn) [in]
1605  * @param hsp_list Structure containing all HSPs for a given subject [in] [out]
1606  * @param query_info Query information, including effective lengths [in]
1607  * @param subject_length Subject sequence length [in]
1608  * @param sbp Scoring and statistical parameters [in]
1609  * @param link_hsp_params Parameters for linking HSPs [in]
1610  * @param gapped_calculation TRUE if input HSPs are from a gapped search [in]
1611  */
1612 static Int2
1614  const BlastQueryInfo* query_info, Int4 subject_length, const BlastScoreBlk* sbp,
1615  const BlastLinkHSPParameters* link_hsp_params, Boolean gapped_calculation)
1616 {
1617  BlastHSP** hsp_array;
1618  BlastLinkedHSPSet** link_hsp_array;
1619  BlastLinkedHSPSet** score_hsp_array; /* an array of HSPs sorted by decreasing
1620  score */
1621  BlastLinkedHSPSet** offset_hsp_array; /* an array of HSPs sorted by increasing
1622  query offset */
1623  BlastLinkedHSPSet* head_hsp;
1624  Int4 hspcnt, index, index1;
1625  Int4 gap_size;
1626  Blast_KarlinBlk ** kbp_array;
1627  Int4* qend_index_array = NULL;
1628 
1629  /* Check input arguments. */
1630  if (!link_hsp_params || !sbp || !query_info)
1631  return -1;
1632 
1633  /* If HSP list is not available or has <= 1 HSPs, there is nothing to do. */
1634  if (!hsp_list || hsp_list->hspcnt <= 1)
1635  return 0;
1636 
1637  if(gapped_calculation) {
1638  kbp_array = sbp->kbp_gap;
1639  } else {
1640  kbp_array = sbp->kbp;
1641  }
1642 
1643  /* max gap size in query */
1644  gap_size = (program == eBlastTypeBlastx) ?
1645  link_hsp_params->longest_intron :
1646  link_hsp_params->gap_size;
1647 
1648  hspcnt = hsp_list->hspcnt;
1649  hsp_array = hsp_list->hsp_array;
1650 
1651  /* Set up an array of HSP structure wrappers. */
1652  link_hsp_array =
1653  s_LinkedHSPSetArraySetUp(hsp_array, hspcnt, kbp_array, program);
1654 
1655  /* Allocate, fill and sort the auxiliary arrays. */
1656  score_hsp_array =
1657  (BlastLinkedHSPSet**) malloc(hspcnt*sizeof(BlastLinkedHSPSet*));
1658  memcpy(score_hsp_array, link_hsp_array, hspcnt*sizeof(BlastLinkedHSPSet*));
1659  qsort(score_hsp_array, hspcnt, sizeof(BlastLinkedHSPSet*),
1661  offset_hsp_array =
1662  (BlastLinkedHSPSet**) malloc(hspcnt*sizeof(BlastLinkedHSPSet*));
1663  memcpy(offset_hsp_array, link_hsp_array, hspcnt*sizeof(BlastLinkedHSPSet*));
1664  qsort(offset_hsp_array, hspcnt, sizeof(BlastLinkedHSPSet*),
1666 
1667  s_LinkedHSPSetArrayIndexQueryEnds(offset_hsp_array, hspcnt, &qend_index_array);
1668 
1669  /* head_hsp is set to NULL whenever there is no current linked set that is
1670  being worked on. */
1671  head_hsp = NULL;
1672  for (index = 0; index < hspcnt && score_hsp_array[index]; ) {
1673  double best_evalue, best_sum_score = 0;
1674  BlastLinkedHSPSet* best_hsp = NULL;
1675  BlastLinkedHSPSet* tail_hsp = NULL;
1676  Int4 hsp_index_left, hsp_index_right;
1677  Int4 left_offset;
1678 
1679  if (!head_hsp) {
1680  /* Find the highest scoring HSP that is not yet part of a linked set.
1681  An HSP is part of a linked set if and only if either prev or next
1682  pointer is not NULL. */
1683  while (index<hspcnt && score_hsp_array[index] &&
1684  (score_hsp_array[index]->next ||
1685  score_hsp_array[index]->prev))
1686  index++;
1687  if (index==hspcnt)
1688  break;
1689  head_hsp = score_hsp_array[index];
1690  }
1691  /* Find the last link in the current HSP set. */
1692  for (tail_hsp = head_hsp; tail_hsp->next; tail_hsp = tail_hsp->next);
1693 
1694  best_evalue = head_hsp->hsp->evalue;
1695  best_sum_score = head_hsp->sum_score;
1696  /* left_offset is the leftmost point where an HSP can end to be
1697  admissible for linking with head_hsp. */
1698  left_offset = head_hsp->hsp->query.offset - gap_size;
1699 
1700  /* Find the smallest index in the offset array, for which an HSP can
1701  possibly be added to the set currently being explored. */
1702  hsp_index_left =
1703  s_HSPOffsetEndBinarySearch(offset_hsp_array, hspcnt, qend_index_array,
1704  head_hsp->queryId, left_offset);
1705 
1706  /* Find the largest index in the offset array, for which an HSP can be
1707  possibly added to the currently explored set. */
1708  hsp_index_right =
1709  s_HSPOffsetBinarySearch(offset_hsp_array, hspcnt,
1710  tail_hsp->queryId,
1711  tail_hsp->hsp->query.end + gap_size);
1712 
1713  for (index1 = hsp_index_left; index1 < hsp_index_right; ++index1) {
1714  BlastLinkedHSPSet* lhsp = offset_hsp_array[index1];
1715 
1716  /* From each previously linked HSP set consider only one
1717  representative - the leftmost HSP whose query end is
1718  >= left_offset. */
1719  if (lhsp->prev && lhsp->prev->hsp->query.end >= left_offset)
1720  continue;
1721 
1722  if (s_LinkedHSPSetsAdmissible(head_hsp, lhsp,
1723  link_hsp_params, program)) {
1724  double evalue, sum_score;
1725  /* Check if the e-value for the new combined HSP set is better
1726  than for the previously obtained set. */
1727  if ((evalue = s_SumHSPEvalue(program, query_info, subject_length,
1728  link_hsp_params, head_hsp, lhsp,
1729  &sum_score)) <
1730  MIN(best_evalue, lhsp->hsp->evalue)) {
1731  best_hsp = lhsp;
1732  best_evalue = evalue;
1733  best_sum_score = sum_score;
1734  }
1735  }
1736  }
1737 
1738  /* Link the new HSP to the set, if it qualified. */
1739  if (best_hsp) {
1740  head_hsp = s_CombineLinkedHSPSets(head_hsp, best_hsp, best_sum_score,
1741  best_evalue);
1742  } else {
1743  head_hsp = NULL;
1744  ++index;
1745  }
1746  }
1747 
1748  /* Free the auxiliary arrays. */
1749  sfree(score_hsp_array);
1750  sfree(offset_hsp_array);
1751  sfree(qend_index_array);
1752 
1753  /* Do the final clean up. */
1754  s_LinkedHSPSetArrayCleanUp(link_hsp_array, hspcnt);
1755 
1756  return 0;
1757 }
1758 
1759 /* see description in link_hsps.h */
1760 Int2
1761 BLAST_LinkHsps(EBlastProgramType program_number, BlastHSPList* hsp_list,
1762  const BlastQueryInfo* query_info, Int4 subject_length,
1763  const BlastScoreBlk* sbp, const BlastLinkHSPParameters* link_hsp_params,
1764  Boolean gapped_calculation)
1765 {
1766  Int4 index;
1767 
1768  if (!hsp_list || hsp_list->hspcnt == 0)
1769  return 0;
1770 
1771  ASSERT(link_hsp_params);
1772 
1773  /* Remove any information on number of linked HSPs from previous
1774  linking. */
1775  for (index = 0; index < hsp_list->hspcnt; ++index)
1776  hsp_list->hsp_array[index]->num = 1;
1777 
1778  /* Link up the HSP's for this hsp_list. */
1779  if (link_hsp_params->longest_intron <= 0) {
1780  s_BlastEvenGapLinkHSPs(program_number, hsp_list, query_info,
1781  subject_length, sbp, link_hsp_params,
1782  gapped_calculation);
1783  /* The HSP's may be in a different order than they were before,
1784  but hsp contains the first one. */
1785  } else {
1786  Blast_HSPListAdjustOddBlastnScores(hsp_list, gapped_calculation, sbp);
1787  /* Calculate individual HSP e-values first - they'll be needed to
1788  compare with sum e-values. Use decay rate to compensate for
1789  multiple tests. */
1790 
1791  Blast_HSPListGetEvalues(program_number, query_info,
1792  Blast_SubjectIsTranslated(program_number) ?
1793  subject_length / CODON_LENGTH : subject_length,
1794  hsp_list, gapped_calculation, FALSE,sbp,
1795  link_hsp_params->gap_decay_rate, 1.0);
1796 
1797  s_BlastUnevenGapLinkHSPs(program_number, hsp_list, query_info,
1798  subject_length, sbp, link_hsp_params,
1799  gapped_calculation);
1800  }
1801 
1802  /* Sort the HSP array by score */
1803  Blast_HSPListSortByScore(hsp_list);
1804 
1805  /* Find and fill the best e-value */
1806  hsp_list->best_evalue = hsp_list->hsp_array[0]->evalue;
1807  for (index = 1; index < hsp_list->hspcnt; ++index) {
1808  if (hsp_list->hsp_array[index]->evalue < hsp_list->best_evalue)
1809  hsp_list->best_evalue = hsp_list->hsp_array[index]->evalue;
1810  }
1811 
1812  return 0;
1813 }
1814 
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
#define CODON_LENGTH
Codons are always of length 3.
Definition: blast_def.h:63
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
Definition: blast_def.h:93
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
Definition: blast_def.h:88
int ScoreCompareHSPs(const void *h1, const void *h2)
Comparison callback function for sorting HSPs, first by score in descending order,...
Definition: blast_hits.c:1330
Int2 Blast_HSPListGetEvalues(EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor)
Calculate the expected values for all HSPs in a hit list, without using the sum statistics.
Definition: blast_hits.c:1811
void Blast_HSPListAdjustOddBlastnScores(BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp)
For nucleotide BLAST, if the match reward score is equal to 2, random alignments are dominated by run...
Definition: blast_hits.c:3051
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Definition: blast_hits.c:1374
Utilities for dealing with BLAST HSPs in the core of BLAST.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Definition: blast_program.c:60
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Definition: blast_program.c:43
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastx
Definition: blast_program.h:75
@ eBlastTypeTblastx
Definition: blast_program.h:79
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Definition: blast_program.c:63
double BLAST_GapDecayDivisor(double decayrate, unsigned nsegs)
Compute a divisor used to weight the evalue of a collection of "nsegs" distinct alignments.
Definition: blast_stat.c:4063
double BLAST_LargeGapSumE(Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
Calculates the e-value if a collection of distinct alignments with arbitrarily large gaps between the...
Definition: blast_stat.c:4516
double BLAST_UnevenGapSumE(Int4 query_start_points, Int4 subject_start_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
Calculates the e-value of a collection multiple distinct alignments with asymmetric gaps between the ...
Definition: blast_stat.c:4475
double BLAST_SmallGapSumE(Int4 start_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
Calculates the e-value for alignments with "small" gaps (typically under fifty residues/basepairs) fo...
Definition: blast_stat.c:4402
Various auxiliary BLAST utility functions.
static ulg window_size
static DLIST_TYPE *DLIST_NAME() prev(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:61
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
int offset
Definition: replacements.h:160
#define H(x, y, z)
Definition: md4.c:180
#define NULL
Definition: ncbistd.hpp:225
const CVect2< U > & v2
Definition: globals.hpp:440
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int i
if(yy_accept[yy_current_state])
const struct ncbi::grid::netcache::search::fields::SIZE size
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
#define INT4_MAX
largest nubmer represented by signed int
Definition: ncbi_std.h:141
#define SIGN(a)
return +1 for a > 0, -1 for a < 0
Definition: ncbi_std.h:127
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:117
T max(T x_, T y_)
T prob(T x_)
Int4 query_length
Length of this query, strand or frame.
Int4 length_adjustment
Length adjustment for boundary conditions.
Int8 eff_searchsp
Effective search space for this context.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
double best_evalue
Smallest e-value for HSPs in this list.
Definition: blast_hits.h:162
Structure holding all information about an HSP.
Definition: blast_hits.h:126
double evalue
This HSP's e-value.
Definition: blast_hits.h:130
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
Int4 context
Context number of query.
Definition: blast_hits.h:133
Int4 num
How many HSP's are linked together for sum statistics evaluation? If unset (0), this HSP is not part ...
Definition: blast_hits.h:135
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
Parameter block for linking HSPs with sum statistics.
double gap_decay_rate
Decay rate for linking HSPs and calculating cutoff scores.
double gap_prob
Probability of decay for linking HSPs.
Int4 cutoff_big_gap
Cutoff sum score for linked HSPs with big gaps.
Int4 cutoff_small_gap
Cutoff sum score for linked HSPs with small gaps.
Int4 overlap_size
Maximal overlap allowed in successive linked HSPs.
Int4 longest_intron
Length of a longest intron for uneven gap linking of HSPs.
Int4 gap_size
Small gap size for linking HSPs.
Simple doubly linked list of HSPs, used for calculating sum statistics.
Definition: link_hsps.c:1098
Uint4 queryId
Used for support of OOF linking.
Definition: link_hsps.c:1100
BlastHSP * hsp
HSP for the current link in the chain.
Definition: link_hsps.c:1099
struct BlastLinkedHSPSet * next
Next link in the chain.
Definition: link_hsps.c:1101
double sum_score
Sum bit score for the linked set.
Definition: link_hsps.c:1103
struct BlastLinkedHSPSet * prev
Previous link in the chain.
Definition: link_hsps.c:1102
The query related information.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Structure used for scoring calculations.
Definition: blast_stat.h:177
Blast_KarlinBlk ** kbp
Karlin-Altschul parameters.
Definition: blast_stat.h:207
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
Definition: blast_stat.h:208
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int2 frame
Translation frame.
Definition: blast_hits.h:97
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
Structure to hold the Karlin-Altschul parameters.
Definition: blast_stat.h:66
double Lambda
Lambda value used in statistics.
Definition: blast_stat.h:67
double logK
natural log of K value used in statistics
Definition: blast_stat.h:69
Structure containing all information internal to the process of linking HSPs.
Definition: link_hsps.c:76
Boolean start_of_chain
If TRUE, this HSP starts a chain along the "link" pointer.
Definition: link_hsps.c:83
Int4 q_offset_trim
Start of trimmed hsp in query.
Definition: link_hsps.c:90
BlastHSP * hsp
Specific HSP this structure corresponds to.
Definition: link_hsps.c:77
Int4 s_offset_trim
Start of trimmed hsp in subject.
Definition: link_hsps.c:92
struct LinkHSPStruct * next
Next HSP in a set, if any.
Definition: link_hsps.c:79
Int4 linked_to
Where this HSP is linked to?
Definition: link_hsps.c:85
Int4 s_end_trim
End of trimmed HSP in subject.
Definition: link_hsps.c:93
Int4 q_end_trim
End of trimmed HSP in query.
Definition: link_hsps.c:91
struct LinkHSPStruct * prev
Previous HSP in a set, if any.
Definition: link_hsps.c:78
double xsum
Normalized score of a set of HSPs.
Definition: link_hsps.c:86
ELinkOrderingMethod ordering_method
Which method (max or no max for gaps) was used for linking HSPs?
Definition: link_hsps.c:87
BlastHSPLink hsp_link
Auxiliary structure for keeping track of sum scores, etc.
Definition: link_hsps.c:80
Boolean linked_set
Is this HSp part of a linked set?
Definition: link_hsps.c:82
The helper array contains the info used frequently in the inner for loops of the HSP linking algorith...
Definition: link_hsps.c:100
Int4 next_larger
offset into array of HelpStructs containing HSP with higher score, used in bailout calculations
Definition: link_hsps.c:106
Int4 q_off_trim
query start of trimmed HSP
Definition: link_hsps.c:102
Int4 maxsum1
threshold for stopping link attempts (?)
Definition: link_hsps.c:105
LinkHSPStruct * ptr
The HSP to which the info belongs.
Definition: link_hsps.c:101
Int4 s_off_trim
subject start of trimmed HSP
Definition: link_hsps.c:103
Int4 sum[eOrderingMethods]
raw score of linked set containing HSP(?)
Definition: link_hsps.c:104
static CS_CONTEXT * context
Definition: will_convert.c:21
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Fri Sep 20 14:57:21 2024 by modify_doxy.py rev. 669887