NCBI C++ ToolKit
hspfilter_besthit.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: hspfilter_besthit.c 87828 2019-10-09 11:00:47Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Ning Ma
27  *
28  */
29 
30 /** @file hspfilter_besthit.c
31  * Implementation of the BlastHSPWriter interface to save only best hits from
32  * a BLAST search, and subsequently return them in sorted order.
33  */
34 
35 
38 #include "blast_hits_priv.h"
39 
40 /** linked list of HSPs
41  * used to keep best hits for each query.
42  */
43 typedef struct LinkedHSP_BH {
45  Int4 sid; /* OID for hsp*/
46  Int4 begin; /* query offset in plus strand - overhang */
47  Int4 end; /* query end in plus strand + overhang */
48  Int4 len; /* actual length */
49  struct LinkedHSP_BH *next;
51 
52 typedef struct BlastHSPBestHitData {
53  BlastHSPBestHitParams* params; /**< parameters to control overhang */
54  BlastQueryInfo* query_info; /**< query info */
55  LinkedHSP_BH** best_list; /**< buffer to store best hits */
56  Int4* num_hsps; /**< field to record the number of hsps in each list */
57  Int4* max_hsps; /**< max number of hsps to hold before pruning */
59 
60 /*************************************************************/
61 /** The following are implementations for BlastHSPWriter ADT */
62 
63 /** Perform pre-run stage-specific initialization
64  * @param data The internal data structure [in][out]
65  * @param results The HSP results to operate on [in]
66  */
67 static int
68 s_BlastHSPBestHitInit(void* data, void* hsp_results)
69 {
70  int i;
71  BlastHSPBestHitData * bh_data = data;
72  BlastHSPResults* results = (BlastHSPResults*)hsp_results;
73  bh_data->best_list = calloc(results->num_queries, sizeof(LinkedHSP_BH *));
74  bh_data->num_hsps = calloc(results->num_queries, sizeof(Int4));
75  bh_data->max_hsps = calloc(results->num_queries, sizeof(Int4));
76  for (i=0; i<results->num_queries; ++i)
77  /* initially set this to 5 times num_seqs to keep */
78  /* the max hsps to keep will eventually be determined adaptively */
79  bh_data->max_hsps[i] = bh_data->params->prelim_hitlist_size * 2;
80  return 0;
81 }
82 
83 /** Export best_list to hitlist
84  * @param qid The query index [in]
85  * @param data The buffered data structure [in][out]
86  * @param hit_list The hitlist to be populated [in][out]
87  */
88 static int
90  BlastHSPBestHitData *bh_data,
91  BlastHitList * hit_list)
92 {
93  int sid;
94  Boolean allocated;
95  LinkedHSP_BH *best_list = bh_data->best_list[qid], *p;
96  BlastHSPList *list;
97  BlastHitList *tmp_hit_list = Blast_HitListNew(bh_data->num_hsps[qid]);
98  tmp_hit_list->hsplist_current = bh_data->num_hsps[qid];
99  tmp_hit_list->hsplist_array = calloc(tmp_hit_list->hsplist_current,
100  sizeof(BlastHSPList *));
101 
102  while (best_list) {
103 
104  p = best_list;
105  allocated = FALSE;
106  for (sid = 0; sid < tmp_hit_list->hsplist_count; ++sid) {
107  list = tmp_hit_list->hsplist_array[sid];
108  if (p->sid == list->oid) {
109  allocated = TRUE;
110  break;
111  }
112  }
113 
114  if (! allocated) {
115  list = Blast_HSPListNew(bh_data->params->hsp_num_max);
116  list->oid = p->sid;
117  list->query_index = qid;
118  ASSERT(sid < tmp_hit_list->hsplist_current);
119  tmp_hit_list->hsplist_array[sid] = list;
120  tmp_hit_list->hsplist_count++;
121  }
122 
123  Blast_HSPListSaveHSP(list, p->hsp);
124  best_list = p->next;
125  free(p);
126  }
127 
128  bh_data->best_list[qid] = NULL;
129  bh_data->num_hsps[qid] = 0;
130 
131  for (sid = 0; sid < tmp_hit_list->hsplist_count; ++sid) {
132  Blast_HitListUpdate(hit_list, tmp_hit_list->hsplist_array[sid]);
133  tmp_hit_list->hsplist_array[sid] = NULL;
134  }
135 
136  Blast_HitListFree(tmp_hit_list);
137  return 0;
138 }
139 
140 /** Import hitlist to best_list (assuming all hsps are besthits)
141  * @param qid The query index [in]
142  * @param data The buffered data structure [in][out]
143  * @param hit_list The hitlist to be populated [in][out]
144  */
145 static int
147  BlastHSPBestHitData *bh_data,
148  BlastHitList * hit_list)
149 {
150  int sid, id;
151  LinkedHSP_BH *best_list = bh_data->best_list[qid], *p, *q, *r;
152  BlastHSPList *list;
153  BlastHSP *hsp;
154  int qlen = BlastQueryInfoGetQueryLength(bh_data->query_info,
155  bh_data->params->program, qid);
156 
157  for (sid=0; sid < hit_list->hsplist_count; ++sid) {
158 
159  list = hit_list->hsplist_array[sid];
160  for (id =0; id < list->hspcnt; ++id) {
161 
162  hsp = list->hsp_array[id];
163  r = malloc(sizeof(LinkedHSP_BH));
164  r->hsp = hsp;
165  r->sid = list->oid;
166  r->begin = (bh_data->query_info->contexts[hsp->context].frame < 0 ) ?
167  qlen - hsp->query.end : hsp->query.offset;
168  r->len = hsp->query.end - hsp->query.offset;
169  r->end = r->begin + r->len;
170  for (q=NULL, p=best_list; p && p->begin < r->begin; q=p, p=p->next);
171  r->next = p;
172  list->hsp_array[id] = NULL; /* remove it from hsp_list */
173  if (q) {
174  q->next = r;
175  } else {
176  best_list = bh_data->best_list[qid] = r;
177  }
178  ++(bh_data->num_hsps[qid]);
179  }
180  hit_list->hsplist_array[sid] = Blast_HSPListFree(list);
181  }
182 
183  bh_data->max_hsps[qid] = bh_data->num_hsps[qid] * 2;
184 
185  return 0;
186 }
187 
188 
189 /** Perform post-run clean-ups
190  Blast_HSPListFree(hsp_list);
191  * @param data The buffered data structure [in]
192  * @param results The HSP results to propagate [in][out]
193  */
194 static int
195 s_BlastHSPBestHitFinal(void* data, void* hsp_results)
196 {
197  int qid, sid;
198  BlastHSPBestHitData *bh_data = data;
199  BlastHSPResults* results = (BlastHSPResults*)hsp_results;
200  LinkedHSP_BH **best_list = bh_data->best_list;
201  BlastHitList* hitlist;
202 
203  /* rip best hits off the best_list and put them to results */
204  for (qid=0; qid<results->num_queries; ++qid) {
205  if (best_list[qid]) {
206 
207  if (!results->hitlist_array[qid]) {
208  results->hitlist_array[qid] = Blast_HitListNew(bh_data->params->prelim_hitlist_size);
209  }
210  hitlist = Blast_HitListNew(bh_data->num_hsps[qid]);
211 
212  s_ExportToHitlist(qid, bh_data, hitlist);
213  /* sort hsplists */
214  for (sid=0; sid < hitlist->hsplist_count; ++sid) {
216  }
217  Blast_HitListSortByEvalue(hitlist);
218  for (sid=0; sid < hitlist->hsplist_count; ++sid) {
219  Blast_HitListUpdate(results->hitlist_array[qid], hitlist->hsplist_array[sid] );
220  hitlist->hsplist_array[sid] = NULL;
221  }
222  Blast_HitListFree(hitlist);
223  }
224  }
225 
226  sfree(bh_data->best_list);
227  sfree(bh_data->num_hsps);
228  sfree(bh_data->max_hsps);
229  bh_data->best_list = NULL;
230  return 0;
231 }
232 
233 /** Perform writing task, will save best hits to best_list
234  * @param data To store results to [in][out]
235  * @param hsp_list Pointer to the HSP list to save in the collector. [in]
236  */
237 static int
238 s_BlastHSPBestHitRun(void* data, BlastHSPList* hsp_list)
239 {
240  Int4 i, qid, qlen, begin, end, lenA, lenB, scoreA, scoreB, overhang;
241  Int4 allowed_begin, allowed_end;
242  double denA, evalueA, evalueB, param_overhang, param_s;
243  BlastHSP *hsp;
244  LinkedHSP_BH *p, *q, *r;
245  Boolean bad;
246 
247  BlastHSPBestHitData* bh_data = data;
248  BlastHSPBestHitParams* params = bh_data->params;
249  EBlastProgramType program = params->program;
250  LinkedHSP_BH **best_list = bh_data->best_list;
251 
252  if (!hsp_list) return 0;
253  param_overhang = params->overhang;
254  param_s = 1.0 - params->score_edge;
255 
256  for (i=0; i<hsp_list->hspcnt; ++i) {
257 
258  hsp = hsp_list->hsp_array[i];
259  qid = Blast_GetQueryIndexFromContext(hsp->context, program);
260  qlen = BlastQueryInfoGetQueryLength(bh_data->query_info, program, qid);
261 
262  begin = (bh_data->query_info->contexts[hsp->context].frame < 0 ) ?
263  qlen - hsp->query.end
264  : hsp->query.offset;
265  lenA = hsp->query.end - hsp->query.offset;
266  end = begin + lenA;
267  scoreA = hsp->score;
268  evalueA = hsp->evalue;
269  denA = 1.0 * scoreA / lenA / param_s;
270 
271  /* See if new hit A is bad */
272  bad = FALSE;
273  for (p=best_list[qid]; p && p->end < end; p=p->next);
274  for ( ; p && p->begin <= begin; p=p->next) {
275  /* check conditions */
276  lenB = p->len;
277  scoreB = p->hsp->score;
278  evalueB= p->hsp->evalue;
279  if ( p->end >= end /* condition 1 */
280  && evalueB <= evalueA /* condition 2 */
281  && 1.0 * scoreB / lenB > denA) /* condition 3 */
282  {
283  /* the new hit is bad, do nothing */
284  bad = TRUE;
285  break;
286  }
287  }
288  if (bad) continue;
289 
290  /* See if new hit A makes some old hits bad */
291  overhang = 2.0 * lenA * param_overhang / (1.0 - 2.0 * param_overhang);
292  allowed_begin = begin - overhang;
293  allowed_end = end + overhang;
294  overhang = lenA * param_overhang;
295  begin -= overhang;
296  end += overhang;
297  denA = 1.0 * scoreA / lenA * param_s;
298  /* use q to remember node before p */
299  for (q=NULL, p=best_list[qid]; p && p->begin < allowed_begin; q=p, p=p->next);
300  for (; p && p->begin < allowed_end; ) {
301  /* check conditions */
302  lenB = p->len;
303  scoreB = p->hsp->score;
304  overhang = (p->end - p->begin - lenB)/2;
305  evalueB= p->hsp->evalue;
306  if ( p->begin + overhang >= begin
307  && p->end - overhang <= end /* condition 1 */
308  && evalueB >= evalueA /* condition 2 */
309  && 1.0 * scoreB / lenB < denA) /* condition 3 */
310  { /* remove it from best list */
311  r = p;
312  if (q) q->next = p->next;
313  else best_list[qid] = p->next;
314  p = p->next;
315  r->hsp = Blast_HSPFree(r->hsp);
316  free(r);
317  --(bh_data->num_hsps[qid]);
318  } else {
319  q = p;
320  p = p->next;
321  }
322  }
323 
324  /* Insert hit A into the best_list and hit_list */
325  for (q=NULL, p=best_list[qid]; p && p->begin < begin; q=p, p=p->next);
326  r = malloc(sizeof(LinkedHSP_BH));
327  r->hsp = hsp;
328  r->sid = hsp_list->oid;
329  r->begin = begin;
330  r->end = end;
331  r->len = lenA;
332  r->next = p;
333  hsp_list->hsp_array[i] = NULL; /* remove it from hsp_list */
334  if (q) {
335  q->next = r;
336  } else {
337  best_list[qid] = r;
338  }
339 
340  if ( ++(bh_data->num_hsps[qid]) > bh_data->max_hsps[qid]) {
341  BlastHitList *hitlist = Blast_HitListNew(bh_data->num_hsps[qid]);
342  s_ExportToHitlist(qid, bh_data, hitlist);
343  s_ImportFromHitlist(qid, bh_data, hitlist);
344  Blast_HitListFree(hitlist);
345  }
346  }
347 
348  /* now all qualified hits have been moved to best_list, we can remove hsp_list */
349  Blast_HSPListFree(hsp_list);
350 
351  return 0;
352 }
353 
354 /** Perform writing task for RPS blast, will save best hits to best_list
355  * @param data To store results to [in][out]
356  * @param hsp_list Pointer to the HSP list to save in the collector. [in]
357  */
358 static int
360 {
361  Int4 i, qid, begin, end, lenA, lenB, scoreA, scoreB, overhang;
362  Int4 allowed_begin, allowed_end;
363  double denA, evalueA, evalueB, param_overhang, param_s;
364  BlastHSP *hsp;
365  LinkedHSP_BH *p, *q, *r;
366  Boolean bad;
367 
368  BlastHSPBestHitData* bh_data = data;
369  BlastHSPBestHitParams* params = bh_data->params;
370  LinkedHSP_BH **best_list = bh_data->best_list;
371 
372  if (!hsp_list) return 0;
373  param_overhang = params->overhang;
374  param_s = 1.0 - params->score_edge;
375 
376  for (i=0; i<hsp_list->hspcnt; ++i) {
377 
378  hsp = hsp_list->hsp_array[i];
379  qid = hsp_list->query_index;
380  begin = hsp->query.offset;
381  lenA = hsp->query.end - hsp->query.offset;
382  end = begin + lenA;
383  scoreA = hsp->score;
384  evalueA = hsp->evalue;
385  denA = 1.0 * scoreA / lenA / param_s;
386 
387  /* See if new hit A is bad */
388  bad = FALSE;
389  for (p=best_list[qid]; p && p->end < end; p=p->next);
390  for ( ; p && p->begin < begin; p=p->next) {
391  /* check conditions */
392  lenB = p->len;
393  scoreB = p->hsp->score;
394  evalueB= p->hsp->evalue;
395  if ( p->end >= end /* condition 1 */
396  && evalueB <= evalueA /* condition 2 */
397  && 1.0 * scoreB / lenB > denA) /* condition 3 */
398  {
399  /* the new hit is bad, do nothing */
400  bad = TRUE;
401  break;
402  }
403  }
404  if (bad) continue;
405 
406  /* See if new hit A makes some old hits bad */
407  overhang = 2.0 * lenA * param_overhang / (1.0 - 2.0 * param_overhang);
408  allowed_begin = begin - overhang;
409  allowed_end = end + overhang;
410  overhang = lenA * param_overhang;
411  begin -= overhang;
412  end += overhang;
413  denA = 1.0 * scoreA / lenA * param_s;
414  /* use q to remember node before p */
415  for (q=NULL, p=best_list[qid]; p && p->begin < allowed_begin; q=p, p=p->next);
416  for (; p && p->begin < allowed_end; ) {
417  /* check conditions */
418  lenB = p->len;
419  scoreB = p->hsp->score;
420  overhang = (p->end - p->begin - lenB)/2;
421  evalueB= p->hsp->evalue;
422  if ( p->begin + overhang >= begin
423  && p->end - overhang <= end /* condition 1 */
424  && evalueB >= evalueA /* condition 2 */
425  && 1.0 * scoreB / lenB < denA) /* condition 3 */
426  { /* remove it from best list */
427  r = p;
428  if (q) q->next = p->next;
429  else best_list[qid] = p->next;
430  p = p->next;
431  r->hsp = Blast_HSPFree(r->hsp);
432  free(r);
433  } else {
434  q = p;
435  p = p->next;
436  }
437  }
438 
439  /* Insert hit A into the best_list and hit_list */
440  for (q=NULL, p=best_list[qid]; p && p->begin < begin; q=p, p=p->next);
441  r = malloc(sizeof(LinkedHSP_BH));
442  r->hsp = hsp;
443  r->sid = hsp->context;
444  hsp->context = qid;
445  r->begin = begin;
446  r->end = end;
447  r->len = lenA;
448  r->next = p;
449  hsp_list->hsp_array[i] = NULL; /* remove it from hsp_list */
450  if (q) {
451  q->next = r;
452  } else {
453  best_list[qid] = r;
454  }
455  }
456 
457  /* now all qualified hits have been moved to best_list, we can remove hsp_list */
458  Blast_HSPListFree(hsp_list);
459 
460  return 0;
461 }
462 
463 /** Free the writer
464  * @param writer The writer to free [in]
465  * @return NULL.
466  */
467 static
470 {
471  BlastHSPBestHitData *data = writer->data;
472  sfree(data->params);
473  sfree(writer->data);
474  sfree(writer);
475  return NULL;
476 }
477 
478 /** create the writer
479  * @param params Pointer to the besthit parameter [in]
480  * @param query_info BlastQueryInfo [in]
481  * @return writer
482  */
483 static
485 s_BlastHSPBestHitNew(void* params, BlastQueryInfo* query_info,
486  BLAST_SequenceBlk* sequence)
487 {
488  BlastHSPWriter * writer = NULL;
489  BlastHSPBestHitData data;
490  BlastHSPBestHitParams * bh_param = params;
491 
492  /* best hit algo needs query_info */
493  if (! query_info) return NULL;
494 
495  /* allocate space for writer */
496  writer = malloc(sizeof(BlastHSPWriter));
497 
498  /* fill up the function pointers */
499  writer->InitFnPtr = &s_BlastHSPBestHitInit;
501  writer->FreeFnPtr = &s_BlastHSPBestHitFree;
502  writer->RunFnPtr = (Blast_ProgramIsRpsBlast(bh_param->program))
505 
506  /* allocate for data structure */
507  writer->data = malloc(sizeof(BlastHSPBestHitData));
508  data.params = params;
509  data.query_info = query_info;
510  memcpy(writer->data, &data, sizeof(data));
511 
512  return writer;
513 }
514 
515 /** The pipe version of best-hit writer.
516  * @param data To store results to [in][out]
517  * @param hsp_list Pointer to the HSP list to save in the collector. [in]
518  */
519 static int
521 {
522  int qid, sid, num_list;
523  s_BlastHSPBestHitInit(data, results);
525  for (qid = 0; qid < results->num_queries; ++qid) {
526  if (!(results->hitlist_array[qid])) continue;
527  num_list = results->hitlist_array[qid]->hsplist_count;
528  for (sid = 0; sid < num_list; ++sid) {
529  s_BlastHSPBestHitRun(data,
530  results->hitlist_array[qid]->hsplist_array[sid]);
531  results->hitlist_array[qid]->hsplist_array[sid] = NULL;
532  }
533  results->hitlist_array[qid]->hsplist_count = 0;
534  Blast_HitListFree(results->hitlist_array[qid]);
535  results->hitlist_array[qid] = NULL;
536  }
537  s_BlastHSPBestHitFinal(data, results);
538  return 0;
539 }
540 
541 /** Free the pipe
542  * @param pipe The pipe to free [in]
543  * @return NULL.
544  */
545 static
548 {
549  BlastHSPBestHitData *data = pipe->data;
550  sfree(data->params);
551  sfree(pipe->data);
552  sfree(pipe);
553  return NULL;
554 }
555 
556 /** create the pipe
557  * @param params Pointer to the besthit parameter [in]
558  * @param query_info BlastQueryInfo [in]
559  * @return pipe
560  */
561 static
562 BlastHSPPipe*
563 s_BlastHSPBestHitPipeNew(void* params, BlastQueryInfo* query_info)
564 {
565  BlastHSPPipe * pipe = NULL;
566  BlastHSPBestHitData data;
567 
568  /* best hit algo needs query_info */
569  if (! query_info) return NULL;
570 
571  /* allocate space for writer */
572  pipe = malloc(sizeof(BlastHSPPipe));
573 
574  /* fill up the function pointers */
577 
578  /* allocate for data structure */
579  pipe->data = malloc(sizeof(BlastHSPBestHitData));
580  data.params = params;
581  data.query_info = query_info;
582  memcpy(pipe->data, &data, sizeof(data));
583  pipe->next = NULL;
584 
585  return pipe;
586 }
587 
588 /**************************************************************/
589 /** The following are exported functions to be used by APP */
590 
593  const BlastHSPBestHitOptions* best_hit_opts,
594  Int4 compositionBasedStats,
595  Boolean gapped_calculation)
596 {
597  BlastHSPBestHitParams* retval = NULL;
598 
601  compositionBasedStats, gapped_calculation);
602  retval->hsp_num_max = BlastHspNumMax(gapped_calculation, hit_options);
603  retval->program = hit_options->program_number;
604  retval->overhang = best_hit_opts->overhang;
605  retval->score_edge = best_hit_opts->score_edge;
606  return retval;
607 }
608 
611 {
612  if ( !opts )
613  return NULL;
614  sfree(opts);
615  return NULL;
616 }
617 
620  BlastHSPWriterInfo * writer_info =
621  malloc(sizeof(BlastHSPWriterInfo));
622  writer_info->NewFnPtr = &s_BlastHSPBestHitNew;
623  writer_info->params = params;
624  return writer_info;
625 }
626 
629  BlastHSPPipeInfo * pipe_info =
630  malloc(sizeof(BlastHSPPipeInfo));
631  pipe_info->NewFnPtr = &s_BlastHSPBestHitPipeNew;
632  pipe_info->params = params;
633  pipe_info->next = NULL;
634  return pipe_info;
635 }
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
BlastHitList * Blast_HitListFree(BlastHitList *hitlist)
Deallocate memory for the hit list.
Definition: blast_hits.c:3131
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
Definition: blast_hits.c:213
Int2 Blast_HSPResultsSortByEvalue(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value.
Definition: blast_hits.c:3375
BlastHitList * Blast_HitListNew(Int4 hitlist_size)
Allocate memory for a hit list of a given size.
Definition: blast_hits.c:3117
Int2 Blast_HitListSortByEvalue(BlastHitList *hit_list)
Sort BlastHitLIst bon evalue.
Definition: blast_hits.c:3323
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Definition: blast_hits.c:1558
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Definition: blast_hits.c:130
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
Definition: blast_hits.c:1754
Int4 GetPrelimHitlistSize(Int4 hitlist_size, Int4 compositionBasedStats, Boolean gapped_calculation)
Definition: blast_hits.c:44
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Definition: blast_hits.c:1542
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Definition: blast_hits.c:1374
Int2 Blast_HitListUpdate(BlastHitList *hit_list, BlastHSPList *hsp_list)
Insert a new HSP list into the hit list.
Definition: blast_hits.c:3235
Utilities for dealing with BLAST HSPs in the core of BLAST.
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
Definition: blast_program.c:73
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Int4 Blast_GetQueryIndexFromContext(Int4 context, EBlastProgramType program)
Given a context from BLAST engine core, return the query index.
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)
Obtains the sequence length for a given query in the query, without taking into consideration any app...
Various auxiliary BLAST utility functions.
#define NULL
Definition: ncbistd.hpp:225
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
static int s_BlastHSPBestHitFinal(void *data, void *hsp_results)
Perform post-run clean-ups Blast_HSPListFree(hsp_list);.
static BlastHSPWriter * s_BlastHSPBestHitFree(BlastHSPWriter *writer)
Free the writer.
static BlastHSPWriter * s_BlastHSPBestHitNew(void *params, BlastQueryInfo *query_info, BLAST_SequenceBlk *sequence)
create the writer
static int s_BlastHSPBestHitRun(void *data, BlastHSPList *hsp_list)
Perform writing task, will save best hits to best_list.
BlastHSPPipeInfo * BlastHSPBestHitPipeInfoNew(BlastHSPBestHitParams *params)
static int s_ExportToHitlist(int qid, BlastHSPBestHitData *bh_data, BlastHitList *hit_list)
Export best_list to hitlist.
static BlastHSPPipe * s_BlastHSPBestHitPipeNew(void *params, BlastQueryInfo *query_info)
create the pipe
static int s_BlastHSPBestHitPipeRun(void *data, BlastHSPResults *results)
The pipe version of best-hit writer.
BlastHSPWriterInfo * BlastHSPBestHitInfoNew(BlastHSPBestHitParams *params)
WriterInfo and PipeInfo to create a best hit writer/pipe.
struct BlastHSPBestHitData BlastHSPBestHitData
static int s_BlastHSPBestHitInit(void *data, void *hsp_results)
The following are implementations for BlastHSPWriter ADT.
static BlastHSPPipe * s_BlastHSPBestHitPipeFree(BlastHSPPipe *pipe)
Free the pipe.
struct LinkedHSP_BH LinkedHSP_BH
linked list of HSPs used to keep best hits for each query.
static int s_BlastHSPBestHitRun_RPS(void *data, BlastHSPList *hsp_list)
Perform writing task for RPS blast, will save best hits to best_list.
BlastHSPBestHitParams * BlastHSPBestHitParamsFree(BlastHSPBestHitParams *opts)
Deallocates the BlastHSPBestHitParams structure passed in.
BlastHSPBestHitParams * BlastHSPBestHitParamsNew(const BlastHitSavingOptions *hit_options, const BlastHSPBestHitOptions *best_hit_opts, Int4 compositionBasedStats, Boolean gapped_calculation)
The following are exported functions to be used by APP.
static int s_ImportFromHitlist(int qid, BlastHSPBestHitData *bh_data, BlastHitList *hit_list)
Import hitlist to best_list (assuming all hsps are besthits)
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
for(len=0;yy_str[len];++len)
int i
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Structure to hold a sequence.
Definition: blast_def.h:242
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Int4 * max_hsps
max number of hsps to hold before pruning
BlastQueryInfo * query_info
query info
Int4 * num_hsps
field to record the number of hsps in each list
BlastHSPBestHitParams * params
parameters to control overhang
LinkedHSP_BH ** best_list
buffer to store best hits
Options for the Best Hit HSP collection algorithm.
Keeps parameters used in best hit algorithm.
double score_edge
fraction of score margin in condition 4
double overhang
overhang used in condition 1.
Int4 hsp_num_max
number of HSPs to save per db sequence.
Int4 prelim_hitlist_size
number of hits saved during preliminary part of search.
EBlastProgramType program
program type.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Definition: blast_hits.h:154
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
Int4 query_index
Index of the query which this HSPList corresponds to.
Definition: blast_hits.h:155
A wrap of data structure used to create a pipe.
BlastHSPPipeNewFn NewFnPtr
struct BlastHSPPipeInfo * next
the next pipe inf in chain
ADT definition of BlastHSPPipe.
void * data
data structure
BlastHSPPipe * next
the next pipe in chain
BlastHSPPipeRunFn RunFnPtr
BlastHSPPipeFreeFn FreeFnPtr
The structure to contain all BLAST results, for multiple queries.
Definition: blast_hits.h:183
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Definition: blast_hits.h:185
Int4 num_queries
Number of query sequences.
Definition: blast_hits.h:184
A wrap of data structure used to create a writer.
BlastHSPWriterNewFn NewFnPtr
ADT definition of BlastHSPWriter.
void * data
data structure
BlastHSPWriterFinalFn FinalFnPtr
BlastHSPWriterFreeFn FreeFnPtr
BlastHSPWriterRunFn RunFnPtr
BlastHSPWriterInitFn InitFnPtr
Structure holding all information about an HSP.
Definition: blast_hits.h:126
double evalue
This HSP's e-value.
Definition: blast_hits.h:130
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
Int4 context
Context number of query.
Definition: blast_hits.h:133
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
The structure to contain all BLAST results for one query sequence.
Definition: blast_hits.h:169
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Definition: blast_hits.h:176
Int4 hsplist_count
Filled size of the HSP lists array.
Definition: blast_hits.h:170
Int4 hsplist_current
Number of allocated HSP list arrays.
Definition: blast_hits.h:178
Options used when evaluating and saving hits These include: a.
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 hitlist_size
Maximal number of database sequences to return results for.
The query related information.
BlastContextInfo * contexts
Information per context.
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
linked list of HSPs used to keep best hits for each query.
struct LinkedHSP_BH * next
BlastHSP * hsp
@ FALSE
Definition: testodbc.c:27
@ TRUE
Definition: testodbc.c:27
void free(voidpf ptr)
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Fri Mar 01 10:06:17 2024 by modify_doxy.py rev. 669887