NCBI C++ ToolKit
blast_hspstream.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_hspstream.c 100164 2023-06-28 13:36:01Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Ilya Dondoshansky
27  *
28  */
29 
30 /** @file blast_hspstream.c
31  * BlastHSPStream is used to save hits from preliminary stage and
32  * pass on to the traceback stage.
33  */
34 
35 
39 
40 /** Default hit saving stream methods */
41 
42 /** Free the BlastHSPStream with its HSP list collector data structure.
43  * @param hsp_stream The HSP stream to free [in]
44  * @return NULL.
45  */
47 {
48  int index=0;
49  BlastHSPPipe *p;
50 
51  if (!hsp_stream) {
52  return NULL;
53  }
54 
55  hsp_stream->x_lock = MT_LOCK_Delete(hsp_stream->x_lock);
56  Blast_HSPResultsFree(hsp_stream->results);
57  for (index=0; index < hsp_stream->num_hsplists; index++)
58  {
59  hsp_stream->sorted_hsplists[index] =
60  Blast_HSPListFree(hsp_stream->sorted_hsplists[index]);
61  }
62  sfree(hsp_stream->sort_by_score);
63  sfree(hsp_stream->sorted_hsplists);
64 
65  if (hsp_stream->writer) {
66  (hsp_stream->writer->FreeFnPtr) (hsp_stream->writer);
67  hsp_stream->writer = NULL;
68  }
69 
70  /* free un-used pipes */
71  while (hsp_stream->pre_pipe) {
72  p = hsp_stream->pre_pipe;
73  hsp_stream->pre_pipe = p->next;
74  sfree(p);
75  }
76  while (hsp_stream->tback_pipe) {
77  p = hsp_stream->tback_pipe;
78  hsp_stream->tback_pipe = p->next;
79  sfree(p);
80  }
81 
82  sfree(hsp_stream);
83  return NULL;
84 }
85 
86 /** callback used to sort HSP lists in order of decreasing OID
87  * @param x First HSP list [in]
88  * @param y Second HSP list [in]
89  * @return compare result
90  */
91 static int s_SortHSPListByOid(const void *x, const void *y)
92 {
93  BlastHSPList **xx = (BlastHSPList **)x;
94  BlastHSPList **yy = (BlastHSPList **)y;
95  return (*yy)->oid - (*xx)->oid;
96 }
97 
98 /** certain hspstreams (such as besthit and culling) uses its own data structure
99  * and therefore must be finalized before reading/merging
100  */
101 static void s_FinalizeWriter(BlastHSPStream* hsp_stream)
102 {
103  BlastHSPPipe *pipe;
104  if (!hsp_stream || !hsp_stream->results || hsp_stream->writer_finalized)
105  return;
106 
107  /* perform post-writer clean ups */
108  if (hsp_stream->writer) {
109  if (!hsp_stream->writer_initialized) {
110  /* some filter (e.g. hsp_queue) always needs finalization */
111  (hsp_stream->writer->InitFnPtr)
112  (hsp_stream->writer->data, hsp_stream->results);
113  }
114  (hsp_stream->writer->FinalFnPtr)
115  (hsp_stream->writer->data, hsp_stream->results);
116  }
117 
118  /* apply preliminary stage pipes */
119  while (hsp_stream->pre_pipe) {
120  pipe = hsp_stream->pre_pipe;
121  hsp_stream->pre_pipe = pipe->next;
122  (pipe->RunFnPtr) (pipe->data, hsp_stream->results);
123  (pipe->FreeFnPtr) (pipe);
124  }
125 
126  hsp_stream->writer_finalized = TRUE;
127 }
128 
129 /** Prohibit any future writing to the HSP stream when all results are written.
130  * Also perform sorting of results here to prepare them for reading.
131  * @param hsp_stream The HSP stream to close [in] [out]
132  */
134 {
135  Int4 i, j, k;
136  Int4 num_hsplists;
137  BlastHSPResults *results;
138 
139  if (!hsp_stream || !hsp_stream->results || hsp_stream->results_sorted)
140  return;
141 
142  s_FinalizeWriter(hsp_stream);
143 
144  if (hsp_stream->sort_by_score) {
145  if (hsp_stream->sort_by_score->sort_on_read) {
147  } else {
148  /* Reverse the order of HSP lists, because they will be returned
149  starting from end, for the sake of convenience */
151  }
152  hsp_stream->results_sorted = TRUE;
153  hsp_stream->x_lock = MT_LOCK_Delete(hsp_stream->x_lock);
154  return;
155  }
156 
157  results = hsp_stream->results;
158  num_hsplists = hsp_stream->num_hsplists;
159 
160  /* concatenate all the HSPLists from 'results' */
161 
162  for (i = 0; i < results->num_queries; i++) {
163 
164  BlastHitList *hitlist = results->hitlist_array[i];
165  if (hitlist == NULL)
166  continue;
167 
168  /* grow the list if necessary */
169 
170  if (num_hsplists + hitlist->hsplist_count >
171  hsp_stream->num_hsplists_alloc) {
172 
173  Int4 alloc = MAX(num_hsplists + hitlist->hsplist_count + 100,
174  2 * hsp_stream->num_hsplists_alloc);
175  hsp_stream->num_hsplists_alloc = alloc;
176  hsp_stream->sorted_hsplists = (BlastHSPList **)realloc(
177  hsp_stream->sorted_hsplists,
178  alloc * sizeof(BlastHSPList *));
179  }
180 
181  for (j = k = 0; j < hitlist->hsplist_count; j++) {
182 
183  BlastHSPList *hsplist = hitlist->hsplist_array[j];
184  if (hsplist == NULL)
185  continue;
186 
187  hsplist->query_index = i;
188  hsp_stream->sorted_hsplists[num_hsplists + k] = hsplist;
189  k++;
190  }
191 
192  hitlist->hsplist_count = 0;
193  num_hsplists += k;
194  }
195 
196  /* sort in order of decreasing subject OID. HSPLists will be
197  read out from the end of hsplist_array later */
198 
199  hsp_stream->num_hsplists = num_hsplists;
200  if (num_hsplists > 1) {
201  qsort(hsp_stream->sorted_hsplists, num_hsplists,
202  sizeof(BlastHSPList *), s_SortHSPListByOid);
203  }
204 
205  hsp_stream->results_sorted = TRUE;
206  hsp_stream->x_lock = MT_LOCK_Delete(hsp_stream->x_lock);
207 }
208 
210  BlastMappingResults* results)
211 {
212  if (!hsp_stream || !hsp_stream->writer)
213  return;
214 
215  if (!hsp_stream->writer_initialized) {
216  (hsp_stream->writer->InitFnPtr)
217  (hsp_stream->writer->data, hsp_stream->results);
218  }
219  (hsp_stream->writer->FinalFnPtr)
220  (hsp_stream->writer->data, results);
221 
222  hsp_stream->writer_finalized = TRUE;
223  hsp_stream->x_lock = MT_LOCK_Delete(hsp_stream->x_lock);
224 }
225 
227 {
228  if (!hsp_stream)
229  return;
230 
231  s_FinalizeWriter(hsp_stream);
232 
233  hsp_stream->x_lock = MT_LOCK_Delete(hsp_stream->x_lock);
234 }
235 
236 /** Closing the HSP after traceback is done.
237  * This is mainly to provide a chance to apply post-traceback pipes.
238  * @param hsp_stream The HSP stream to close [in] [out]
239  * @param results The traceback results [in] [out]
240  */
242  BlastHSPResults* results)
243 {
244  BlastHSPPipe *pipe;
245 
246  if (!hsp_stream || !results) {
247  return;
248  }
249 
250  /* apply traceback stage pipes */
251  while (hsp_stream->tback_pipe) {
252  pipe = hsp_stream->tback_pipe;
253  hsp_stream->tback_pipe = pipe->next;
254  (pipe->RunFnPtr) (pipe->data, results);
255  (pipe->FreeFnPtr) (pipe);
256  }
257  return;
258 }
259 
260 const int kBlastHSPStream_Error = -1;
262 const int kBlastHSPStream_Eof = 1;
263 
264 /** Read one HSP list from the results saved in an HSP list collector. Once an
265  * HSP list is read from the stream, it relinquishes ownership and removes it
266  * from the internal results data structure.
267  * @param hsp_stream The HSP stream to read from [in]
268  * @param hsp_list_out The read HSP list. [out]
269  * @return Success, error, or end of reading, when nothing left to read.
270  */
271 int BlastHSPStreamRead(BlastHSPStream* hsp_stream, BlastHSPList** hsp_list_out)
272 {
273  *hsp_list_out = NULL;
274 
275  if (!hsp_stream)
276  return kBlastHSPStream_Error;
277 
278  if (!hsp_stream->results)
279  return kBlastHSPStream_Eof;
280 
281  /* If this stream is not yet closed for writing, close it. In particular,
282  this includes sorting of results.
283  NB: to lift the prohibition on write after the first read, the
284  following 2 lines should be removed, and stream closure for writing
285  should be done outside of the read function. */
286  if (!hsp_stream->results_sorted)
287  BlastHSPStreamClose(hsp_stream);
288 
289  if (hsp_stream->sort_by_score) {
290  Int4 last_hsplist_index = -1, index = 0;
291  BlastHitList* hit_list = NULL;
292  BlastHSPResults* results = hsp_stream->results;
293 
294  /* Find index of the first query that has results. */
295  for (index = hsp_stream->sort_by_score->first_query_index;
296  index < results->num_queries; ++index) {
297  if (results->hitlist_array[index] &&
298  results->hitlist_array[index]->hsplist_count > 0)
299  break;
300  }
301  if (index >= results->num_queries)
302  return kBlastHSPStream_Eof;
303 
304  hsp_stream->sort_by_score->first_query_index = index;
305 
306  hit_list = results->hitlist_array[index];
307  last_hsplist_index = hit_list->hsplist_count - 1;
308 
309  *hsp_list_out = hit_list->hsplist_array[last_hsplist_index];
310  /* Assign the query index here so the caller knows which query this HSP
311  list comes from */
312  (*hsp_list_out)->query_index = index;
313  /* Dequeue this HSP list by decrementing the HSPList count */
314  --hit_list->hsplist_count;
315  if (hit_list->hsplist_count == 0) {
316  /* Advance the first query index, without checking that the next
317  * query has results - that will be done on the next call. */
318  ++hsp_stream->sort_by_score->first_query_index;
319  }
320  } else {
321  /* return the next HSPlist out of the collection stored */
322 
323  if (!hsp_stream->num_hsplists)
324  return kBlastHSPStream_Eof;
325 
326  *hsp_list_out =
327  hsp_stream->sorted_hsplists[--hsp_stream->num_hsplists];
328 
329  }
331 }
332 
333 /** Write an HSP list to the collector HSP stream. The HSP stream assumes
334  * ownership of the HSP list and sets the dereferenced pointer to NULL.
335  * @param hsp_stream Stream to write to. [in] [out]
336  * @param hsp_list Pointer to the HSP list to save in the collector. [in]
337  * @return Success or error, if stream is already closed for writing.
338  */
339 int BlastHSPStreamWrite(BlastHSPStream* hsp_stream, BlastHSPList** hsp_list)
340 {
341  Int2 status = 0;
342 
343  if (!hsp_stream)
344  return kBlastHSPStream_Error;
345 
346  /** Lock the mutex, if necessary */
347  MT_LOCK_Do(hsp_stream->x_lock, eMT_Lock);
348 
349  /** Prohibit writing after reading has already started. This prohibition
350  * can be lifted later. There is no inherent problem in using read and
351  * write in any order, except that sorting would have to be done on
352  * every read after a write.
353  */
354  if (hsp_stream->results_sorted) {
355  MT_LOCK_Do(hsp_stream->x_lock, eMT_Unlock);
356  return kBlastHSPStream_Error;
357  }
358 
359  if (hsp_stream->writer) {
360  /** if writer has not been initialized, initialize it first */
361  if (!(hsp_stream->writer_initialized)) {
362  (hsp_stream->writer->InitFnPtr)
363  (hsp_stream->writer->data, hsp_stream->results);
364  hsp_stream->writer_initialized = TRUE;
365  }
366 
367  /** filtering processing */
368  status = (hsp_stream->writer->RunFnPtr)
369  (hsp_stream->writer->data, *hsp_list);
370  }
371 
372  if (status != 0) {
373  MT_LOCK_Do(hsp_stream->x_lock, eMT_Unlock);
374  return kBlastHSPStream_Error;
375  }
376  /* Results structure is no longer sorted, even if it was before.
377  The following assignment is only necessary if the logic to prohibit
378  writing after the first read is removed. */
379  hsp_stream->results_sorted = FALSE;
380 
381  /* Free the caller from this pointer's ownership. */
382  *hsp_list = NULL;
383 
384  /** Unlock the mutex */
385  MT_LOCK_Do(hsp_stream->x_lock, eMT_Unlock);
386 
388 }
389 
390 /* #define _DEBUG_VERBOSE 1 */
391 /** Merge two HSPStreams. The HSPs from the first stream are
392  * moved to the second stream.
393  * @param squery_blk Structure controlling the merge process [in]
394  * @param chunk_num Unique integer assigned to hsp_stream [in]
395  * @param stream1 The stream to merge [in][out]
396  * @param stream2 The stream that will contain the
397  * HSPLists of the first stream [in][out]
398  */
400  Uint4 chunk_num,
401  BlastHSPStream* stream1,
402  BlastHSPStream* stream2)
403 {
404  Int4 i, j, k;
405  BlastHSPResults *results1 = NULL;
406  BlastHSPResults *results2 = NULL;
407  Int4 contexts_per_query = 0;
408 #ifdef _DEBUG
409  Int4 num_queries = 0, num_ctx = 0, num_ctx_offsets = 0;
410  Int4 max_ctx;
411 #endif
412 
413  Uint4 *query_list = NULL, *offset_list = NULL, num_contexts = 0;
414  Int4 *context_list = NULL;
415 
416 
417  if (!stream1 || !stream2)
418  return kBlastHSPStream_Error;
419 
420  s_FinalizeWriter(stream1);
421  s_FinalizeWriter(stream2);
422 
423  results1 = stream1->results;
424  results2 = stream2->results;
425 
426  contexts_per_query = BLAST_GetNumberOfContexts(stream2->program);
427 
428  SplitQueryBlk_GetQueryIndicesForChunk(squery_blk, chunk_num, &query_list);
429  SplitQueryBlk_GetQueryContextsForChunk(squery_blk, chunk_num,
430  &context_list, &num_contexts);
431  SplitQueryBlk_GetContextOffsetsForChunk(squery_blk, chunk_num, &offset_list);
432 
433 #if defined(_DEBUG_VERBOSE)
434  fprintf(stderr, "Chunk %d\n", chunk_num);
435  fprintf(stderr, "Queries : ");
436  for (num_queries = 0; query_list[num_queries] != UINT4_MAX; num_queries++)
437  fprintf(stderr, "%d ", query_list[num_queries]);
438  fprintf(stderr, "\n");
439  fprintf(stderr, "Contexts : ");
440  for (num_ctx = 0; num_ctx < num_contexts; num_ctx++)
441  fprintf(stderr, "%d ", context_list[num_ctx]);
442  fprintf(stderr, "\n");
443  fprintf(stderr, "Context starting offsets : ");
444  for (num_ctx_offsets = 0; offset_list[num_ctx_offsets] != UINT4_MAX;
445  num_ctx_offsets++)
446  fprintf(stderr, "%d ", offset_list[num_ctx_offsets]);
447  fprintf(stderr, "\n");
448 #elif defined(_DEBUG)
449  for (num_queries = 0; query_list[num_queries] != UINT4_MAX; num_queries++) ;
450  for (num_ctx = 0, max_ctx = INT4_MIN; num_ctx < (Int4)num_contexts; num_ctx++)
451  max_ctx = MAX(max_ctx, context_list[num_ctx]);
452  for (num_ctx_offsets = 0; offset_list[num_ctx_offsets] != UINT4_MAX;
453  num_ctx_offsets++) ;
454 #endif
455 
456  for (i = 0; i < results1->num_queries; i++) {
457  BlastHitList *hitlist = results1->hitlist_array[i];
458  Int4 global_query = query_list[i];
459  Int4 split_points[NUM_FRAMES];
460 #ifdef _DEBUG
461  ASSERT(i < num_queries);
462 #endif
463 
464  if (hitlist == NULL) {
465 #if defined(_DEBUG_VERBOSE)
466 fprintf(stderr, "No hits to query %d\n", global_query);
467 #endif
468  continue;
469  }
470 
471  /* we will be mapping HSPs from the local context to
472  their place on the unsplit concatenated query. Once
473  that's done, overlapping HSPs need to get merged, and
474  to do that we must know the offset within each context
475  where the last chunk ended and the current chunk begins */
476  for (j = 0; j < contexts_per_query; j++) {
477  split_points[j] = -1;
478  }
479 
480  for (j = 0; j < contexts_per_query; j++) {
481  Int4 local_context = i * contexts_per_query + j;
482  if (context_list[local_context] >= 0) {
483  split_points[context_list[local_context] % contexts_per_query] =
484  offset_list[local_context];
485  }
486  }
487 
488 #if defined(_DEBUG_VERBOSE)
489  fprintf(stderr, "query %d split points: ", i);
490  for (j = 0; j < contexts_per_query; j++) {
491  fprintf(stderr, "%d ", split_points[j]);
492  }
493  fprintf(stderr, "\n");
494 #endif
495 
496  for (j = 0; j < hitlist->hsplist_count; j++) {
497  BlastHSPList *hsplist = hitlist->hsplist_array[j];
498 
499  for (k = 0; k < hsplist->hspcnt; k++) {
500  BlastHSP *hsp = hsplist->hsp_array[k];
501  Int4 local_context = hsp->context;
502 #ifdef _DEBUG
503  ASSERT(local_context <= max_ctx);
504  ASSERT(local_context < num_ctx);
505  ASSERT(local_context < num_ctx_offsets);
506 #endif
507 
508  hsp->context = context_list[local_context];
509  hsp->query.offset += offset_list[local_context];
510  hsp->query.end += offset_list[local_context];
511  hsp->query.gapped_start += offset_list[local_context];
512  hsp->query.frame = BLAST_ContextToFrame(stream2->program,
513  hsp->context);
514  }
515 
516  hsplist->query_index = global_query;
517  }
518 
519  Blast_HitListMerge(results1->hitlist_array + i,
520  results2->hitlist_array + global_query,
521  contexts_per_query, split_points,
523  SplitQueryBlk_AllowGap(squery_blk));
524  }
525 
526  /* Sort to the canonical order, which the merge may not have done. */
527  for (i = 0; i < results2->num_queries; i++) {
528  BlastHitList *hitlist = results2->hitlist_array[i];
529  if (hitlist == NULL)
530  continue;
531 
532  for (j = 0; j < hitlist->hsplist_count; j++)
534  }
535 
536  stream2->results_sorted = FALSE;
537 
538 #if _DEBUG_VERBOSE
539  fprintf(stderr, "new results: %d queries\n", results2->num_queries);
540  for (i = 0; i < results2->num_queries; i++) {
541  BlastHitList *hitlist = results2->hitlist_array[i];
542  if (hitlist == NULL)
543  continue;
544 
545  for (j = 0; j < hitlist->hsplist_count; j++) {
546  BlastHSPList *hsplist = hitlist->hsplist_array[j];
547  fprintf(stderr,
548  "query %d OID %d\n", hsplist->query_index, hsplist->oid);
549 
550  for (k = 0; k < hsplist->hspcnt; k++) {
551  BlastHSP *hsp = hsplist->hsp_array[k];
552  fprintf(stderr, "c %d q %d-%d s %d-%d score %d\n", hsp->context,
553  hsp->query.offset, hsp->query.end,
554  hsp->subject.offset, hsp->subject.end,
555  hsp->score);
556  }
557  }
558  }
559 #endif
560 
561  sfree(query_list);
562  sfree(context_list);
563  sfree(offset_list);
564 
566 }
567 
570 {
571  Int4 i;
572  Int4 num_hsplists;
573  Int4 target_oid;
574  BlastHSPList *hsplist;
575 
576  if (!hsp_stream || !batch)
577  return kBlastHSPStream_Error;
578 
579  /* If this stream is not yet closed for writing, close it. In particular,
580  this includes sorting of results.
581  NB: to lift the prohibition on write after the first read, the
582  following 2 lines should be removed, and stream closure for writing
583  should be done outside of the read function. */
584  if (!hsp_stream->results_sorted)
585  BlastHSPStreamClose(hsp_stream);
586 
587  batch->num_hsplists = 0;
588  if (!hsp_stream->results)
589  return kBlastHSPStream_Eof;
590 
591  /* return all the HSPlists with the same subject OID as the
592  last HSPList in the collection stored. We assume there is
593  at most one HSPList per query sequence */
594 
595  num_hsplists = hsp_stream->num_hsplists;
596  if (num_hsplists == 0)
597  return kBlastHSPStream_Eof;
598 
599  hsplist = hsp_stream->sorted_hsplists[num_hsplists - 1];
600  target_oid = hsplist->oid;
601 
602  for (i = 0; i < num_hsplists; i++) {
603  hsplist = hsp_stream->sorted_hsplists[num_hsplists - 1 - i];
604  if (hsplist->oid != target_oid)
605  break;
606 
607  batch->hsplist_array[i] = hsplist;
608  }
609 
610  hsp_stream->num_hsplists = num_hsplists - i;
611  batch->num_hsplists = i;
612 
614 }
615 
618 {
620  calloc(1, sizeof(BlastHSPStreamResultBatch));
621 
622  retval->hsplist_array = (BlastHSPList **)calloc((size_t)num_hsplists,
623  sizeof(BlastHSPList *));
624  return retval;
625 }
626 
629 {
630  if (batch != NULL) {
631  if (batch->hsplist_array) {
632  sfree(batch->hsplist_array);
633  }
634  sfree(batch);
635  }
636  return NULL;
637 }
638 
640 {
641  Int4 i;
642  if (batch != NULL) {
643  for (i = 0; i < batch->num_hsplists; i++) {
644  batch->hsplist_array[i] =
646  }
647  batch->num_hsplists = 0;
648  }
649  return batch;
650 }
651 
654  const BlastExtensionOptions* extn_opts,
655  Boolean sort_on_read,
656  Int4 num_queries,
657  BlastHSPWriter *writer)
658 {
659  BlastHSPStream* hsp_stream =
661 
662  hsp_stream->program = program;
663 
664  hsp_stream->num_hsplists = 0;
665  hsp_stream->num_hsplists_alloc = 100;
666  hsp_stream->sorted_hsplists = (BlastHSPList **)malloc(
667  hsp_stream->num_hsplists_alloc *
668  sizeof(BlastHSPList *));
669 
670  /* FIXME: This will not be needed for mapper when the new mapping
671  results structure is implemented */
672  hsp_stream->results = Blast_HSPResultsNew(num_queries);
673 
674  hsp_stream->results_sorted = FALSE;
675 
676  /* This is needed to meet a pre-condition of the composition-based
677  * statistics code */
678  if ((Blast_QueryIsProtein(program) || Blast_QueryIsPssm(program)) &&
679  extn_opts->compositionBasedStats != 0) {
680  hsp_stream->sort_by_score =
682  hsp_stream->sort_by_score->sort_on_read = sort_on_read;
683  hsp_stream->sort_by_score->first_query_index = 0;
684  } else {
685  hsp_stream->sort_by_score = NULL;
686  }
687  hsp_stream->x_lock = NULL;
688  hsp_stream->writer = writer;
689  hsp_stream->writer_initialized = FALSE;
690  hsp_stream->writer_finalized = FALSE;
691  hsp_stream->pre_pipe = NULL;
692  hsp_stream->tback_pipe = NULL;
693 
694  return hsp_stream;
695 }
696 
698  MT_LOCK lock)
699 {
700  /* only one lock can be registered */
701  if (!hsp_stream || (hsp_stream->x_lock && lock)) {
702  MT_LOCK_Delete(lock);
703  return -1;
704  }
705  hsp_stream->x_lock = lock;
706  return 0;
707 }
708 
710  BlastHSPPipe* pipe,
711  EBlastStage stage)
712 {
713  BlastHSPPipe *p;
714 
715  if (!hsp_stream || !pipe) {
716  return -1;
717  }
718 
719  pipe->next = NULL;
720 
721  switch(stage) {
722  case ePrelimSearch:
723  p = hsp_stream->pre_pipe;
724  if (!p) {
725  hsp_stream->pre_pipe = pipe;
726  return 0;
727  }
728  break;
729  case eTracebackSearch:
730  p = hsp_stream->tback_pipe;
731  if (!p) {
732  hsp_stream->tback_pipe = pipe;
733  return 0;
734  }
735  break;
736  default:
737  return -1;
738  }
739 
740  /* insert the pipe at the end */
741  for (; p && p->next; p = p->next);
742  p->next = pipe;
743 
744  return 0;
745 }
746 
749  BlastQueryInfo* query_info,
751 {
752  BlastHSPWriter * writer = NULL;
753  if(writer_info && *writer_info) {
754  writer = ((*writer_info)->NewFnPtr) ((*writer_info)->params, query_info,
755  query);
756  sfree(*writer_info);
757  }
758  ASSERT(writer_info && *writer_info == NULL);
759  return writer;
760 }
761 
764  BlastHSPPipeInfo* node)
765 {
766  if (head) {
767  if (*head) {
769  while (tmp->next) {
770  tmp = tmp->next;
771  }
772  tmp->next = node;
773  } else {
774  *head = node;
775  }
776  }
777  return node;
778 }
779 
782  BlastQueryInfo* query_info)
783 {
784  BlastHSPPipe *pipe = NULL;
785  BlastHSPPipe *p = pipe;
786  BlastHSPPipeInfo *info = *pipe_info;
787  BlastHSPPipeInfo *q = info;
788 
789  while(info) {
790  if (p) {
791  p->next = (info->NewFnPtr) (info->params, query_info);
792  p = p->next;
793  } else {
794  pipe = (info->NewFnPtr) (info->params, query_info);
795  p = pipe;
796  }
797  p->next = NULL;
798  q = info;
799  info = info->next;
800  sfree(q);
801  }
802  *pipe_info = NULL;
803  return pipe;
804 }
805 
806 void s_TrimHitList(BlastHitList* hitlist, int count)
807 {
808  int old_count = hitlist->hsplist_count;
809  int index =0;
810  for (index = count; index < old_count; index++) {
811  hitlist->hsplist_array[index] = Blast_HSPListFree(hitlist->hsplist_array[index]);
812  }
813  hitlist->hsplist_count = count;
814 }
815 
816 void BlastHSPCBSStreamClose(BlastHSPStream* hsp_stream, int hitlist_size)
817 {
818  BlastHitList* hit_list;
819  BlastHSPResults * results;
820  int index =0;
821 
822  if (!hsp_stream || !hsp_stream->results || hsp_stream->results_sorted)
823  return;
824 
825  s_FinalizeWriter(hsp_stream);
826 
827  results = hsp_stream->results;
828 
829  for (index = 0; index < results->num_queries; index++) {
830  hit_list = results->hitlist_array[index];
831  if (hit_list) {
832  const int REF_HIT_NUM = MAX(500, hitlist_size);
833  const int MIN_BUF_SZ = REF_HIT_NUM + 600;
834  if (MIN_BUF_SZ + 100 < hit_list->hsplist_count) {
835  int max_index = hit_list->hsplist_count -1;
836  double best_evalue = 0, evalue_limit = 0;
837  int mag = -180, pct = 90, i = 0;
838  Blast_HitListSortByEvalue(hit_list);
839  best_evalue = hit_list->hsplist_array[REF_HIT_NUM]->best_evalue;
840  if (best_evalue != 0 ){
841  mag = log10(hit_list->hsplist_array[REF_HIT_NUM]->best_evalue);
842  }
843  if (mag < -20){
844  mag = MAX(mag*pct/100, mag +10);
845  }
846  else {
847  mag = mag/2;
848  }
849  evalue_limit = (mag >= 0)? best_evalue*3: 9.9* pow(10, mag);
850 
851  i = MIN_BUF_SZ;
852  for(; i < max_index; i +=100) {
853  if((hit_list->hsplist_array[i]->best_evalue != 0) &&
854  (evalue_limit < hit_list->hsplist_array[i]->best_evalue)){
855  s_TrimHitList(hit_list, i);
856  break;
857  }
858  }
859  }
860  }
861  }
862 
863  BlastHSPStreamClose(hsp_stream);
864  return;
865 
866 }
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
Definition: blast_def.h:88
EBlastStage
Enumeration for the stages in the BLAST search.
Definition: blast_def.h:324
@ eTracebackSearch
Traceback stage.
Definition: blast_def.h:330
@ ePrelimSearch
Preliminary stage.
Definition: blast_def.h:328
Int2 Blast_HSPResultsReverseSort(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value, in reverse order.
Definition: blast_hits.c:3396
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
Definition: blast_hits.c:3358
Int2 Blast_HitListMerge(BlastHitList **old_hit_list_ptr, BlastHitList **combined_hit_list_ptr, Int4 contexts_per_query, Int4 *split_offsets, Int4 chunk_overlap_size, Boolean allow_gap)
Combine two hitlists; both HitLists must contain HSPs that represent alignments to the same query seq...
Definition: blast_hits.c:2113
Int2 Blast_HSPResultsReverseOrder(BlastHSPResults *results)
Reverse order of HSP lists in each hit list in the BLAST results.
Definition: blast_hits.c:3412
Int2 Blast_HitListSortByEvalue(BlastHitList *hit_list)
Sort BlastHitLIst bon evalue.
Definition: blast_hits.c:3323
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Definition: blast_hits.c:3338
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Definition: blast_hits.c:1542
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Definition: blast_hits.c:1374
BlastHSPWriter * BlastHSPWriterNew(BlastHSPWriterInfo **writer_info, BlastQueryInfo *query_info, BLAST_SequenceBlk *query)
A generic function to create writer.
void BlastHSPStreamSimpleClose(BlastHSPStream *hsp_stream)
Closes the BlastHSPStream structure for writing without any sorting as done in BlastHSPStreamClose.
void BlastHSPStreamClose(BlastHSPStream *hsp_stream)
Prohibit any future writing to the HSP stream when all results are written.
BlastHSPPipe * BlastHSPPipeNew(BlastHSPPipeInfo **pipe_info, BlastQueryInfo *query_info)
A generic function to create pipe.
static void s_FinalizeWriter(BlastHSPStream *hsp_stream)
certain hspstreams (such as besthit and culling) uses its own data structure and therefore must be fi...
void s_TrimHitList(BlastHitList *hitlist, int count)
int BlastHSPStreamMerge(SSplitQueryBlk *squery_blk, Uint4 chunk_num, BlastHSPStream *stream1, BlastHSPStream *stream2)
Merge two HSPStreams.
static int s_SortHSPListByOid(const void *x, const void *y)
callback used to sort HSP lists in order of decreasing OID
int BlastHSPStreamWrite(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Write an HSP list to the collector HSP stream.
BlastHSPStreamResultBatch * Blast_HSPStreamResultBatchReset(BlastHSPStreamResultBatch *batch)
free the list of HSPLists within a batch
BlastHSPStream * BlastHSPStreamFree(BlastHSPStream *hsp_stream)
Default hit saving stream methods.
void BlastHSPStreamMappingClose(BlastHSPStream *hsp_stream, BlastMappingResults *results)
Closes BlastHSPStream structure for mapping and produces BlastMappingResults.
BlastHSPStream * BlastHSPStreamNew(EBlastProgramType program, const BlastExtensionOptions *extn_opts, Boolean sort_on_read, Int4 num_queries, BlastHSPWriter *writer)
Initialize the HSP stream.
const int kBlastHSPStream_Eof
Return value when the end of the stream is reached (applicable to read method only)
int BlastHSPStreamRegisterMTLock(BlastHSPStream *hsp_stream, MT_LOCK lock)
Attach a mutex lock to a stream to protect multiple access during writing.
const int kBlastHSPStream_Success
Standard success return value for BlastHSPStream methods.
void BlastHSPCBSStreamClose(BlastHSPStream *hsp_stream, int hitlist_size)
BlastHSPPipeInfo * BlastHSPPipeInfo_Add(BlastHSPPipeInfo **head, BlastHSPPipeInfo *node)
Adds node to the linked list starting a head, which should be NULL when initializing the linked list,...
int BlastHSPStreamRegisterPipe(BlastHSPStream *hsp_stream, BlastHSPPipe *pipe, EBlastStage stage)
Insert the user-specified pipe to the *end* of the pipeline.
void BlastHSPStreamTBackClose(BlastHSPStream *hsp_stream, BlastHSPResults *results)
Closing the HSP after traceback is done.
BlastHSPStreamResultBatch * Blast_HSPStreamResultBatchInit(Int4 num_hsplists)
create a new batch to hold HSP results
int BlastHSPStreamRead(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list_out)
Read one HSP list from the results saved in an HSP list collector.
const int kBlastHSPStream_Error
Standard error return value for BlastHSPStream methods.
BlastHSPStreamResultBatch * Blast_HSPStreamResultBatchFree(BlastHSPStreamResultBatch *batch)
free a batch of HSP results.
int BlastHSPStreamBatchRead(BlastHSPStream *hsp_stream, BlastHSPStreamResultBatch *batch)
Batch read function for this BlastHSPStream implementation.
Declaration of ADT to save and retrieve lists of HSPs in the BLAST engine.
Private interfaces to support the multi-threaded traceback in conjunction with the BlastHSPStream.
Boolean Blast_QueryIsPssm(EBlastProgramType p)
Returns true if the query is PSSM.
Definition: blast_program.c:46
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Definition: blast_program.c:40
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Various auxiliary BLAST utility functions.
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
Definition: blast_util.c:839
unsigned int BLAST_GetNumberOfContexts(EBlastProgramType program)
Get the number of contexts for a given program.
Definition: blast_util.c:1373
#define head
Definition: ct_nlmzip_i.h:138
static char tmp[3200]
Definition: utf8.c:42
#define NULL
Definition: ncbistd.hpp:225
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
MT_LOCK MT_LOCK_Delete(MT_LOCK lk)
Decrement internal reference count by 1, and if it reaches 0, then destroy the handle,...
Definition: ncbi_core.c:234
#define MT_LOCK_Do(lk, how)
Call "lk->handler(lk->data, how)".
Definition: ncbi_core.h:270
@ eMT_Unlock
unlock critical section
Definition: ncbi_core.h:183
@ eMT_Lock
lock critical section
Definition: ncbi_core.h:181
int i
static MDB_envinfo info
Definition: mdb_load.c:37
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define UINT4_MAX
largest number represented by unsigned int.
Definition: ncbi_std.h:136
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
#define INT4_MIN
Smallest (most negative) number represented by signed int.
Definition: ncbi_std.h:146
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:117
T log10(T x_)
Boolean SplitQueryBlk_AllowGap(SSplitQueryBlk *squery_blk)
Determines whether HSPs on different diagnonals may be merged.
Definition: split_query.c:154
Int2 SplitQueryBlk_GetQueryIndicesForChunk(const SSplitQueryBlk *squery_blk, Uint4 chunk_num, Uint4 **query_indices)
Retrieve an array of query indices for the requested chunk.
Definition: split_query.c:235
Int2 SplitQueryBlk_GetContextOffsetsForChunk(const SSplitQueryBlk *squery_blk, Uint4 chunk_num, Uint4 **context_offsets)
Retrieve an array of context offsets for the requested chunk.
Definition: split_query.c:293
Int2 SplitQueryBlk_GetQueryContextsForChunk(const SSplitQueryBlk *squery_blk, Uint4 chunk_num, Int4 **query_contexts, Uint4 *num_query_contexts)
Retrieve an array of query contexts for the requested chunk.
Definition: split_query.c:262
size_t SplitQueryBlk_GetChunkOverlapSize(const SSplitQueryBlk *squery_blk)
Returns the query chunk overlap size.
Definition: split_query.c:332
Structure to hold a sequence.
Definition: blast_def.h:242
Options used for gapped extension These include: a.
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Definition: blast_hits.h:154
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
double best_evalue
Smallest e-value for HSPs in this list.
Definition: blast_hits.h:162
Int4 query_index
Index of the query which this HSPList corresponds to.
Definition: blast_hits.h:155
A wrap of data structure used to create a pipe.
ADT definition of BlastHSPPipe.
void * data
data structure
BlastHSPPipe * next
the next pipe in chain
BlastHSPPipeRunFn RunFnPtr
BlastHSPPipeFreeFn FreeFnPtr
The structure to contain all BLAST results, for multiple queries.
Definition: blast_hits.h:183
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Definition: blast_hits.h:185
Int4 num_queries
Number of query sequences.
Definition: blast_hits.h:184
structure used to hold a collection of hits retrieved from the HSPStream
Int4 num_hsplists
number of lists of HSPs returned
BlastHSPList ** hsplist_array
array of HSP lists returned
Default implementation of BlastHSPStream.
BlastHSPWriter * writer
writer to be applied when writing
Int4 num_hsplists_alloc
number of entries in sorted_hsplists
Boolean writer_finalized
Is writer ever finalized?
Int4 num_hsplists
number of HSPlists saved
BlastHSPList ** sorted_hsplists
list of all HSPlists from 'results' combined, sorted in order of decreasing subject OID
BlastHSPResults * results
Structure for saving HSP lists.
EBlastProgramType program
BLAST program type.
Boolean results_sorted
Have the results already been sorted? Set to true after the first read call.
MT_LOCK x_lock
Mutex for writing and reading results.
BlastHSPPipe * tback_pipe
registered traceback pipeline
Boolean writer_initialized
Is writer already initialized?
SSortByScoreStruct * sort_by_score
BlastHSPPipe * pre_pipe
registered preliminary pipeline (unused for now)
A wrap of data structure used to create a writer.
ADT definition of BlastHSPWriter.
void * data
data structure
BlastHSPWriterFinalFn FinalFnPtr
BlastHSPWriterFreeFn FreeFnPtr
BlastHSPWriterRunFn RunFnPtr
BlastHSPWriterInitFn InitFnPtr
Structure holding all information about an HSP.
Definition: blast_hits.h:126
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
Int4 context
Context number of query.
Definition: blast_hits.h:133
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
The structure to contain all BLAST results for one query sequence.
Definition: blast_hits.h:169
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Definition: blast_hits.h:176
Int4 hsplist_count
Filled size of the HSP lists array.
Definition: blast_hits.h:170
Structure that contains BLAST mapping results.
Definition: spliced_hits.h:91
The query related information.
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int4 gapped_start
Where the gapped extension started.
Definition: blast_hits.h:100
Int2 frame
Translation frame.
Definition: blast_hits.h:97
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
Auxiliary structure to allow sorting the results by score for the composition-based statistics code.
Int4 first_query_index
Index of the first query to try getting results from.
Boolean sort_on_read
Should the results be sorted on the first read call?
Structure to keep track of which query sequences are allocated to each query chunk.
Definition: split_query.h:54
static string query
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Sun Apr 14 05:27:21 2024 by modify_doxy.py rev. 669887