NCBI C++ ToolKit
blast_hits.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_hits.h 87828 2019-10-09 11:00:47Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Ilya Dondoshansky
27  *
28  */
29 
30 /** @file blast_hits.h
31  * Structures and API used for saving BLAST hits
32  */
33 
34 #ifndef ALGO_BLAST_CORE__BLAST_HITS__H
35 #define ALGO_BLAST_CORE__BLAST_HITS__H
36 
47 
48 #ifdef __cplusplus
49 extern "C" {
50 #endif
51 
52 
54 
55 /** Keeps prelim_hitlist_size and HitSavingOptions
56  together, mostly for use by hspstream. */
57 typedef struct SBlastHitsParameters {
58  Int4 prelim_hitlist_size; /**< number of hits saved during preliminary
59  part of search. */
60  Int4 hsp_num_max; /**< number of HSPs to save per db sequence. */
62 
63 /** Sets up small structures used by blast_hit.c for saving HSPs.
64  * @param hit_options field hitlist_size and hsp_num_max needed, a pointer to
65  * this structure will be stored on resulting structure.[in]
66  * @param ext_options field compositionBasedStats needed here. [in]
67  * @param scoring_options gapped_calculation needed here. [in]
68  * @param retval the allocated SBlastHitsParameters*
69  * @return zero on success, 1 on NULL parameter, 2 if calloc fails.
70  */
73  const BlastExtensionOptions* ext_options,
74  const BlastScoringOptions* scoring_options,
75  SBlastHitsParameters* *retval);
76 
77 /** Make a deep copy of the SBlastHitsParameters structure passed in
78  * @param hit_params source hit parameters structure [in]
79  * @return NULL if out of memory, otherwise deep copy of first argument
80  */
84 
85 /** Deallocated SBlastHitsParameters.
86  * @param param object to be freed.
87  * @return NULL pointer.
88  */
91 
92 
93 
94 
95 /** One sequence segment within an HSP */
96 typedef struct BlastSeg {
97  Int2 frame; /**< Translation frame */
98  Int4 offset; /**< Start of hsp */
99  Int4 end; /**< End of hsp */
100  Int4 gapped_start;/**< Where the gapped extension started. */
102 
103 /** In PHI BLAST: information about pattern match in a given HSP. */
104 typedef struct SPHIHspInfo {
105  Int4 index; /**< Index of query pattern occurrence for this HSP. */
106  Int4 length; /**< Length of this pattern occurrence in subject. */
108 
109 typedef struct JumperEditsBlock JumperEditsBlock;
110 
111 /** Mapping information for an HSP */
112 typedef struct BlastHSPMappingInfo
113 {
114  JumperEditsBlock* edits; /**< Information about mismatches and gaps, used
115  for mapping short reads */
116  Uint1 left_edge; /**< Two subject bases before the alignment in the four
117  least significant bits and flags in most significat
118  bits (for RNA-seq mapping) */
119  Uint1 right_edge; /** < Same as above for subject bases after the alignment
120  (for RNA-seq mapping) */
121  Int4 flags; /**< Additional information about this HSP */
122  SequenceOverhangs* subject_overhangs; /**< Unaligned subject subsequence */
124 
125 /** Structure holding all information about an HSP */
126 typedef struct BlastHSP {
127  Int4 score; /**< This HSP's raw score */
128  Int4 num_ident; /**< Number of identical base pairs in this HSP */
129  double bit_score; /**< Bit score, calculated from score */
130  double evalue; /**< This HSP's e-value */
131  BlastSeg query; /**< Query sequence info. */
132  BlastSeg subject; /**< Subject sequence info. */
133  Int4 context; /**< Context number of query */
134  GapEditScript* gap_info;/**< ALL gapped alignment is here */
135  Int4 num; /**< How many HSP's are linked together for sum
136  statistics evaluation? If unset (0), this HSP is
137  not part of a linked set, i.e. value 0 is treated
138  the same way as 1. */
139  Int2 comp_adjustment_method; /**< which mode of composition
140  adjustment was used; relevant
141  only for blastp and tblastn */
142  SPHIHspInfo* pat_info; /**< In PHI BLAST, information about this pattern
143  match. */
145 
147 
149 
150 /** The structure to hold all HSPs for a given sequence after the gapped
151  * alignment.
152  */
153 typedef struct BlastHSPList {
154  Int4 oid;/**< The ordinal id of the subject sequence this HSP list is for */
155  Int4 query_index; /**< Index of the query which this HSPList corresponds to.
156  Set to 0 if not applicable */
157  BlastHSP** hsp_array; /**< Array of pointers to individual HSPs */
158  Int4 hspcnt; /**< Number of HSPs saved */
159  Int4 allocated; /**< The allocated size of the hsp_array */
160  Int4 hsp_max; /**< The maximal number of HSPs allowed to be saved */
161  Boolean do_not_reallocate; /**< Is reallocation of the hsp_array allowed? */
162  double best_evalue; /**< Smallest e-value for HSPs in this list. Filled after
163  e-values are calculated. Necessary because HSPs are
164  sorted by score, but highest scoring HSP may not have
165  the lowest e-value if sum statistics is used. */
167 
168 /** The structure to contain all BLAST results for one query sequence */
169 typedef struct BlastHitList {
170  Int4 hsplist_count; /**< Filled size of the HSP lists array */
171  Int4 hsplist_max; /**< Maximal allowed size of the HSP lists array */
172  double worst_evalue; /**< Highest of the best e-values among the HSP
173  lists */
174  Int4 low_score; /**< The lowest of the best scores among the HSP lists */
175  Boolean heapified; /**< Is this hit list already heapified? */
176  BlastHSPList** hsplist_array; /**< Array of HSP lists for individual
177  database hits */
178  Int4 hsplist_current; /**< Number of allocated HSP list arrays. */
179  Int4 num_hits; /**< Number of similar hits for the query (for mapping) */
181 
182 /** The structure to contain all BLAST results, for multiple queries */
183 typedef struct BlastHSPResults {
184  Int4 num_queries; /**< Number of query sequences */
185  BlastHitList** hitlist_array; /**< Array of results for individual
186  query sequences */
188 
189 
190 /** By how much should the chunks of a subject sequence overlap if it is
191  too long and has to be split */
192 #define DBSEQ_CHUNK_OVERLAP 100
193 
194 /********************************************************************************
195 
196 The following section has four sets of functions (or "APIs"), manipulating with
197 the following structures:
198 1. BlastHSP, which is the basic unit to record one alignment.
199 2. BlastHSPList, which is a list of BlastHSP's for one database sequence.
200 3. BlastHitList, which contains all HSPList's for a given query.
201 4. BlastHSPResults, which is a list of BlastHitList's for multiple queries.
202 
203  The naming conventions for the functions are the following:
204 
205 1.) All routines start with "Blast_"
206 
207 2.) After "Blast_" comes the structure being manipulated, that should be either
208  HSP (all capitals all the time!), HSPList (exactly this capitalization),
209  HitList (capital H and L, all others lower-case), or HSPResults.
210 
211 3.) finally the task being done, e.g., "Free", "New", "Init".
212 
213 ********************************************************************************/
214 /********************************************************************************
215  HSP API
216 ********************************************************************************/
217 
218 /** Deallocate memory for an HSP structure */
221 
222 /** Allocate and zeros out memory for an HSP structure */
224 BlastHSP* Blast_HSPNew(void);
225 
226 /** Allocates BlastHSP and inits with information from input.
227  * structure.
228  * @param query_start Start of query alignment [in]
229  * @param query_end End of query alignment [in]
230  * @param subject_start Start of subject alignment [in]
231  * @param subject_end End of subject alignment [in]
232  * @param query_gapped_start Where gapped alignment started on query [in]
233  * @param subject_gapped_start Where gapped alignment started on subject [in]
234  * @param query_context The index of the query containing this HSP [in]
235  * @param query_frame Query frame: -3..3 for translated sequence,
236  * 1 or -1 for blastn, 0 for blastp [in]
237  * @param subject_frame Subject frame: -3..3 for translated sequence,
238  * 1 for blastn, 0 for blastp [in]
239  * @param score score of alignment [in]
240  * @param gap_edit Will be transferred to HSP and nulled out
241  * if a traceback was not calculated may be NULL [in] [out]
242  * @param ret_hsp allocated and filled in BlastHSP [out]
243  */
245 Int2
246 Blast_HSPInit(Int4 query_start, Int4 query_end,
247  Int4 subject_start, Int4 subject_end,
248  Int4 query_gapped_start, Int4 subject_gapped_start,
249  Int4 query_context, Int2 query_frame, Int2 subject_frame,
250  Int4 score, GapEditScript* *gap_edit, BlastHSP** ret_hsp);
251 
252 /** Make a deep copy of an HSP */
254 BlastHSP*
255 Blast_HSPClone(const BlastHSP* hsp);
256 
257 /** Deallocate memory for an HSP's additional data structure */
260 
261 /** Allocate memory for an HSP's additional data structure */
264 
265 
266 
267 /** Reevaluate the HSP's score and percent identity after taking
268  * into account the ambiguity information. Used only for blastn after a greedy
269  * gapped extension with traceback. This function can remove part of the
270  * alignment at either end, if its score becomes negative after reevaluation.
271  * Traceback is also adjusted in that case.
272  * @param hsp The HSP structure [in] [out]
273  * @param query_start Pointer to the start of the query sequence [in]
274  * @param query_length Length of the query sequence [in]
275  * @param subject_start Pointer to the start of the subject sequence [in]
276  * @param subject_length Length of the subject sequence [in]
277  * @param hit_params Hit saving parameters containing score cut-off [in]
278  * @param score_params Scoring parameters [in]
279  * @param sbp Score block with Karlin-Altschul parameters [in]
280  * @return Should this HSP be deleted after the score reevaluation?
281  */
283 Boolean
285  const Uint1* query_start, const Int4 query_length,
286  const Uint1* subject_start, const Int4 subject_length,
287  const BlastHitSavingParameters* hit_params,
288  const BlastScoringParameters* score_params, const BlastScoreBlk* sbp);
289 
290 /** Reevaluate the HSP's score and percent identity after taking into
291  * account the ambiguity information. Used for ungapped searches with
292  * nucleotide database (blastn, tblastn, tblastx).
293  * @param hsp The HSP structure [in] [out]
294  * @param query_start Pointer to the start of the query sequence [in]
295  * @param subject_start Pointer to the start of the subject sequence [in]
296  * @param word_params Initial word parameters with ungapped cutoff score [in]
297  * @param sbp Score block with Karlin-Altschul parameters [in]
298  * @param translated Are sequences protein (with a translated subject)? [in]
299  * @return Should this HSP be deleted after the score reevaluation?
300  */
302 Boolean
304  const Uint1* query_start, const Uint1* subject_start,
305  const BlastInitialWordParameters* word_params,
306  BlastScoreBlk* sbp, Boolean translated);
307 
308 /** Calculate number of identities in an HSP and set the BlastHSP::num_ident
309  * field (unconditionally)
310  * @param query The query sequence [in]
311  * @param subject The uncompressed subject sequence [in]
312  * @param hsp All information about the HSP, the output of this function will
313  * be stored in its num_ident field [in|out]
314  * @param score_options Scoring options [in]
315  * @param align_length_ptr The alignment length, including gaps (optional) [out]
316  * @return 0 on success, -1 on invalid parameters or error
317  */
319 Int2
321  const Uint1* subject,
322  BlastHSP* hsp,
323  const BlastScoringOptions* score_options,
324  Int4* align_length_ptr);
325 
326 /** Calculate number of identities and positives in an HSP and set the
327  * BlastHSP::num_ident and BlastHSP::num_positives fields
328  * @param query The query sequence [in]
329  * @param subject The uncompressed subject sequence [in]
330  * @param hsp All information about the HSP, the output of this function will
331  * be stored in its num_ident field [in|out]
332  * @param score_options Scoring options [in]
333  * @param align_length_ptr The alignment length, including gaps (optional) [out]
334  * @param sbp Score blk containing the matrix for counting positives [in]
335  * @return 0 on success, -1 on invalid parameters or error
336  */
338 Int2
340  const Uint1* subject,
341  BlastHSP* hsp,
342  const BlastScoringOptions* score_options,
343  Int4* align_length_ptr,
344  const BlastScoreBlk* sbp);
345 
346 /** Determines whether this HSP should be kept or
347  * deleted.
348  * @param hsp An HSP structure [in] [out]
349  * @param hit_options Hit saving options containing percent identity and
350  * HSP length thresholds.
351  * @param align_length alignment length including gaps
352  * @return FALSE if HSP passes the test, TRUE if it should be deleted.
353  */
355 Boolean
357  const BlastHitSavingOptions* hit_options,
358  Int4 align_length);
359 
360 /** Calculates number of identities and alignment lengths of an HSP via
361  * Blast_HSPGetNumIdentities and determines whether this HSP should be kept or
362  * deleted.
363  * @param program_number Type of BLAST program [in]
364  * @param hsp An HSP structure [in] [out]
365  * @param query Query sequence [in]
366  * @param subject Subject sequence [in]
367  * @param score_options Scoring options, needed to distinguish the
368  * out-of-frame case. [in]
369  * @param hit_options Hit saving options containing percent identity and
370  * HSP length thresholds.
371  * @return FALSE if HSP passes the test, TRUE if it should be deleted.
372  */
374 Boolean
376  BlastHSP* hsp, const Uint1* query, const Uint1* subject,
377  const BlastScoringOptions* score_options,
378  const BlastHitSavingOptions* hit_options);
379 
380 /** Calculate query coverage percentage of an hsp
381  * @param hsp An HSP structure [in]
382  * @param query_length Length of query [in]
383  * @return percentage query coverage of the input hsp
384  */
386 double
387 Blast_HSPGetQueryCoverage(const BlastHSP* hsp, Int4 query_length);
388 
389 /** Calculate query coverage percentage of an hsp
390  * @param hsp An HSP structure [in]
391  * @param min_query_coverage_pct Min query coverage pct for saving the hsp[in]
392  * @param query_length Length of query [in]
393  * @return true if hsp's query coverage pct < min_query_coverage_pct (delete hsp)
394  */
397  double min_query_coverage_pct,
398  Int4 query_length);
399 
400 
403  const BlastHitSavingOptions* hit_options);
404 
405 /** Calculated the number of HSPs that should be saved.
406  * @param gapped_calculation ungapped if false [in]
407  * @param options HitSavingoptions object [in]
408  * @return number of HSPs to save.
409  */
411 Int4
412 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions* options);
413 
414 /** Calculate length of an HSP as length in query plus length of gaps in
415  * query. If gap information is unavailable, return maximum between length in
416  * query and in subject.
417  * @param hsp An HSP structure [in]
418  * @param length Length of this HSP [out]
419  * @param gaps Total number of gaps in this HSP [out]
420  * @param gap_opens Number of gap openings in this HSP [out]
421  */
423 void Blast_HSPCalcLengthAndGaps(const BlastHSP* hsp, Int4* length,
424  Int4* gaps, Int4* gap_opens);
425 
426 /** Adjust HSP endpoint offsets according to strand/frame; return values in
427  * 1-offset coordinates instead of internal 0-offset.
428  * @param program Type of BLAST program [in]
429  * @param hsp An HSP structure [in]
430  * @param query_length Length of query [in]
431  * @param subject_length Length of subject [in]
432  * @param q_start Start of alignment in query [out]
433  * @param q_end End of alignment in query [out]
434  * @param s_start Start of alignment in subject [out]
435  * @param s_end End of alignment in subject [out]
436  */
438 void
440  Int4 query_length, Int4 subject_length,
441  Int4* q_start, Int4* q_end,
442  Int4* s_start, Int4* s_end);
443 
444 /** Performs the translation and coordinates adjustment, if only part of the
445  * subject sequence is translated for gapped alignment.
446  * @param subject_blk Subject sequence structure [in]
447  * @param hsp The HSP information [in] [out]
448  * @param is_ooframe Return a mixed-frame sequence if TRUE [in]
449  * @param gen_code_string Database genetic code [in]
450  * @param translation_buffer_ptr Pointer to buffer holding the translation [out]
451  * @param subject_ptr Pointer to sequence to be passed to the gapped
452  * alignment [out]
453  * @param subject_length_ptr Length of the translated sequence [out]
454  * @param start_shift_ptr How far is the partial sequence shifted w.r.t. the
455  * full sequence. [out]
456  */
458 Int2
460  BlastHSP* hsp, Boolean is_ooframe, const Uint1* gen_code_string,
461  Uint1** translation_buffer_ptr, Uint1** subject_ptr,
462  Int4* subject_length_ptr, Int4* start_shift_ptr);
463 
464 /** Adjusts offsets if partial sequence was used for extension.
465  * @param hsp The hit to work on [in][out]
466  * @param start_shift amount of database sequence not used for extension. [in]
467 */
469 void
470 Blast_HSPAdjustSubjectOffset(BlastHSP* hsp, Int4 start_shift);
471 
472 
473 /** Returns a buffer with a protein translated from nucleotide.
474  * @param target_t SBlastTargetTranslation* with information about translation [in]
475  * @param hsp The hit to work on [in]
476  * @param translated_length length of the protein sequence [in]
477 */
479 const Uint1*
480 Blast_HSPGetTargetTranslation(SBlastTargetTranslation* target_t, const BlastHSP* hsp, Int4* translated_length);
481 
482 /********************************************************************************
483  HSPList API
484 ********************************************************************************/
485 
486 /** Deallocate memory for an HSP list structure
487  * as well as all it's components.
488  * @param hsp_list the BlastHSPList to be freed [in].
489 */
492 
493 /** Creates HSP list structure with a default size HSP array
494  * @param hsp_max the maximum number of HSP's that can ever be
495  * saved at once [in].
496 */
499 
500 /** Returns true if the BlastHSPList contains no HSPs
501  * @param hsp_list list of HSPs to examine [in]
502  */
504 Boolean
505 Blast_HSPList_IsEmpty(const BlastHSPList* hsp_list);
506 
507 /** Returns a duplicate (deep copy) of the given hsp list. */
509 BlastHSPList* BlastHSPListDup(const BlastHSPList* hsp_list);
510 
511 /** Swaps the two HSP lists via structure assignment */
513 void Blast_HSPListSwap(BlastHSPList* list1, BlastHSPList* list2);
514 
515 /** Saves HSP information into a BlastHSPList structure
516  * @param hsp_list Structure holding all HSPs with full gapped alignment
517  * information [in] [out]
518  * @param hsp The new HSP to be inserted into the HSPList [in]
519 */
521 Int2
523 
524 /** Calculate the expected values for all HSPs in a hit list, without using
525  * the sum statistics. In case of multiple queries, the offsets are assumed
526  * to be already adjusted to individual query coordinates, and the contexts
527  * are set for each HSP.
528  * @param program_number Type of BLAST program [in]
529  * @param query_info Auxiliary query information - needed only for effective
530  * search space calculation if it is not provided [in]
531  * @param subject_length Subject length - needed for Spouge's new FSC [in]
532  * @param hsp_list List of HSPs for one subject sequence [in] [out]
533  * @param gapped_calculation Is this for a gapped or ungapped search? [in]
534  * @param RPS_prelim Is this for a RPS preliminary search? [in]
535  * @param sbp Structure containing statistical information [in]
536  * @param gap_decay_rate Adjustment parameter to compensate for the effects of
537  * performing multiple tests when linking HSPs. No adjustment is made if 0. [in]
538  * @param scaling_factor Scaling factor by which Lambda should be divided. Used in
539  * RPS BLAST only; should be set to 1.0 in other cases. [in]
540  *
541  */
544  const BlastQueryInfo* query_info,
545  Int4 subject_length,
546  BlastHSPList* hsp_list,
547  Boolean gapped_calculation,
548  Boolean RPS_prelim,
549  const BlastScoreBlk* sbp, double gap_decay_rate,
550  double scaling_factor);
551 
552 /** Calculate e-values for a PHI BLAST HSP list.
553  * @param hsp_list HSP list found by PHI BLAST [in] [out]
554  * @param sbp Scoring block with statistical parameters [in]
555  * @param query_info Structure containing information about pattern counts [in]
556  * @param pattern_blk Structure containing information about pattern hits in db [in]
557  */
560  const BlastQueryInfo* query_info,
561  const SPHIPatternSearchBlk* pattern_blk);
562 
563 /** Calculate bit scores from raw scores in an HSP list.
564  * @param hsp_list List of HSPs [in] [out]
565  * @param gapped_calculation Is this a gapped search? [in]
566  * @param sbp Scoring block with statistical parameters [in]
567  */
570  Boolean gapped_calculation,
571  const BlastScoreBlk* sbp);
572 
573 /** Calculate bit scores from raw scores in an HSP list for a PHI BLAST search.
574  * @param hsp_list List of HSPs [in] [out]
575  * @param sbp Scoring block with statistical parameters [in]
576  */
579 
580 /** Discard the HSPs above the e-value threshold from the HSP list
581  * @param hsp_list List of HSPs for one subject sequence [in] [out]
582  * @param hit_options Options block containing the e-value cut-off [in]
583 */
586  const BlastHitSavingOptions* hit_options);
587 
588 /** Discard the HSPs above the raw threshold from the HSP list
589  * @param hsp_list List of HSPs for one subject sequence [in] [out]
590  * @param hit_options Options block containing the e-value cut-off [in]
591  * -RMH-
592  */
595  const BlastHitSavingOptions* hit_options);
596 
597 /** Discard the HSPs below the min query coverage pct from the HSP list
598  * @param hsp_list List of HSPs for one subject sequence [in] [out]
599  * @param hit_options Options block containing the min query coverage pct [in]
600  * @param query_info Structure containing information about the queries [in]
601  * @param program_number Type of BLAST program.
602 */
605  const BlastHitSavingOptions* hit_options,
606  const BlastQueryInfo* query_info,
607  EBlastProgramType program_number);
608 
609 /** Cleans out the NULLed out HSP's from the HSP array that
610  * is part of the BlastHSPList.
611  * @param hsp_list Contains array of pointers to HSP structures [in]
612  * @return status of function call.
613 */
615 Int2
617 
618 /** Check for an overlap of two different alignments and remove redundant HSPs.
619  * A sufficient overlap is when two alignments have the same start or end values
620  * If an overlap is found the HSP with the lowest score is removed, if both scores
621  * are the same then the first is removed.
622  * @param program Type of BLAST program. For some programs (PHI BLAST), the
623  * purge should not be performed. [in]
624  * @param hsp_list Contains array of pointers to HSPs to purge [in]
625  * @param purge Should the hsp be purged? [in]
626  * @return The number of valid alignments remaining.
627 */
629 Int4
631  BlastHSPList* hsp_list,
632  Boolean purge);
633 
635 Int4
637  const BlastHSPSubjectBestHitOptions* subject_besthit_opts,
638  const BlastQueryInfo *query_info,
639  BlastHSPList* hsp_list);
640 
641 /** Reevaluate all ungapped HSPs in an HSP list.
642  * This is only done for an ungapped search, or if traceback is
643  * already available.
644  * Subject sequence is uncompressed and saved here (for nucleotide sequences).
645  * The number of identities is calculated for each HSP along the way,
646  * hence this function is called for all programs.
647  * @param program Type of BLAST program [in]
648  * @param hsp_list The list of HSPs for one subject sequence [in] [out]
649  * @param query_blk The query sequence [in]
650  * @param subject_blk The subject sequence [in] [out]
651  * @param word_params Initial word parameters, containing ungapped cutoff
652  * score [in]
653  * @param hit_params Hit saving parameters, including cutoff score [in]
654  * @param query_info Auxiliary query information [in]
655  * @param sbp The statistical information [in]
656  * @param score_params Parameters related to scoring [in]
657  * @param seq_src The BLAST database structure (for retrieving uncompressed
658  * sequence) [in]
659  * @param gen_code_string Genetic code string in case of a translated
660  * database search. [in]
661  */
663 Int2
665  BlastHSPList* hsp_list, BLAST_SequenceBlk* query_blk,
666  BLAST_SequenceBlk* subject_blk,
667  const BlastInitialWordParameters* word_params,
668  const BlastHitSavingParameters* hit_params, const BlastQueryInfo* query_info,
669  BlastScoreBlk* sbp, const BlastScoringParameters* score_params,
670  const BlastSeqSrc* seq_src, const Uint1* gen_code_string);
671 
672 /** Append one HSP list to the other. Discard lower scoring HSPs if there is
673  * not enough space to keep all.
674  * @param old_hsp_list_ptr list of HSPs, will be NULLed out on return [in|out]
675  * @param combined_hsp_list_ptr Pointer to the combined list of HSPs, possibly
676  * containing previously saved HSPs [in] [out]
677  * @param hsp_num_max Maximal allowed number of HSPs to save (unlimited if INT4_MAX) [in]
678  * @return Status: 0 on success, -1 on failure.
679  */
681 Int2 Blast_HSPListAppend(BlastHSPList** old_hsp_list_ptr,
682  BlastHSPList** combined_hsp_list_ptr, Int4 hsp_num_max);
683 
684 /** Merge an HSP list from a chunk of the subject sequence into a previously
685  * computed HSP list.
686  * @param hsp_list Contains HSPs from the new chunk [in]
687  * @param combined_hsp_list_ptr Contains HSPs from previous chunks [in] [out]
688  * @param hsp_num_max Maximal allowed number of HSPs to save (unlimited if INT4_MAX) [in]
689  * @param split_points Offset The sequence offset (query or subject) that is
690  * the boundary between HSPs in combined_hsp_list and hsp_list. [in]
691  * @param contexts_per_query If positive, the number of query contexts
692  * that hits can contain. If negative, the (one) split
693  * point occurs on the subject sequence [in]
694  * @param chunk_overlap_size The length of the overlap region between the
695  * sequence region containing hsp_list and that
696  * containing combined_hsp_list [in]
697  * @param allow_gap Allow merging HSPs at different diagonals [in]
698  * @param short_reads Assume that queries are shorter than the database
699  * overlap region [in]
700  * @return 0 if HSP lists have been merged successfully, -1 otherwise.
701  */
704  BlastHSPList** combined_hsp_list_ptr,
705  Int4 hsp_num_max, Int4* split_points,
706  Int4 contexts_per_query,
707  Int4 chunk_overlap_size,
708  Boolean allow_gap,
709  Boolean short_reads);
710 
711 /** Adjust subject offsets in an HSP list if only part of the subject sequence
712  * was searched. Used when long subject sequence is split into more manageable
713  * chunks.
714  * @param hsp_list List of HSPs from a chunk of a subject sequence [in]
715  * @param offset Offset where the chunk starts [in]
716  */
719 
720 /** For nucleotide BLAST, if the match reward score is equal to 2,
721  * random alignments are dominated by runs of exact matches, which all have even
722  * scores. This makes it impossible to estimate statistical parameters correctly
723  * for odd scores. Hence the raw score formula is adjusted - all scores are
724  * rounded down to the nearest even value in order to provide a conservative estimate.
725  * @param hsp_list HSP list structure to adjust scores for. [in] [out]
726  * @param gapped_calculation not an ungapped alignment [in]
727  * @param sbp used for round_down Boolean
728  */
731  Boolean gapped_calculation,
732  const BlastScoreBlk* sbp);
733 
734 /** Check if HSP list is sorted by score.
735  * @param hsp_list The list to check [in]
736  * @return TRUE if sorted, FALSE if not.
737  */
740 
741 /** Sort the HSPs in an HSP list by score. This type of sorting is done before
742  * the e-values are calcaulted, and also at the beginning of the traceback stage,
743  * where it is needed to eliminate the effects of wrong score order because of
744  * application of sum statistics.
745  * Checks if the HSP array is already sorted before proceeding with quicksort.
746  * @param hsp_list Structure containing array of HSPs to be sorted. [in] [out]
747  */
749 void Blast_HSPListSortByScore(BlastHSPList* hsp_list);
750 
751 /** Sort the HSPs in an HSP list by e-value, with scores and other criteria
752  * used to resolve ties. Checks if the HSP array is already sorted before
753  * proceeding with quicksort.
754  * @param hsp_list Structure containing array of HSPs to be sorted. [in] [out]
755  */
758 
759 /********************************************************************************
760  HitList API.
761 ********************************************************************************/
762 
763 /** Allocate memory for a hit list of a given size.
764  * @param hitlist_size Size of the hit list (number of HSP lists) [in]
765  */
767 BlastHitList* Blast_HitListNew(Int4 hitlist_size);
768 
769 /** Deallocate memory for the hit list */
772 
773 /** Deallocate memory for every HSP list on BlastHitList,
774  * as well as all their components.
775  * @param hitlist contains the BlastHSPList array to be freed [in/out].
776 */
779 
780 /** Insert a new HSP list into the hit list.
781  * Before capacity of the hit list is reached, just add to the end;
782  * After that, store in a heap, to ensure efficient insertion and deletion.
783  * The heap order is reverse, with worst e-value on top, for convenience
784  * of deletion.
785  * @param hit_list Contains all HSP lists saved so far [in] [out]
786  * @param hsp_list A new HSP list to be inserted into the hit list [in]
787 */
789 Int2 Blast_HitListUpdate(BlastHitList* hit_list, BlastHSPList* hsp_list);
790 
791 /** Combine two hitlists; both HitLists must contain HSPs that
792  * represent alignments to the same query sequence
793  * @param old_hit_list_ptr Pointer to original HitList, will be NULLed
794  * out on return [in|out]
795  * @param combined_hit_list_ptr Pointer to the combined HitList [in|out]
796  t* @param contexts_per_query The number of different contexts that can
797  * occur in hits from old_hit_list and combined_hit_list [in]
798  * @param split_offsets the query offset that marks the boundary between
799  * combined_hit_list and old_hit_list. HSPs in old_hit_list
800  * that hit to context i are assumed to lie to the right
801  * of split_offsets[i] [in]
802  * @param chunk_overlap_size The length of the overlap region between the
803  * sequence region containing hit_list and that
804  * containing combined_hit_list [in]
805  * @param allow_gap Allow merging HSPs at different diagonals [in]
806 */
808 Int2 Blast_HitListMerge(BlastHitList** old_hit_list_ptr,
809  BlastHitList** combined_hit_list_ptr,
810  Int4 contexts_per_query, Int4 *split_offsets,
811  Int4 chunk_overlap_size, Boolean allow_gap);
812 
813 /** Purges a BlastHitList of NULL HSP lists.
814  * @param hit_list BLAST hit list to purge. [in] [out]
815  */
817 Int2
819 
820 /** Sort BlastHitLIst bon evalue
821  * @param hit_lsit BLAST hit list to be sorted [in] [out]
822  */
824 Int2
826 
827 /********************************************************************************
828  HSPResults API.
829 ********************************************************************************/
830 
831 /** Initialize the results structure.
832  * @param num_queries Number of query sequences to allocate results structure
833  * for [in]
834  */
837 
838 /** Deallocate memory for BLAST results */
841 
842 /** Sort each hit list in the BLAST results by best e-value */
845 /** Sort each hit list in the BLAST results by best e-value, in reverse
846  order. */
849 
850 /** Reverse order of HSP lists in each hit list in the BLAST results.
851  * This allows to return HSP lists from the end of the arrays when reading
852  * from a collector HSP stream.
853  */
856 
857 /** Blast_HSPResultsInsertHSPList
858  * Insert an HSP list to the appropriate place in the results structure.
859  * All HSPs in this list must be from the same query and same subject; the oid
860  * and query_index fields must be set in the BlastHSPList input structure.
861  * @param results The structure holding results for all queries [in] [out]
862  * @param hsp_list The results for one query-subject sequence pair. [in]
863  * @param hitlist_size Maximal allowed hit list size. [in]
864  */
867  BlastHSPList* hsp_list, Int4 hitlist_size);
868 
869 /* Forward declaration */
870 struct BlastHSPStream;
871 
872 /** Move all of the hits within an HSPStream into a BlastHSPResults
873  * structure.
874  * @param hsp_stream The HSPStream [in][out]
875  * @param num_queries Number of queries in the search [in]
876  * @param hit_param Hit parameters [in]
877  * @return The generated collection of HSP results
878  */
882  size_t num_queries,
883  SBlastHitsParameters* hit_param);
884 
885 /** As Blast_HSPResultsFromHSPStream, except the total number of
886  * HSPs kept for each query does not exceed an explicit limit.
887  * The database sequences with the smallest number of hits are
888  * saved first, and hits are removed from query i if the average
889  * number of hits saved threatens to exceed (max_num_hsps / (number
890  * of DB sequences with hits to query i))
891  * @param hsp_stream The HSPStream [in][out]
892  * @param num_queries Number of queries in the search [in]
893  * @param hit_param Hit parameters [in]
894  * @param max_num_hsps The limit on the number of HSPs to be
895  * kept for each query sequence [in]
896  * @param removed_hsps Set to TRUE if any hits were removed [out]
897  * @return The generated collection of HSP results
898  */
901  Uint4 num_queries,
902  SBlastHitsParameters* hit_param,
903  Uint4 max_num_hsps,
904  Boolean* removed_hsps);
905 
907 /** As Blast_HSPResultsFromHSPStreamWithLimit, except accept and return
908  * array of Boolen flags specifying which query exceeded HSP limits.
909  */
911  Uint4 num_queries,
912  SBlastHitsParameters* hit_param,
913  Uint4 max_num_hsps,
914  Boolean* removed_hsps);
915 /** Splits the BlastHSPResults structure for a PHI BLAST search into an array of
916  * BlastHSPResults structures, corresponding to different pattern occurrences in
917  * query. All HSPs are copied, so it is safe to free the returned
918  * BlastHSPResults structures independently of the input results structure.
919  * @param results All results from a PHI BLAST search, with HSPs for
920  * different query pattern occurrences mixed together. [in]
921  * @param pattern_info Information about pattern occurrences in query. [in]
922  * @return Array of pointers to BlastHSPResults structures, corresponding to
923  * different pattern occurrences.
924  */
928  const SPHIQueryInfo* pattern_info);
929 
930 
931 /** Count the number of occurrences of pattern in sequence, which
932  * do not overlap by more than half the pattern match length.
933  * @param query_info Query information structure, containing pattern info. [in]
934  */
936 Int4
938 
939 /** Apply Cross_match like masklevel to HSP list. -RMH-
940  */
943  const BlastQueryInfo *query_info,
944  Int4 masklevel, Int4 query_length);
945 
947 Int4 GetPrelimHitlistSize(Int4 hitlist_size,
948  Int4 compositionBasedStats,
949  Boolean gapped_calculation);
950 #ifdef __cplusplus
951 }
952 #endif
953 #endif /* !ALGO_BLAST_CORE__BLAST_HITS__H */
Defines to provide correct exporting from BLAST DLL in Windows.
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
struct BlastHSPMappingInfo BlastHSPMappingInfo
Mapping information for an HSP.
Int2 Blast_HSPListReapByRawScore(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the raw threshold from the HSP list.
Definition: blast_hits.c:2076
Int2 Blast_HSPResultsReverseSort(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value, in reverse order.
Definition: blast_hits.c:3402
Int2 Blast_HSPListAppend(BlastHSPList **old_hsp_list_ptr, BlastHSPList **combined_hsp_list_ptr, Int4 hsp_num_max)
Append one HSP list to the other.
Definition: blast_hits.c:2807
BlastHSPResults * Blast_HSPResultsFromHSPStreamWithLimitEx(struct BlastHSPStream *hsp_stream, Uint4 num_queries, SBlastHitsParameters *hit_param, Uint4 max_num_hsps, Boolean *removed_hsps)
As Blast_HSPResultsFromHSPStreamWithLimit, except accept and return array of Boolen flags specifying ...
Definition: blast_hits.c:3873
void Blast_HSPListSortByEvalue(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties.
Definition: blast_hits.c:1437
Int2 Blast_HitListHSPListsFree(BlastHitList *hitlist)
Deallocate memory for every HSP list on BlastHitList, as well as all their components.
Definition: blast_hits.c:3148
BlastHSP * Blast_HSPNew(void)
Allocate and zeros out memory for an HSP structure.
Definition: blast_hits.c:141
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
Definition: blast_hits.c:3364
struct SPHIHspInfo SPHIHspInfo
In PHI BLAST: information about pattern match in a given HSP.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Definition: blast_hits.c:151
void Blast_HSPListPHIGetEvalues(BlastHSPList *hsp_list, BlastScoreBlk *sbp, const BlastQueryInfo *query_info, const SPHIPatternSearchBlk *pattern_blk)
Calculate e-values for a PHI BLAST HSP list.
Definition: blast_hits.c:1955
Int2 Blast_HSPGetNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr, const BlastScoreBlk *sbp)
Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::...
Definition: blast_hits.c:966
BlastHitList * Blast_HitListFree(BlastHitList *hitlist)
Deallocate memory for the hit list.
Definition: blast_hits.c:3137
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
Definition: blast_hits.c:213
Int2 Blast_HitListMerge(BlastHitList **old_hit_list_ptr, BlastHitList **combined_hit_list_ptr, Int4 contexts_per_query, Int4 *split_offsets, Int4 chunk_overlap_size, Boolean allow_gap)
Combine two hitlists; both HitLists must contain HSPs that represent alignments to the same query seq...
Definition: blast_hits.c:2119
void Blast_HSPCalcLengthAndGaps(const BlastHSP *hsp, Int4 *length, Int4 *gaps, Int4 *gap_opens)
Calculate length of an HSP as length in query plus length of gaps in query.
Definition: blast_hits.c:1055
Int2 Blast_HSPResultsSortByEvalue(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value.
Definition: blast_hits.c:3381
Boolean Blast_HSPTestIdentityAndLength(EBlastProgramType program_number, BlastHSP *hsp, const Uint1 *query, const Uint1 *subject, const BlastScoringOptions *score_options, const BlastHitSavingOptions *hit_options)
Calculates number of identities and alignment lengths of an HSP via Blast_HSPGetNumIdentities and det...
Definition: blast_hits.c:1004
Int4 Blast_HSPListSubjectBestHit(EBlastProgramType program, const BlastHSPSubjectBestHitOptions *subject_besthit_opts, const BlastQueryInfo *query_info, BlastHSPList *hsp_list)
Definition: blast_hits.c:2536
Int2 Blast_HSPResultsReverseOrder(BlastHSPResults *results)
Reverse order of HSP lists in each hit list in the BLAST results.
Definition: blast_hits.c:3418
Boolean Blast_HSPList_IsEmpty(const BlastHSPList *hsp_list)
Returns true if the BlastHSPList contains no HSPs.
Definition: blast_hits.c:1578
struct BlastHitList BlastHitList
The structure to contain all BLAST results for one query sequence.
BlastHitList * Blast_HitListNew(Int4 hitlist_size)
Allocate memory for a hit list of a given size.
Definition: blast_hits.c:3123
Int2 Blast_HSPGetPartialSubjectTranslation(BLAST_SequenceBlk *subject_blk, BlastHSP *hsp, Boolean is_ooframe, const Uint1 *gen_code_string, Uint1 **translation_buffer_ptr, Uint1 **subject_ptr, Int4 *subject_length_ptr, Int4 *start_shift_ptr)
Performs the translation and coordinates adjustment, if only part of the subject sequence is translat...
Definition: blast_hits.c:1239
Int2 Blast_HitListSortByEvalue(BlastHitList *hit_list)
Sort BlastHitLIst bon evalue.
Definition: blast_hits.c:3329
Int4 Blast_HSPListPurgeHSPsWithCommonEndpoints(EBlastProgramType program, BlastHSPList *hsp_list, Boolean purge)
Check for an overlap of two different alignments and remove redundant HSPs.
Definition: blast_hits.c:2455
Int2 Blast_HSPResultsInsertHSPList(BlastHSPResults *results, BlastHSPList *hsp_list, Int4 hitlist_size)
Blast_HSPResultsInsertHSPList Insert an HSP list to the appropriate place in the results structure.
Definition: blast_hits.c:3552
void Blast_HSPGetAdjustedOffsets(EBlastProgramType program, BlastHSP *hsp, Int4 query_length, Int4 subject_length, Int4 *q_start, Int4 *q_end, Int4 *s_start, Int4 *s_end)
Adjust HSP endpoint offsets according to strand/frame; return values in 1-offset coordinates instead ...
Definition: blast_hits.c:1109
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Definition: blast_hits.c:1558
Int2 Blast_HSPGetNumIdentities(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr)
Calculate number of identities in an HSP and set the BlastHSP::num_ident field (unconditionally)
Definition: blast_hits.c:940
Int2 Blast_HSPListPurgeNullHSPs(BlastHSPList *hsp_list)
Cleans out the NULLed out HSP's from the HSP array that is part of the BlastHSPList.
Definition: blast_hits.c:2225
BlastHSPMappingInfo * BlastHSPMappingInfoNew(void)
Allocate memory for an HSP's additional data structure.
Definition: blast_hits.c:207
BlastHSPList * BlastHSPListDup(const BlastHSPList *hsp_list)
Returns a duplicate (deep copy) of the given hsp list.
Definition: blast_hits.c:1583
Boolean Blast_HSPTest(BlastHSP *hsp, const BlastHitSavingOptions *hit_options, Int4 align_length)
Determines whether this HSP should be kept or deleted.
Definition: blast_hits.c:1027
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Definition: blast_hits.c:3344
struct BlastHSPResults BlastHSPResults
The structure to contain all BLAST results, for multiple queries.
BlastHSPResults * Blast_HSPResultsFromHSPStreamWithLimit(struct BlastHSPStream *hsp_stream, Uint4 num_queries, SBlastHitsParameters *hit_param, Uint4 max_num_hsps, Boolean *removed_hsps)
As Blast_HSPResultsFromHSPStream, except the total number of HSPs kept for each query does not exceed...
Definition: blast_hits.c:3855
Int2 Blast_HSPListsMerge(BlastHSPList **hsp_list, BlastHSPList **combined_hsp_list_ptr, Int4 hsp_num_max, Int4 *split_points, Int4 contexts_per_query, Int4 chunk_overlap_size, Boolean allow_gap, Boolean short_reads)
Merge an HSP list from a chunk of the subject sequence into a previously computed HSP list.
Definition: blast_hits.c:2855
BlastHSPMappingInfo * BlastHSPMappingInfoFree(BlastHSPMappingInfo *info)
Deallocate memory for an HSP's additional data structure.
Definition: blast_hits.c:192
struct BlastHSPList BlastHSPList
The structure to hold all HSPs for a given sequence after the gapped alignment.
BlastHSP * Blast_HSPClone(const BlastHSP *hsp)
Make a deep copy of an HSP.
Definition: blast_hits.c:264
Int2 Blast_HitListPurgeNullHSPLists(BlastHitList *hit_list)
Purges a BlastHitList of NULL HSP lists.
Definition: blast_hits.c:3300
Int2 Blast_HSPListGetEvalues(EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor)
Calculate the expected values for all HSPs in a hit list, without using the sum statistics.
Definition: blast_hits.c:1811
Int2 Blast_HSPListReevaluateUngapped(EBlastProgramType program, BlastHSPList *hsp_list, BLAST_SequenceBlk *query_blk, BLAST_SequenceBlk *subject_blk, const BlastInitialWordParameters *word_params, const BlastHitSavingParameters *hit_params, const BlastQueryInfo *query_info, BlastScoreBlk *sbp, const BlastScoringParameters *score_params, const BlastSeqSrc *seq_src, const Uint1 *gen_code_string)
Reevaluate all ungapped HSPs in an HSP list.
Definition: blast_hits.c:2607
Boolean Blast_HSPQueryCoverageTest(BlastHSP *hsp, double min_query_coverage_pct, Int4 query_length)
Calculate query coverage percentage of an hsp.
Definition: blast_hits.c:1045
Int2 Blast_TrimHSPListByMaxHsps(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Definition: blast_hits.c:2049
SBlastHitsParameters * SBlastHitsParametersFree(SBlastHitsParameters *param)
Deallocated SBlastHitsParameters.
Definition: blast_hits.c:115
void Blast_HSPListAdjustOddBlastnScores(BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp)
For nucleotide BLAST, if the match reward score is equal to 2, random alignments are dominated by run...
Definition: blast_hits.c:3051
struct SBlastHitsParameters SBlastHitsParameters
Keeps prelim_hitlist_size and HitSavingOptions together, mostly for use by hspstream.
Int2 Blast_HSPListReapByQueryCoverage(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options, const BlastQueryInfo *query_info, EBlastProgramType program_number)
Discard the HSPs below the min query coverage pct from the HSP list.
Definition: blast_hits.c:2010
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Definition: blast_hits.c:130
Int2 Blast_HSPResultsApplyMasklevel(BlastHSPResults *results, const BlastQueryInfo *query_info, Int4 masklevel, Int4 query_length)
Apply Cross_match like masklevel to HSP list.
Definition: blast_hits.c:3465
Boolean Blast_HSPListIsSortedByScore(const BlastHSPList *hsp_list)
Check if HSP list is sorted by score.
Definition: blast_hits.c:1358
const Uint1 * Blast_HSPGetTargetTranslation(SBlastTargetTranslation *target_t, const BlastHSP *hsp, Int4 *translated_length)
Returns a buffer with a protein translated from nucleotide.
Definition: blast_hits.c:1147
struct BlastSeg BlastSeg
One sequence segment within an HSP.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
Definition: blast_hits.c:1754
Int4 GetPrelimHitlistSize(Int4 hitlist_size, Int4 compositionBasedStats, Boolean gapped_calculation)
Definition: blast_hits.c:44
BlastHSPResults ** PHIBlast_HSPResultsSplit(const BlastHSPResults *results, const SPHIQueryInfo *pattern_info)
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structur...
Definition: blast_hits.c:3570
void Blast_HSPAdjustSubjectOffset(BlastHSP *hsp, Int4 start_shift)
Adjusts offsets if partial sequence was used for extension.
Definition: blast_hits.c:1316
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Definition: blast_hits.c:1542
double Blast_HSPGetQueryCoverage(const BlastHSP *hsp, Int4 query_length)
Calculate query coverage percentage of an hsp.
Definition: blast_hits.c:1034
Boolean Blast_HSPReevaluateWithAmbiguitiesUngapped(BlastHSP *hsp, const Uint1 *query_start, const Uint1 *subject_start, const BlastInitialWordParameters *word_params, BlastScoreBlk *sbp, Boolean translated)
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information.
Definition: blast_hits.c:676
Int2 SBlastHitsParametersNew(const BlastHitSavingOptions *hit_options, const BlastExtensionOptions *ext_options, const BlastScoringOptions *scoring_options, SBlastHitsParameters **retval)
Sets up small structures used by blast_hit.c for saving HSPs.
Definition: blast_hits.c:75
Int4 PhiBlastGetEffectiveNumberOfPatterns(const BlastQueryInfo *query_info)
Count the number of occurrences of pattern in sequence, which do not overlap by more than half the pa...
Definition: blast_hits.c:360
void Blast_HSPListPHIGetBitScores(BlastHSPList *hsp_list, BlastScoreBlk *sbp)
Calculate bit scores from raw scores in an HSP list for a PHI BLAST search.
Definition: blast_hits.c:1934
void Blast_HSPListSwap(BlastHSPList *list1, BlastHSPList *list2)
Swaps the two HSP lists via structure assignment.
Definition: blast_hits.c:1614
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Definition: blast_hits.c:1374
Int2 Blast_HSPListReapByEvalue(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the e-value threshold from the HSP list.
Definition: blast_hits.c:1976
SBlastHitsParameters * SBlastHitsParametersDup(const SBlastHitsParameters *hit_params)
Make a deep copy of the SBlastHitsParameters structure passed in.
Definition: blast_hits.c:101
Int2 Blast_HSPListGetBitScores(BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp)
Calculate bit scores from raw scores in an HSP list.
Definition: blast_hits.c:1907
struct BlastHSP BlastHSP
Structure holding all information about an HSP.
Boolean Blast_HSPReevaluateWithAmbiguitiesGapped(BlastHSP *hsp, const Uint1 *query_start, const Int4 query_length, const Uint1 *subject_start, const Int4 subject_length, const BlastHitSavingParameters *hit_params, const BlastScoringParameters *score_params, const BlastScoreBlk *sbp)
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information.
Definition: blast_hits.c:479
BlastHSPResults * Blast_HSPResultsFromHSPStream(struct BlastHSPStream *hsp_stream, size_t num_queries, SBlastHitsParameters *hit_param)
Move all of the hits within an HSPStream into a BlastHSPResults structure.
Definition: blast_hits.c:3633
Int2 Blast_HitListUpdate(BlastHitList *hit_list, BlastHSPList *hsp_list)
Insert a new HSP list into the hit list.
Definition: blast_hits.c:3241
void Blast_HSPListAdjustOffsets(BlastHSPList *hsp_list, Int4 offset)
Adjust subject offsets in an HSP list if only part of the subject sequence was searched.
Definition: blast_hits.c:3035
The structures and functions in blast_options.
Structure and function definitions for BLAST parameter structures, which are internal to the CORE of ...
Definitions for various programs supported by core BLAST.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
Definitions and functions associated with the BlastQueryInfo structure.
Declaration of ADT to retrieve sequences for the BLAST engine.
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
int offset
Definition: replacements.h:160
Definitions of structures used for saving traceback information.
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
static MDB_envinfo info
Definition: mdb_load.c:37
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
Functions for finding pattern matches in sequence (PHI-BLAST).
static int * results[]
static int pattern_info(int what, void *where, BOOL unsetok)
Definition: pcre2test.c:4156
Structure to hold a sequence.
Definition: blast_def.h:242
Options used for gapped extension These include: a.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Boolean do_not_reallocate
Is reallocation of the hsp_array allowed?
Definition: blast_hits.h:161
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Definition: blast_hits.h:154
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
Int4 hsp_max
The maximal number of HSPs allowed to be saved.
Definition: blast_hits.h:160
double best_evalue
Smallest e-value for HSPs in this list.
Definition: blast_hits.h:162
Int4 allocated
The allocated size of the hsp_array.
Definition: blast_hits.h:159
Int4 query_index
Index of the query which this HSPList corresponds to.
Definition: blast_hits.h:155
Mapping information for an HSP.
Definition: blast_hits.h:113
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
Definition: blast_hits.h:116
Int4 flags
< Same as above for subject bases after the alignment (for RNA-seq mapping)
Definition: blast_hits.h:121
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
Definition: blast_hits.h:114
SequenceOverhangs * subject_overhangs
Unaligned subject subsequence.
Definition: blast_hits.h:122
The structure to contain all BLAST results, for multiple queries.
Definition: blast_hits.h:183
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Definition: blast_hits.h:185
Int4 num_queries
Number of query sequences.
Definition: blast_hits.h:184
Default implementation of BlastHSPStream.
Structure holding all information about an HSP.
Definition: blast_hits.h:126
SPHIHspInfo * pat_info
In PHI BLAST, information about this pattern match.
Definition: blast_hits.h:142
double evalue
This HSP's e-value.
Definition: blast_hits.h:130
Int4 num_ident
Number of identical base pairs in this HSP.
Definition: blast_hits.h:128
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
Int4 context
Context number of query.
Definition: blast_hits.h:133
double bit_score
Bit score, calculated from score.
Definition: blast_hits.h:129
Int4 num
How many HSP's are linked together for sum statistics evaluation? If unset (0), this HSP is not part ...
Definition: blast_hits.h:135
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
GapEditScript * gap_info
ALL gapped alignment is here.
Definition: blast_hits.h:134
Int4 num_positives
Definition: blast_hits.h:144
Int2 comp_adjustment_method
which mode of composition adjustment was used; relevant only for blastp and tblastn
Definition: blast_hits.h:139
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
BlastHSPMappingInfo * map_info
Definition: blast_hits.h:146
The structure to contain all BLAST results for one query sequence.
Definition: blast_hits.h:169
double worst_evalue
Highest of the best e-values among the HSP lists.
Definition: blast_hits.h:172
Int4 hsplist_max
Maximal allowed size of the HSP lists array.
Definition: blast_hits.h:171
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Definition: blast_hits.h:176
Int4 hsplist_count
Filled size of the HSP lists array.
Definition: blast_hits.h:170
Int4 low_score
The lowest of the best scores among the HSP lists.
Definition: blast_hits.h:174
Int4 hsplist_current
Number of allocated HSP list arrays.
Definition: blast_hits.h:178
Boolean heapified
Is this hit list already heapified?
Definition: blast_hits.h:175
Int4 num_hits
Number of similar hits for the query (for mapping)
Definition: blast_hits.h:179
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
The query related information.
Structure used for scoring calculations.
Definition: blast_stat.h:177
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
One sequence segment within an HSP.
Definition: blast_hits.h:96
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int4 gapped_start
Where the gapped extension started.
Definition: blast_hits.h:100
Int2 frame
Translation frame.
Definition: blast_hits.h:97
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Edit script: linked list of correspondencies between two sequences.
Definition: gapinfo.h:57
Alignment edit script for gapped alignment.
Definition: jumper.h:96
Keeps prelim_hitlist_size and HitSavingOptions together, mostly for use by hspstream.
Definition: blast_hits.h:57
Int4 prelim_hitlist_size
number of hits saved during preliminary part of search.
Definition: blast_hits.h:58
Int4 hsp_num_max
number of HSPs to save per db sequence.
Definition: blast_hits.h:60
Information about target translations.
Definition: blast_def.h:311
In PHI BLAST: information about pattern match in a given HSP.
Definition: blast_hits.h:104
Int4 index
Index of query pattern occurrence for this HSP.
Definition: blast_hits.h:105
Int4 length
Length of this pattern occurrence in subject.
Definition: blast_hits.h:106
Structure containing all auxiliary information needed in a pattern search.
Definition: pattern.h:155
In PHI BLAST, structure containing information about all pattern occurrences in query.
Definition: blast_def.h:300
Structure to save short unaligned subsequences outside an HSP.
Definition: jumper.h:268
static string subject
static string query
Modified on Fri Sep 20 14:58:24 2024 by modify_doxy.py rev. 669887