NCBI C++ ToolKit
|
BLAST functions for saving hits after the (preliminary) gapped alignment. More...
#include <algo/blast/core/ncbi_math.h>
#include <algo/blast/core/blast_hits.h>
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_hspstream.h>
#include "blast_hits_priv.h"
#include "blast_itree.h"
#include "jumper.h"
Go to the source code of this file.
Go to the SVN repository for this file.
Classes | |
struct | SHspWrap |
Auxiliary structure for sorting HSPs. More... | |
struct | BlastHSPwOid |
Macros | |
#define | OVERLAP_DIAG_CLOSE 10 |
Maximal diagonal distance between HSP starting offsets, within which HSPs from search of different chunks of subject sequence are considered for merging. More... | |
Typedefs | |
typedef struct SHspWrap | SHspWrap |
Auxiliary structure for sorting HSPs. More... | |
typedef struct BlastHSPwOid | BlastHSPwOid |
Functions | |
Int4 | GetPrelimHitlistSize (Int4 hitlist_size, Int4 compositionBasedStats, Boolean gapped_calculation) |
Int2 | SBlastHitsParametersNew (const BlastHitSavingOptions *hit_options, const BlastExtensionOptions *ext_options, const BlastScoringOptions *scoring_options, SBlastHitsParameters **retval) |
Sets up small structures used by blast_hit.c for saving HSPs. More... | |
SBlastHitsParameters * | SBlastHitsParametersDup (const SBlastHitsParameters *hit_params) |
Make a deep copy of the SBlastHitsParameters structure passed in. More... | |
SBlastHitsParameters * | SBlastHitsParametersFree (SBlastHitsParameters *param) |
Deallocated SBlastHitsParameters. More... | |
BlastHSP * | Blast_HSPFree (BlastHSP *hsp) |
Deallocate memory for an HSP structure. More... | |
BlastHSP * | Blast_HSPNew (void) |
Allocate and zeros out memory for an HSP structure. More... | |
Int2 | Blast_HSPInit (Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp) |
Allocates BlastHSP and inits with information from input. More... | |
BlastHSPMappingInfo * | BlastHSPMappingInfoFree (BlastHSPMappingInfo *info) |
Deallocate memory for an HSP's additional data structure. More... | |
BlastHSPMappingInfo * | BlastHSPMappingInfoNew (void) |
Allocate memory for an HSP's additional data structure. More... | |
Int4 | BlastHspNumMax (Boolean gapped_calculation, const BlastHitSavingOptions *options) |
Calculated the number of HSPs that should be saved. More... | |
static BlastHSP * | s_BlastHSPCopy (const BlastHSP *hsp) |
Copies all contents of a BlastHSP structure. More... | |
BlastHSP * | Blast_HSPClone (const BlastHSP *hsp) |
Make a deep copy of an HSP. More... | |
Int4 | PhiBlastGetEffectiveNumberOfPatterns (const BlastQueryInfo *query_info) |
Count the number of occurrences of pattern in sequence, which do not overlap by more than half the pattern match length. More... | |
static void | s_HSPPHIGetEvalue (BlastHSP *hsp, BlastScoreBlk *sbp, const BlastQueryInfo *query_info, const SPHIPatternSearchBlk *pattern_blk) |
Calculate e-value for an HSP found by PHI BLAST. More... | |
static Boolean | s_UpdateReevaluatedHSP (BlastHSP *hsp, Boolean gapped, Int4 cutoff_score, Int4 score, const Uint1 *query_start, const Uint1 *subject_start, const Uint1 *best_q_start, const Uint1 *best_q_end, const Uint1 *best_s_start, const Uint1 *best_s_end, int best_start_esp_index, int best_end_esp_index, int best_end_esp_num) |
Update HSP data after reevaluation with ambiguities. More... | |
Boolean | Blast_HSPReevaluateWithAmbiguitiesGapped (BlastHSP *hsp, const Uint1 *q, const Int4 qlen, const Uint1 *s, const Int4 slen, const BlastHitSavingParameters *hit_params, const BlastScoringParameters *score_params, const BlastScoreBlk *sbp) |
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information. More... | |
static Boolean | s_UpdateReevaluatedHSPUngapped (BlastHSP *hsp, Int4 cutoff_score, Int4 score, const Uint1 *query_start, const Uint1 *subject_start, const Uint1 *best_q_start, const Uint1 *best_q_end, const Uint1 *best_s_start, const Uint1 *best_s_end) |
Update HSP data after reevaluation with ambiguities for an ungapped search. More... | |
Boolean | Blast_HSPReevaluateWithAmbiguitiesUngapped (BlastHSP *hsp, const Uint1 *query_start, const Uint1 *subject_start, const BlastInitialWordParameters *word_params, BlastScoreBlk *sbp, Boolean translated) |
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information. More... | |
static Int2 | s_Blast_HSPGetNumIdentitiesAndPositives (const Uint1 *query, const Uint1 *subject, const BlastHSP *hsp, Int4 *num_ident_ptr, Int4 *align_length_ptr, const BlastScoreBlk *sbp, Int4 *num_pos_ptr) |
Calculate number of identities in a regular HSP. More... | |
static Int2 | s_Blast_HSPGetOOFNumIdentitiesAndPositives (const Uint1 *query, const Uint1 *subject, const BlastHSP *hsp, EBlastProgramType program, Int4 *num_ident_ptr, Int4 *align_length_ptr, const BlastScoreBlk *sbp, Int4 *num_pos_ptr) |
Calculate number of identities in an HSP for an out-of-frame alignment. More... | |
Int2 | Blast_HSPGetNumIdentities (const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr) |
Calculate number of identities in an HSP and set the BlastHSP::num_ident field (unconditionally) More... | |
Int2 | Blast_HSPGetNumIdentitiesAndPositives (const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr, const BlastScoreBlk *sbp) |
Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::num_positives fields. More... | |
static Boolean | s_HSPTest (const BlastHSP *hsp, const BlastHitSavingOptions *hit_options, Int4 align_length) |
Boolean | Blast_HSPTestIdentityAndLength (EBlastProgramType program_number, BlastHSP *hsp, const Uint1 *query, const Uint1 *subject, const BlastScoringOptions *score_options, const BlastHitSavingOptions *hit_options) |
Calculates number of identities and alignment lengths of an HSP via Blast_HSPGetNumIdentities and determines whether this HSP should be kept or deleted. More... | |
Boolean | Blast_HSPTest (BlastHSP *hsp, const BlastHitSavingOptions *hit_options, Int4 align_length) |
Determines whether this HSP should be kept or deleted. More... | |
double | Blast_HSPGetQueryCoverage (const BlastHSP *hsp, Int4 query_length) |
Calculate query coverage percentage of an hsp. More... | |
Boolean | Blast_HSPQueryCoverageTest (BlastHSP *hsp, double min_query_coverage_pct, Int4 query_length) |
Calculate query coverage percentage of an hsp. More... | |
void | Blast_HSPCalcLengthAndGaps (const BlastHSP *hsp, Int4 *length_out, Int4 *gaps_out, Int4 *gap_opens_out) |
Calculate length of an HSP as length in query plus length of gaps in query. More... | |
static void | s_BlastSegGetTranslatedOffsets (const BlastSeg *segment, Int4 seq_length, Int4 *start, Int4 *end) |
Adjust start and end of an HSP in a translated sequence segment. More... | |
void | Blast_HSPGetAdjustedOffsets (EBlastProgramType program, BlastHSP *hsp, Int4 query_length, Int4 subject_length, Int4 *q_start, Int4 *q_end, Int4 *s_start, Int4 *s_end) |
Adjust HSP endpoint offsets according to strand/frame; return values in 1-offset coordinates instead of internal 0-offset. More... | |
const Uint1 * | Blast_HSPGetTargetTranslation (SBlastTargetTranslation *target_t, const BlastHSP *hsp, Int4 *translated_length) |
Returns a buffer with a protein translated from nucleotide. More... | |
Int2 | Blast_HSPGetPartialSubjectTranslation (BLAST_SequenceBlk *subject_blk, BlastHSP *hsp, Boolean is_ooframe, const Uint1 *gen_code_string, Uint1 **translation_buffer_ptr, Uint1 **subject_ptr, Int4 *subject_length_ptr, Int4 *start_shift_ptr) |
Performs the translation and coordinates adjustment, if only part of the subject sequence is translated for gapped alignment. More... | |
void | Blast_HSPAdjustSubjectOffset (BlastHSP *hsp, Int4 start_shift) |
Adjusts offsets if partial sequence was used for extension. More... | |
int | ScoreCompareHSPs (const void *h1, const void *h2) |
Comparison callback function for sorting HSPs, first by score in descending order, then by location. More... | |
Boolean | Blast_HSPListIsSortedByScore (const BlastHSPList *hsp_list) |
Check if HSP list is sorted by score. More... | |
void | Blast_HSPListSortByScore (BlastHSPList *hsp_list) |
Sort the HSPs in an HSP list by score. More... | |
static int | s_EvalueComp (double evalue1, double evalue2) |
Compares 2 evalues, consider them equal if both are close enough to zero. More... | |
static int | s_EvalueCompareHSPs (const void *v1, const void *v2) |
Comparison callback function for sorting HSPs by e-value and score, before saving BlastHSPList in a BlastHitList. More... | |
void | Blast_HSPListSortByEvalue (BlastHSPList *hsp_list) |
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties. More... | |
static Int4 | s_HSPStartDiag (const BlastHSP *hsp) |
Retrieve the starting diagonal of an HSP. More... | |
static Int4 | s_HSPEndDiag (const BlastHSP *hsp) |
Retrieve the ending diagonal of an HSP. More... | |
static Boolean | s_BlastMergeTwoHSPs (BlastHSP *hsp1, BlastHSP *hsp2, Boolean allow_gap) |
Given two hits, check if the hits can be merged and do the merge if so. More... | |
BlastHSPList * | Blast_HSPListFree (BlastHSPList *hsp_list) |
Deallocate memory for an HSP list structure as well as all it's components. More... | |
BlastHSPList * | Blast_HSPListNew (Int4 hsp_max) |
Creates HSP list structure with a default size HSP array. More... | |
Boolean | Blast_HSPList_IsEmpty (const BlastHSPList *hsp_list) |
Returns true if the BlastHSPList contains no HSPs. More... | |
BlastHSPList * | BlastHSPListDup (const BlastHSPList *hsp_list) |
Returns a duplicate (deep copy) of the given hsp list. More... | |
void | Blast_HSPListSwap (BlastHSPList *list1, BlastHSPList *list2) |
Swaps the two HSP lists via structure assignment. More... | |
static void | s_Heapify (char *base0, char *base, char *lim, char *last, size_t width, int(*compar)(const void *, const void *)) |
This is a copy of a static function from ncbimisc.c. More... | |
static void | s_CreateHeap (void *b, size_t nel, size_t width, int(*compar)(const void *, const void *)) |
Creates a heap of elements based on a comparison function. More... | |
static void | s_BlastHSPListInsertHSPInHeap (BlastHSPList *hsp_list, BlastHSP **hsp) |
Given a BlastHSPList* with a heapified HSP array, check whether the new HSP is better than the worst scoring. More... | |
static Boolean | s_BlastCheckBestEvalue (const BlastHSPList *hsp_list) |
Verifies that the best_evalue field on the BlastHSPList is correct. More... | |
static double | s_BlastGetBestEvalue (const BlastHSPList *hsp_list) |
Gets the best (lowest) evalue from the BlastHSPList. More... | |
Int2 | Blast_HSPListSaveHSP (BlastHSPList *hsp_list, BlastHSP *new_hsp) |
Saves HSP information into a BlastHSPList structure. More... | |
Int2 | Blast_HSPListGetEvalues (EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor) |
Calculate the expected values for all HSPs in a hit list, without using the sum statistics. More... | |
Int2 | Blast_HSPListGetBitScores (BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp) |
Calculate bit scores from raw scores in an HSP list. More... | |
void | Blast_HSPListPHIGetBitScores (BlastHSPList *hsp_list, BlastScoreBlk *sbp) |
Calculate bit scores from raw scores in an HSP list for a PHI BLAST search. More... | |
void | Blast_HSPListPHIGetEvalues (BlastHSPList *hsp_list, BlastScoreBlk *sbp, const BlastQueryInfo *query_info, const SPHIPatternSearchBlk *pattern_blk) |
Calculate e-values for a PHI BLAST HSP list. More... | |
Int2 | Blast_HSPListReapByEvalue (BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options) |
Discard the HSPs above the e-value threshold from the HSP list. More... | |
Int2 | Blast_HSPListReapByQueryCoverage (BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options, const BlastQueryInfo *query_info, EBlastProgramType program_number) |
Discard the HSPs below the min query coverage pct from the HSP list. More... | |
Int2 | Blast_TrimHSPListByMaxHsps (BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options) |
Int2 | Blast_HSPListReapByRawScore (BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options) |
Same as Blast_HSPListReapByEvalue() except that it uses the raw score of the hit and the HitSavingOptions->cutoff_score to filter out hits. More... | |
static int | s_SortHSPListByOid (const void *x, const void *y) |
callback used to sort HSP lists in order of increasing OID More... | |
Int2 | Blast_HitListMerge (BlastHitList **old_hit_list_ptr, BlastHitList **combined_hit_list_ptr, Int4 contexts_per_query, Int4 *split_offsets, Int4 chunk_overlap_size, Boolean allow_gap) |
Combine two hitlists; both HitLists must contain HSPs that represent alignments to the same query sequence. More... | |
Int2 | Blast_HSPListPurgeNullHSPs (BlastHSPList *hsp_list) |
Cleans out the NULLed out HSP's from the HSP array that is part of the BlastHSPList. More... | |
static int | s_QueryOffsetCompareHSPs (const void *v1, const void *v2) |
Callback for sorting HSPs by starting offset in query. More... | |
static int | s_QueryEndCompareHSPs (const void *v1, const void *v2) |
Callback for sorting HSPs by ending offset in query. More... | |
static void | s_CutOffGapEditScript (BlastHSP *hsp, Int4 q_cut, Int4 s_cut, Boolean cut_begin) |
Int4 | Blast_HSPListPurgeHSPsWithCommonEndpoints (EBlastProgramType program, BlastHSPList *hsp_list, Boolean purge) |
Check for an overlap of two different alignments and remove redundant HSPs. More... | |
Int4 | Blast_HSPListSubjectBestHit (EBlastProgramType program, const BlastHSPSubjectBestHitOptions *subject_besthit_opts, const BlastQueryInfo *query_info, BlastHSPList *hsp_list) |
Int2 | Blast_HSPListReevaluateUngapped (EBlastProgramType program, BlastHSPList *hsp_list, BLAST_SequenceBlk *query_blk, BLAST_SequenceBlk *subject_blk, const BlastInitialWordParameters *word_params, const BlastHitSavingParameters *hit_params, const BlastQueryInfo *query_info, BlastScoreBlk *sbp, const BlastScoringParameters *score_params, const BlastSeqSrc *seq_src, const Uint1 *gen_code_string) |
Reevaluate all ungapped HSPs in an HSP list. More... | |
static void | s_BlastHSPListsCombineByScore (BlastHSPList *hsp_list, BlastHSPList *combined_hsp_list, Int4 new_hspcnt) |
Combine two HSP lists, without altering the individual HSPs, and without reallocating the HSP array. More... | |
Int2 | Blast_HSPListAppend (BlastHSPList **old_hsp_list_ptr, BlastHSPList **combined_hsp_list_ptr, Int4 hsp_num_max) |
Append one HSP list to the other. More... | |
Int2 | Blast_HSPListsMerge (BlastHSPList **hsp_list_ptr, BlastHSPList **combined_hsp_list_ptr, Int4 hsp_num_max, Int4 *split_offsets, Int4 contexts_per_query, Int4 chunk_overlap_size, Boolean allow_gap, Boolean short_reads) |
Merge an HSP list from a chunk of the subject sequence into a previously computed HSP list. More... | |
void | Blast_HSPListAdjustOffsets (BlastHSPList *hsp_list, Int4 offset) |
Adjust subject offsets in an HSP list if only part of the subject sequence was searched. More... | |
void | Blast_HSPListAdjustOddBlastnScores (BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp) |
For nucleotide BLAST, if the match reward score is equal to 2, random alignments are dominated by runs of exact matches, which all have even scores. More... | |
static int | s_EvalueCompareHSPLists (const void *v1, const void *v2) |
Callback for sorting hsp lists by their best evalue/score; Evalues are compared with the condition that if both are close enough to zero (currently < 1.0e-180), they are considered equal. More... | |
static int | s_EvalueCompareHSPListsRev (const void *v1, const void *v2) |
Callback for sorting hsp lists by their best e-value/score, in reverse order - from higher e-value to lower (lower score to higher). More... | |
BlastHitList * | Blast_HitListNew (Int4 hitlist_size) |
Allocate memory for a hit list of a given size. More... | |
BlastHitList * | Blast_HitListFree (BlastHitList *hitlist) |
Deallocate memory for the hit list. More... | |
Int2 | Blast_HitListHSPListsFree (BlastHitList *hitlist) |
Deallocate memory for every HSP list on BlastHitList, as well as all their components. More... | |
static void | s_BlastHitListPurge (BlastHitList *hit_list) |
Purge a BlastHitList of empty HSP lists. More... | |
static void | s_BlastHitListInsertHSPListInHeap (BlastHitList *hit_list, BlastHSPList *hsp_list) |
Given a BlastHitList* with a heapified HSP list array, remove the worst scoring HSP list and insert the new HSP list in the heap. More... | |
static Int2 | s_Blast_HitListGrowHSPListArray (BlastHitList *hit_list) |
Given a BlastHitList pointer this function makes the hsplist_array larger, up to a maximum size. More... | |
Int2 | Blast_HitListUpdate (BlastHitList *hit_list, BlastHSPList *hsp_list) |
Insert a new HSP list into the hit list. More... | |
Int2 | Blast_HitListPurgeNullHSPLists (BlastHitList *hit_list) |
Purges a BlastHitList of NULL HSP lists. More... | |
Int2 | Blast_HitListSortByEvalue (BlastHitList *hit_list) |
Sort BlastHitLIst bon evalue. More... | |
BlastHSPResults * | Blast_HSPResultsNew (Int4 num_queries) |
Initialize the results structure. More... | |
BlastHSPResults * | Blast_HSPResultsFree (BlastHSPResults *results) |
Deallocate memory for BLAST results. More... | |
Int2 | Blast_HSPResultsSortByEvalue (BlastHSPResults *results) |
Sort each hit list in the BLAST results by best e-value. More... | |
Int2 | Blast_HSPResultsReverseSort (BlastHSPResults *results) |
Sort each hit list in the BLAST results by best e-value, in reverse order. More... | |
Int2 | Blast_HSPResultsReverseOrder (BlastHSPResults *results) |
Reverse order of HSP lists in each hit list in the BLAST results. More... | |
static int | s_SortHspWrapRawScore (const void *x, const void *y) |
callback used to sort a list of encapsulated HSP structures in order of decreasing raw score -RMH- More... | |
Int2 | Blast_HSPResultsApplyMasklevel (BlastHSPResults *results, const BlastQueryInfo *query_info, Int4 masklevel, Int4 query_length) |
Apply Cross_match like masklevel to HSP list. More... | |
Int2 | Blast_HSPResultsInsertHSPList (BlastHSPResults *results, BlastHSPList *hsp_list, Int4 hitlist_size) |
Blast_HSPResultsInsertHSPList Insert an HSP list to the appropriate place in the results structure. More... | |
BlastHSPResults ** | PHIBlast_HSPResultsSplit (const BlastHSPResults *results, const SPHIQueryInfo *pattern_info) |
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structures, corresponding to different pattern occurrences in query. More... | |
BlastHSPResults * | Blast_HSPResultsFromHSPStream (BlastHSPStream *hsp_stream, size_t num_queries, SBlastHitsParameters *bhp) |
Move all of the hits within an HSPStream into a BlastHSPResults structure. More... | |
static int | s_CompareHsplistHspcnt (const void *v1, const void *v2) |
Comparison function for sorting HSP lists in increasing order of the number of HSPs in a hit. More... | |
static Boolean | s_TrimResultsByTotalHSPLimit (BlastHSPResults *results, Uint4 total_hsp_limit) |
Removes extra results if a limit is imposed on the total number of HSPs returned. More... | |
static int | s_CompareScoreHSPwOid (const void *v1, const void *v2) |
static int | s_CompareOidHSPwOid (const void *v1, const void *v2) |
static Boolean | s_TrimResultsByTotalHSPLimitEx (BlastHSPResults *results, Uint4 total_hsp_limit, Boolean *hsp_limit_exceeded) |
BlastHSPResults * | Blast_HSPResultsFromHSPStreamWithLimit (BlastHSPStream *hsp_stream, Uint4 num_queries, SBlastHitsParameters *hit_param, Uint4 max_num_hsps, Boolean *removed_hsps) |
As Blast_HSPResultsFromHSPStream, except the total number of HSPs kept for each query does not exceed an explicit limit. More... | |
BlastHSPResults * | Blast_HSPResultsFromHSPStreamWithLimitEx (BlastHSPStream *hsp_stream, Uint4 num_queries, SBlastHitsParameters *hit_param, Uint4 max_num_hsps, Boolean *removed_hsps) |
As Blast_HSPResultsFromHSPStreamWithLimit, except accept and return array of Boolen flags specifying which query exceeded HSP limits. More... | |
BLAST functions for saving hits after the (preliminary) gapped alignment.
Definition in file blast_hits.c.
#define OVERLAP_DIAG_CLOSE 10 |
Maximal diagonal distance between HSP starting offsets, within which HSPs from search of different chunks of subject sequence are considered for merging.
Definition at line 1537 of file blast_hits.c.
typedef struct BlastHSPwOid BlastHSPwOid |
BlastHitList* Blast_HitListFree | ( | BlastHitList * | hitlist | ) |
Deallocate memory for the hit list.
Definition at line 3137 of file blast_hits.c.
References Blast_HitListHSPListsFree(), NULL, and sfree.
Referenced by Blast_HitListMerge(), Blast_HSPResultsFree(), Blast_RedoAlignmentCore_MT(), BOOST_AUTO_TEST_CASE(), s_BlastHSPBestHitFinal(), s_BlastHSPBestHitPipeRun(), s_BlastHSPBestHitRun(), s_BlastHSPCullingPipeRun(), s_ExportToHitlist(), and s_TrimResultsByTotalHSPLimitEx().
Int2 Blast_HitListHSPListsFree | ( | BlastHitList * | hitlist | ) |
Deallocate memory for every HSP list on BlastHitList, as well as all their components.
hitlist | contains the BlastHSPList array to be freed [in/out]. |
Definition at line 3148 of file blast_hits.c.
References Blast_HSPListFree(), BlastHitList::hsplist_array, BlastHitList::hsplist_count, and sfree.
Referenced by Blast_HitListFree().
Int2 Blast_HitListMerge | ( | BlastHitList ** | old_hit_list_ptr, |
BlastHitList ** | combined_hit_list_ptr, | ||
Int4 | contexts_per_query, | ||
Int4 * | split_offsets, | ||
Int4 | chunk_overlap_size, | ||
Boolean | allow_gap | ||
) |
Combine two hitlists; both HitLists must contain HSPs that represent alignments to the same query sequence.
old_hit_list_ptr | Pointer to original HitList, will be NULLed out on return [in|out] |
combined_hit_list_ptr | Pointer to the combined HitList [in|out] t* |
contexts_per_query | The number of different contexts that can occur in hits from old_hit_list and combined_hit_list [in] |
split_offsets | the query offset that marks the boundary between combined_hit_list and old_hit_list. HSPs in old_hit_list that hit to context i are assumed to lie to the right of split_offsets[i] [in] |
chunk_overlap_size | The length of the overlap region between the sequence region containing hit_list and that containing combined_hit_list [in] |
allow_gap | Allow merging HSPs at different diagonals [in] |
Definition at line 2119 of file blast_hits.c.
References ASSERT, Blast_HitListFree(), Blast_HitListNew(), Blast_HitListUpdate(), Blast_HSPListAppend(), Blast_HSPListsMerge(), FALSE, BlastHSPList::hsp_max, BlastHitList::hsplist_array, BlastHitList::hsplist_count, BlastHitList::hsplist_max, i, NULL, BlastHSPList::oid, s_SortHSPListByOid(), and TRUE.
Referenced by BlastHSPStreamMerge().
BlastHitList* Blast_HitListNew | ( | Int4 | hitlist_size | ) |
Allocate memory for a hit list of a given size.
hitlist_size | Size of the hit list (number of HSP lists) [in] |
Definition at line 3123 of file blast_hits.c.
References calloc(), BlastHitList::hsplist_count, BlastHitList::hsplist_current, BlastHitList::hsplist_max, INT4_MAX, and BlastHitList::low_score.
Referenced by Blast_HitListMerge(), Blast_HSPResultsInsertHSPList(), BOOST_AUTO_TEST_CASE(), s_BlastHSPBestHitFinal(), s_BlastHSPBestHitRun(), s_BlastHSPCollectorRun(), s_BlastHSPCollectorRun_RPS(), s_BlastHSPCullingFinal(), s_ExportToHitlist(), s_FillResultsFromCompoHeaps(), and SThreadLocalDataArrayConsolidateResults().
Int2 Blast_HitListPurgeNullHSPLists | ( | BlastHitList * | hit_list | ) |
Purges a BlastHitList of NULL HSP lists.
hit_list | BLAST hit list to purge. [in] [out] |
Definition at line 3300 of file blast_hits.c.
References BlastHitList::hsplist_array, BlastHitList::hsplist_count, and NULL.
Referenced by Blast_HSPResultsApplyMasklevel(), and s_FilterBlastResults().
Int2 Blast_HitListSortByEvalue | ( | BlastHitList * | hit_list | ) |
Sort BlastHitLIst bon evalue.
hit_lsit | BLAST hit list to be sorted [in] [out] |
Definition at line 3329 of file blast_hits.c.
References BlastHitList::hsplist_array, BlastHitList::hsplist_count, s_BlastHitListPurge(), and s_EvalueCompareHSPLists().
Referenced by BlastHSPCBSStreamClose(), s_BlastHSPBestHitFinal(), and s_BlastHSPCullingPipeRun().
Int2 Blast_HitListUpdate | ( | BlastHitList * | hit_list, |
BlastHSPList * | hsp_list | ||
) |
Insert a new HSP list into the hit list.
Before capacity of the hit list is reached, just add to the end; After that, store in a heap, to ensure efficient insertion and deletion. The heap order is reverse, with worst e-value on top, for convenience of deletion.
hit_list | Contains all HSP lists saved so far [in] [out] |
hsp_list | A new HSP list to be inserted into the hit list [in] |
Definition at line 3241 of file blast_hits.c.
References ASSERT, BlastHSPList::best_evalue, Blast_HSPListFree(), Blast_HSPListSortByEvalue(), BlastHitList::heapified, BlastHSPList::hsp_array, BlastHitList::hsplist_array, BlastHitList::hsplist_count, BlastHitList::hsplist_current, BlastHitList::hsplist_max, BlastHitList::low_score, MAX, MIN, s_Blast_HitListGrowHSPListArray(), s_BlastCheckBestEvalue(), s_BlastGetBestEvalue(), s_BlastHitListInsertHSPListInHeap(), s_CreateHeap(), s_EvalueCompareHSPLists(), BlastHSP::score, TRUE, and BlastHitList::worst_evalue.
Referenced by Blast_HitListMerge(), Blast_HSPResultsInsertHSPList(), BOOST_AUTO_TEST_CASE(), s_BlastHSPBestHitFinal(), s_BlastHSPCollectorRun(), s_BlastHSPCollectorRun_RPS(), s_ExportToHitlist(), and s_FillResultsFromCompoHeaps().
Adjusts offsets if partial sequence was used for extension.
hsp | The hit to work on [in][out] |
start_shift | amount of database sequence not used for extension. [in] |
Definition at line 1316 of file blast_hits.c.
References BlastSeg::end, BlastSeg::gapped_start, BlastSeg::offset, and BlastHSP::subject.
Referenced by Blast_TracebackFromHSPList(), and s_GetTraceback().
void Blast_HSPCalcLengthAndGaps | ( | const BlastHSP * | hsp, |
Int4 * | length, | ||
Int4 * | gaps, | ||
Int4 * | gap_opens | ||
) |
Calculate length of an HSP as length in query plus length of gaps in query.
If gap information is unavailable, return maximum between length in query and in subject.
hsp | An HSP structure [in] |
length | Length of this HSP [out] |
gaps | Total number of gaps in this HSP [out] |
gap_opens | Number of gap openings in this HSP [out] |
Definition at line 1055 of file blast_hits.c.
References eGapAlignDel, eGapAlignIns, BlastSeg::end, BlastHSP::gap_info, GapEditScript::num, BlastSeg::offset, GapEditScript::op_type, BlastHSP::query, GapEditScript::size, and BlastHSP::subject.
Make a deep copy of an HSP.
Definition at line 264 of file blast_hits.c.
References BlastHSP::bit_score, Blast_HSPFree(), Blast_HSPNew(), BlastHSPMappingInfoNew(), BlastMemDup(), calloc(), BlastHSP::comp_adjustment_method, BlastHSP::context, copy(), BlastHSPMappingInfo::edits, BlastHSP::evalue, BlastHSP::gap_info, GapEditScriptDup(), JumperEditsBlockDup(), SequenceOverhangs::left, BlastHSPMappingInfo::left_edge, SequenceOverhangs::left_len, malloc(), BlastHSP::map_info, NULL, BlastHSP::num, BlastHSP::num_ident, BlastHSP::num_positives, BlastHSP::pat_info, BlastHSP::query, SequenceOverhangs::right, BlastHSPMappingInfo::right_edge, SequenceOverhangs::right_len, BlastHSP::score, SequenceOverhangsFree(), BlastHSP::subject, and BlastHSPMappingInfo::subject_overhangs.
Referenced by HSPContainerDup(), s_FindBestPath(), and s_MergeHSPs().
Deallocate memory for an HSP structure.
Definition at line 130 of file blast_hits.c.
References BlastHSPMappingInfoFree(), BlastHSP::gap_info, GapEditScriptDelete(), BlastHSP::map_info, NULL, BlastHSP::pat_info, and sfree.
Referenced by BLAST_GetGappedScore(), Blast_HSPClone(), Blast_HSPListFree(), Blast_HSPListPurgeHSPsWithCommonEndpoints(), Blast_HSPListReapByEvalue(), Blast_HSPListReapByQueryCoverage(), Blast_HSPListReapByRawScore(), Blast_HSPListReevaluateUngapped(), Blast_HSPListsMerge(), Blast_HSPListSubjectBestHit(), Blast_HSPResultsApplyMasklevel(), Blast_TracebackFromHSPList(), Blast_TrimHSPListByMaxHsps(), BOOST_AUTO_TEST_CASE(), DoAnchoredScan(), HSPContainerDup(), HSPContainerFree(), s_Blast_HSPListReapByPrelimEvalue(), s_BlastHSPBestHitRun(), s_BlastHSPBestHitRun_RPS(), s_BlastHSPListInsertHSPInHeap(), s_BlastHSPListsCombineByScore(), s_CreateHSPForWordHit(), s_FindSpliceJunctions(), s_GetTraceback(), s_HitlistReapContained(), s_HSPFree(), s_IntronToGap(), s_MergeHSPs(), s_PHITracebackFromHSPList(), s_TrimResultsByTotalHSPLimit(), and s_TrimResultsByTotalHSPLimitEx().
void Blast_HSPGetAdjustedOffsets | ( | EBlastProgramType | program, |
BlastHSP * | hsp, | ||
Int4 | query_length, | ||
Int4 | subject_length, | ||
Int4 * | q_start, | ||
Int4 * | q_end, | ||
Int4 * | s_start, | ||
Int4 * | s_end | ||
) |
Adjust HSP endpoint offsets according to strand/frame; return values in 1-offset coordinates instead of internal 0-offset.
program | Type of BLAST program [in] |
hsp | An HSP structure [in] |
query_length | Length of query [in] |
subject_length | Length of subject [in] |
q_start | Start of alignment in query [out] |
q_end | End of alignment in query [out] |
s_start | Start of alignment in subject [out] |
s_end | End of alignment in subject [out] |
Definition at line 1109 of file blast_hits.c.
References Blast_QueryIsTranslated(), Blast_SubjectIsTranslated(), BlastSeg::end, BlastSeg::frame, BlastHSP::gap_info, BlastSeg::offset, BlastHSP::query, s_BlastSegGetTranslatedOffsets(), and BlastHSP::subject.
Int2 Blast_HSPGetNumIdentities | ( | const Uint1 * | query, |
const Uint1 * | subject, | ||
BlastHSP * | hsp, | ||
const BlastScoringOptions * | score_options, | ||
Int4 * | align_length_ptr | ||
) |
Calculate number of identities in an HSP and set the BlastHSP::num_ident field (unconditionally)
query | The query sequence [in] |
subject | The uncompressed subject sequence [in] |
hsp | All information about the HSP, the output of this function will be stored in its num_ident field [in|out] |
score_options | Scoring options [in] |
align_length_ptr | The alignment length, including gaps (optional) [out] |
Definition at line 940 of file blast_hits.c.
References BlastScoringOptions::is_ooframe, NULL, BlastHSP::num_ident, BlastScoringOptions::program_number, query, s_Blast_HSPGetNumIdentitiesAndPositives(), s_Blast_HSPGetOOFNumIdentitiesAndPositives(), and subject.
Referenced by Blast_HSPTestIdentityAndLength(), and BOOST_AUTO_TEST_CASE().
Int2 Blast_HSPGetNumIdentitiesAndPositives | ( | const Uint1 * | query, |
const Uint1 * | subject, | ||
BlastHSP * | hsp, | ||
const BlastScoringOptions * | score_options, | ||
Int4 * | align_length_ptr, | ||
const BlastScoreBlk * | sbp | ||
) |
Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::num_positives fields.
query | The query sequence [in] |
subject | The uncompressed subject sequence [in] |
hsp | All information about the HSP, the output of this function will be stored in its num_ident field [in|out] |
score_options | Scoring options [in] |
align_length_ptr | The alignment length, including gaps (optional) [out] |
sbp | Score blk containing the matrix for counting positives [in] |
Definition at line 966 of file blast_hits.c.
References BlastScoringOptions::is_ooframe, BlastHSP::num_ident, BlastHSP::num_positives, BlastScoringOptions::program_number, query, s_Blast_HSPGetNumIdentitiesAndPositives(), s_Blast_HSPGetOOFNumIdentitiesAndPositives(), and subject.
Referenced by Blast_HSPListReevaluateUngapped(), Blast_TracebackFromHSPList(), and s_ComputeNumIdentities().
Int2 Blast_HSPGetPartialSubjectTranslation | ( | BLAST_SequenceBlk * | subject_blk, |
BlastHSP * | hsp, | ||
Boolean | is_ooframe, | ||
const Uint1 * | gen_code_string, | ||
Uint1 ** | translation_buffer_ptr, | ||
Uint1 ** | subject_ptr, | ||
Int4 * | subject_length_ptr, | ||
Int4 * | start_shift_ptr | ||
) |
Performs the translation and coordinates adjustment, if only part of the subject sequence is translated for gapped alignment.
subject_blk | Subject sequence structure [in] |
hsp | The HSP information [in] [out] |
is_ooframe | Return a mixed-frame sequence if TRUE [in] |
gen_code_string | Database genetic code [in] |
translation_buffer_ptr | Pointer to buffer holding the translation [out] |
subject_ptr | Pointer to sequence to be passed to the gapped alignment [out] |
subject_length_ptr | Length of the translated sequence [out] |
start_shift_ptr | How far is the partial sequence shifted w.r.t. the full sequence. [out] |
Definition at line 1239 of file blast_hits.c.
References ASSERT, Blast_GetPartialTranslation(), CODON_LENGTH, BlastSeg::end, BlastSeg::frame, BlastSeg::gapped_start, BLAST_SequenceBlk::length, MAX, MAX_FULL_TRANSLATION, MIN, NULL, BlastSeg::offset, BLAST_SequenceBlk::sequence_start, sfree, BlastHSP::subject, and subject.
Calculate query coverage percentage of an hsp.
hsp | An HSP structure [in] |
query_length | Length of query [in] |
Definition at line 1034 of file blast_hits.c.
References BlastSeg::end, BlastSeg::offset, and BlastHSP::query.
Referenced by Blast_HSPQueryCoverageTest(), BOOST_AUTO_TEST_CASE(), and s_BuildScoreList().
const Uint1* Blast_HSPGetTargetTranslation | ( | SBlastTargetTranslation * | target_t, |
const BlastHSP * | hsp, | ||
Int4 * | translated_length | ||
) |
Returns a buffer with a protein translated from nucleotide.
target_t | SBlastTargetTranslation* with information about translation [in] |
hsp | The hit to work on [in] |
translated_length | length of the protein sequence [in] |
Definition at line 1147 of file blast_hits.c.
References ASSERT, BLAST_FrameToContext(), BLAST_GetTranslation(), CODON_LENGTH, context, BlastSeg::end, FENCE_SENTRY, BlastSeg::frame, SBlastTargetTranslation::gen_code_string, GetReverseNuclSequence(), BLAST_SequenceBlk::length, malloc(), MAX, MIN, NULL, BlastSeg::offset, SBlastTargetTranslation::partial, SBlastTargetTranslation::program_number, SBlastTargetTranslation::range, BLAST_SequenceBlk::sequence, sfree, BlastHSP::subject, SBlastTargetTranslation::subject_blk, and SBlastTargetTranslation::translations.
Referenced by Blast_HSPListReevaluateUngapped(), Blast_TracebackFromHSPList(), BOOST_AUTO_TEST_CASE(), and s_ComputeNumIdentities().
Int2 Blast_HSPInit | ( | Int4 | query_start, |
Int4 | query_end, | ||
Int4 | subject_start, | ||
Int4 | subject_end, | ||
Int4 | query_gapped_start, | ||
Int4 | subject_gapped_start, | ||
Int4 | query_context, | ||
Int2 | query_frame, | ||
Int2 | subject_frame, | ||
Int4 | score, | ||
GapEditScript ** | gap_edit, | ||
BlastHSP ** | ret_hsp | ||
) |
Allocates BlastHSP and inits with information from input.
structure.
query_start | Start of query alignment [in] |
query_end | End of query alignment [in] |
subject_start | Start of subject alignment [in] |
subject_end | End of subject alignment [in] |
query_gapped_start | Where gapped alignment started on query [in] |
subject_gapped_start | Where gapped alignment started on subject [in] |
query_context | The index of the query containing this HSP [in] |
query_frame | Query frame: -3..3 for translated sequence, 1 or -1 for blastn, 0 for blastp [in] |
subject_frame | Subject frame: -3..3 for translated sequence, 1 for blastn, 0 for blastp [in] |
score | score of alignment [in] |
gap_edit | Will be transferred to HSP and nulled out if a traceback was not calculated may be NULL [in] [out] |
ret_hsp | allocated and filled in BlastHSP [out] |
Definition at line 151 of file blast_hits.c.
References Blast_HSPNew(), BLASTERR_MEMORY, BlastHSP::context, BlastSeg::end, BlastSeg::frame, BlastHSP::gap_info, BlastSeg::gapped_start, NULL, BlastSeg::offset, BlastHSP::query, BlastHSP::score, and BlastHSP::subject.
Referenced by BLAST_GetGappedScore(), BLAST_GetUngappedHSPList(), BLAST_SmithWatermanGetGappedScore(), BlastNaExtendJumper(), BOOST_AUTO_TEST_CASE(), PHIGetGappedScore(), s_BlastHSPCopy(), s_CreateHSP(), s_CreateHSPForWordHit(), s_GetTraceback(), s_HSPListFromDistinctAlignments(), CRedoAlignmentTestFixture::setUpHSPList(), and ShortRead_IndexedWordFinder().
Boolean Blast_HSPList_IsEmpty | ( | const BlastHSPList * | hsp_list | ) |
Returns true if the BlastHSPList contains no HSPs.
hsp_list | list of HSPs to examine [in] |
Definition at line 1578 of file blast_hits.c.
References FALSE, BlastHSPList::hspcnt, and TRUE.
Referenced by SThreadLocalDataArrayConsolidateResults().
void Blast_HSPListAdjustOddBlastnScores | ( | BlastHSPList * | hsp_list, |
Boolean | gapped_calculation, | ||
const BlastScoreBlk * | sbp | ||
) |
For nucleotide BLAST, if the match reward score is equal to 2, random alignments are dominated by runs of exact matches, which all have even scores.
This makes it impossible to estimate statistical parameters correctly for odd scores. Hence the raw score formula is adjusted - all scores are rounded down to the nearest even value in order to provide a conservative estimate.
hsp_list | HSP list structure to adjust scores for. [in] [out] |
gapped_calculation | not an ungapped alignment [in] |
sbp | used for round_down Boolean |
Definition at line 3051 of file blast_hits.c.
References Blast_HSPListSortByScore(), FALSE, BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastScoreBlk::round_down, and BlastHSP::score.
Referenced by Blast_HSPListReevaluateUngapped(), BLAST_LinkHsps(), BOOST_AUTO_TEST_CASE(), s_BlastSearchEngineOneContext(), and s_HSPListPostTracebackUpdate().
void Blast_HSPListAdjustOffsets | ( | BlastHSPList * | hsp_list, |
Int4 | offset | ||
) |
Adjust subject offsets in an HSP list if only part of the subject sequence was searched.
Used when long subject sequence is split into more manageable chunks.
hsp_list | List of HSPs from a chunk of a subject sequence [in] |
offset | Offset where the chunk starts [in] |
Definition at line 3035 of file blast_hits.c.
References BlastSeg::end, BlastSeg::gapped_start, BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastSeg::offset, offset, and BlastHSP::subject.
Referenced by s_BlastSearchEngineOneContext().
Int2 Blast_HSPListAppend | ( | BlastHSPList ** | old_hsp_list_ptr, |
BlastHSPList ** | combined_hsp_list_ptr, | ||
Int4 | hsp_num_max | ||
) |
Append one HSP list to the other.
Discard lower scoring HSPs if there is not enough space to keep all.
old_hsp_list_ptr | list of HSPs, will be NULLed out on return [in|out] |
combined_hsp_list_ptr | Pointer to the combined list of HSPs, possibly containing previously saved HSPs [in] [out] |
hsp_num_max | Maximal allowed number of HSPs to save (unlimited if INT4_MAX) [in] |
Definition at line 2807 of file blast_hits.c.
References BlastHSPList::allocated, Blast_HSPListFree(), BlastHSPList::do_not_reallocate, BlastHSPList::hsp_array, BlastHSPList::hspcnt, MIN, NULL, s_BlastHSPListsCombineByScore(), and TRUE.
Referenced by Blast_HitListMerge(), and s_BlastSearchEngineCore().
BlastHSPList* Blast_HSPListFree | ( | BlastHSPList * | hsp_list | ) |
Deallocate memory for an HSP list structure as well as all it's components.
hsp_list | the BlastHSPList to be freed [in]. |
Definition at line 1542 of file blast_hits.c.
References Blast_HSPFree(), BlastHSPList::hsp_array, BlastHSPList::hspcnt, NULL, and sfree.
Referenced by BLAST_ComputeTraceback_MT(), BLAST_GetGappedScore(), Blast_HitListHSPListsFree(), Blast_HitListUpdate(), Blast_HSPListAppend(), Blast_HSPListsMerge(), Blast_HSPResultsApplyMasklevel(), Blast_HSPStreamResultBatchReset(), BLAST_PreliminarySearchEngine(), Blast_RedoAlignmentCore_MT(), Blast_TracebackFromHSPList(), BlastHSPStreamFree(), BOOST_AUTO_TEST_CASE(), DoAnchoredSearch(), LinkHspTestFixture::freeStructures(), s_BlastHitListInsertHSPListInHeap(), s_BlastHitListPurge(), s_BlastHSPBestHitRun(), s_BlastHSPBestHitRun_RPS(), s_BlastHSPCollectorRun(), s_BlastHSPCollectorRun_RPS(), s_BlastHSPCullingRun(), s_BlastHSPMapperSplicedPairedRun(), s_BlastPruneExtraHits(), s_BlastSearchEngineCore(), s_BlastSearchEngineOneContext(), s_ClearHeap(), s_FilterBlastResults(), s_HSPListFromDistinctAlignments(), s_ImportFromHitlist(), s_RPSComputeTraceback(), s_RPSPreliminarySearchEngine(), s_TrimHitList(), and testHSPStream().
Int2 Blast_HSPListGetBitScores | ( | BlastHSPList * | hsp_list, |
Boolean | gapped_calculation, | ||
const BlastScoreBlk * | sbp | ||
) |
Calculate bit scores from raw scores in an HSP list.
hsp_list | List of HSPs [in] [out] |
gapped_calculation | Is this a gapped search? [in] |
sbp | Scoring block with statistical parameters [in] |
Definition at line 1907 of file blast_hits.c.
References ASSERT, BlastHSP::bit_score, BlastHSP::context, FALSE, BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastScoreBlk::kbp, BlastScoreBlk::kbp_gap, Blast_KarlinBlk::Lambda, Blast_KarlinBlk::logK, NCBIMATH_LN2, NULL, BlastScoreBlk::round_down, and BlastHSP::score.
Referenced by BLAST_ComputeTraceback_MT(), BLAST_PreliminarySearchEngine(), s_GetBitScores(), and s_HSPListPostTracebackUpdate().
Int2 Blast_HSPListGetEvalues | ( | EBlastProgramType | program_number, |
const BlastQueryInfo * | query_info, | ||
Int4 | subject_length, | ||
BlastHSPList * | hsp_list, | ||
Boolean | gapped_calculation, | ||
Boolean | RPS_prelim, | ||
const BlastScoreBlk * | sbp, | ||
double | gap_decay_rate, | ||
double | scaling_factor | ||
) |
Calculate the expected values for all HSPs in a hit list, without using the sum statistics.
In case of multiple queries, the offsets are assumed to be already adjusted to individual query coordinates, and the contexts are set for each HSP.
program_number | Type of BLAST program [in] |
query_info | Auxiliary query information - needed only for effective search space calculation if it is not provided [in] |
subject_length | Subject length - needed for Spouge's new FSC [in] |
hsp_list | List of HSPs for one subject sequence [in] [out] |
gapped_calculation | Is this for a gapped or ungapped search? [in] |
RPS_prelim | Is this for a RPS preliminary search? [in] |
sbp | Structure containing statistical information [in] |
gap_decay_rate | Adjustment parameter to compensate for the effects of performing multiple tests when linking HSPs. No adjustment is made if 0. [in] |
scaling_factor | Scaling factor by which Lambda should be divided. Used in RPS BLAST only; should be set to 1.0 in other cases. [in] |
Definition at line 1811 of file blast_hits.c.
References ASSERT, BlastHSPList::best_evalue, BLAST_GapDecayDivisor(), Blast_HSPListIsSortedByScore(), BLAST_KarlinStoE_simple(), Blast_ProgramIsRpsBlast(), BLAST_SpougeStoE(), BlastHSP::context, BlastQueryInfo::contexts, BlastContextInfo::eff_searchsp, BlastHSP::evalue, FALSE, BlastScoreBlk::gbp, BlastHSPList::hsp_array, BlastHSPList::hspcnt, i, BlastScoreBlk::kbp, BlastScoreBlk::kbp_gap, Blast_KarlinBlk::Lambda, NULL, BlastScoreBlk::number_of_contexts, BlastContextInfo::query_length, BlastScoreBlk::round_down, s_BlastGetBestEvalue(), and BlastHSP::score.
Referenced by BLAST_LinkHsps(), BLAST_PreliminarySearchEngine(), s_BlastSearchEngineCore(), s_HitlistEvaluateAndPurge(), and s_HSPListPostTracebackUpdate().
Boolean Blast_HSPListIsSortedByScore | ( | const BlastHSPList * | hsp_list | ) |
Check if HSP list is sorted by score.
hsp_list | The list to check [in] |
Definition at line 1358 of file blast_hits.c.
References FALSE, BlastHSPList::hsp_array, BlastHSPList::hspcnt, ScoreCompareHSPs(), and TRUE.
Referenced by Blast_HSPListGetEvalues(), Blast_HSPListPHIGetEvalues(), Blast_HSPListSortByScore(), Blast_TracebackFromHSPList(), BOOST_AUTO_TEST_CASE(), s_BlastHSPCollectorRun(), s_BlastHSPCollectorRun_RPS(), s_PHITracebackFromHSPList(), and CPhiblastTestFixture::x_CheckSplitResults().
BlastHSPList* Blast_HSPListNew | ( | Int4 | hsp_max | ) |
Creates HSP list structure with a default size HSP array.
hsp_max | the maximum number of HSP's that can ever be saved at once [in]. |
Definition at line 1558 of file blast_hits.c.
References BlastHSPList::allocated, calloc(), BlastHSPList::hsp_array, BlastHSPList::hsp_max, INT4_MAX, and MIN.
Referenced by BLAST_GetGappedScore(), BLAST_GetUngappedHSPList(), Blast_RedoAlignmentCore_MT(), BLAST_SmithWatermanGetGappedScore(), BOOST_AUTO_TEST_CASE(), DoAnchoredSearch(), JumperNaWordFinder(), PHIBlast_HSPResultsSplit(), PHIGetGappedScore(), s_BlastHSPCollectorRun(), s_BlastHSPCollectorRun_RPS(), s_BlastHSPCullingFinal(), s_ExportToHitlist(), s_SetupHSPListBlastn(), s_SetupHSPListForUngappedReevaluateTransl(), s_SetupHSPListTransl(), s_TrimResultsByTotalHSPLimitEx(), CRedoAlignmentTestFixture::setUpHSPList(), setupHSPList(), LinkHspTestFixture::setupHSPListForMiddleInsertTest(), LinkHspTestFixture::setupHSPListNucl(), LinkHspTestFixture::setupHSPListTransl(), ShortRead_IndexedWordFinder(), testHSPStream(), and CPhiblastTestFixture::x_SetupHSPList().
void Blast_HSPListPHIGetBitScores | ( | BlastHSPList * | hsp_list, |
BlastScoreBlk * | sbp | ||
) |
Calculate bit scores from raw scores in an HSP list for a PHI BLAST search.
hsp_list | List of HSPs [in] [out] |
sbp | Scoring block with statistical parameters [in] |
Definition at line 1934 of file blast_hits.c.
References ASSERT, BlastHSP::bit_score, BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastScoreBlk::kbp_gap, Blast_KarlinBlk::Lambda, lambda(), log, NCBIMATH_LN2, NULL, Blast_KarlinBlk::paramC, and BlastHSP::score.
Referenced by s_PHITracebackFromHSPList().
void Blast_HSPListPHIGetEvalues | ( | BlastHSPList * | hsp_list, |
BlastScoreBlk * | sbp, | ||
const BlastQueryInfo * | query_info, | ||
const SPHIPatternSearchBlk * | pattern_blk | ||
) |
Calculate e-values for a PHI BLAST HSP list.
hsp_list | HSP list found by PHI BLAST [in] [out] |
sbp | Scoring block with statistical parameters [in] |
query_info | Structure containing information about pattern counts [in] |
pattern_blk | Structure containing information about pattern hits in db [in] |
Definition at line 1955 of file blast_hits.c.
References ASSERT, BlastHSPList::best_evalue, Blast_HSPListIsSortedByScore(), BlastHSPList::hsp_array, BlastHSPList::hspcnt, s_BlastGetBestEvalue(), and s_HSPPHIGetEvalue().
Referenced by BOOST_AUTO_TEST_CASE(), and s_PHITracebackFromHSPList().
Int4 Blast_HSPListPurgeHSPsWithCommonEndpoints | ( | EBlastProgramType | program, |
BlastHSPList * | hsp_list, | ||
Boolean | purge | ||
) |
Check for an overlap of two different alignments and remove redundant HSPs.
A sufficient overlap is when two alignments have the same start or end values If an overlap is found the HSP with the lowest score is removed, if both scores are the same then the first is removed.
program | Type of BLAST program. For some programs (PHI BLAST), the purge should not be performed. [in] |
hsp_list | Contains array of pointers to HSPs to purge [in] |
purge | Should the hsp be purged? [in] |
Definition at line 2455 of file blast_hits.c.
References Blast_HSPFree(), Blast_HSPListPurgeNullHSPs(), Blast_ProgramIsPhiBlast(), context, eBlastTypeBlastn, BlastSeg::end, FALSE, BlastHSPList::hsp_array, BlastHSPList::hspcnt, i, NULL, BlastSeg::offset, BlastHSP::query, query, s_CutOffGapEditScript(), s_QueryEndCompareHSPs(), s_QueryOffsetCompareHSPs(), BlastHSP::subject, and TRUE.
Referenced by Blast_TracebackFromHSPList(), BOOST_AUTO_TEST_CASE(), and s_BlastSearchEngineOneContext().
Int2 Blast_HSPListPurgeNullHSPs | ( | BlastHSPList * | hsp_list | ) |
Cleans out the NULLed out HSP's from the HSP array that is part of the BlastHSPList.
hsp_list | Contains array of pointers to HSP structures [in] |
Definition at line 2225 of file blast_hits.c.
References BlastHSPList::hsp_array, BlastHSPList::hspcnt, and NULL.
Referenced by Blast_HSPListPurgeHSPsWithCommonEndpoints(), Blast_HSPListReevaluateUngapped(), Blast_HSPListsMerge(), Blast_HSPListSubjectBestHit(), Blast_TracebackFromHSPList(), BOOST_AUTO_TEST_CASE(), and s_PHITracebackFromHSPList().
Int2 Blast_HSPListReapByEvalue | ( | BlastHSPList * | hsp_list, |
const BlastHitSavingOptions * | hit_options | ||
) |
Discard the HSPs above the e-value threshold from the HSP list.
hsp_list | List of HSPs for one subject sequence [in] [out] |
hit_options | Options block containing the e-value cut-off [in] |
Definition at line 1976 of file blast_hits.c.
References ASSERT, Blast_HSPFree(), BlastHSP::evalue, BlastHitSavingOptions::expect_value, BlastHSPList::hsp_array, BlastHSPList::hspcnt, and NULL.
Referenced by BOOST_AUTO_TEST_CASE(), s_HitlistEvaluateAndPurge(), s_HSPListPostTracebackUpdate(), s_PHITracebackFromHSPList(), and LinkHspTestFixture::testUnevenGapLinkHsps().
Int2 Blast_HSPListReapByQueryCoverage | ( | BlastHSPList * | hsp_list, |
const BlastHitSavingOptions * | hit_options, | ||
const BlastQueryInfo * | query_info, | ||
EBlastProgramType | program_number | ||
) |
Discard the HSPs below the min query coverage pct from the HSP list.
hsp_list | List of HSPs for one subject sequence [in] [out] |
hit_options | Options block containing the min query coverage pct [in] |
query_info | Structure containing information about the queries [in] |
program_number | Type of BLAST program. |
Definition at line 2010 of file blast_hits.c.
References ASSERT, BlastHSPList::best_evalue, Blast_HSPFree(), Blast_HSPQueryCoverageTest(), BlastHSP::context, BlastQueryInfo::contexts, FALSE, BlastHSPList::hsp_array, BlastHSPList::hspcnt, NULL, BlastHitSavingOptions::query_cov_hsp_perc, BlastContextInfo::query_length, s_BlastGetBestEvalue(), and TRUE.
Referenced by BLAST_PreliminarySearchEngine(), BOOST_AUTO_TEST_CASE(), and s_FilterBlastResults().
Int2 Blast_HSPListReapByRawScore | ( | BlastHSPList * | hsp_list, |
const BlastHitSavingOptions * | hit_options | ||
) |
Same as Blast_HSPListReapByEvalue() except that it uses the raw score of the hit and the HitSavingOptions->cutoff_score to filter out hits.
Discard the HSPs above the raw threshold from the HSP list.
-RMH-
Definition at line 2076 of file blast_hits.c.
References ASSERT, Blast_HSPFree(), BlastHitSavingOptions::cutoff_score, BlastHSPList::hsp_array, BlastHSPList::hspcnt, NULL, and BlastHSP::score.
Referenced by BLAST_PreliminarySearchEngine(), BOOST_AUTO_TEST_CASE(), and s_BlastSearchEngineCore().
Int2 Blast_HSPListReevaluateUngapped | ( | EBlastProgramType | program, |
BlastHSPList * | hsp_list, | ||
BLAST_SequenceBlk * | query_blk, | ||
BLAST_SequenceBlk * | subject_blk, | ||
const BlastInitialWordParameters * | word_params, | ||
const BlastHitSavingParameters * | hit_params, | ||
const BlastQueryInfo * | query_info, | ||
BlastScoreBlk * | sbp, | ||
const BlastScoringParameters * | score_params, | ||
const BlastSeqSrc * | seq_src, | ||
const Uint1 * | gen_code_string | ||
) |
Reevaluate all ungapped HSPs in an HSP list.
This is only done for an ungapped search, or if traceback is already available. Subject sequence is uncompressed and saved here (for nucleotide sequences). The number of identities is calculated for each HSP along the way, hence this function is called for all programs.
program | Type of BLAST program [in] |
hsp_list | The list of HSPs for one subject sequence [in] [out] |
query_blk | The query sequence [in] |
subject_blk | The subject sequence [in] [out] |
word_params | Initial word parameters, containing ungapped cutoff score [in] |
hit_params | Hit saving parameters, including cutoff score [in] |
query_info | Auxiliary query information [in] |
sbp | The statistical information [in] |
score_params | Parameters related to scoring [in] |
seq_src | The BLAST database structure (for retrieving uncompressed sequence) [in] |
gen_code_string | Genetic code string in case of a translated database search. [in] |
Definition at line 2607 of file blast_hits.c.
References ASSERT, Blast_HSPFree(), Blast_HSPGetNumIdentitiesAndPositives(), Blast_HSPGetTargetTranslation(), Blast_HSPListAdjustOddBlastnScores(), Blast_HSPListPurgeNullHSPs(), Blast_HSPListSortByScore(), Blast_HSPReevaluateWithAmbiguitiesUngapped(), Blast_HSPTest(), BLAST_SEQSRC_EXCLUDED, Blast_SubjectIsNucleotide(), Blast_SubjectIsTranslated(), BlastSeqSrcGetSequence(), BlastSeqSrcReleaseSequence(), BlastTargetTranslationFree(), BlastTargetTranslationNew(), BlastSeqSrcGetSeqArg::check_oid_exclusion, BlastHSP::context, context, BlastQueryInfo::contexts, eBlastEncodingNcbi4na, eBlastEncodingNucleotide, BlastSeqSrcGetSeqArg::encoding, FALSE, BlastScoringOptions::gapped_calculation, BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastScoringOptions::is_ooframe, NULL, BLAST_SequenceBlk::oid, BlastSeqSrcGetSeqArg::oid, BlastHitSavingParameters::options, BlastScoringParameters::options, BlastContextInfo::query_offset, BlastSeqSrcGetSeqArg::seq, BLAST_SequenceBlk::sequence, BLAST_SequenceBlk::sequence_nomask, BLAST_SequenceBlk::sequence_start, and TRUE.
Referenced by BLAST_PreliminarySearchEngine(), and BOOST_AUTO_TEST_CASE().
Int2 Blast_HSPListSaveHSP | ( | BlastHSPList * | hsp_list, |
BlastHSP * | hsp | ||
) |
Saves HSP information into a BlastHSPList structure.
hsp_list | Structure holding all HSPs with full gapped alignment information [in] [out] |
hsp | The new HSP to be inserted into the HSPList [in] |
Definition at line 1754 of file blast_hits.c.
References BlastHSPList::allocated, BlastHSPList::do_not_reallocate, FALSE, BlastHSPList::hsp_array, BlastHSPList::hsp_max, BlastHSPList::hspcnt, MIN, NULL, s_BlastHSPListInsertHSPInHeap(), s_CreateHeap(), ScoreCompareHSPs(), and TRUE.
Referenced by BLAST_GetGappedScore(), BLAST_GetUngappedHSPList(), Blast_HSPResultsApplyMasklevel(), BLAST_SmithWatermanGetGappedScore(), BlastNaExtendJumper(), BOOST_AUTO_TEST_CASE(), DoAnchoredScan(), DoAnchoredSearch(), PHIBlast_HSPResultsSplit(), PHIGetGappedScore(), s_AddNextHSP(), s_BlastHSPCollectorRun(), s_BlastHSPCollectorRun_RPS(), s_ExportToHitlist(), s_GetTraceback(), s_HSPListFromDistinctAlignments(), s_TrimResultsByTotalHSPLimitEx(), CRedoAlignmentTestFixture::setUpHSPList(), ShortRead_IndexedWordFinder(), and CPhiblastTestFixture::x_SetupHSPList().
Int2 Blast_HSPListsMerge | ( | BlastHSPList ** | hsp_list, |
BlastHSPList ** | combined_hsp_list_ptr, | ||
Int4 | hsp_num_max, | ||
Int4 * | split_points, | ||
Int4 | contexts_per_query, | ||
Int4 | chunk_overlap_size, | ||
Boolean | allow_gap, | ||
Boolean | short_reads | ||
) |
Merge an HSP list from a chunk of the subject sequence into a previously computed HSP list.
hsp_list | Contains HSPs from the new chunk [in] |
combined_hsp_list_ptr | Contains HSPs from previous chunks [in] [out] |
hsp_num_max | Maximal allowed number of HSPs to save (unlimited if INT4_MAX) [in] |
split_points | Offset The sequence offset (query or subject) that is the boundary between HSPs in combined_hsp_list and hsp_list. [in] |
contexts_per_query | If positive, the number of query contexts that hits can contain. If negative, the (one) split point occurs on the subject sequence [in] |
chunk_overlap_size | The length of the overlap region between the sequence region containing hsp_list and that containing combined_hsp_list [in] |
allow_gap | Allow merging HSPs at different diagonals [in] |
short_reads | Assume that queries are shorter than the database overlap region [in] |
Definition at line 2855 of file blast_hits.c.
References ABS, BlastHSPList::allocated, Blast_HSPFree(), Blast_HSPListFree(), Blast_HSPListPurgeNullHSPs(), BlastHSP::context, BlastHSPList::do_not_reallocate, BlastSeg::end, FALSE, BlastSeg::frame, BlastHSPList::hsp_array, BlastHSPList::hspcnt, MIN, NULL, BlastSeg::offset, OVERLAP_DIAG_CLOSE, BlastHSP::query, query, s_BlastHSPListsCombineByScore(), s_BlastMergeTwoHSPs(), s_HSPEndDiag(), s_HSPStartDiag(), BlastHSP::subject, and TRUE.
Referenced by Blast_HitListMerge(), BOOST_AUTO_TEST_CASE(), and s_BlastSearchEngineOneContext().
void Blast_HSPListSortByEvalue | ( | BlastHSPList * | hsp_list | ) |
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties.
Checks if the HSP array is already sorted before proceeding with quicksort.
hsp_list | Structure containing array of HSPs to be sorted. [in] [out] |
Definition at line 1437 of file blast_hits.c.
References BlastHSPList::hsp_array, BlastHSPList::hspcnt, and s_EvalueCompareHSPs().
Referenced by Blast_HitListUpdate(), BlastHitList2SeqAlign_OMF(), BOOST_AUTO_TEST_CASE(), s_BLAST_OneSubjectResults2CSeqAlign(), and s_BlastHSPCullingPipeRun().
void Blast_HSPListSortByScore | ( | BlastHSPList * | hsp_list | ) |
Sort the HSPs in an HSP list by score.
This type of sorting is done before the e-values are calcaulted, and also at the beginning of the traceback stage, where it is needed to eliminate the effects of wrong score order because of application of sum statistics. Checks if the HSP array is already sorted before proceeding with quicksort.
hsp_list | Structure containing array of HSPs to be sorted. [in] [out] |
Definition at line 1374 of file blast_hits.c.
References Blast_HSPListIsSortedByScore(), BlastHSPList::hsp_array, BlastHSPList::hspcnt, and ScoreCompareHSPs().
Referenced by BLAST_GetUngappedHSPList(), Blast_HSPListAdjustOddBlastnScores(), Blast_HSPListReevaluateUngapped(), Blast_HSPResultsApplyMasklevel(), BLAST_LinkHsps(), Blast_TracebackFromHSPList(), BlastHSPStreamMerge(), BOOST_AUTO_TEST_CASE(), PHIGetGappedScore(), CRedoAlignmentTestFixture::runRedoAlignmentCoreUnitTest(), s_BlastHSPBestHitFinal(), s_BlastHSPCullingFinal(), s_BlastHSPListRPSUpdate(), s_BlastHSPListsCombineByScore(), s_BlastSearchEngineOneContext(), s_HSPListFromDistinctAlignments(), s_HSPListRescaleScores(), s_PHITracebackFromHSPList(), CTracebackSearchTestFixture::x_GetSampleHspStream(), and CTracebackSearchTestFixture::x_GetSelfHitHspStream().
Int4 Blast_HSPListSubjectBestHit | ( | EBlastProgramType | program, |
const BlastHSPSubjectBestHitOptions * | subject_besthit_opts, | ||
const BlastQueryInfo * | query_info, | ||
BlastHSPList * | hsp_list | ||
) |
Definition at line 2536 of file blast_hits.c.
References Blast_HSPFree(), Blast_HSPListPurgeNullHSPs(), Blast_ProgramIsPhiBlast(), BlastHSP::context, context, BlastQueryInfo::contexts, eBlastTypeBlastn, BlastSeg::end, BlastSeg::frame, BlastHSPList::hsp_array, BlastHSPList::hspcnt, i, BlastHSPSubjectBestHitOptions::max_range_diff, NULL, BlastSeg::offset, BlastHSP::query, query, and BlastContextInfo::query_length.
Referenced by BOOST_AUTO_TEST_CASE(), s_BlastSearchEngineOneContext(), and s_FilterBlastResults().
void Blast_HSPListSwap | ( | BlastHSPList * | list1, |
BlastHSPList * | list2 | ||
) |
Swaps the two HSP lists via structure assignment.
Definition at line 1614 of file blast_hits.c.
References tmp.
Referenced by Blast_RedoAlignmentCore_MT(), and Blast_TracebackFromHSPList().
BlastHSP* Blast_HSPNew | ( | void | ) |
Allocate and zeros out memory for an HSP structure.
Definition at line 141 of file blast_hits.c.
References calloc().
Referenced by Blast_HSPClone(), Blast_HSPInit(), Blast_TracebackFromHSPList(), BOOST_AUTO_TEST_CASE(), s_AddNextHSP(), s_SetupHSPForGappedReevaluateTest(), s_SetupHSPForUngappedReevaluateNucl(), s_SetupHSPListBlastn(), s_SetupHSPListForUngappedReevaluateTransl(), s_SetupHSPListTransl(), setupHSPList(), and CPhiblastTestFixture::x_SetupHSPList().
Boolean Blast_HSPQueryCoverageTest | ( | BlastHSP * | hsp, |
double | min_query_coverage_pct, | ||
Int4 | query_length | ||
) |
Calculate query coverage percentage of an hsp.
hsp | An HSP structure [in] |
min_query_coverage_pct | Min query coverage pct for saving the hsp[in] |
query_length | Length of query [in] |
Definition at line 1045 of file blast_hits.c.
References Blast_HSPGetQueryCoverage().
Referenced by Blast_HSPListReapByQueryCoverage(), and BOOST_AUTO_TEST_CASE().
Boolean Blast_HSPReevaluateWithAmbiguitiesGapped | ( | BlastHSP * | hsp, |
const Uint1 * | query_start, | ||
const Int4 | query_length, | ||
const Uint1 * | subject_start, | ||
const Int4 | subject_length, | ||
const BlastHitSavingParameters * | hit_params, | ||
const BlastScoringParameters * | score_params, | ||
const BlastScoreBlk * | sbp | ||
) |
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information.
Used only for blastn after a greedy gapped extension with traceback. This function can remove part of the alignment at either end, if its score becomes negative after reevaluation. Traceback is also adjusted in that case.
hsp | The HSP structure [in] [out] |
query_start | Pointer to the start of the query sequence [in] |
query_length | Length of the query sequence [in] |
subject_start | Pointer to the start of the subject sequence [in] |
subject_length | Length of the subject sequence [in] |
hit_params | Hit saving parameters containing score cut-off [in] |
score_params | Scoring parameters [in] |
sbp | Score block with Karlin-Altschul parameters [in] |
Definition at line 479 of file blast_hits.c.
References ASSERT, BlastHSP::context, BlastGappedCutoffs::cutoff_score, BlastHitSavingParameters::cutoffs, SBlastScoreMatrix::data, eGapAlignDel, eGapAlignIns, eGapAlignSub, BlastScoringParameters::gap_extend, BlastHSP::gap_info, BlastScoringParameters::gap_open, BlastScoreBlk::matrix, GapEditScript::num, BlastSeg::offset, GapEditScript::op_type, BlastScoringParameters::penalty, BlastHSP::query, query, BlastScoringParameters::reward, s_UpdateReevaluatedHSP(), GapEditScript::size, ncbi::grid::netcache::search::fields::size, BlastHSP::subject, subject, and TRUE.
Referenced by Blast_TracebackFromHSPList(), and BOOST_AUTO_TEST_CASE().
Boolean Blast_HSPReevaluateWithAmbiguitiesUngapped | ( | BlastHSP * | hsp, |
const Uint1 * | query_start, | ||
const Uint1 * | subject_start, | ||
const BlastInitialWordParameters * | word_params, | ||
BlastScoreBlk * | sbp, | ||
Boolean | translated | ||
) |
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information.
Used for ungapped searches with nucleotide database (blastn, tblastn, tblastx).
hsp | The HSP structure [in] [out] |
query_start | Pointer to the start of the query sequence [in] |
subject_start | Pointer to the start of the subject sequence [in] |
word_params | Initial word parameters with ungapped cutoff score [in] |
sbp | Score block with Karlin-Altschul parameters [in] |
translated | Are sequences protein (with a translated subject)? [in] |
Definition at line 676 of file blast_hits.c.
References BlastHSP::context, BlastUngappedCutoffs::cutoff_score, BlastInitialWordParameters::cutoffs, SBlastScoreMatrix::data, BlastSeg::end, BlastScoreBlk::matrix, BlastSeg::offset, BlastHSP::query, query, s_UpdateReevaluatedHSPUngapped(), BlastHSP::subject, and subject.
Referenced by Blast_HSPListReevaluateUngapped(), and BOOST_AUTO_TEST_CASE().
Int2 Blast_HSPResultsApplyMasklevel | ( | BlastHSPResults * | results, |
const BlastQueryInfo * | query_info, | ||
Int4 | masklevel, | ||
Int4 | query_length | ||
) |
Apply Cross_match like masklevel to HSP list.
-RMH-
Definition at line 3465 of file blast_hits.c.
References BlastHSPList::best_evalue, Blast_HitListPurgeNullHSPLists(), Blast_HSPFree(), Blast_HSPListFree(), Blast_HSPListSaveHSP(), Blast_HSPListSortByScore(), Blast_IntervalTreeFree(), Blast_IntervalTreeInit(), Blast_IntervalTreeReset(), BlastIntervalTreeAddHSP(), BlastIntervalTreeMasksHSP(), eQueryOnlyStrandIndifferent, BlastHSP::evalue, SHspWrap::hsp, BlastHSPList::hsp_array, BlastHSPList::hspcnt, SHspWrap::hsplist, BlastHitList::hsplist_array, BlastHitList::hsplist_count, i, malloc(), NULL, results, s_SortHspWrapRawScore(), and sfree.
Referenced by BLAST_ComputeTraceback_MT(), and BOOST_AUTO_TEST_CASE().
BlastHSPResults* Blast_HSPResultsFree | ( | BlastHSPResults * | results | ) |
Deallocate memory for BLAST results.
Definition at line 3364 of file blast_hits.c.
References Blast_HitListFree(), NULL, results, and sfree.
Referenced by BLAST_ComputeTraceback_MT(), Blast_HSPResultsNew(), Blast_RedoAlignmentCore_MT(), BlastHSPStreamFree(), BOOST_AUTO_TEST_CASE(), CBlastTracebackSearch::Run(), CRedoAlignmentTestFixture::runRedoAlignmentCoreUnitTest(), SThreadLocalDataArrayConsolidateResults(), SThreadLocalDataFree(), CBlastPrelimSearch::x_BuildStdSegList(), and CPhiblastTestFixture::x_CheckSplitResults().
BlastHSPResults* Blast_HSPResultsFromHSPStream | ( | struct BlastHSPStream * | hsp_stream, |
size_t | num_queries, | ||
SBlastHitsParameters * | hit_param | ||
) |
Move all of the hits within an HSPStream into a BlastHSPResults structure.
hsp_stream | The HSPStream [in][out] |
num_queries | Number of queries in the search [in] |
hit_param | Hit parameters [in] |
Definition at line 3633 of file blast_hits.c.
References Blast_HSPResultsInsertHSPList(), Blast_HSPResultsNew(), BlastHSPStreamRead(), kBlastHSPStream_Eof, NULL, SBlastHitsParameters::prelim_hitlist_size, and SBlastHitsParametersFree().
Referenced by Blast_HSPResultsFromHSPStreamWithLimit(), and Blast_HSPResultsFromHSPStreamWithLimitEx().
BlastHSPResults* Blast_HSPResultsFromHSPStreamWithLimit | ( | struct BlastHSPStream * | hsp_stream, |
Uint4 | num_queries, | ||
SBlastHitsParameters * | hit_param, | ||
Uint4 | max_num_hsps, | ||
Boolean * | removed_hsps | ||
) |
As Blast_HSPResultsFromHSPStream, except the total number of HSPs kept for each query does not exceed an explicit limit.
The database sequences with the smallest number of hits are saved first, and hits are removed from query i if the average number of hits saved threatens to exceed (max_num_hsps / (number of DB sequences with hits to query i))
hsp_stream | The HSPStream [in][out] |
num_queries | Number of queries in the search [in] |
hit_param | Hit parameters [in] |
max_num_hsps | The limit on the number of HSPs to be kept for each query sequence [in] |
removed_hsps | Set to TRUE if any hits were removed [out] |
Definition at line 3855 of file blast_hits.c.
References Blast_HSPResultsFromHSPStream(), FALSE, and s_TrimResultsByTotalHSPLimit().
BlastHSPResults* Blast_HSPResultsFromHSPStreamWithLimitEx | ( | BlastHSPStream * | hsp_stream, |
Uint4 | num_queries, | ||
SBlastHitsParameters * | hit_param, | ||
Uint4 | max_num_hsps, | ||
Boolean * | removed_hsps | ||
) |
As Blast_HSPResultsFromHSPStreamWithLimit, except accept and return array of Boolen flags specifying which query exceeded HSP limits.
Definition at line 3873 of file blast_hits.c.
References Blast_HSPResultsFromHSPStream(), FALSE, and s_TrimResultsByTotalHSPLimitEx().
Referenced by CBlastPrelimSearch::ComputeBlastHSPResults().
Int2 Blast_HSPResultsInsertHSPList | ( | BlastHSPResults * | results, |
BlastHSPList * | hsp_list, | ||
Int4 | hitlist_size | ||
) |
Blast_HSPResultsInsertHSPList Insert an HSP list to the appropriate place in the results structure.
All HSPs in this list must be from the same query and same subject; the oid and query_index fields must be set in the BlastHSPList input structure.
results | The structure holding results for all queries [in] [out] |
hsp_list | The results for one query-subject sequence pair. [in] |
hitlist_size | Maximal allowed hit list size. [in] |
Definition at line 3552 of file blast_hits.c.
References ASSERT, Blast_HitListNew(), Blast_HitListUpdate(), BlastHSPList::hspcnt, BlastHSPList::query_index, and results.
Referenced by BLAST_ComputeTraceback_MT(), Blast_HSPResultsFromHSPStream(), BOOST_AUTO_TEST_CASE(), PHIBlast_HSPResultsSplit(), s_RPSComputeTraceback(), s_TrimResultsByTotalHSPLimitEx(), and CPhiblastTestFixture::x_SetupResults().
BlastHSPResults* Blast_HSPResultsNew | ( | Int4 | num_queries | ) |
Initialize the results structure.
num_queries | Number of query sequences to allocate results structure for [in] |
Definition at line 3344 of file blast_hits.c.
References Blast_HSPResultsFree(), calloc(), BlastHSPResults::hitlist_array, NULL, and BlastHSPResults::num_queries.
Referenced by BLAST_ComputeTraceback_MT(), Blast_HSPResultsFromHSPStream(), Blast_RedoAlignmentCore_MT(), BlastHSPStreamNew(), BOOST_AUTO_TEST_CASE(), PHIBlast_HSPResultsSplit(), CRedoAlignmentTestFixture::runRedoAlignmentCoreUnitTest(), SThreadLocalDataArrayConsolidateResults(), SThreadLocalDataArraySetup(), and CPhiblastTestFixture::x_SetupResults().
Int2 Blast_HSPResultsReverseOrder | ( | BlastHSPResults * | results | ) |
Reverse order of HSP lists in each hit list in the BLAST results.
This allows to return HSP lists from the end of the arrays when reading from a collector HSP stream.
Definition at line 3418 of file blast_hits.c.
References BlastHitList::hsplist_array, BlastHitList::hsplist_count, and results.
Referenced by BlastHSPStreamClose(), and s_FillResultsFromCompoHeaps().
Int2 Blast_HSPResultsReverseSort | ( | BlastHSPResults * | results | ) |
Sort each hit list in the BLAST results by best e-value, in reverse order.
Definition at line 3402 of file blast_hits.c.
References BlastHitList::hsplist_array, BlastHitList::hsplist_count, results, s_BlastHitListPurge(), and s_EvalueCompareHSPListsRev().
Referenced by BlastHSPStreamClose().
Int2 Blast_HSPResultsSortByEvalue | ( | BlastHSPResults * | results | ) |
Sort each hit list in the BLAST results by best e-value.
Definition at line 3381 of file blast_hits.c.
References BlastHitList::hsplist_array, BlastHitList::hsplist_count, NULL, results, s_BlastHitListPurge(), and s_EvalueCompareHSPLists().
Referenced by BLAST_ComputeTraceback_MT(), CBlastPrelimSearch::ComputeBlastHSPResults(), PHIBlast_HSPResultsSplit(), s_BlastHSPBestHitPipeRun(), and s_RPSComputeTraceback().
Boolean Blast_HSPTest | ( | BlastHSP * | hsp, |
const BlastHitSavingOptions * | hit_options, | ||
Int4 | align_length | ||
) |
Determines whether this HSP should be kept or deleted.
hsp | An HSP structure [in] [out] |
hit_options | Hit saving options containing percent identity and HSP length thresholds. |
align_length | alignment length including gaps |
Definition at line 1027 of file blast_hits.c.
References s_HSPTest().
Referenced by Blast_HSPListReevaluateUngapped(), and Blast_TracebackFromHSPList().
Boolean Blast_HSPTestIdentityAndLength | ( | EBlastProgramType | program_number, |
BlastHSP * | hsp, | ||
const Uint1 * | query, | ||
const Uint1 * | subject, | ||
const BlastScoringOptions * | score_options, | ||
const BlastHitSavingOptions * | hit_options | ||
) |
Calculates number of identities and alignment lengths of an HSP via Blast_HSPGetNumIdentities and determines whether this HSP should be kept or deleted.
program_number | Type of BLAST program [in] |
hsp | An HSP structure [in] [out] |
query | Query sequence [in] |
subject | Subject sequence [in] |
score_options | Scoring options, needed to distinguish the out-of-frame case. [in] |
hit_options | Hit saving options containing percent identity and HSP length thresholds. |
Definition at line 1004 of file blast_hits.c.
References ASSERT, Blast_HSPGetNumIdentities(), FALSE, query, s_HSPTest(), and subject.
Referenced by Blast_TracebackFromHSPList(), BOOST_AUTO_TEST_CASE(), and s_GetTraceback().
Int2 Blast_TrimHSPListByMaxHsps | ( | BlastHSPList * | hsp_list, |
const BlastHitSavingOptions * | hit_options | ||
) |
Definition at line 2049 of file blast_hits.c.
References Blast_HSPFree(), BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastHitSavingOptions::max_hsps_per_subject, and NULL.
Referenced by BOOST_AUTO_TEST_CASE(), and s_FilterBlastResults().
BlastHSPList* BlastHSPListDup | ( | const BlastHSPList * | hsp_list | ) |
Returns a duplicate (deep copy) of the given hsp list.
Definition at line 1583 of file blast_hits.c.
References BlastHSPList::hsp_array, BlastHSPList::hspcnt, and malloc().
Referenced by Blast_TracebackFromHSPList().
BlastHSPMappingInfo* BlastHSPMappingInfoFree | ( | BlastHSPMappingInfo * | info | ) |
Deallocate memory for an HSP's additional data structure.
Definition at line 192 of file blast_hits.c.
References info, JumperEditsBlockFree(), NULL, SequenceOverhangsFree(), and sfree.
Referenced by Blast_HSPFree().
BlastHSPMappingInfo* BlastHSPMappingInfoNew | ( | void | ) |
Allocate memory for an HSP's additional data structure.
Definition at line 207 of file blast_hits.c.
References calloc().
Referenced by Blast_HSPClone(), BlastNaExtendJumper(), s_CreateHSP(), s_CreateHSPForWordHit(), and ShortRead_IndexedWordFinder().
Int4 BlastHspNumMax | ( | Boolean | gapped_calculation, |
const BlastHitSavingOptions * | options | ||
) |
Calculated the number of HSPs that should be saved.
gapped_calculation | ungapped if false [in] |
options | HitSavingoptions object [in] |
Definition at line 213 of file blast_hits.c.
References BlastHitSavingOptions::hsp_num_max, and INT4_MAX.
Referenced by BLAST_GetGappedScore(), BLAST_GetUngappedHSPList(), BLAST_SmithWatermanGetGappedScore(), BlastHSPBestHitParamsNew(), BlastHSPCollectorParamsNew(), JumperNaWordFinder(), PHIGetGappedScore(), s_BlastSearchEngineCore(), s_BlastSearchEngineOneContext(), SBlastHitsParametersNew(), and ShortRead_IndexedWordFinder().
Int4 GetPrelimHitlistSize | ( | Int4 | hitlist_size, |
Int4 | compositionBasedStats, | ||
Boolean | gapped_calculation | ||
) |
Definition at line 44 of file blast_hits.c.
References MAX, MIN, and NULL.
Referenced by BlastHSPBestHitParamsNew(), BlastHSPCollectorParamsNew(), and SBlastHitsParametersNew().
BlastHSPResults** PHIBlast_HSPResultsSplit | ( | const BlastHSPResults * | results, |
const SPHIQueryInfo * | pattern_info | ||
) |
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structures, corresponding to different pattern occurrences in query.
All HSPs are copied, so it is safe to free the returned BlastHSPResults structures independently of the input results structure.
results | All results from a PHI BLAST search, with HSPs for different query pattern occurrences mixed together. [in] |
pattern_info | Information about pattern occurrences in query. [in] |
Definition at line 3570 of file blast_hits.c.
References Blast_HSPListNew(), Blast_HSPListSaveHSP(), Blast_HSPResultsInsertHSPList(), Blast_HSPResultsNew(), Blast_HSPResultsSortByEvalue(), calloc(), BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastHitList::hsplist_array, BlastHitList::hsplist_count, BlastHitList::hsplist_max, SPHIHspInfo::index, NULL, BlastHSPList::oid, BlastHSP::pat_info, pattern_info(), results, s_BlastHSPCopy(), and sfree.
Referenced by BOOST_AUTO_TEST_CASE(), and PhiBlastResults2SeqAlign_OMF().
Int4 PhiBlastGetEffectiveNumberOfPatterns | ( | const BlastQueryInfo * | query_info | ) |
Count the number of occurrences of pattern in sequence, which do not overlap by more than half the pattern match length.
query_info | Query information structure, containing pattern info. [in] |
Definition at line 360 of file blast_hits.c.
References ASSERT, BlastQueryInfo::contexts, count, BlastContextInfo::length_adjustment, SPHIQueryInfo::num_patterns, SPHIQueryInfo::occurrences, SPHIPatternInfo::offset, and BlastQueryInfo::pattern_info.
Referenced by s_HSPPHIGetEvalue(), and s_PhiBlastCutoffScore().
|
static |
Given a BlastHitList pointer this function makes the hsplist_array larger, up to a maximum size.
These incremental increases are mostly an issue for users who put in a very large number for number of hits to save, but only save a few.
hit_list | object containing the hsplist_array to grow [in] |
Definition at line 3217 of file blast_hits.c.
References ASSERT, BLASTERR_MEMORY, BlastHitList::hsplist_array, BlastHitList::hsplist_current, BlastHitList::hsplist_max, MIN, and NULL.
Referenced by Blast_HitListUpdate().
|
static |
Calculate number of identities in a regular HSP.
query | The query sequence [in] |
subject | The uncompressed subject sequence [in] |
hsp | All information about the HSP [in] |
num_ident_ptr | Number of identities [out] |
align_length_ptr | The alignment length, including gaps [out] |
sbp | Blast score blk [in] |
num_pos_ptr | Number of Positives [out] |
Definition at line 746 of file blast_hits.c.
References SBlastScoreMatrix::data, eGapAlignDel, eGapAlignIns, eGapAlignSub, BlastSeg::end, BlastHSP::gap_info, i, BlastScoreBlk::matrix, NULL, GapEditScript::num, BlastSeg::offset, GapEditScript::op_type, BlastScoreBlk::protein_alphabet, BlastHSP::query, query, GapEditScript::size, BlastHSP::subject, and subject.
Referenced by Blast_HSPGetNumIdentities(), and Blast_HSPGetNumIdentitiesAndPositives().
|
static |
Calculate number of identities in an HSP for an out-of-frame alignment.
query | The query sequence [in] |
subject | The uncompressed subject sequence [in] |
hsp | All information about the HSP [in] |
program | BLAST program (blastx or tblastn) [in] |
num_ident_ptr | Number of identities [out] |
align_length_ptr | The alignment length, including gaps [out] |
sbp | Blast score blk [in] |
num_pos_ptr | Number of Positives [out] |
Definition at line 850 of file blast_hits.c.
References CODON_LENGTH, SBlastScoreMatrix::data, eBlastTypeRpsTblastn, eBlastTypeTblastn, eGapAlignDel, eGapAlignDel1, eGapAlignDel2, eGapAlignIns, eGapAlignIns1, eGapAlignIns2, eGapAlignSub, BlastHSP::gap_info, i, BlastScoreBlk::matrix, NULL, GapEditScript::num, BlastSeg::offset, GapEditScript::op_type, BlastScoreBlk::protein_alphabet, BlastHSP::query, query, GapEditScript::size, BlastHSP::subject, and subject.
Referenced by Blast_HSPGetNumIdentities(), and Blast_HSPGetNumIdentitiesAndPositives().
|
static |
Verifies that the best_evalue field on the BlastHSPList is correct.
hsp_list | object to check [in] |
Definition at line 1714 of file blast_hits.c.
References ABS, BlastHSPList::best_evalue, BlastHSP::evalue, FALSE, BlastHSPList::hsp_array, BlastHSPList::hspcnt, INT4_MAX, MIN, and TRUE.
Referenced by Blast_HitListUpdate().
|
static |
Gets the best (lowest) evalue from the BlastHSPList.
hsp_list | object containing the evalues [in] |
Definition at line 1740 of file blast_hits.c.
References BlastHSP::evalue, BlastHSPList::hsp_array, BlastHSPList::hspcnt, INT4_MAX, and MIN.
Referenced by Blast_HitListUpdate(), Blast_HSPListGetEvalues(), Blast_HSPListPHIGetEvalues(), and Blast_HSPListReapByQueryCoverage().
|
static |
Given a BlastHitList* with a heapified HSP list array, remove the worst scoring HSP list and insert the new HSP list in the heap.
hit_list | Contains all HSP lists for a given query [in] [out] |
hsp_list | A new HSP list to be inserted into the hit list [in] |
Definition at line 3194 of file blast_hits.c.
References BlastHSPList::best_evalue, Blast_HSPListFree(), BlastHSPList::hsp_array, BlastHitList::hsplist_array, BlastHitList::hsplist_count, BlastHitList::low_score, s_EvalueCompareHSPLists(), s_Heapify(), BlastHSP::score, and BlastHitList::worst_evalue.
Referenced by Blast_HitListUpdate().
|
static |
Purge a BlastHitList of empty HSP lists.
hit_list | BLAST hit list structure. [in] [out] |
Definition at line 3170 of file blast_hits.c.
References Blast_HSPListFree(), BlastHSPList::hspcnt, BlastHitList::hsplist_array, and BlastHitList::hsplist_count.
Referenced by Blast_HitListSortByEvalue(), Blast_HSPResultsReverseSort(), and Blast_HSPResultsSortByEvalue().
Copies all contents of a BlastHSP structure.
Used in PHI BLAST for splitting results corresponding to different pattern occurrences in query.
hsp | Original HSP [in] |
Definition at line 236 of file blast_hits.c.
References BlastHSP::bit_score, Blast_HSPInit(), BlastMemDup(), BlastHSP::comp_adjustment_method, BlastHSP::context, BlastSeg::end, BlastHSP::evalue, BlastSeg::frame, BlastHSP::gap_info, GapEditScriptDup(), BlastSeg::gapped_start, NULL, BlastHSP::num, BlastHSP::num_ident, BlastSeg::offset, BlastHSP::pat_info, BlastHSP::query, BlastHSP::score, and BlastHSP::subject.
Referenced by PHIBlast_HSPResultsSplit().
|
static |
Given a BlastHSPList* with a heapified HSP array, check whether the new HSP is better than the worst scoring.
If it is, then remove the worst scoring and insert, otherwise free the new one. HSP and insert the new HSP in the heap.
hsp_list | Contains all HSPs for a given subject. [in] [out] |
hsp | A pointer to new HSP to be inserted into the HSP list [in] [out] |
Definition at line 1687 of file blast_hits.c.
References Blast_HSPFree(), BlastHSPList::hsp_array, BlastHSPList::hspcnt, s_Heapify(), and ScoreCompareHSPs().
Referenced by Blast_HSPListSaveHSP().
|
static |
Combine two HSP lists, without altering the individual HSPs, and without reallocating the HSP array.
hsp_list | New HSP list [in] |
combined_hsp_list | Old HSP list, to which new HSPs are added [in] [out] |
new_hspcnt | How many HSPs to save in the combined list? The extra ones are freed. The best scoring HSPs are saved. This argument cannot be greater than the allocated size of the combined list's HSP array. [in] |
Definition at line 2747 of file blast_hits.c.
References BlastHSPList::allocated, ASSERT, Blast_HSPFree(), Blast_HSPListSortByScore(), BlastHSPList::hsp_array, BlastHSPList::hspcnt, malloc(), NULL, ScoreCompareHSPs(), and sfree.
Referenced by Blast_HSPListAppend(), and Blast_HSPListsMerge().
Given two hits, check if the hits can be merged and do the merge if so.
Hits must not contain traceback
hsp1 | The first hit. If merging happens, this hit is overwritten with the merged version [in][out] |
hsp2 | The second hit [in] |
Definition at line 1488 of file blast_hits.c.
References ASSERT, CONTAINED_IN_HSP, BlastSeg::end, FALSE, BlastSeg::frame, BlastHSP::gap_info, BlastSeg::gapped_start, MAX, MIN, BlastSeg::offset, BlastHSP::query, BlastHSP::score, BlastHSP::subject, and TRUE.
Referenced by Blast_HSPListsMerge().
|
static |
Adjust start and end of an HSP in a translated sequence segment.
segment | BlastSeg structure (part of BlastHSP) [in] |
seq_length | Length of the full sequence [in] |
start | Start of the alignment in this segment in nucleotide coordinates, 1-offset [out] |
end | End of the alignment in this segment in nucleotide coordinates, 1-offset [out] |
Definition at line 1093 of file blast_hits.c.
References CODON_LENGTH, BlastSeg::end, BlastSeg::frame, and BlastSeg::offset.
Referenced by Blast_HSPGetAdjustedOffsets().
Comparison function for sorting HSP lists in increasing order of the number of HSPs in a hit.
Needed for s_TrimResultsByTotalHSPLimit below.
v1 | Pointer to the first HSP list [in] |
v2 | Pointer to the second HSP list [in] |
Definition at line 3656 of file blast_hits.c.
Referenced by s_TrimResultsByTotalHSPLimit().
Definition at line 3752 of file blast_hits.c.
Referenced by s_TrimResultsByTotalHSPLimitEx().
Definition at line 3744 of file blast_hits.c.
References r1, r2, s_EvalueCompareHSPs(), and v2.
Referenced by s_TrimResultsByTotalHSPLimitEx().
|
static |
Creates a heap of elements based on a comparison function.
b | An array [in] [out] |
nel | Number of elements in b [in] |
width | The size of each element [in] |
compar | Callback to compare two heap elements [in] |
Definition at line 1660 of file blast_hits.c.
References b, i, and s_Heapify().
Referenced by Blast_HitListUpdate(), and Blast_HSPListSaveHSP().
|
static |
Definition at line 2392 of file blast_hits.c.
References ASSERT, eGapAlignDel, eGapAlignIns, eGapAlignSub, BlastSeg::end, FALSE, BlastHSP::gap_info, GapEditScript::num, BlastSeg::offset, GapEditScript::op_type, BlastHSP::query, GapEditScript::size, BlastHSP::subject, and TRUE.
Referenced by Blast_HSPListPurgeHSPsWithCommonEndpoints().
Compares 2 evalues, consider them equal if both are close enough to zero.
evalue1 | First evalue [in] |
evalue2 | Second evalue [in] |
Definition at line 1390 of file blast_hits.c.
References epsilon.
Referenced by s_EvalueCompareHSPLists(), and s_EvalueCompareHSPs().
Callback for sorting hsp lists by their best evalue/score; Evalues are compared with the condition that if both are close enough to zero (currently < 1.0e-180), they are considered equal.
It is assumed that the HSP arrays in each hit list are already sorted by e-value/score.
Definition at line 3076 of file blast_hits.c.
References BlastHSPList::best_evalue, BLAST_CMP, BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastHSPList::oid, s_EvalueComp(), BlastHSP::score, and v2.
Referenced by Blast_HitListSortByEvalue(), Blast_HitListUpdate(), Blast_HSPResultsSortByEvalue(), s_BlastHitListInsertHSPListInHeap(), and s_EvalueCompareHSPListsRev().
Callback for sorting hsp lists by their best e-value/score, in reverse order - from higher e-value to lower (lower score to higher).
Definition at line 3111 of file blast_hits.c.
References s_EvalueCompareHSPLists(), and v2.
Referenced by Blast_HSPResultsReverseSort().
Comparison callback function for sorting HSPs by e-value and score, before saving BlastHSPList in a BlastHitList.
E-value has priority over score, because lower scoring HSPs might have lower e-values, if they are linked with sum statistics. E-values are compared only up to a certain precision.
v1 | Pointer to first HSP [in] |
v2 | Pointer to second HSP [in] |
Definition at line 1415 of file blast_hits.c.
References BlastHSP::evalue, s_EvalueComp(), ScoreCompareHSPs(), and v2.
Referenced by Blast_HSPListSortByEvalue(), and s_CompareScoreHSPwOid().
|
static |
This is a copy of a static function from ncbimisc.c.
Turns array into a heap with respect to a given comparison function.
Definition at line 1627 of file blast_hits.c.
Referenced by s_BlastHitListInsertHSPListInHeap(), s_BlastHSPListInsertHSPInHeap(), and s_CreateHeap().
Retrieve the ending diagonal of an HSP.
hsp | The target HSP |
Definition at line 1475 of file blast_hits.c.
References BlastSeg::end, BlastHSP::query, and BlastHSP::subject.
Referenced by Blast_HSPListsMerge().
|
static |
Calculate e-value for an HSP found by PHI BLAST.
hsp | An HSP found by PHI BLAST [in] |
sbp | Scoring block with statistical parameters [in] |
query_info | Structure containing information about pattern counts [in] |
pattern_blk | Structure containing counts of PHI pattern hits [in] |
Definition at line 399 of file blast_hits.c.
References ASSERT, BlastHSP::evalue, BlastScoreBlk::kbp, Blast_KarlinBlk::Lambda, SPHIPatternSearchBlk::num_patterns_db, Blast_KarlinBlk::paramC, PhiBlastGetEffectiveNumberOfPatterns(), and BlastHSP::score.
Referenced by Blast_HSPListPHIGetEvalues().
Retrieve the starting diagonal of an HSP.
hsp | The target HSP |
Definition at line 1465 of file blast_hits.c.
References BlastSeg::offset, BlastHSP::query, and BlastHSP::subject.
Referenced by Blast_HSPListsMerge().
|
static |
Definition at line 993 of file blast_hits.c.
References BlastHSP::num_ident, and BlastHitSavingOptions::percent_identity.
Referenced by Blast_HSPTest(), and Blast_HSPTestIdentityAndLength().
Callback for sorting HSPs by ending offset in query.
Sorting is by increasing context, then increasing query end offset, then increasing subject end offset, then decreasing score, then decreasing query start offset, then decreasing subject start offset. Null HSPs are moved to the end of the array.
v1 | pointer to first HSP [in] |
v2 | pointer to second HSP [in] |
Definition at line 2333 of file blast_hits.c.
References BlastHSP::context, BlastSeg::end, BlastSeg::offset, BlastHSP::query, BlastHSP::score, BlastHSP::subject, and v2.
Referenced by Blast_HSPListPurgeHSPsWithCommonEndpoints().
Callback for sorting HSPs by starting offset in query.
Sorting is by increasing context, then increasing query start offset, then increasing subject start offset, then decreasing score, then increasing query end offset, then increasing subject end offset. Null HSPs are moved to the end of the array.
v1 | pointer to first HSP [in] |
v2 | pointer to second HSP [in] |
Definition at line 2268 of file blast_hits.c.
References BlastHSP::context, BlastSeg::end, BlastSeg::offset, BlastHSP::query, BlastHSP::score, BlastHSP::subject, and v2.
Referenced by Blast_HSPListPurgeHSPsWithCommonEndpoints().
callback used to sort HSP lists in order of increasing OID
x | First HSP list [in] |
y | Second HSP list [in] |
Definition at line 2112 of file blast_hits.c.
References BlastHSPList::oid.
Referenced by Blast_HitListMerge().
callback used to sort a list of encapsulated HSP structures in order of decreasing raw score -RMH-
Definition at line 3452 of file blast_hits.c.
References SHspWrap::hsp, and BlastHSP::score.
Referenced by Blast_HSPResultsApplyMasklevel().
|
static |
Removes extra results if a limit is imposed on the total number of HSPs returned.
If the search involves multiple query sequences, the total HSP limit is applied separately to each query. The trimming algorithm makes sure that at least 1 HSP is returned for each database sequence hit. Suppose results for a given query consist of HSP lists for N database sequences, and the limit is T. HSP lists are sorted in order of increasing number of HSPs in each list. Then the algorithm proceeds by leaving at most i*T/N HSPs for the first i HSP lists, for every i = 1, 2, ..., N.
results | Results after preliminary stage of a BLAST search [in|out] |
total_hsp_limit | Limit on total number of HSPs [in] |
Definition at line 3683 of file blast_hits.c.
References Blast_HSPFree(), FALSE, BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastHitList::hsplist_array, BlastHitList::hsplist_count, malloc(), MAX, NULL, results, s_CompareHsplistHspcnt(), sfree, and TRUE.
Referenced by Blast_HSPResultsFromHSPStreamWithLimit().
|
static |
Definition at line 3765 of file blast_hits.c.
References Blast_HitListFree(), Blast_HSPFree(), Blast_HSPListNew(), Blast_HSPListSaveHSP(), Blast_HSPResultsInsertHSPList(), FALSE, free(), BlastHSPwOid::hsp, BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastHitList::hsplist_array, BlastHitList::hsplist_count, BlastHitList::hsplist_max, malloc(), NULL, BlastHSPList::oid, BlastHSPwOid::oid, BlastHSPList::query_index, results, s_CompareOidHSPwOid(), s_CompareScoreHSPwOid(), and TRUE.
Referenced by Blast_HSPResultsFromHSPStreamWithLimitEx().
|
static |
Update HSP data after reevaluation with ambiguities.
In particular this function calculates number of identities and checks if the percent identity criterion is satisfied.
hsp | HSP to update [in] [out] |
gapped | Is this a gapped search? [in] |
cutoff_score | Cutoff score for saving the HSP [in] |
score | New score [in] |
query_start | Start of query sequence [in] |
subject_start | Start of subject sequence [in] |
best_q_start | Pointer to start of the new alignment in query [in] |
best_q_end | Pointer to end of the new alignment in query [in] |
best_s_start | Pointer to start of the new alignment in subject [in] |
best_s_end | Pointer to end of the new alignment in subject [in] |
best_start_esp_index | index of the edit script array where the new alignment starts. [in] |
best_end_esp_index | index in the edit script array where the new alignment ends. [in] |
best_end_esp_num | Number of edit operations in the last edit script, that are included in the alignment. [in] |
Definition at line 440 of file blast_hits.c.
References ASSERT, BlastSeg::end, FALSE, BlastHSP::gap_info, GapEditScriptDelete(), GapEditScriptNew(), GapEditScriptPartialCopy(), GapEditScript::num, BlastSeg::offset, BlastHSP::query, BlastHSP::score, GapEditScript::size, BlastHSP::subject, and TRUE.
Referenced by Blast_HSPReevaluateWithAmbiguitiesGapped(), and s_UpdateReevaluatedHSPUngapped().
|
static |
Update HSP data after reevaluation with ambiguities for an ungapped search.
In particular this function calculates number of identities and checks if the percent identity criterion is satisfied.
hsp | HSP to update [in] [out] |
cutoff_score | Cutoff score for saving the HSP [in] |
score | New score [in] |
query_start | Start of query sequence [in] |
subject_start | Start of subject sequence [in] |
best_q_start | Pointer to start of the new alignment in query [in] |
best_q_end | Pointer to end of the new alignment in query [in] |
best_s_start | Pointer to start of the new alignment in subject [in] |
best_s_end | Pointer to end of the new alignment in subject [in] |
Definition at line 664 of file blast_hits.c.
References FALSE, and s_UpdateReevaluatedHSP().
Referenced by Blast_HSPReevaluateWithAmbiguitiesUngapped().
SBlastHitsParameters* SBlastHitsParametersDup | ( | const SBlastHitsParameters * | hit_params | ) |
Make a deep copy of the SBlastHitsParameters structure passed in.
hit_params | source hit parameters structure [in] |
Definition at line 101 of file blast_hits.c.
SBlastHitsParameters* SBlastHitsParametersFree | ( | SBlastHitsParameters * | param | ) |
Deallocated SBlastHitsParameters.
param | object to be freed. |
Definition at line 115 of file blast_hits.c.
Referenced by Blast_HSPResultsFromHSPStream(), BOOST_AUTO_TEST_CASE(), CBlastTracebackSearch::Run(), and CBlastTracebackSearch::RunSimple().
Int2 SBlastHitsParametersNew | ( | const BlastHitSavingOptions * | hit_options, |
const BlastExtensionOptions * | ext_options, | ||
const BlastScoringOptions * | scoring_options, | ||
SBlastHitsParameters ** | retval | ||
) |
Sets up small structures used by blast_hit.c for saving HSPs.
hit_options | field hitlist_size and hsp_num_max needed, a pointer to this structure will be stored on resulting structure.[in] |
ext_options | field compositionBasedStats needed here. [in] |
scoring_options | gapped_calculation needed here. [in] |
retval | the allocated SBlastHitsParameters* |
Definition at line 75 of file blast_hits.c.
References ASSERT, BlastHspNumMax(), BlastExtensionOptions::compositionBasedStats, BlastScoringOptions::gapped_calculation, GetPrelimHitlistSize(), BlastHitSavingOptions::hitlist_size, malloc(), and NULL.
Referenced by BOOST_AUTO_TEST_CASE(), CBlastPrelimSearch::ComputeBlastHSPResults(), CBlastTracebackSearch::Run(), and CBlastTracebackSearch::RunSimple().
Comparison callback function for sorting HSPs, first by score in descending order, then by location.
Among alignments with equal score, an HSP will precede any other HSPs that are completely contained within its endpoints.
H2 is contained in H1 if and only if H1.query.offset <= H2.query.offset <= H2.query.end <= H1.query.end H1.sbjct.offset <= H2.sbjct.offset <= H2.sbjct.end <= H1.sbjct.end
Definition at line 1330 of file blast_hits.c.
References BLAST_CMP, BlastSeg::end, BlastSeg::offset, BlastHSP::query, result, BlastHSP::score, and BlastHSP::subject.
Referenced by Blast_HSPListIsSortedByScore(), Blast_HSPListSaveHSP(), Blast_HSPListSortByScore(), s_BlastHSPListInsertHSPInHeap(), s_BlastHSPListsCombineByScore(), s_EvalueCompareHSPs(), s_ScoreCompareHSPWithContext(), and s_SumScoreCompareLinkedHSPSets().