34 #ifndef ALGO_BLAST_CORE__BLAST_HITS__H
35 #define ALGO_BLAST_CORE__BLAST_HITS__H
192 #define DBSEQ_CHUNK_OVERLAP 100
247 Int4 subject_start,
Int4 subject_end,
248 Int4 query_gapped_start,
Int4 subject_gapped_start,
249 Int4 query_context,
Int2 query_frame,
Int2 subject_frame,
285 const Uint1* query_start,
const Int4 query_length,
286 const Uint1* subject_start,
const Int4 subject_length,
304 const Uint1* query_start,
const Uint1* subject_start,
324 Int4* align_length_ptr);
343 Int4* align_length_ptr,
397 double min_query_coverage_pct,
440 Int4 query_length,
Int4 subject_length,
461 Uint1** translation_buffer_ptr,
Uint1** subject_ptr,
462 Int4* subject_length_ptr,
Int4* start_shift_ptr);
550 double scaling_factor);
705 Int4 hsp_num_max,
Int4* split_points,
706 Int4 contexts_per_query,
707 Int4 chunk_overlap_size,
810 Int4 contexts_per_query,
Int4 *split_offsets,
948 Int4 compositionBasedStats,
Defines to provide correct exporting from BLAST DLL in Windows.
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
struct BlastHSPMappingInfo BlastHSPMappingInfo
Mapping information for an HSP.
Int2 Blast_HSPListReapByRawScore(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the raw threshold from the HSP list.
Int2 Blast_HSPResultsReverseSort(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value, in reverse order.
Int2 Blast_HSPListAppend(BlastHSPList **old_hsp_list_ptr, BlastHSPList **combined_hsp_list_ptr, Int4 hsp_num_max)
Append one HSP list to the other.
BlastHSPResults * Blast_HSPResultsFromHSPStreamWithLimitEx(struct BlastHSPStream *hsp_stream, Uint4 num_queries, SBlastHitsParameters *hit_param, Uint4 max_num_hsps, Boolean *removed_hsps)
As Blast_HSPResultsFromHSPStreamWithLimit, except accept and return array of Boolen flags specifying ...
void Blast_HSPListSortByEvalue(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties.
Int2 Blast_HitListHSPListsFree(BlastHitList *hitlist)
Deallocate memory for every HSP list on BlastHitList, as well as all their components.
BlastHSP * Blast_HSPNew(void)
Allocate and zeros out memory for an HSP structure.
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
struct SPHIHspInfo SPHIHspInfo
In PHI BLAST: information about pattern match in a given HSP.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
void Blast_HSPListPHIGetEvalues(BlastHSPList *hsp_list, BlastScoreBlk *sbp, const BlastQueryInfo *query_info, const SPHIPatternSearchBlk *pattern_blk)
Calculate e-values for a PHI BLAST HSP list.
Int2 Blast_HSPGetNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr, const BlastScoreBlk *sbp)
Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::...
BlastHitList * Blast_HitListFree(BlastHitList *hitlist)
Deallocate memory for the hit list.
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
Int2 Blast_HitListMerge(BlastHitList **old_hit_list_ptr, BlastHitList **combined_hit_list_ptr, Int4 contexts_per_query, Int4 *split_offsets, Int4 chunk_overlap_size, Boolean allow_gap)
Combine two hitlists; both HitLists must contain HSPs that represent alignments to the same query seq...
void Blast_HSPCalcLengthAndGaps(const BlastHSP *hsp, Int4 *length, Int4 *gaps, Int4 *gap_opens)
Calculate length of an HSP as length in query plus length of gaps in query.
Int2 Blast_HSPResultsSortByEvalue(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value.
Boolean Blast_HSPTestIdentityAndLength(EBlastProgramType program_number, BlastHSP *hsp, const Uint1 *query, const Uint1 *subject, const BlastScoringOptions *score_options, const BlastHitSavingOptions *hit_options)
Calculates number of identities and alignment lengths of an HSP via Blast_HSPGetNumIdentities and det...
Int4 Blast_HSPListSubjectBestHit(EBlastProgramType program, const BlastHSPSubjectBestHitOptions *subject_besthit_opts, const BlastQueryInfo *query_info, BlastHSPList *hsp_list)
Int2 Blast_HSPResultsReverseOrder(BlastHSPResults *results)
Reverse order of HSP lists in each hit list in the BLAST results.
Boolean Blast_HSPList_IsEmpty(const BlastHSPList *hsp_list)
Returns true if the BlastHSPList contains no HSPs.
struct BlastHitList BlastHitList
The structure to contain all BLAST results for one query sequence.
BlastHitList * Blast_HitListNew(Int4 hitlist_size)
Allocate memory for a hit list of a given size.
Int2 Blast_HSPGetPartialSubjectTranslation(BLAST_SequenceBlk *subject_blk, BlastHSP *hsp, Boolean is_ooframe, const Uint1 *gen_code_string, Uint1 **translation_buffer_ptr, Uint1 **subject_ptr, Int4 *subject_length_ptr, Int4 *start_shift_ptr)
Performs the translation and coordinates adjustment, if only part of the subject sequence is translat...
Int2 Blast_HitListSortByEvalue(BlastHitList *hit_list)
Sort BlastHitLIst bon evalue.
Int4 Blast_HSPListPurgeHSPsWithCommonEndpoints(EBlastProgramType program, BlastHSPList *hsp_list, Boolean purge)
Check for an overlap of two different alignments and remove redundant HSPs.
Int2 Blast_HSPResultsInsertHSPList(BlastHSPResults *results, BlastHSPList *hsp_list, Int4 hitlist_size)
Blast_HSPResultsInsertHSPList Insert an HSP list to the appropriate place in the results structure.
void Blast_HSPGetAdjustedOffsets(EBlastProgramType program, BlastHSP *hsp, Int4 query_length, Int4 subject_length, Int4 *q_start, Int4 *q_end, Int4 *s_start, Int4 *s_end)
Adjust HSP endpoint offsets according to strand/frame; return values in 1-offset coordinates instead ...
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Int2 Blast_HSPGetNumIdentities(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr)
Calculate number of identities in an HSP and set the BlastHSP::num_ident field (unconditionally)
Int2 Blast_HSPListPurgeNullHSPs(BlastHSPList *hsp_list)
Cleans out the NULLed out HSP's from the HSP array that is part of the BlastHSPList.
BlastHSPMappingInfo * BlastHSPMappingInfoNew(void)
Allocate memory for an HSP's additional data structure.
BlastHSPList * BlastHSPListDup(const BlastHSPList *hsp_list)
Returns a duplicate (deep copy) of the given hsp list.
Boolean Blast_HSPTest(BlastHSP *hsp, const BlastHitSavingOptions *hit_options, Int4 align_length)
Determines whether this HSP should be kept or deleted.
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
struct BlastHSPResults BlastHSPResults
The structure to contain all BLAST results, for multiple queries.
BlastHSPResults * Blast_HSPResultsFromHSPStreamWithLimit(struct BlastHSPStream *hsp_stream, Uint4 num_queries, SBlastHitsParameters *hit_param, Uint4 max_num_hsps, Boolean *removed_hsps)
As Blast_HSPResultsFromHSPStream, except the total number of HSPs kept for each query does not exceed...
Int2 Blast_HSPListsMerge(BlastHSPList **hsp_list, BlastHSPList **combined_hsp_list_ptr, Int4 hsp_num_max, Int4 *split_points, Int4 contexts_per_query, Int4 chunk_overlap_size, Boolean allow_gap, Boolean short_reads)
Merge an HSP list from a chunk of the subject sequence into a previously computed HSP list.
BlastHSPMappingInfo * BlastHSPMappingInfoFree(BlastHSPMappingInfo *info)
Deallocate memory for an HSP's additional data structure.
struct BlastHSPList BlastHSPList
The structure to hold all HSPs for a given sequence after the gapped alignment.
BlastHSP * Blast_HSPClone(const BlastHSP *hsp)
Make a deep copy of an HSP.
Int2 Blast_HitListPurgeNullHSPLists(BlastHitList *hit_list)
Purges a BlastHitList of NULL HSP lists.
Int2 Blast_HSPListGetEvalues(EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor)
Calculate the expected values for all HSPs in a hit list, without using the sum statistics.
Int2 Blast_HSPListReevaluateUngapped(EBlastProgramType program, BlastHSPList *hsp_list, BLAST_SequenceBlk *query_blk, BLAST_SequenceBlk *subject_blk, const BlastInitialWordParameters *word_params, const BlastHitSavingParameters *hit_params, const BlastQueryInfo *query_info, BlastScoreBlk *sbp, const BlastScoringParameters *score_params, const BlastSeqSrc *seq_src, const Uint1 *gen_code_string)
Reevaluate all ungapped HSPs in an HSP list.
Boolean Blast_HSPQueryCoverageTest(BlastHSP *hsp, double min_query_coverage_pct, Int4 query_length)
Calculate query coverage percentage of an hsp.
Int2 Blast_TrimHSPListByMaxHsps(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
SBlastHitsParameters * SBlastHitsParametersFree(SBlastHitsParameters *param)
Deallocated SBlastHitsParameters.
void Blast_HSPListAdjustOddBlastnScores(BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp)
For nucleotide BLAST, if the match reward score is equal to 2, random alignments are dominated by run...
struct SBlastHitsParameters SBlastHitsParameters
Keeps prelim_hitlist_size and HitSavingOptions together, mostly for use by hspstream.
Int2 Blast_HSPListReapByQueryCoverage(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options, const BlastQueryInfo *query_info, EBlastProgramType program_number)
Discard the HSPs below the min query coverage pct from the HSP list.
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Int2 Blast_HSPResultsApplyMasklevel(BlastHSPResults *results, const BlastQueryInfo *query_info, Int4 masklevel, Int4 query_length)
Apply Cross_match like masklevel to HSP list.
Boolean Blast_HSPListIsSortedByScore(const BlastHSPList *hsp_list)
Check if HSP list is sorted by score.
const Uint1 * Blast_HSPGetTargetTranslation(SBlastTargetTranslation *target_t, const BlastHSP *hsp, Int4 *translated_length)
Returns a buffer with a protein translated from nucleotide.
struct BlastSeg BlastSeg
One sequence segment within an HSP.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
Int4 GetPrelimHitlistSize(Int4 hitlist_size, Int4 compositionBasedStats, Boolean gapped_calculation)
BlastHSPResults ** PHIBlast_HSPResultsSplit(const BlastHSPResults *results, const SPHIQueryInfo *pattern_info)
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structur...
void Blast_HSPAdjustSubjectOffset(BlastHSP *hsp, Int4 start_shift)
Adjusts offsets if partial sequence was used for extension.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
double Blast_HSPGetQueryCoverage(const BlastHSP *hsp, Int4 query_length)
Calculate query coverage percentage of an hsp.
Boolean Blast_HSPReevaluateWithAmbiguitiesUngapped(BlastHSP *hsp, const Uint1 *query_start, const Uint1 *subject_start, const BlastInitialWordParameters *word_params, BlastScoreBlk *sbp, Boolean translated)
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information.
Int2 SBlastHitsParametersNew(const BlastHitSavingOptions *hit_options, const BlastExtensionOptions *ext_options, const BlastScoringOptions *scoring_options, SBlastHitsParameters **retval)
Sets up small structures used by blast_hit.c for saving HSPs.
Int4 PhiBlastGetEffectiveNumberOfPatterns(const BlastQueryInfo *query_info)
Count the number of occurrences of pattern in sequence, which do not overlap by more than half the pa...
void Blast_HSPListPHIGetBitScores(BlastHSPList *hsp_list, BlastScoreBlk *sbp)
Calculate bit scores from raw scores in an HSP list for a PHI BLAST search.
void Blast_HSPListSwap(BlastHSPList *list1, BlastHSPList *list2)
Swaps the two HSP lists via structure assignment.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Int2 Blast_HSPListReapByEvalue(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the e-value threshold from the HSP list.
SBlastHitsParameters * SBlastHitsParametersDup(const SBlastHitsParameters *hit_params)
Make a deep copy of the SBlastHitsParameters structure passed in.
Int2 Blast_HSPListGetBitScores(BlastHSPList *hsp_list, Boolean gapped_calculation, const BlastScoreBlk *sbp)
Calculate bit scores from raw scores in an HSP list.
struct BlastHSP BlastHSP
Structure holding all information about an HSP.
Boolean Blast_HSPReevaluateWithAmbiguitiesGapped(BlastHSP *hsp, const Uint1 *query_start, const Int4 query_length, const Uint1 *subject_start, const Int4 subject_length, const BlastHitSavingParameters *hit_params, const BlastScoringParameters *score_params, const BlastScoreBlk *sbp)
Reevaluate the HSP's score and percent identity after taking into account the ambiguity information.
BlastHSPResults * Blast_HSPResultsFromHSPStream(struct BlastHSPStream *hsp_stream, size_t num_queries, SBlastHitsParameters *hit_param)
Move all of the hits within an HSPStream into a BlastHSPResults structure.
Int2 Blast_HitListUpdate(BlastHitList *hit_list, BlastHSPList *hsp_list)
Insert a new HSP list into the hit list.
void Blast_HSPListAdjustOffsets(BlastHSPList *hsp_list, Int4 offset)
Adjust subject offsets in an HSP list if only part of the subject sequence was searched.
The structures and functions in blast_options.
Structure and function definitions for BLAST parameter structures, which are internal to the CORE of ...
Definitions for various programs supported by core BLAST.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definitions and functions associated with the BlastQueryInfo structure.
Declaration of ADT to retrieve sequences for the BLAST engine.
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
Definitions of structures used for saving traceback information.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
Uint1 Boolean
bool replacment for C
Functions for finding pattern matches in sequence (PHI-BLAST).
static int pattern_info(int what, void *where, BOOL unsetok)
Structure to hold a sequence.
Options used for gapped extension These include: a.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Boolean do_not_reallocate
Is reallocation of the hsp_array allowed?
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Int4 hsp_max
The maximal number of HSPs allowed to be saved.
double best_evalue
Smallest e-value for HSPs in this list.
Int4 allocated
The allocated size of the hsp_array.
Int4 query_index
Index of the query which this HSPList corresponds to.
Mapping information for an HSP.
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
Int4 flags
< Same as above for subject bases after the alignment (for RNA-seq mapping)
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
SequenceOverhangs * subject_overhangs
Unaligned subject subsequence.
The structure to contain all BLAST results, for multiple queries.
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Int4 num_queries
Number of query sequences.
Default implementation of BlastHSPStream.
Structure holding all information about an HSP.
SPHIHspInfo * pat_info
In PHI BLAST, information about this pattern match.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
Int4 context
Context number of query.
double bit_score
Bit score, calculated from score.
Int4 num
How many HSP's are linked together for sum statistics evaluation? If unset (0), this HSP is not part ...
BlastSeg subject
Subject sequence info.
GapEditScript * gap_info
ALL gapped alignment is here.
Int2 comp_adjustment_method
which mode of composition adjustment was used; relevant only for blastp and tblastn
Int4 score
This HSP's raw score.
BlastHSPMappingInfo * map_info
The structure to contain all BLAST results for one query sequence.
double worst_evalue
Highest of the best e-values among the HSP lists.
Int4 hsplist_max
Maximal allowed size of the HSP lists array.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Int4 hsplist_count
Filled size of the HSP lists array.
Int4 low_score
The lowest of the best scores among the HSP lists.
Int4 hsplist_current
Number of allocated HSP list arrays.
Boolean heapified
Is this hit list already heapified?
Int4 num_hits
Number of similar hits for the query (for mapping)
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
The query related information.
Structure used for scoring calculations.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
One sequence segment within an HSP.
Int4 gapped_start
Where the gapped extension started.
Int2 frame
Translation frame.
Complete type definition of Blast Sequence Source ADT.
Edit script: linked list of correspondencies between two sequences.
Alignment edit script for gapped alignment.
Keeps prelim_hitlist_size and HitSavingOptions together, mostly for use by hspstream.
Int4 prelim_hitlist_size
number of hits saved during preliminary part of search.
Int4 hsp_num_max
number of HSPs to save per db sequence.
Information about target translations.
In PHI BLAST: information about pattern match in a given HSP.
Int4 index
Index of query pattern occurrence for this HSP.
Int4 length
Length of this pattern occurrence in subject.
Structure containing all auxiliary information needed in a pattern search.
In PHI BLAST, structure containing information about all pattern occurrences in query.
Structure to save short unaligned subsequences outside an HSP.