108 if (p->sid == list->
oid) {
118 ASSERT(sid < tmp_hit_list->hsplist_current);
160 for (
id =0;
id < list->
hspcnt; ++id) {
169 r->end =
r->begin +
r->len;
170 for (q=
NULL, p=best_list; p && p->
begin <
r->begin; q=p, p=p->next);
204 for (qid=0; qid<
results->num_queries; ++qid) {
205 if (best_list[qid]) {
207 if (!
results->hitlist_array[qid]) {
240 Int4 i, qid, qlen, begin, end, lenA, lenB, scoreA, scoreB, overhang;
241 Int4 allowed_begin, allowed_end;
242 double denA, evalueA, evalueB, param_overhang, param_s;
252 if (!hsp_list)
return 0;
269 denA = 1.0 * scoreA / lenA / param_s;
273 for (p=best_list[qid]; p && p->
end < end; p=p->
next);
274 for ( ; p && p->
begin <= begin; p=p->
next) {
280 && evalueB <= evalueA
281 && 1.0 * scoreB / lenB > denA)
291 overhang = 2.0 * lenA * param_overhang / (1.0 - 2.0 * param_overhang);
292 allowed_begin = begin - overhang;
293 allowed_end = end + overhang;
294 overhang = lenA * param_overhang;
297 denA = 1.0 * scoreA / lenA * param_s;
299 for (q=
NULL, p=best_list[qid]; p && p->
begin < allowed_begin; q=p, p=p->
next);
300 for (; p && p->begin < allowed_end; ) {
303 scoreB = p->hsp->score;
304 overhang = (p->end - p->begin - lenB)/2;
305 evalueB= p->hsp->evalue;
306 if ( p->begin + overhang >= begin
307 && p->end - overhang <= end
308 && evalueB >= evalueA
309 && 1.0 * scoreB / lenB < denA)
313 else best_list[qid] = p->
next;
325 for (q=
NULL, p=best_list[qid]; p && p->
begin < begin; q=p, p=p->
next);
328 r->sid = hsp_list->
oid;
361 Int4 i, qid, begin, end, lenA, lenB, scoreA, scoreB, overhang;
362 Int4 allowed_begin, allowed_end;
363 double denA, evalueA, evalueB, param_overhang, param_s;
372 if (!hsp_list)
return 0;
385 denA = 1.0 * scoreA / lenA / param_s;
389 for (p=best_list[qid]; p && p->
end < end; p=p->
next);
390 for ( ; p && p->
begin < begin; p=p->
next) {
396 && evalueB <= evalueA
397 && 1.0 * scoreB / lenB > denA)
407 overhang = 2.0 * lenA * param_overhang / (1.0 - 2.0 * param_overhang);
408 allowed_begin = begin - overhang;
409 allowed_end = end + overhang;
410 overhang = lenA * param_overhang;
413 denA = 1.0 * scoreA / lenA * param_s;
415 for (q=
NULL, p=best_list[qid]; p && p->
begin < allowed_begin; q=p, p=p->
next);
416 for (; p && p->begin < allowed_end; ) {
419 scoreB = p->hsp->score;
420 overhang = (p->end - p->begin - lenB)/2;
421 evalueB= p->hsp->evalue;
422 if ( p->begin + overhang >= begin
423 && p->end - overhang <= end
424 && evalueB >= evalueA
425 && 1.0 * scoreB / lenB < denA)
429 else best_list[qid] = p->
next;
440 for (q=
NULL, p=best_list[qid]; p && p->
begin < begin; q=p, p=p->
next);
493 if (! query_info)
return NULL;
508 data.params = params;
509 data.query_info = query_info;
522 int qid, sid, num_list;
525 for (qid = 0; qid <
results->num_queries; ++qid) {
526 if (!(
results->hitlist_array[qid]))
continue;
527 num_list =
results->hitlist_array[qid]->hsplist_count;
528 for (sid = 0; sid < num_list; ++sid) {
530 results->hitlist_array[qid]->hsplist_array[sid]);
531 results->hitlist_array[qid]->hsplist_array[sid] =
NULL;
533 results->hitlist_array[qid]->hsplist_count = 0;
569 if (! query_info)
return NULL;
580 data.params = params;
581 data.query_info = query_info;
594 Int4 compositionBasedStats,
601 compositionBasedStats, gapped_calculation);
623 writer_info->
params = params;
632 pipe_info->
params = params;
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
BlastHitList * Blast_HitListFree(BlastHitList *hitlist)
Deallocate memory for the hit list.
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
Int2 Blast_HSPResultsSortByEvalue(BlastHSPResults *results)
Sort each hit list in the BLAST results by best e-value.
BlastHitList * Blast_HitListNew(Int4 hitlist_size)
Allocate memory for a hit list of a given size.
Int2 Blast_HitListSortByEvalue(BlastHitList *hit_list)
Sort BlastHitLIst bon evalue.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
Int4 GetPrelimHitlistSize(Int4 hitlist_size, Int4 compositionBasedStats, Boolean gapped_calculation)
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Int2 Blast_HitListUpdate(BlastHitList *hit_list, BlastHSPList *hsp_list)
Insert a new HSP list into the hit list.
Utilities for dealing with BLAST HSPs in the core of BLAST.
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Int4 Blast_GetQueryIndexFromContext(Int4 context, EBlastProgramType program)
Given a context from BLAST engine core, return the query index.
Int4 BlastQueryInfoGetQueryLength(const BlastQueryInfo *qinfo, EBlastProgramType program, Int4 query_index)
Obtains the sequence length for a given query in the query, without taking into consideration any app...
Various auxiliary BLAST utility functions.
int32_t Int4
4-byte (32-bit) signed integer
static int s_BlastHSPBestHitFinal(void *data, void *hsp_results)
Perform post-run clean-ups Blast_HSPListFree(hsp_list);.
static BlastHSPWriter * s_BlastHSPBestHitFree(BlastHSPWriter *writer)
Free the writer.
static BlastHSPWriter * s_BlastHSPBestHitNew(void *params, BlastQueryInfo *query_info, BLAST_SequenceBlk *sequence)
create the writer
static int s_BlastHSPBestHitRun(void *data, BlastHSPList *hsp_list)
Perform writing task, will save best hits to best_list.
BlastHSPPipeInfo * BlastHSPBestHitPipeInfoNew(BlastHSPBestHitParams *params)
static int s_ExportToHitlist(int qid, BlastHSPBestHitData *bh_data, BlastHitList *hit_list)
Export best_list to hitlist.
static BlastHSPPipe * s_BlastHSPBestHitPipeNew(void *params, BlastQueryInfo *query_info)
create the pipe
static int s_BlastHSPBestHitPipeRun(void *data, BlastHSPResults *results)
The pipe version of best-hit writer.
BlastHSPWriterInfo * BlastHSPBestHitInfoNew(BlastHSPBestHitParams *params)
WriterInfo and PipeInfo to create a best hit writer/pipe.
struct BlastHSPBestHitData BlastHSPBestHitData
static int s_BlastHSPBestHitInit(void *data, void *hsp_results)
The following are implementations for BlastHSPWriter ADT.
static BlastHSPPipe * s_BlastHSPBestHitPipeFree(BlastHSPPipe *pipe)
Free the pipe.
struct LinkedHSP_BH LinkedHSP_BH
linked list of HSPs used to keep best hits for each query.
static int s_BlastHSPBestHitRun_RPS(void *data, BlastHSPList *hsp_list)
Perform writing task for RPS blast, will save best hits to best_list.
BlastHSPBestHitParams * BlastHSPBestHitParamsFree(BlastHSPBestHitParams *opts)
Deallocates the BlastHSPBestHitParams structure passed in.
BlastHSPBestHitParams * BlastHSPBestHitParamsNew(const BlastHitSavingOptions *hit_options, const BlastHSPBestHitOptions *best_hit_opts, Int4 compositionBasedStats, Boolean gapped_calculation)
The following are exported functions to be used by APP.
static int s_ImportFromHitlist(int qid, BlastHSPBestHitData *bh_data, BlastHitList *hit_list)
Import hitlist to best_list (assuming all hsps are besthits)
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
for(len=0;yy_str[len];++len)
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ASSERT
macro for assert.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
Structure to hold a sequence.
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Int4 * max_hsps
max number of hsps to hold before pruning
BlastQueryInfo * query_info
query info
Int4 * num_hsps
field to record the number of hsps in each list
BlastHSPBestHitParams * params
parameters to control overhang
LinkedHSP_BH ** best_list
buffer to store best hits
Options for the Best Hit HSP collection algorithm.
Keeps parameters used in best hit algorithm.
double score_edge
fraction of score margin in condition 4
double overhang
overhang used in condition 1.
Int4 hsp_num_max
number of HSPs to save per db sequence.
Int4 prelim_hitlist_size
number of hits saved during preliminary part of search.
EBlastProgramType program
program type.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Int4 query_index
Index of the query which this HSPList corresponds to.
A wrap of data structure used to create a pipe.
BlastHSPPipeNewFn NewFnPtr
struct BlastHSPPipeInfo * next
the next pipe inf in chain
ADT definition of BlastHSPPipe.
void * data
data structure
BlastHSPPipe * next
the next pipe in chain
BlastHSPPipeRunFn RunFnPtr
BlastHSPPipeFreeFn FreeFnPtr
The structure to contain all BLAST results, for multiple queries.
A wrap of data structure used to create a writer.
BlastHSPWriterNewFn NewFnPtr
ADT definition of BlastHSPWriter.
void * data
data structure
BlastHSPWriterFinalFn FinalFnPtr
BlastHSPWriterFreeFn FreeFnPtr
BlastHSPWriterRunFn RunFnPtr
BlastHSPWriterInitFn InitFnPtr
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
BlastSeg query
Query sequence info.
Int4 context
Context number of query.
Int4 score
This HSP's raw score.
The structure to contain all BLAST results for one query sequence.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Int4 hsplist_count
Filled size of the HSP lists array.
Int4 hsplist_current
Number of allocated HSP list arrays.
Options used when evaluating and saving hits These include: a.
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 hitlist_size
Maximal number of database sequences to return results for.
The query related information.
BlastContextInfo * contexts
Information per context.
linked list of HSPs used to keep best hits for each query.
struct LinkedHSP_BH * next
voidp calloc(uInt items, uInt size)