102 m_LookupSegments=
NULL;
104 m_ScoreParams =
NULL;
129 BOOST_REQUIRE(writer_info ==
NULL);
140 query_blk, query_info, 1.0, &m_LookupSegments,
NULL,
152 query_info, m_ScoreBlk, &m_ScoreParams,
153 &m_ExtParams, &m_HitParams, &m_EffLenParams,
164 hsp_stream, query_blk, query_info, seq_src,
165 m_GapAlign, m_ScoreParams, m_ExtParams, m_HitParams, m_EffLenParams,
177 const int k_num_hsps_start = 7;
178 const int k_num_hsps_end = 6;
184 unique_ptr<SSeqLoc> qsl(
187 unique_ptr<SSeqLoc> ssl(
191 opts->SetTraditionalMegablastDefaults();
192 opts->SetGappedMode(
true);
193 opts->SetMatchReward(0);
194 opts->SetMismatchPenalty(0);
200 opts->SetGapOpeningCost(12);
201 opts->SetGapExtensionCost(2);
202 opts->SetWordSize(11);
203 opts->SetCutoffScore(500);
205 opts->SetMatrixName(
"20p43g.matrix");
206 opts->SetComplexityAdjMode(
true );
208 CBl2Seq blaster(*qsl, *ssl, *opts);
219 prog, strand_opt, &query_info);
221 query_info, &query_blk,
prog, strand_opt, blast_msg);
223 BOOST_REQUIRE(m->empty());
236 hsp_list->
hspcnt = k_num_hsps_start;
238 hsp_list->
hsp_max = k_num_hsps_start;
249 const int query_offset[k_num_hsps_start] = { 6378, 5950, 5295, 7191, 7351, 5199, 3818 };
250 const int query_end[k_num_hsps_start] = { 6792, 6161, 5400, 7239, 7431, 5276, 3830 };
251 const int subject_offset[k_num_hsps_start] = { 4, 39, 16, 378, 1, 0, 71};
252 const int subject_end[k_num_hsps_start] = { 419, 241, 123, 428, 86, 66, 83};
253 const int score[k_num_hsps_start] = { 1582, 1391, 902, 341, 332, 286, 118 };
254 const int context[k_num_hsps_start] = { 0, 0, 0, 0, 1, 0, 1 };
255 const int subject_frame[k_num_hsps_start] = { 1, 1, 1, 1, 1, 1, 1 };
256 const int query_gapped_start[k_num_hsps_start] = { 6625, 6035, 5298, 7194, 7411, 5202, 3821 };
257 const int subject_gapped_start[k_num_hsps_start] = { 244, 116, 19, 381, 66, 3, 74 };
260 for (
int index=0; index<k_num_hsps_start; index++)
292 hsp_stream, query_blk, query_info, seq_src, &
results);
296 const int query_offset_final[k_num_hsps_end] = { 6374, 5916, 5263, 6778, 5199, 3740 };
297 const int query_end_final[k_num_hsps_end] = { 6785, 6345, 5756, 7241, 5522, 4113 };
298 const int subject_offset_final[k_num_hsps_end] = { 1, 0, 1, 0, 0, 0 };
299 const int subject_end_final[k_num_hsps_end] = { 426, 419, 426, 430, 426, 425 };
300 const int score_final[k_num_hsps_end] = { 1560, 1532, 1183, 815, 637, 590 };
301 const int context_final[k_num_hsps_end] = { 0, 0, 0, 0, 0, 1 };
302 const int subject_frame_final[k_num_hsps_end] = { 1, 1, 1, 1, 1, 1};
303 const int query_gapped_start_final[k_num_hsps_end] = { 6625, 6035, 5298, 7194, 5202, 3821 };
304 const int subject_gapped_start_final[k_num_hsps_end] = { 244, 116, 19, 381, 3, 74 };
305 const int num_ident_final[k_num_hsps_end] = { 314, 304, 300, 273, 238, 247 };
311 BOOST_REQUIRE(hsp_list !=
NULL);
312 BOOST_REQUIRE_EQUAL(k_num_hsps_end, hsp_list->
hspcnt);
313 for (
int index=0; index<k_num_hsps_end; index++)
316 BOOST_REQUIRE_EQUAL(query_offset_final[index], tmp_hsp->
query.
offset);
317 BOOST_REQUIRE_EQUAL(query_end_final[index], tmp_hsp->
query.
end);
318 BOOST_REQUIRE_EQUAL(subject_offset_final[index], tmp_hsp->
subject.
offset);
319 BOOST_REQUIRE_EQUAL(subject_end_final[index], tmp_hsp->
subject.
end);
320 BOOST_REQUIRE_EQUAL(score_final[index], tmp_hsp->
score);
321 BOOST_REQUIRE_EQUAL(context_final[index], (
int) tmp_hsp->
context);
322 BOOST_REQUIRE_EQUAL(subject_frame_final[index], (
int) tmp_hsp->
subject.
frame);
323 BOOST_REQUIRE_EQUAL(query_gapped_start_final[index], tmp_hsp->
query.
gapped_start);
325 BOOST_REQUIRE_EQUAL(num_ident_final[index], tmp_hsp->
num_ident);
331 BOOST_REQUIRE(seq_src ==
NULL);
Declares the CBl2Seq (BLAST 2 Sequences) class.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Structures and functions prototypes used for BLAST gapped extension.
BlastGapAlignStruct * BLAST_GapAlignStructFree(BlastGapAlignStruct *gap_align)
Deallocates memory in the BlastGapAlignStruct structure.
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
BlastHSPWriter * BlastHSPWriterNew(BlastHSPWriterInfo **writer_info, BlastQueryInfo *query_info, BLAST_SequenceBlk *query)
A generic function to create writer.
Declaration of ADT to save and retrieve lists of HSPs in the BLAST engine.
int BlastHSPStreamWrite(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Invokes the user-specified write function for this BlastHSPStream implementation.
BlastHSPStream * BlastHSPStreamFree(BlastHSPStream *hsp_stream)
Frees the BlastHSPStream structure by invoking the destructor function set by the user-defined constr...
BlastHSPStream * BlastHSPStreamNew(EBlastProgramType program, const BlastExtensionOptions *extn_opts, Boolean sort_on_read, Int4 num_queries, BlastHSPWriter *writer)
Initialize the HSP stream.
Common definitions for protein and nucleotide lookup tables.
Blast_Message * Blast_MessageFree(Blast_Message *blast_msg)
Deallocates message memory.
Declares the CBlastNucleotideOptionsHandle class.
Definitions which are dependant on the NCBI C++ Object Manager.
@ eDynProgTbck
standard affine gapping
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
BlastEffectiveLengthsParameters * BlastEffectiveLengthsParametersFree(BlastEffectiveLengthsParameters *parameters)
Deallocate memory for BlastEffectiveLengthsParameters*.
BlastExtensionParameters * BlastExtensionParametersFree(BlastExtensionParameters *parameters)
Deallocate memory for BlastExtensionParameters.
BlastScoringParameters * BlastScoringParametersFree(BlastScoringParameters *parameters)
Deallocate memory for BlastScoringParameters.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Declares the CBlastProteinOptionsHandle class.
Defines interface for retrieving sequence identifiers.
Int4 BlastSeqSrcGetSeqLen(const BlastSeqSrc *seq_src, void *oid)
Retrieve sequence length (number of residues/bases)
BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)
Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...
Utilities initialize/setup BLAST.
Int2 BLAST_MainSetUp(EBlastProgramType program_number, const QuerySetUpOptions *qsup_options, const BlastScoringOptions *scoring_options, BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, double scale_factor, BlastSeqLoc **lookup_segments, BlastMaskLoc **mask, BlastScoreBlk **sbpp, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
"Main" setup routine for BLAST.
Int2 BLAST_GapAlignSetUp(EBlastProgramType program_number, const BlastSeqSrc *seq_src, const BlastScoringOptions *scoring_options, const BlastEffectiveLengthsOptions *eff_len_options, const BlastExtensionOptions *ext_options, const BlastHitSavingOptions *hit_options, BlastQueryInfo *query_info, BlastScoreBlk *sbp, BlastScoringParameters **score_params, BlastExtensionParameters **ext_params, BlastHitSavingParameters **hit_params, BlastEffectiveLengthsParameters **eff_len_params, BlastGapAlignStruct **gap_align)
Set up the auxiliary structures for gapped alignment / traceback only.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Functions to do gapped alignment with traceback.
Int2 BLAST_ComputeTraceback(EBlastProgramType program_number, BlastHSPStream *hsp_stream, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, const BlastSeqSrc *seq_src, BlastGapAlignStruct *gap_align, BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, BlastHitSavingParameters *hit_params, BlastEffectiveLengthsParameters *eff_len_params, const BlastDatabaseOptions *db_options, const PSIBlastOptions *psi_options, const BlastRPSInfo *rps_info, SPHIPatternSearchBlk *pattern_blk, BlastHSPResults **results, TInterruptFnPtr interrupt_search, SBlastProgress *progress_info)
Given the preliminary alignment results from a database search, redo the gapped alignment with traceb...
Definitions of special type used in BLAST.
Declares the CBlastxOptionsHandle class.
CAutoEnvironmentVariable –.
Wrapper class for BLAST_SequenceBlk .
Runs the BLAST algorithm between 2 sequences.
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Encapsulates ALL the BLAST algorithm's options.
Wrapper class for BlastQueryInfo .
static CTestObjMgr & Instance()
BlastHitSavingParameters * m_HitParams
static BlastHSPStream * x_MakeStream(const CBlastOptions &opt)
Blast_Message * m_BlastMessage
BlastExtensionParameters * m_ExtParams
BlastSeqLoc * m_LookupSegments
BlastScoringParameters * m_ScoreParams
BlastGapAlignStruct * m_GapAlign
void x_SetupMain(const CBlastOptions &opt, const CBLAST_SequenceBlk &query_blk, const CBlastQueryInfo &query_info)
void x_SetupGapAlign(const CBlastOptions &opt, const BlastSeqSrc *seq_src, const CBlastQueryInfo &query_info)
BlastScoreBlk * m_ScoreBlk
void x_ComputeTracebak(const CBlastOptions &opt, BlastHSPStream *hsp_stream, const CBLAST_SequenceBlk &query_blk, const CBlastQueryInfo &query_info, const BlastSeqSrc *seq_src, BlastHSPResults **results)
BlastEffectiveLengthsParameters * m_EffLenParams
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
Declares the CDiscNucleotideOptionsHandle class.
BlastHitSavingOptions * GetHitSaveOpts() const
Returns BlastHitSavingOptions for eLocal objects, NULL for eRemote.
const CBlastOptionsHandle & GetOptionsHandle() const
Retrieve the options handle.
void SetDbLength(Int8 len)
Sets DbLength.
BlastExtensionOptions * GetExtnOpts() const
Returns BlastExtensionOptions for eLocal objects, NULL for eRemote.
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
BlastSeqSrc * MultiSeqBlastSeqSrcInit(TSeqLocVector &seq_vector, EBlastProgramType program, bool dbscan_mode=false)
Initialize the sequence source structure.
BlastEffectiveLengthsOptions * GetEffLenOpts() const
Returns BlastEffectiveLengthsOptions for eLocal objects, NULL for eRemote.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
BlastScoringOptions * GetScoringOpts() const
Returns BlastScoringOptions for eLocal objects, NULL for eRemote.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
QuerySetUpOptions * GetQueryOpts() const
Returns QuerySetUpOptions for eLocal objects, NULL for eRemote.
const TSeqLocVector & GetQueries() const
Retrieve a vector of query sequences.
CBlastOptionsHandle & SetOptionsHandle()
Set the options handle.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
BlastDatabaseOptions * GetDbOpts() const
Returns BlastDatabaseOptions for eLocal objects, NULL for eRemote.
const TSeqLocVector & GetSubjects() const
Retrieve a vector of subject sequences.
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
ENa_strand
strand of nucleic acid
@ eNa_strand_both
in forward orientation
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
BlastHSPCollectorParams * BlastHSPCollectorParamsNew(const BlastHitSavingOptions *hit_options, Int4 compositionBasedStats, Boolean gapped_calculation)
Sets up parameter set for use by collector.
BlastHSPWriterInfo * BlastHSPCollectorInfoNew(BlastHSPCollectorParams *params)
WriterInfo to create a default writer: the collecter.
Utility functions for lookup table generation.
void CheckForBlastSeqSrcErrors(const BlastSeqSrc *seqsrc)
Magic spell ;-) needed for some weird compilers... very empiric.
#define FALSE
bool replacment for C indicating false.
Defines: CTimeFormat - storage class for time format.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
BOOST_AUTO_TEST_CASE(testRMBlastTraceBack)
Defines a concrete strategy for the IBlastSeqInfoSrc interface for sequence identifiers retrieval fro...
Implementation of the BlastSeqSrc interface for a vector of sequence locations.
Implementation of the BlastSeqSrc interface using the C++ BLAST databases API.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Parameters for setting up effective lengths and search spaces.
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Computed values used as parameters for gapped alignments.
Structure supporting the gapped alignment.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Boolean do_not_reallocate
Is reallocation of the hsp_array allowed?
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Int4 hsp_max
The maximal number of HSPs allowed to be saved.
Int4 allocated
The allocated size of the hsp_array.
The structure to contain all BLAST results, for multiple queries.
Default implementation of BlastHSPStream.
A wrap of data structure used to create a writer.
ADT definition of BlastHSPWriter.
Structure holding all information about an HSP.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
Int4 context
Context number of query.
BlastSeg subject
Subject sequence info.
Int4 score
This HSP's raw score.
The structure to contain all BLAST results for one query sequence.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Structure used for scoring calculations.
Boolean gapped_calculation
gap-free search if FALSE
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Int4 gapped_start
Where the gapped extension started.
Int2 frame
Translation frame.
Used to hold a set of positions, mostly used for filtering.
Complete type definition of Blast Sequence Source ADT.
Structure to hold the a message from the core of the BLAST engine.
Declares the CTBlastnOptionsHandle class.
Utility stuff for more convenient using of Boost.Test library.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
static CS_CONTEXT * context
voidp calloc(uInt items, uInt size)