93 m_HitSavingOpts =
NULL;
95 m_ipInitHitlist =
NULL;
97 m_ipScoreParams =
NULL;
108 sfree(m_ipScoreParams);
109 sfree(m_ipHitParams);
110 sfree(m_ipExtParams);
119 const int num_hsps = 8;
120 const int q_offsets[num_hsps] =
121 {8799, 1358, 14042, 27664, 5143, 27737, 5231, 3212 };
122 const int s_offsets[num_hsps] =
123 { 2728, 2736, 2784, 2784, 2792, 2856, 2888, 3640 };
124 const int q_starts[num_hsps] =
125 { 8794, 1355, 14015, 27637, 5131, 27732, 5226, 3201 };
126 const int s_starts[num_hsps] =
127 { 2723, 2733, 2757, 2757, 2780, 2851, 2883, 3629 };
128 const int lengths[num_hsps] = { 174, 18, 141, 92, 38, 37, 28, 20 };
129 const int scores[num_hsps] = { 146, 18, 93, 40, 34, 21, 24, 16 };
135 for (index = 0; index < num_hsps; ++index) {
138 ungapped_data->
q_start = q_starts[index];
139 ungapped_data->
s_start = s_starts[index];
140 ungapped_data->
length = lengths[index];
141 ungapped_data->
score = scores[index];
143 s_offsets[index], ungapped_data);
149 const int num_hsps = 14;
150 const int q_offsets[num_hsps] =
151 { 8799, 1358, 8831, 14042, 27664, 5143, 8863, 8903, 8927, 14114,
152 27737, 8943, 5231, 3212 };
153 const int s_offsets[num_hsps] =
154 { 2728, 2736, 2760, 2784, 2784, 2792, 2792, 2832, 2856, 2856,
155 2856, 2872, 2888, 3640 };
160 for (index = 0; index < num_hsps; ++index) {
162 s_offsets[index],
NULL);
174 db_num_seq, &eff_len_params);
176 m_ipScoreBlk, m_iclsQueryInfo,
NULL);
188 BOOST_REQUIRE(status == 0);
191 if (m_ipScoreBlk->
gbp) {
198 BOOST_REQUIRE(status == 0);
201 kCoreProgramType, m_ipScoreBlk,
202 m_iclsQueryBlk->
sequence, m_iclsQueryInfo,
205 BOOST_REQUIRE(message ==
NULL);
207 BOOST_REQUIRE(status == 0);
209 kCoreProgramType, m_iclsQueryInfo,
NULL);
211 BOOST_REQUIRE(status == 0);
216 fillEffectiveLengths(kCoreProgramType, m_ScoringOpts,
220 m_ipScoreBlk, &m_ipScoreParams);
226 BOOST_REQUIRE(status == 0);
229 m_ExtnOpts, m_ipScoreBlk,
230 m_iclsQueryInfo, &m_ipExtParams);
234 BOOST_REQUIRE(status == 0);
237 m_ipScoreBlk, m_iclsQueryInfo, 0, 0, &m_ipHitParams);
240 subject_length, m_ipScoreBlk, &m_ipGapAlign);
241 BOOST_REQUIRE(status == 0);
248 const int num_hsps = 7;
249 const int query_starts[num_hsps] =
250 { 8794, 13982, 12612, 5131, 5226, 1355, 3201 };
251 const int subject_starts[num_hsps] =
252 { 2723, 2723, 2733, 2780, 2883, 2733, 3629 };
253 const int query_lengths[num_hsps] = { 174, 174, 182, 38, 28, 18, 20 };
254 const int subject_lengths[num_hsps] = { 174, 175, 183, 38, 28, 18, 20 };
258 pair<TSeqPos, TSeqPos>
range(20000, 35000);
259 unique_ptr<SSeqLoc> qsl(
262 unique_ptr<SSeqLoc> ssl(
268 queries.push_back(*qsl);
269 subjects.push_back(*ssl);
278 prog, strand_opt, blast_msg);
280 BOOST_REQUIRE(m->empty());
283 Uint4 subject_length;
284 vector<BLAST_SequenceBlk*> subject_blk_v;
286 &subject_blk_v, &subject_length);
288 setupStructures(subject_length,
false);
299 m_iclsQueryBlk, m_iclsQueryInfo, subject_blk_v[0],
300 m_ipGapAlign, m_ipScoreParams, m_ipExtParams,
301 m_ipHitParams, m_ipInitHitlist, &hsp_list,
305 BOOST_REQUIRE_EQUAL(num_hsps, hsp_list->
hspcnt);
307 BOOST_REQUIRE_EQUAL(num_hsps, gapped_stats->
extensions);
314 for (index = 0; index < num_hsps; ++index) {
316 query_starts[index]);
318 subject_starts[index]);
321 query_lengths[index]);
324 subject_lengths[index]);
331 const int num_hsps = 7;
332 const int query_starts[num_hsps] =
333 { 8794, 13982, 12612, 5131, 5226, 1355, 3201 };
334 const int subject_starts[num_hsps] =
335 { 2723, 2723, 2733, 2780, 2883, 2733, 3629 };
336 const int query_lengths[num_hsps] =
337 { 174, 174, 182, 38, 28, 18, 20 };
338 const int subject_lengths[num_hsps] =
339 { 174, 175, 183, 38, 28, 18, 20 };
344 pair<TSeqPos, TSeqPos>
range(20000, 35000);
345 unique_ptr<SSeqLoc> qsl(
348 unique_ptr<SSeqLoc> ssl(
355 queries.push_back(*qsl);
356 subjects.push_back(*ssl);
365 prog, strand_opt, blast_msg);
367 BOOST_REQUIRE(m->empty());
370 Uint4 subject_length;
371 vector<BLAST_SequenceBlk*> subject_blk_v;
373 &subject_blk_v, &subject_length);
375 setupStructures(subject_length,
true);
377 setupGreedyHitList();
384 m_iclsQueryBlk, m_iclsQueryInfo, subject_blk_v[0],
385 m_ipGapAlign, m_ipScoreParams, m_ipExtParams,
386 m_ipHitParams, m_ipInitHitlist, &hsp_list,
389 BOOST_REQUIRE_EQUAL(num_hsps, hsp_list->
hspcnt);
395 BOOST_REQUIRE(hsp_list ==
NULL);
399 setupGreedyHitList();
402 m_ipHitParams->options->min_hit_length = 100;
403 m_ipHitParams->options->percent_identity = 99;
406 m_iclsQueryBlk, m_iclsQueryInfo, subject_blk_v[0],
407 m_ipGapAlign, m_ipScoreParams, m_ipExtParams,
408 m_ipHitParams, m_ipInitHitlist, &hsp_list,
411 BOOST_REQUIRE_EQUAL(num_hsps, hsp_list->
hspcnt);
417 BOOST_REQUIRE_EQUAL(2*num_hsps, gapped_stats->
extensions);
424 for (index = 0; index < num_hsps; ++index) {
426 query_starts[index]);
428 subject_starts[index]);
431 query_lengths[index]);
434 subject_lengths[index]);
442 const int query_start = 2612;
443 const int query_end = 2754;
444 const int subject_start = 291;
445 const int subject_end = 438;
446 const int q_offset = 2754;
447 const int s_offset = 438;
457 unique_ptr<SSeqLoc> qsl(
460 pair<TSeqPos, TSeqPos>
range(1896999, 1897550);
461 unique_ptr<SSeqLoc> ssl(
468 queries.push_back(*qsl);
469 subjects.push_back(*ssl);
478 prog, strand_opt, blast_msg);
480 BOOST_REQUIRE(m->empty());
483 Uint4 subject_length;
484 vector<BLAST_SequenceBlk*> subject_blk_v;
486 &subject_blk_v, &subject_length);
488 setupStructures(subject_length,
true);
491 m_ipScoreParams->reward = 1;
492 m_ipScoreParams->penalty = -2;
493 m_ipScoreParams->gap_open = 0;
494 m_ipScoreParams->gap_extend = 0;
496 m_ipExtParams->gap_x_dropoff = 16;
497 m_ipExtParams->gap_x_dropoff_final = 54;
502 subject_length, m_ipScoreBlk, &m_ipGapAlign);
513 m_iclsQueryBlk, m_iclsQueryInfo, subject_blk_v[0],
514 m_ipGapAlign, m_ipScoreParams, m_ipExtParams,
515 m_ipHitParams, m_ipInitHitlist, &hsp_list,
518 m_ipInitHitlist->init_hsp_array[0].ungapped_data =
NULL;
520 BOOST_REQUIRE_EQUAL(1, hsp_list->
hspcnt);
532 BOOST_REQUIRE(m_ipGapAlign->greedy_query_seed_start >= query_start);
533 BOOST_REQUIRE(m_ipGapAlign->greedy_query_seed_start <= query_end);
534 BOOST_REQUIRE(m_ipGapAlign->greedy_subject_seed_start >= subject_start);
535 BOOST_REQUIRE(m_ipGapAlign->greedy_subject_seed_start <= subject_end);
541 const int kSize = 100;
542 const int kDefaultSize = 1000000;
544 BOOST_REQUIRE(retval);
551 const int kDefaultSize = 1000000;
553 BOOST_REQUIRE(retval);
559 const int kSize = 5000000;
561 BOOST_REQUIRE(retval);
569 bool null_output =
false;
573 BOOST_REQUIRE_EQUAL(
true, null_output);
579 bool null_output =
false;
583 BOOST_REQUIRE_EQUAL(
true, null_output);
Declares the CBl2Seq (BLAST 2 Sequences) class.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
BlastInitHitList * BLAST_InitHitListNew(void)
Allocate memory for the BlastInitHitList structure.
Boolean BLAST_SaveInitialHit(BlastInitHitList *init_hitlist, Int4 q_off, Int4 s_off, BlastUngappedData *ungapped_data)
Save the initial hit data into the initial hit list structure.
void Blast_InitHitListSortByScore(BlastInitHitList *init_hitlist)
Sort array of initial HSPs by score.
Blast_ExtendWord * BlastExtendWordFree(Blast_ExtendWord *ewp)
Deallocate memory for the word extension structure.
BlastInitHitList * BLAST_InitHitListFree(BlastInitHitList *init_hitlist)
Free memory for the BlastInitList structure.
Structures and functions prototypes used for BLAST gapped extension.
Int2 BLAST_GetGappedScore(EBlastProgramType program_number, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, BLAST_SequenceBlk *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, const BlastHitSavingParameters *hit_params, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list_ptr, BlastGappedStats *gapped_stats, Boolean *fence_hit)
Performs gapped extension for all non-Mega BLAST programs, given that ungapped extension has been don...
Int2 BLAST_GapAlignStructNew(const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, Uint4 max_subject_length, BlastScoreBlk *sbp, BlastGapAlignStruct **gap_align_ptr)
Initializes the BlastGapAlignStruct structure.
BlastGapAlignStruct * BLAST_GapAlignStructFree(BlastGapAlignStruct *gap_align)
Deallocates memory in the BlastGapAlignStruct structure.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Blast_Message * Blast_MessageFree(Blast_Message *blast_msg)
Deallocates message memory.
Declares the CBlastNucleotideOptionsHandle class.
Definitions which are dependant on the NCBI C++ Object Manager.
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
@ eGreedyScoreOnly
Greedy extension (megaBlast)
Int2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)
Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
BlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)
Deallocate memory for BlastEffectiveLengthsOptions*.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
BlastEffectiveLengthsParameters * BlastEffectiveLengthsParametersFree(BlastEffectiveLengthsParameters *parameters)
Deallocate memory for BlastEffectiveLengthsParameters*.
Int2 BlastExtensionParametersNew(EBlastProgramType blast_program, const BlastExtensionOptions *options, BlastScoreBlk *sbp, BlastQueryInfo *query_info, BlastExtensionParameters **parameters)
Calculate the raw values for the X-dropoff parameters.
Int2 BlastScoringParametersNew(const BlastScoringOptions *options, BlastScoreBlk *sbp, BlastScoringParameters **parameters)
Calculate scaled cutoff scores and gap penalties.
Int2 BlastHitSavingParametersNew(EBlastProgramType program_number, const BlastHitSavingOptions *options, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subject_length, Int4 compositionBasedStats, BlastHitSavingParameters **parameters)
Allocate memory and initialize the BlastHitSavingParameters structure.
Int2 BlastEffectiveLengthsParametersNew(const BlastEffectiveLengthsOptions *options, Int8 db_length, Int4 num_seqs, BlastEffectiveLengthsParameters **parameters)
Allocate memory for BlastEffectiveLengthsParameters.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Utilities initialize/setup BLAST.
Int2 Blast_ScoreBlkKbpGappedCalc(BlastScoreBlk *sbp, const BlastScoringOptions *scoring_options, EBlastProgramType program, const BlastQueryInfo *query_info, Blast_Message **error_return)
Blast_ScoreBlkKbpGappedCalc, fills the ScoreBlkPtr for a gapped search.
Int2 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
Initializes the substitution matrix in the BlastScoreBlk according to the scoring options specified.
Int2 BLAST_CalcEffLengths(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, const BlastEffectiveLengthsParameters *eff_len_params, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Blast_Message **blast_message)
Function to calculate effective query length and db length as well as effective search space.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Int2 Blast_ScoreBlkKbpUngappedCalc(EBlastProgramType program, BlastScoreBlk *sbp, Uint1 *query, const BlastQueryInfo *query_info, Blast_Message **blast_message)
Calculate and fill the ungapped Karlin-Altschul parameters in the BlastScoreBlk structure (fields kbp...
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
BOOST_AUTO_TEST_CASE(testGapAlignment)
int x_score_compare_hsps(const void *v1, const void *v2)
Wrapper class for BLAST_SequenceBlk .
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Encapsulates ALL the BLAST algorithm's options.
Wrapper class for BlastQueryInfo .
static CTestObjMgr & Instance()
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
void MBSpaceFree(SMBSpace *sp)
Free the space structure.
SMBSpace * MBSpaceNew(int num_space_arrays)
Allocate a space structure for greedy alignment At least num_space_arrays will be allocated,...
void SetupSubjects(TSeqLocVector &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Sets up internal subject data structure for the BLAST search.
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
#define BLASTNA_SEQ_CODE
Identifies the blastna alphabet, for use in blast only.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
ENa_strand
strand of nucleic acid
@ eNa_strand_both
in forward orientation
range(_Ty, _Ty) -> range< _Ty >
Magic spell ;-) needed for some weird compilers... very empiric.
Defines: CTimeFormat - storage class for time format.
static SQLCHAR output[256]
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Uint1 * sequence
Sequence used for search (could be translation).
Options for setting up effective lengths and search spaces.
Parameters for setting up effective lengths and search spaces.
Options used for gapped extension These include: a.
EBlastPrelimGapExt ePrelimGapExt
type of preliminary gapped extension (normally) for calculating score.
Computed values used as parameters for gapped alignments.
Structure supporting the gapped alignment.
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Structure holding all information about an HSP.
BlastSeg query
Query sequence info.
BlastSeg subject
Subject sequence info.
Int4 score
This HSP's raw score.
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Structure to hold all initial HSPs for a given subject sequence.
Structure used for scoring calculations.
Blast_KarlinBlk ** kbp
Karlin-Altschul parameters.
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
Blast_KarlinBlk ** kbp_gap_std
K-A parameters for std (not position-based) alignments.
Blast_KarlinBlk ** kbp_std
K-A parameters for ungapped alignments.
Blast_GumbelBlk * gbp
Gumbel parameters for FSC.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Boolean gapped_calculation
gap-free search if FALSE
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Structure to hold ungapped alignment information.
Int4 score
Score of the ungapped alignment.
Int4 length
Length of the ungapped alignment.
Int4 q_start
Start of the ungapped alignment in query.
Int4 s_start
Start of the ungapped alignment in subject.
Structure for keeping initial word extension information.
Structure to hold the a message from the core of the BLAST engine.
BlastHitSavingParameters * m_ipHitParams
BlastHitSavingOptions * m_HitSavingOpts
CBLAST_SequenceBlk m_iclsQueryBlk
~CBlastExtendTestFixture()
CBlastExtendTestFixture()
BlastGapAlignStruct * m_ipGapAlign
void setupStructures(Uint4 subject_length, bool greedy)
BlastScoringOptions * m_ScoringOpts
BlastScoreBlk * m_ipScoreBlk
BlastExtensionParameters * m_ipExtParams
CBlastQueryInfo m_iclsQueryInfo
BlastInitHitList * m_ipInitHitlist
BlastScoringParameters * m_ipScoreParams
void fillEffectiveLengths(EBlastProgramType program_type, const BlastScoringOptions *score_options, Int8 db_length, Int4 db_num_seq)
BlastExtensionOptions * m_ExtnOpts
void setupGreedyHitList()
Space structure for greedy alignment algorithm.
Int4 space_allocated
number of structures allocated
Utility stuff for more convenient using of Boost.Test library.
voidp calloc(uInt items, uInt size)