95 query_loc->
SetWhole().SetGi(query_gi);
98 m_vQuery.push_back(
SSeqLoc(query_loc, query_scope));
101 subject_loc->
SetWhole().SetGi(subject_gi);
104 m_vSubject.push_back(
SSeqLoc(subject_loc, subject_scope));
113 diagnostics->ungapped_stat;
115 diagnostics->gapped_stat;
117 BOOST_REQUIRE_EQUAL(22670293, (
int)ungapped_stats->
lookup_hits);
118 BOOST_REQUIRE_EQUAL(296326, ungapped_stats->
init_extends);
120 BOOST_REQUIRE_EQUAL(1254, gapped_stats->
extensions);
131 BOOST_REQUIRE_EQUAL(1152, (
int)ungapped_stats->
lookup_hits);
134 BOOST_REQUIRE_EQUAL(8, gapped_stats->
extensions);
149 const Int4 kNumHspsEnd=23;
159 setupQueryAndSubject(kQueryGi, kSubjectGi);
172 const int kQueryOffsetFinal[kNumHspsEnd] =
173 { 407, 486, 421, 569, 265, 320, 266, 321, 727, 659,
174 92, 1, 1, 727, 422, 216, 167, 825, 167, 831, 216, 369, 49 };
175 const int kQueryLengthFinal[kNumHspsEnd] =
176 { 164, 85, 62, 67, 58, 74, 56, 69, 56, 66, 147, 69,
177 73, 61, 40, 26, 35, 54, 35, 48, 21, 69, 22 };
178 const int kScoreFinal[kNumHspsEnd] =
179 { 368, 199, 160, 104, 99, 95, 94, 92, 94, 89, 108,
180 101, 97, 95, 89, 86, 84, 84, 83, 79, 75, 74, 74};
181 const double kEvalueFinal[kNumHspsEnd] =
182 {1.84467e-35, 4.47098e-34, 4.47098e-34, 4.47098e-34,
183 4.23245e-08, 4.23245e-08, 3.29958e-07, 3.29958e-07,
184 7.11395e-07, 7.11395e-07, 8.64076e-05, 0.000570668,
185 0.001678, 0.00287725, 0.0145032, 0.0325588,
186 0.0558201, 0.0558201, 0.0730883, 0.214807, 0.631249,
187 0.826482, 0.826482 };
193 BOOST_REQUIRE_EQUAL(1,
results->num_queries);
194 BOOST_REQUIRE_EQUAL(1,
results->hitlist_array[0]->hsplist_count);
197 BOOST_REQUIRE_EQUAL(kNumHspsEnd, hsplist->
hspcnt);
204 for (
int index=0; index<kNumHspsEnd; index++) {
206 BOOST_REQUIRE_EQUAL(kQueryOffsetFinal[index], tmp_hsp->
query.
offset);
207 BOOST_REQUIRE_EQUAL(kQueryLengthFinal[index],
209 BOOST_REQUIRE_EQUAL(kScoreFinal[index], tmp_hsp->
score);
210 BOOST_REQUIRE(
fabs((kEvalueFinal[index]-tmp_hsp->
evalue) /
211 kEvalueFinal[index]) < 0.001);
221 const Int4 kNumHspsEnd=8;
228 setupQueryAndSubject(kQueryGi, kSubjectGi);
245 const int kQueryOffsetFinal[kNumHspsEnd] = { 98, 425, 320, 340, 823, 675, 247, 103};
246 const int kQueryLengthFinal[kNumHspsEnd] = { 223,211, 35, 13, 46, 19, 25, 24};
247 const int kScoreFinal[kNumHspsEnd] = {1138, 173, 72, 46, 40, 36, 32, 30};
248 const double kEvalueFinal[kNumHspsEnd] =
249 {2.52769e-153, 4.98722e-18, 2.4525e-05, 0.0352845, 0.187202, 0.568557, 1.72476, 3.0028};
255 BOOST_REQUIRE_EQUAL(1,
results->num_queries);
256 BOOST_REQUIRE_EQUAL(1,
results->hitlist_array[0]->hsplist_count);
259 BOOST_REQUIRE_EQUAL(kNumHspsEnd, hsplist->
hspcnt);
266 for (
int index=0; index<kNumHspsEnd; index++)
269 BOOST_REQUIRE_EQUAL(kQueryOffsetFinal[index],
271 BOOST_REQUIRE_EQUAL(kQueryLengthFinal[index],
273 BOOST_REQUIRE_EQUAL(kScoreFinal[index], tmp_hsp->
score);
274 BOOST_REQUIRE(
fabs(kEvalueFinal[index]-tmp_hsp->
evalue) < 1.0e-10 ||
276 kEvalueFinal[index]) < 0.01);
287 const int kNumHsps = 330;
290 setupQueryAndSubject(kQueryGi, kSubjectGi);
307 BOOST_REQUIRE_EQUAL(1,
results->num_queries);
308 BOOST_REQUIRE_EQUAL(1,
results->hitlist_array[0]->hsplist_count);
311 BOOST_REQUIRE_EQUAL(kNumHsps, hsplist->
hspcnt);
321 const int kNumHsps = 3;
322 const int kMaskedLength = 389;
325 setupQueryAndSubject(kQueryGi, kSubjectGi);
335 Uint4 masked_length = m_vQuery[0].mask->GetPacked_int().GetLength();
336 BOOST_REQUIRE_EQUAL(kMaskedLength, (
int) masked_length);
348 BOOST_REQUIRE_EQUAL(1,
results->num_queries);
349 BOOST_REQUIRE_EQUAL(1,
results->hitlist_array[0]->hsplist_count);
352 BOOST_REQUIRE_EQUAL(kNumHsps, hsplist->
hspcnt);
359 const string kDbName(
"data/seqn");
360 const size_t kNumHits = 2;
362 const int kScores[kNumHits] = { 1024, 944 };
363 const int kNumIdent[kNumHits] = { 458, 423 };
366 query_loc->
SetWhole().SetGi(kQueryGi);
369 m_vQuery.push_back(
SSeqLoc(query_loc, query_scope));
384 CLocalBlast blaster(query_factory, options, dbinfo);
388 BOOST_REQUIRE_EQUAL((
int)1, (
int)
results.GetNumResults());
390 BOOST_REQUIRE_EQUAL(kNumHits, alignment->
Get().size());
397 BOOST_REQUIRE_EQUAL(kScores[0], score);
399 BOOST_REQUIRE_EQUAL(kScores[1], score);
402 BOOST_REQUIRE(first_hit->
GetNamedScore(
"num_ident", num_ident));
403 BOOST_REQUIRE_EQUAL(kNumIdent[0], num_ident);
404 BOOST_REQUIRE(second_hit->
GetNamedScore(
"num_ident", num_ident));
405 BOOST_REQUIRE_EQUAL(kNumIdent[1], num_ident);
425 const string kDbName(
"data/seqp");
428 const int kNumHits = 31;
429 const int kNumHitsToCheck = 3;
430 const int kIndices[kNumHitsToCheck] = { 1, 4, 8 };
431 const int kScores[kNumHitsToCheck] = { 519, 56, 54 };
432 const int kOids[kNumHitsToCheck] = { 74, 971, 45 };
433 const int kQueryLengths[kNumHitsToCheck] = { 297, 46, 63 };
434 const int kSubjectLengths[kNumHitsToCheck] = { 298, 48, 55 };
438 query_loc1->
SetWhole().SetGi(kQueryGi1);
441 m_vQuery.push_back(
SSeqLoc(query_loc1, query_scope1));
443 query_loc2->
SetWhole().SetGi(kQueryGi2);
446 m_vQuery.push_back(
SSeqLoc(query_loc2, query_scope2));
463 BOOST_REQUIRE_EQUAL(2,
results->num_queries);
464 BOOST_REQUIRE_EQUAL(kNumHits,
results->hitlist_array[0]->hsplist_count);
466 BOOST_REQUIRE_CLOSE(
results->hitlist_array[0]->hsplist_array[0]->best_evalue,
467 results->hitlist_array[0]->hsplist_array[0]->hsp_array[0]->evalue,
468 results->hitlist_array[0]->hsplist_array[0]->hsp_array[0]->evalue/2);
470 for (
int index = 0; index < kNumHitsToCheck;
472 const int kHitIndex = kIndices[index];
474 results->hitlist_array[0]->hsplist_array[kHitIndex];
475 BOOST_REQUIRE_EQUAL(kOids[index], hsp_list->
oid);
478 BOOST_REQUIRE_EQUAL(kScores[index], hsp->
score);
480 BOOST_REQUIRE_EQUAL(kQueryLengths[index],
482 BOOST_REQUIRE_EQUAL(kSubjectLengths[index],
489 const unsigned char query[] = {
'\016',
'\007',
'\014',
'\024',
'\004',
'\015',
'\011',
490 '\022',
'\012',
'\016',
'\001',
'\010',
'\007',
'\005',
'\014',
'\023',
491 '\021',
'\003',
'\016',
'\005',
'\013',
'\006',
'\020',
'\011',
'\006',
492 '\015',
'\016',
'\004',
'\017'};
494 const unsigned char subject[] = {
'\000',
'\000',
'\000',
495 '\004',
'\015',
'\011',
'\022',
'\012',
'\016',
'\001',
'\010',
'\007',
496 '\005',
'\014',
'\023',
'\021',
'\003',
'\016',
'\005',
'\013',
'\006',
497 '\020',
'\011',
'\006',
'\015',
'\016',
'\004',
'\017'};
515 Int4 q_offset, s_offset;
527 BOOST_REQUIRE_EQUAL(q_offset, 22);
528 BOOST_REQUIRE_EQUAL(s_offset, 21);
529 BOOST_REQUIRE_EQUAL(retval,
true);
540 BOOST_REQUIRE_EQUAL(q_offset, 18);
541 BOOST_REQUIRE_EQUAL(s_offset, 17);
542 BOOST_REQUIRE_EQUAL(retval,
true);
552 BOOST_REQUIRE_EQUAL(retval,
false);
User-defined methods of the data storage class.
static const Int8 kEffectiveSearchSpace
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
Function calls to actually perform a BLAST search (high level).
Structures and functions prototypes used for BLAST gapped extension.
Boolean BlastGetOffsetsForGappedAlignment(const Uint1 *query, const Uint1 *subject, const BlastScoreBlk *sbp, BlastHSP *hsp, Int4 *q_retval, Int4 *s_retval)
Function to look for the highest scoring window (of size HSP_MAX_WINDOW) in an HSP and return the mid...
void Blast_HSPListSortByEvalue(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by e-value, with scores and other criteria used to resolve ties.
BlastHSP * Blast_HSPNew(void)
Allocate and zeros out memory for an HSP structure.
Boolean Blast_HSPListIsSortedByScore(const BlastHSPList *hsp_list)
Check if HSP list is sorted by score.
Declaration of ADT to save and retrieve lists of HSPs in the BLAST engine.
Declares the CBlastNucleotideOptionsHandle class.
Definitions which are dependant on the NCBI C++ Object Manager.
The structures and functions in blast_options.
#define BLAST_GAP_OPEN_PROT
Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
Int2 BLAST_FillScoringOptions(BlastScoringOptions *options, EBlastProgramType program, Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix, Int4 gap_open, Int4 gap_extend)
Fill non-default values in the BlastScoringOptions structure.
#define BLAST_GAP_EXTN_PROT
cost to extend a gap.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)
Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...
Utilities initialize/setup BLAST.
Int2 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
Initializes the substitution matrix in the BlastScoreBlk according to the scoring options specified.
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Functions to do gapped alignment with traceback.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eBlastp
Protein-Protein.
@ eTblastn
Protein-Translated nucl.
void testLongMatchDiagnostics(BlastDiagnostics *diagnostics)
void testShortMatchDiagnostics(BlastDiagnostics *diagnostics)
BOOST_AUTO_TEST_CASE(testTBLASTNLongMatchBlastEngine)
Wrapper class for BlastHSPResults .
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Search class to perform the preliminary stage of the BLAST search.
Handle to the nucleotide-nucleotide options to the discontiguous BLAST algorithm.
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Search Results for All Queries.
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
bool GetNamedScore(const string &id, int &score) const
Get score.
static CTestObjMgr & Instance()
Declares the CDiscNucleotideOptionsHandle class.
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
void SetMatchReward(int r)
Sets MatchReward.
CRef< SInternalData > Run()
Borrow the internal data and results results.
BlastSeqSrc * SeqDbBlastSeqSrcInit(const string &dbname, bool is_prot, Uint4 first_seq=0, Uint4 last_seq=0, Int4 mask_algo_id=-1, ESubjectMaskingType mask_type=eNoSubjMasking)
Initialize the sequence source structure.
void SetEffectiveSearchSpace(Int8 eff)
Sets EffectiveSearchSpace.
CRef< CSearchResultSet > Run()
Executes the search.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
void SetTraditionalBlastnDefaults()
Sets TraditionalBlastnDefaults.
void SetMismatchPenalty(int p)
Sets MismatchPenalty.
BlastSeqSrc * MultiSeqBlastSeqSrcInit(TSeqLocVector &seq_vector, EBlastProgramType program, bool dbscan_mode=false)
Initialize the sequence source structure.
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
void SetWindowSize(int ws)
Sets WindowSize.
void SetRepeatFiltering(bool val)
Enable repeat filtering.
void SetFilterString(const char *f, bool clear=true)
void Blast_FindRepeatFilterLoc(TSeqLocVector &query_loc, const CBlastOptionsHandle *opts_handle)
Finds repeats locations for a given set of sequences.
void SetGapExtensionCost(int e)
Sets GapExtensionCost.
void SetMaskAtHash(bool m=true)
Sets MaskAtHash.
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
void SetWordSize(int ws)
Sets WordSize.
CRef< TBlastDiagnostics > m_Diagnostics
Diagnostic output from preliminary and traceback stages.
void SetPercentIdentity(double p)
Sets PercentIdentity.
void SetGapOpeningCost(int g)
Sets GapOpeningCost.
CRef< TBlastHSPStream > m_HspStream
HSP output of the preliminary stage goes here.
BlastHSPResults * ComputeBlastHSPResults(BlastHSPStream *stream, Uint4 max_num_hsps=0, bool *rm_hsps=NULL, vector< bool > *rm_hsps_info=NULL) const
Return HSPs in a structure other than the HSPStream? Provide conversion? How to combine this with CBl...
@ eBlastDbIsNucleotide
nucleotide
static bool PreferAccessionOverGi(void)
Check if the option to prefer accession.version over GI is enabled (SeqId/PreferAccessionOverGi or SE...
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
const Tdata & Get(void) const
Get the member data.
E_Choice Which(void) const
Which variant is currently selected.
TGi GetGi(void) const
Get the variant data.
TVersion GetVersion(void) const
Get the Version member data.
const TDdbj & GetDdbj(void) const
Get the variant data.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Gi
GenInfo Integrated Database.
Main class to perform a BLAST search on the local machine.
Magic spell ;-) needed for some weird compilers... very empiric.
Uint1 Boolean
bool replacment for C
#define FALSE
bool replacment for C indicating false.
Defines: CTimeFormat - storage class for time format.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
C++ implementation of repeats filtering for C++ BLAST.
Implementation of the BlastSeqSrc interface for a vector of sequence locations.
Implementation of the BlastSeqSrc interface using the C++ BLAST databases API.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Return statistics from the BLAST search.
BlastUngappedStats * ungapped_stat
Ungapped extension counts.
BlastGappedStats * gapped_stat
Gapped extension counts.
void setupQueryAndSubject(TGi query_gi, TGi subject_gi)
~BlastEngineTestFixture()
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 good_extensions
Number of HSPs below the e-value threshold after gapped extension.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
BlastSeg subject
Subject sequence info.
Int4 score
This HSP's raw score.
Structure used for scoring calculations.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Complete type definition of Blast Sequence Source ADT.
Structure containing hit counts from the ungapped stage of a BLAST search.
Int8 lookup_hits
Number of successful lookup table hits.
Int4 init_extends
Number of initial words found and extended.
Int4 good_init_extends
Number of successful initial extensions, i.e.
Structure to represent a single sequence to be fed to BLAST.
Utility stuff for more convenient using of Boost.Test library.