62 memcpy(
buffer+1, seq.c_str(), seq.size());
64 for (
unsigned int index = 1; index <= seq.size(); ++index)
73 const string kQuerySeq(
"GPLRQIFVEFLERSCTAEFSGFLLYKELGRRLKKTNPVVAEIFSLMSR"
74 "DEARHAGFLNKGLSDFNLALDLGFLTKARKYTFFKPKFIFYATYLSEK"
75 "IGYWRYITIFRHLKANPEYQVYPIFKYFENWCQDENRHGDFFSALL");
78 x_SetupSequenceBlk(kQuerySeq, &query_blk);
89 BOOST_REQUIRE_EQUAL(8, gap_align->
score);
91 BOOST_REQUIRE_EQUAL(142, gap_align->
query_stop);
98 BOOST_REQUIRE_EQUAL(3, esp->
size);
99 BOOST_REQUIRE_EQUAL(45, esp->
num[2]);
135 const int kNumPatterns = 4;
137 { {100,20}, {200,18}, {300,22}, {400, 21} };
149 const int kHspMax = 10;
150 const int kNumRepetitions = 4;
152 hsp_list->
oid = index;
153 for (
int hsp_index = 0; hsp_index < kHspMax; ++hsp_index) {
155 hsp->
score = 200 - 2*index - 5*hsp_index;
166 for (
int index = 0; index < kHitlistSize; ++index) {
176 for (index = 0; index < hitlist->
hsplist_count - 1; ++index) {
187 const int kNumHspLists = 20;
188 for (
int hitlist_index = 0; hitlist_index < num_results;
190 BOOST_REQUIRE(results_array[hitlist_index] !=
NULL);
194 BOOST_REQUIRE(x_CheckIncreasingBestEvalues(hitlist));
195 const int kHspCnt = (13-hitlist_index)/num_results;
196 for (
int hsplist_index = 0; hsplist_index < kNumHspLists;
199 BOOST_REQUIRE_EQUAL(kHspCnt, hsplist->
hspcnt);
200 BOOST_REQUIRE_EQUAL(hsplist_index, hsplist->
oid);
202 for (
int hsp_index = 0; hsp_index < kHspCnt; ++hsp_index) {
204 BOOST_REQUIRE_EQUAL(hitlist_index,
208 results_array[hitlist_index] =
211 sfree(results_array);
220 setUpLookupTable(
"[ED]-x(32,40)-E-x(2)-H");
222 BOOST_REQUIRE(m_ScoreBlk->kbp_gap == m_ScoreBlk->kbp_gap_std);
223 BOOST_REQUIRE(m_ScoreBlk->kbp == m_ScoreBlk->kbp_std);
224 BOOST_REQUIRE_EQUAL(0.5, m_ScoreBlk->kbp_gap[0]->paramC);
225 BOOST_REQUIRE(m_ScoreBlk->kbp_gap[0]->H != 0);
226 BOOST_REQUIRE_EQUAL(m_ScoreBlk->kbp[0]->Lambda,
227 m_ScoreBlk->kbp_gap[0]->Lambda);
228 BOOST_REQUIRE_EQUAL(m_ScoreBlk->kbp[0]->K, m_ScoreBlk->kbp_gap[0]->K);
242 setUpLookupTable(
"LLY");
244 BOOST_REQUIRE(m_ScoreBlk->kbp_gap == m_ScoreBlk->kbp_gap_std);
245 BOOST_REQUIRE(m_ScoreBlk->kbp == m_ScoreBlk->kbp_std);
246 BOOST_REQUIRE_EQUAL(0.5, m_ScoreBlk->kbp_gap[0]->paramC);
247 BOOST_REQUIRE(m_ScoreBlk->kbp_gap[0]->H != 0);
248 BOOST_REQUIRE_EQUAL(m_ScoreBlk->kbp[0]->Lambda,
249 m_ScoreBlk->kbp_gap[0]->Lambda);
250 BOOST_REQUIRE_EQUAL(m_ScoreBlk->kbp[0]->K, m_ScoreBlk->kbp_gap[0]->K);
266 setUpLookupTable(
"[ED]-x(32,40)-E-x(2)-H");
267 x_FindQueryOccurrences();
271 BOOST_REQUIRE_CLOSE(0.0013,
pattern_info->probability, 1);
274 BOOST_REQUIRE_EQUAL(37, m_QueryInfo->contexts[0].length_adjustment);
279 setUpLookupTable(
"LLY");
280 x_FindQueryOccurrences();
284 BOOST_REQUIRE_CLOSE(0.000262,
pattern_info->probability, 1);
287 BOOST_REQUIRE_EQUAL(3, m_QueryInfo->contexts[0].length_adjustment);
292 const int kNumDbHits = 33;
293 setUpLookupTable(
"[ED]-x(32,40)-E-x(2)-H");
294 x_FindQueryOccurrences();
307 BOOST_REQUIRE_CLOSE(7.568e-59, hsp->
evalue, 1);
310 BOOST_REQUIRE(hsp_list ==
NULL);
315 setUpLookupTable(
"[ED]-x(32,40)-E-x(2)-H");
317 kSubjectSeq(
"GETRKLFVEFLERSCTAEFSGFLLYKELGRRLKGKSPVLAECFNLMSRDEARHAG"
318 "FLNKALSDFNLSLDLGFLTKSRNYTFFKPKFIFYATYLSEKIGYWRYITIYRHLE"
319 "AHPEDRVYPIFRFFENWCQDENRHGDFFDAIMKSQPQILNDWKARLWSRF");
320 const int kNumHits = 3;
321 const int kStarts[kNumHits] = { 8, 11, 94 };
322 const int kEnds[kNumHits] = { 52, 52, 133 };
324 Int4 start_offset = 0;
326 x_SetupSequenceBlk(kSubjectSeq, &subject_blk);
334 BOOST_REQUIRE_EQUAL(kNumHits, hit_count);
335 for (
int index = 0; index < kNumHits; ++index) {
336 BOOST_REQUIRE_EQUAL(kStarts[index],
337 (
int) offset_pairs[index].phi_offsets.
s_start);
338 BOOST_REQUIRE_EQUAL(kEnds[index],
339 (
int) offset_pairs[index].phi_offsets.
s_end);
345 setUpLookupTable(
"[ED]-x(32,40)-E-x(2)-H");
347 kQuerySeq(
"GPLRQIFVEFLERSCTAEFSGFLLYKELGRRLKKTNPVVAEIFSLMSRDEARHAGFL"
348 "NKGLSDFNLALDLGFLTKARKYTFFKPKFIFYATYLSEKIGYWRYITIFRHLKANPE"
349 "YQVYPIFKYFENWCQDENRHGDFFSALL");
351 kSubjectSeq(
"GETRKLFVEFLERSCTAEFSGFLLYKELGRRLKGKSPVLAECFNLMSRDEARHAG"
352 "FLNKALSDFNLSLDLGFLTKSRNYTFFKPKFIFYATYLSEKIGYWRYITIYRHLE"
353 "AHPEDRVYPIFRFFENWCQDENRHGDFFDAIMKSQPQILNDWKARLWSRF");
354 const int kQueryPatLength = 40;
355 const int kQueryStart = 94;
357 x_SetupSequenceBlk(kQuerySeq, &query_blk);
359 x_SetupSequenceBlk(kSubjectSeq, &subject_blk);
360 const int kSubjectPatLength = 45;
361 const int kSubjectStart = 8;
371 m_QueryInfo, &ext_params);
375 subject_blk->
length, m_ScoreBlk,
382 score_params, kQueryStart, kSubjectStart,
384 kQueryPatLength, kSubjectPatLength,
387 x_CheckGappedAlignmentResults(gap_align);
391 setUpLookupTable(
"[ED]-x(32,40)-E-x(2)-H");
398 x_CheckSplitResults(results_array,
pattern_info->num_patterns);
406 setUpLookupTable(
"[ED]-x(32,40)-E-x(2)-H");
413 BOOST_REQUIRE(results_array !=
NULL);
414 BOOST_REQUIRE(results_array[0] ==
NULL);
416 sfree(results_array);
427 const bool kIsGapped =
true;
428 setUpLookupTable(
"[ED]-x(32,40)-E-x(2)-H");
429 x_FindQueryOccurrences();
437 m_QueryInfo->contexts[0].eff_searchsp = 10000000;
438 const int k_avg_subject_length=343;
446 BOOST_REQUIRE(ext_options ==
NULL);
448 BOOST_REQUIRE(hit_params ==
NULL);
450 BOOST_REQUIRE(hit_options ==
NULL);
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
union BlastOffsetPair BlastOffsetPair
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Int2 BLAST_GapAlignStructNew(const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, Uint4 max_subject_length, BlastScoreBlk *sbp, BlastGapAlignStruct **gap_align_ptr)
Initializes the BlastGapAlignStruct structure.
BlastHSP * Blast_HSPNew(void)
Allocate and zeros out memory for an HSP structure.
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
void Blast_HSPListPHIGetEvalues(BlastHSPList *hsp_list, BlastScoreBlk *sbp, const BlastQueryInfo *query_info, const SPHIPatternSearchBlk *pattern_blk)
Calculate e-values for a PHI BLAST HSP list.
Int2 Blast_HSPResultsInsertHSPList(BlastHSPResults *results, BlastHSPList *hsp_list, Int4 hitlist_size)
Blast_HSPResultsInsertHSPList Insert an HSP list to the appropriate place in the results structure.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Boolean Blast_HSPListIsSortedByScore(const BlastHSPList *hsp_list)
Check if HSP list is sorted by score.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
BlastHSPResults ** PHIBlast_HSPResultsSplit(const BlastHSPResults *results, const SPHIQueryInfo *pattern_info)
Splits the BlastHSPResults structure for a PHI BLAST search into an array of BlastHSPResults structur...
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
Int2 BlastExtensionParametersNew(EBlastProgramType blast_program, const BlastExtensionOptions *options, BlastScoreBlk *sbp, BlastQueryInfo *query_info, BlastExtensionParameters **parameters)
Calculate the raw values for the X-dropoff parameters.
Int2 BlastScoringParametersNew(const BlastScoringOptions *options, BlastScoreBlk *sbp, BlastScoringParameters **parameters)
Calculate scaled cutoff scores and gap penalties.
Int2 BlastHitSavingParametersNew(EBlastProgramType program_number, const BlastHitSavingOptions *options, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subject_length, Int4 compositionBasedStats, BlastHitSavingParameters **parameters)
Allocate memory and initialize the BlastHitSavingParameters structure.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
Utilities initialize/setup BLAST.
Int2 BlastSetup_ScoreBlkInit(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, const BlastScoringOptions *scoring_options, EBlastProgramType program_number, BlastScoreBlk **sbpp, double scale_factor, Blast_Message **blast_message, GET_MATRIX_PATH get_path)
Initializes the score block structure.
Int2 Blast_SetPHIPatternInfo(EBlastProgramType program, const SPHIPatternSearchBlk *pattern_blk, const BLAST_SequenceBlk *query, const BlastSeqLoc *lookup_segments, BlastQueryInfo *query_info, Blast_Message **blast_message)
In a PHI BLAST search, adds pattern information to the BlastQueryInfo structure.
Internal auxiliary setup classes/functions for C++ BLAST APIs.
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
Wrapper class for BLAST_SequenceBlk .
Wrapper class for BlastExtensionOptions .
Wrapper class for BlastExtensionParameters .
Wrapper class for BlastGapAlignStruct .
Wrapper class for BlastQueryInfo .
Wrapper class for BlastScoreBlk .
Wrapper class for BlastScoringOptions .
Wrapper class for BlastScoringParameters .
Wrapper class for Blast_Message .
Wrapper class for LookupTableOptions .
Wrapper class for LookupTableWrap .
static bool x_CheckIncreasingBestEvalues(BlastHitList *hitlist)
void x_SetupSequenceBlk(const string &seq, BLAST_SequenceBlk **seq_blk)
EBlastProgramType m_Program
CBlastQueryInfo m_QueryInfo
CBlastScoreBlk m_ScoreBlk
static BlastHSPList * x_SetupHSPList(int index)
CPhiblastTestFixture()
Set up: initializes the PHI "lookup table", aka the SPHIPatternSearchBlk structure,...
void setUpLookupTable(string pattern)
void x_FindQueryOccurrences(void)
After the initial set-up is done, finds pattern occurrences in query and fills the pattern informatio...
static SPHIQueryInfo * x_SetupPatternInfo(void)
static void x_CheckSplitResults(BlastHSPResults **results_array, int num_results)
void x_CheckGappedAlignmentResults(BlastGapAlignStruct *gap_align)
static BlastHSPResults * x_SetupResults(const int kHitlistSize)
CLookupTableWrap m_Lookup
Ensure direct dependencies on enough of the core xncbi library to satisfy shared libraries that depen...
static const char location[]
void Reset(LookupTableWrap *p=NULL)
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
void Reset(BlastScoreBlk *p=NULL)
void Reset(BlastQueryInfo *p=NULL)
uint8_t Uint1
1-byte (8-bit) unsigned integer
int32_t Int4
4-byte (32-bit) signed integer
Wrapper for all lookup tables used in BLAST.
Int4 GetOffsetArraySize(LookupTableWrap *lookup)
Determine the size of the offsets arrays to be filled by the ScanSubject function.
Int2 LookupTableWrapInit(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc)
Create the lookup table for all query words.
#define TRUE
bool replacment for C indicating true.
@ eVeryLong
Is pattern too long for a simple multi-word processing?
@ eOneWord
Does pattern consist of a single word?
SPHIQueryInfo * SPHIQueryInfoFree(SPHIQueryInfo *pat_info)
Frees the pattern information structure.
SPHIQueryInfo * SPHIQueryInfoNew(void)
Allocates the pattern occurrences structure.
static int pattern_info(int what, void *where, BOOL unsetok)
Function prototypes used for PHI BLAST gapped extension and gapped extension with traceback.
Int2 PHIGappedAlignmentWithTraceback(Uint1 *query, Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Int4 q_pat_length, Int4 s_pat_length, SPHIPatternSearchBlk *pattern_blk)
Perform a gapped alignment with traceback for PHI BLAST.
Pseudo lookup table structure and database scanning functions used in PHI-BLAST.
Int4 PHIBlastScanSubject(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *query_blk, const BLAST_SequenceBlk *subject, Int4 *offset, BlastOffsetPair *offset_pairs, Int4 array_size)
Scans the subject sequence from "offset" to the end of the sequence.
BOOST_AUTO_TEST_CASE(testPHILookupTableLong)
Tests the values in the PHI BLAST lookup table.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
Structure to hold a sequence.
Int4 length
Length of sequence.
Uint1 * sequence
Sequence used for search (could be translation).
Options used for gapped extension These include: a.
Structure supporting the gapped alignment.
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Int4 cutoff_score
Raw cutoff score corresponding to the e-value provided by the user if no sum stats,...
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
double best_evalue
Smallest e-value for HSPs in this list.
The structure to contain all BLAST results, for multiple queries.
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Structure holding all information about an HSP.
SPHIHspInfo * pat_info
In PHI BLAST, information about this pattern match.
double evalue
This HSP's e-value.
Int4 score
This HSP's raw score.
The structure to contain all BLAST results for one query sequence.
BlastHSPList ** hsplist_array
Array of HSP lists for individual database hits.
Int4 hsplist_count
Filled size of the HSP lists array.
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastGappedCutoffs * cutoffs
per-context gapped cutoff information
Int4 cutoff_score_min
smallest cutoff score across all contexts
Boolean gapped_calculation
gap-free search if FALSE
Used to hold a set of positions, mostly used for filtering.
Edit script: linked list of correspondencies between two sequences.
Int4 * num
Array of number of operations.
Int4 size
Size of above arrays.
char * phi_pattern
PHI-BLAST pattern.
void * lut
Pointer to the actual lookup table structure.
SExtraLongPatternItems * extra_long_items
Additional items necessary if pattern contains pieces longer than a word.
Int4 numWords
Number of words need to hold bit representation of pattern.
In PHI BLAST: information about pattern match in a given HSP.
Int4 index
Index of query pattern occurrence for this HSP.
Information about a single pattern occurence in the query.
Structure containing all auxiliary information needed in a pattern search.
SShortPatternItems * one_word_items
Items necessary when pattern fits in one word.
EPatternType flagPatternLength
Indicates if the whole pattern fits in 1 word, each of several parts of the pattern fit in a word,...
double patternProbability
Probability of this letter combination.
Int4 minPatternMatchLength
Minimum length of string to match this pattern.
Int4 num_patterns_db
Number of patterns actually found during the database search.
SLongPatternItems * multi_word_items
Additional items, when pattern requires multiple words.
In PHI BLAST, structure containing information about all pattern occurrences in query.
Int4 allocated_size
Allocated size of the occurrences array.
Int4 num_patterns
Number of pattern occurrences in query.
SPHIPatternInfo * occurrences
Array of pattern occurrence information structures.
Int4 * whichPositionPtr
Array of positions where pattern lettern should match, for a single word of the pattern.
Int4 match_mask
Bit mask representation of input pattern for patterns that fit in a word.
Utility stuff for more convenient using of Boost.Test library.
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
Uint4 s_start
Start offset of pattern in subject.
Uint4 s_end
End offset of pattern in subject.
voidp calloc(uInt items, uInt size)