72 #define NULL_NUCL_SENTINEL 0xf
73 #define SMALL_QUERY_GI 1945386
74 #define LARGE_QUERY_GI 19572546
83 lookup_segments =
NULL;
96 sprintf(
buf,
"gi|%d", query_gi);
115 BOOST_REQUIRE_EQUAL(0, status);
118 BOOST_REQUIRE_EQUAL(0, status);
120 BOOST_REQUIRE(query_blk !=
NULL);
122 BOOST_REQUIRE(query_blk->
length > 0);
124 BOOST_REQUIRE(query_blk !=
NULL);
151 int len =
iexp(alphabet_size,word_size) + (word_size-1);
158 debruijn(word_size,alphabet_size,sequence+1,0);
160 for(
int i=1;
i<word_size;
i++)
161 sequence[
len-word_size+1+
i] = sequence[
i];
190 lookup_options, query_options, lookup_segments,
193 BOOST_REQUIRE(query_options ==
NULL);
199 BOOST_REQUIRE_EQUAL(65536,
lookup->backbone_size);
200 BOOST_REQUIRE_EQUAL(4,
lookup->longest_chain);
201 BOOST_REQUIRE_EQUAL(1444,
lookup->overflow_size);
202 BOOST_REQUIRE_EQUAL((
Int2)2819,
lookup->final_backbone[48]);
203 BOOST_REQUIRE_EQUAL((
Int2)754,
lookup->final_backbone[42889]);
204 BOOST_REQUIRE_EQUAL((
Int2)(-345),
lookup->final_backbone[21076]);
207 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
209 BOOST_REQUIRE(lookup_options ==
NULL);
225 lookup_options, query_options, lookup_segments,
228 BOOST_REQUIRE(query_options ==
NULL);
233 BOOST_REQUIRE_EQUAL(4194304,
lookup->hashsize);
234 BOOST_REQUIRE_EQUAL(28, (
int)
lookup->word_length);
235 BOOST_REQUIRE_EQUAL(18,
lookup->scan_step);
236 BOOST_REQUIRE_EQUAL(37,
lookup->longest_chain);
237 BOOST_REQUIRE_EQUAL(7,
lookup->pv_array_bts);
238 BOOST_REQUIRE_EQUAL(5868,
lookup->hashtable[36604]);
239 BOOST_REQUIRE_EQUAL(14646,
lookup->hashtable[1426260]);
240 BOOST_REQUIRE_EQUAL(290,
lookup->hashtable[4007075]);
242 int pv_array_size = (
lookup->hashsize >>
lookup->pv_array_bts);
247 BOOST_REQUIRE_EQUAL(-729205454, pv_array_hash);
250 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
252 BOOST_REQUIRE(lookup_options ==
NULL);
269 lookup_options, query_options, lookup_segments,
272 BOOST_REQUIRE(query_options ==
NULL);
276 BOOST_REQUIRE_EQUAL(4194304,
lookup->hashsize);
277 BOOST_REQUIRE_EQUAL(11, (
int)
lookup->word_length);
278 BOOST_REQUIRE_EQUAL(
true, (
bool)
lookup->discontiguous);
279 BOOST_REQUIRE_EQUAL(16, (
int)
lookup->template_length);
280 BOOST_REQUIRE_EQUAL(1, (
int)
lookup->template_type);
281 BOOST_REQUIRE_EQUAL(1,
lookup->scan_step);
282 BOOST_REQUIRE_EQUAL(2,
lookup->longest_chain);
283 BOOST_REQUIRE_EQUAL(49,
lookup->hashtable[2463300]);
284 BOOST_REQUIRE_EQUAL(392,
lookup->hashtable[1663305]);
285 BOOST_REQUIRE_EQUAL(1049,
lookup->hashtable[3586129]);
286 BOOST_REQUIRE_EQUAL(8,
lookup->pv_array_bts);
288 int pv_array_size = (
lookup->hashsize >>
lookup->pv_array_bts);
293 BOOST_REQUIRE_EQUAL(-160576483, pv_array_hash);
296 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
298 BOOST_REQUIRE(lookup_options ==
NULL);
315 lookup_options, query_options, lookup_segments,
318 BOOST_REQUIRE(query_options ==
NULL);
322 BOOST_REQUIRE_EQUAL(16777216,
lookup->hashsize);
323 BOOST_REQUIRE_EQUAL(12, (
int)
lookup->word_length);
324 BOOST_REQUIRE_EQUAL(
true, (
bool)
lookup->discontiguous);
325 BOOST_REQUIRE_EQUAL(16, (
int)
lookup->template_length);
326 BOOST_REQUIRE_EQUAL(3, (
int)
lookup->template_type);
327 BOOST_REQUIRE_EQUAL(1,
lookup->scan_step);
328 BOOST_REQUIRE_EQUAL(2,
lookup->longest_chain);
329 BOOST_REQUIRE_EQUAL(3631,
lookup->hashtable[133875]);
330 BOOST_REQUIRE_EQUAL(2092,
lookup->hashtable[351221]);
331 BOOST_REQUIRE_EQUAL(4951,
lookup->hashtable[1336356]);
332 BOOST_REQUIRE_EQUAL(10,
lookup->pv_array_bts);
334 int pv_array_size = (
lookup->hashsize >>
lookup->pv_array_bts);
339 BOOST_REQUIRE_EQUAL(-630452942, pv_array_hash);
342 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
344 BOOST_REQUIRE(lookup_options ==
NULL);
361 lookup_options, query_options, lookup_segments,
364 BOOST_REQUIRE(query_options ==
NULL);
368 BOOST_REQUIRE_EQUAL(4194304,
lookup->hashsize);
369 BOOST_REQUIRE_EQUAL(11, (
int)
lookup->word_length);
370 BOOST_REQUIRE_EQUAL(
true, (
bool)
lookup->discontiguous);
371 BOOST_REQUIRE_EQUAL(16, (
int)
lookup->template_length);
372 BOOST_REQUIRE_EQUAL(2, (
int)
lookup->template_type);
373 BOOST_REQUIRE_EQUAL(1,
lookup->scan_step);
374 BOOST_REQUIRE_EQUAL(2,
lookup->longest_chain);
375 BOOST_REQUIRE_EQUAL(36,
lookup->hashtable[1353317]);
376 BOOST_REQUIRE_EQUAL(375,
lookup->hashtable[1955444]);
377 BOOST_REQUIRE_EQUAL(5455,
lookup->hashtable[1735012]);
378 BOOST_REQUIRE_EQUAL(8,
lookup->pv_array_bts);
380 int pv_array_size = (
lookup->hashsize >>
lookup->pv_array_bts);
385 BOOST_REQUIRE_EQUAL(932347030, pv_array_hash);
388 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
390 BOOST_REQUIRE(lookup_options ==
NULL);
407 lookup_options, query_options, lookup_segments,
410 BOOST_REQUIRE(query_options ==
NULL);
414 BOOST_REQUIRE_EQUAL(16777216,
lookup->hashsize);
415 BOOST_REQUIRE_EQUAL(12, (
int)
lookup->word_length);
416 BOOST_REQUIRE_EQUAL(
true, (
bool)
lookup->discontiguous);
417 BOOST_REQUIRE_EQUAL(16, (
int)
lookup->template_length);
418 BOOST_REQUIRE_EQUAL(4, (
int)
lookup->template_type);
419 BOOST_REQUIRE_EQUAL(1,
lookup->scan_step);
420 BOOST_REQUIRE_EQUAL(2,
lookup->longest_chain);
421 BOOST_REQUIRE_EQUAL(82,
lookup->hashtable[9606485]);
422 BOOST_REQUIRE_EQUAL(752,
lookup->hashtable[15622537]);
423 BOOST_REQUIRE_EQUAL(5408,
lookup->hashtable[10084009]);
424 BOOST_REQUIRE_EQUAL(10,
lookup->pv_array_bts);
426 int pv_array_size = (
lookup->hashsize >>
lookup->pv_array_bts);
431 BOOST_REQUIRE_EQUAL(558099690, pv_array_hash);
434 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
436 BOOST_REQUIRE(lookup_options ==
NULL);
453 lookup_options, query_options, lookup_segments,
456 BOOST_REQUIRE(query_options ==
NULL);
460 BOOST_REQUIRE_EQUAL(4194304,
lookup->hashsize);
461 BOOST_REQUIRE_EQUAL(11, (
int)
lookup->word_length);
462 BOOST_REQUIRE_EQUAL(
true,
static_cast<bool>(
lookup->discontiguous));
463 BOOST_REQUIRE_EQUAL(16, (
int)
lookup->template_length);
464 BOOST_REQUIRE_EQUAL(1, (
int)
lookup->template_type);
465 BOOST_REQUIRE_EQUAL(1, (
int)
lookup->two_templates);
466 BOOST_REQUIRE_EQUAL(2, (
int)
lookup->second_template_type);
467 BOOST_REQUIRE_EQUAL(1,
lookup->scan_step);
468 BOOST_REQUIRE_EQUAL(4,
lookup->longest_chain);
469 BOOST_REQUIRE_EQUAL(128,
lookup->hashtable[1450605]);
470 BOOST_REQUIRE_EQUAL(342,
lookup->hashtable[4025953]);
471 BOOST_REQUIRE_EQUAL(663,
lookup->hashtable[3139906]);
472 BOOST_REQUIRE_EQUAL(72,
lookup->hashtable2[2599530]);
473 BOOST_REQUIRE_EQUAL(225,
lookup->hashtable2[4110966]);
474 BOOST_REQUIRE_EQUAL(8,
lookup->pv_array_bts);
476 int pv_array_size = (
lookup->hashsize >>
lookup->pv_array_bts);
481 BOOST_REQUIRE_EQUAL(-36132604, pv_array_hash);
484 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
486 BOOST_REQUIRE(lookup_options ==
NULL);
504 lookup_segments, &
lookup, lookup_options,
505 query_options,
NULL, 1), 0);
508 BOOST_REQUIRE(query_options ==
NULL);
510 BOOST_REQUIRE_EQUAL(16, (
int)
lookup->lut_word_length);
511 BOOST_REQUIRE_EQUAL(1,
lookup->scan_step);
512 BOOST_REQUIRE_EQUAL(11,
lookup->longest_chain);
513 BOOST_REQUIRE_EQUAL(32768,
lookup->backbone_size);
514 BOOST_REQUIRE_EQUAL(1494,
lookup->offsets_size);
515 BOOST_REQUIRE_EQUAL(5,
lookup->pv_array_bts);
516 BOOST_REQUIRE(
lookup->hash_callback);
518 Uint4 pv_array_size = 1u << (32 - 10);
523 BOOST_REQUIRE_EQUAL(1515308782, pv_array_hash);
531 for (
int i=0;
i < 16;
i++) {
532 BOOST_REQUIRE((query_blk->sequence[
i] & 0xfc) == 0);
533 word = (word << 2) | query_blk->sequence[
i];
538 BOOST_REQUIRE(
lookup->thick_backbone[hashed_word].num_words > 0);
539 BOOST_REQUIRE_EQUAL(word,
lookup->thick_backbone[hashed_word].words[0]);
540 BOOST_REQUIRE(
lookup->thick_backbone[hashed_word].num_offsets[0] <
543 BOOST_REQUIRE_EQUAL(0,
lookup->thick_backbone[hashed_word].offsets[0]);
548 BOOST_REQUIRE(lookup_options ==
NULL);
564 BOOST_REQUIRE(seqsrc);
569 BOOST_REQUIRE(lookup_options->
db_filter);
571 BOOST_REQUIRE(lookup_options->
db_filter);
573 lookup_options, query_options, lookup_segments,
574 0, &lookup_wrap_ptr,
NULL,
NULL, seqsrc), 0);
576 BOOST_REQUIRE(query_options ==
NULL);
582 BOOST_REQUIRE_EQUAL(16, (
int)
lookup->lut_word_length);
583 BOOST_REQUIRE_EQUAL(1,
lookup->scan_step);
584 BOOST_REQUIRE_EQUAL(10,
lookup->longest_chain);
585 BOOST_REQUIRE_EQUAL(256,
lookup->backbone_size);
586 BOOST_REQUIRE_EQUAL(38,
lookup->offsets_size);
587 BOOST_REQUIRE_EQUAL(5,
lookup->pv_array_bts);
588 BOOST_REQUIRE(
lookup->hash_callback);
590 Uint4 pv_array_size = 1u << (32 - 10);
595 BOOST_REQUIRE_EQUAL(130150681, pv_array_hash);
603 for (
int i=0;
i < 16;
i++) {
604 BOOST_REQUIRE((query_blk->sequence[
i] & 0xfc) == 0);
605 word = (word << 2) | query_blk->sequence[
i];
609 BOOST_REQUIRE(
lookup->thick_backbone[hashed_word].num_words == 0);
612 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
614 BOOST_REQUIRE(lookup_options ==
NULL);
630 BOOST_REQUIRE(lookup_options->
db_filter);
632 BOOST_REQUIRE(lookup_options->
db_filter);
634 lookup_options, query_options, lookup_segments,
638 BOOST_REQUIRE(lookup_options ==
NULL);
643 const int alphabet_size=4;
644 const int word_size=8;
646 debruijnInit(word_size, alphabet_size);
651 FALSE, 0, word_size);
657 lookup_options, query_options, lookup_segments,
660 BOOST_REQUIRE(query_options ==
NULL);
664 BOOST_REQUIRE_EQUAL(65536,
lookup->backbone_size);
665 BOOST_REQUIRE_EQUAL(1,
lookup->longest_chain);
666 BOOST_REQUIRE_EQUAL(0,
lookup->overflow_size);
669 for(index=0;index<
lookup->backbone_size;index++)
671 BOOST_REQUIRE_EQUAL(1,
lookup->thick_backbone[index].num_used);
676 for (index=0; index<pv_size; index++)
678 BOOST_REQUIRE_EQUAL((
Uint4) 0xFFFFFFFF, (
Uint4) pv_array[index]);
682 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
684 BOOST_REQUIRE(lookup_options ==
NULL);
689 const int alphabet_size=4;
690 const int word_size=12;
692 debruijnInit(word_size, alphabet_size);
703 lookup_options, query_options, lookup_segments,
706 BOOST_REQUIRE(query_options ==
NULL);
710 BOOST_REQUIRE_EQUAL(16777216,
lookup->hashsize);
711 BOOST_REQUIRE_EQUAL(28, (
int)
lookup->word_length);
712 BOOST_REQUIRE_EQUAL(2,
lookup->longest_chain);
713 BOOST_REQUIRE_EQUAL(10,
lookup->pv_array_bts);
717 for (index=0; index<query_blk->length+1; index++)
719 BOOST_REQUIRE_EQUAL(0,
lookup->next_pos[index]);
723 int pv_array_size = (
lookup->hashsize >>
lookup->pv_array_bts);
724 for (index=0; index<pv_array_size; index++)
726 BOOST_REQUIRE_EQUAL((
Uint4) 0xFFFFFFFF, (
Uint4) pv_array[index]);
730 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
732 BOOST_REQUIRE(lookup_options ==
NULL);
753 lookup_options, query_options, segments,
756 BOOST_REQUIRE(query_options ==
NULL);
762 BOOST_REQUIRE_EQUAL(65536,
lookup->backbone_size);
763 BOOST_REQUIRE_EQUAL(0,
lookup->longest_chain);
764 BOOST_REQUIRE_EQUAL(28, (
int)
lookup->word_length);
765 BOOST_REQUIRE_EQUAL(2,
lookup->overflow_size);
767 for (
int index=0; index<
lookup->backbone_size; index++)
771 BOOST_REQUIRE_EQUAL((
Int2)(-1),
lookup->final_backbone[index]);
775 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
777 BOOST_REQUIRE(lookup_options ==
NULL);
779 BOOST_REQUIRE(segments ==
NULL);
785 const Int4 word_size = 28;
798 while (offset < query_blk->length) {
807 lookup_options, query_options, segments, 0,
810 BOOST_REQUIRE(query_options ==
NULL);
814 BOOST_REQUIRE_EQUAL(4194304,
lookup->hashsize);
815 BOOST_REQUIRE_EQUAL(28, (
int)
lookup->word_length);
816 BOOST_REQUIRE_EQUAL(18,
lookup->scan_step);
817 BOOST_REQUIRE_EQUAL(2,
lookup->longest_chain);
818 BOOST_REQUIRE_EQUAL(7,
lookup->pv_array_bts);
821 int pv_array_size = (
lookup->hashsize >>
lookup->pv_array_bts);
823 for (index=0; index<pv_array_size; index++)
830 BOOST_REQUIRE(lookup_wrap_ptr ==
NULL);
832 BOOST_REQUIRE(lookup_options ==
NULL);
834 BOOST_REQUIRE(segments ==
NULL);
Declares the CBl2Seq (BLAST 2 Sequences) class.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
#define PV_ARRAY_BTS
bits-to-shift from lookup_index to pv_array index.
Uint4(* TNaLookupHashFunction)(Uint1 *, Uint4)
Hash function type for the lookup table.
#define PV_ARRAY_TYPE
The pv_array 'native' type.
Routines for creating nucleotide BLAST lookup tables.
#define NA_OFFSETS_PER_HASH
BlastNaHashLookupTable * BlastNaHashLookupTableDestruct(BlastNaHashLookupTable *lookup)
Free a nucleotide lookup table.
Int4 BlastNaHashLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastNaHashLookupTable **lut, const LookupTableOptions *opt, const QuerySetUpOptions *query_options, BlastSeqSrc *seqsrc, Uint4 num_threads)
Declares the CBlastNucleotideOptionsHandle class.
Definitions which are dependant on the NCBI C++ Object Manager.
Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)
Allocate memory for QuerySetUpOptions and fill with default values.
Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program, Boolean is_megablast, double threshold, Int4 word_size)
Allocate memory for lookup table options and fill with default values.
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
ELookupTableType
Types of the lookup table.
@ eSmallNaLookupTable
lookup table for blastn with small query
@ eNaLookupTable
blastn lookup table
@ eMBLookupTable
megablast lookup table (includes both contiguous and discontiguous megablast)
@ eNaHashLookupTable
used for 16-base words
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)
Deallocate memory for QuerySetUpOptions.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
Declares the CBlastProteinOptionsHandle class.
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
Int2 BlastSetUp_SeqBlkNew(const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated)
Allocates memory for *sequence_blk and then populates it.
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
Declares the CBlastxOptionsHandle class.
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
static CTestObjMgr & Instance()
Declares the CDiscNucleotideOptionsHandle class.
static int lookup(const char *name, const struct lookup_int *table)
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
BlastSeqSrc * MakeSeqSrc()
Retrieves or constructs the BlastSeqSrc.
TAutoUint1Ptr data
Sequence data.
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
@ eSentinels
Use sentinel bytes.
@ eBlastDbIsNucleotide
nucleotide
element_type * release(void)
Release will release ownership of pointer to caller.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
@ eNa_strand_both
in forward orientation
for(len=0;yy_str[len];++len)
Utility functions for lookup table generation.
Int4 iexp(Int4 x, Int4 n)
Integer exponentiation using right to left binary algorithm.
void debruijn(Int4 n, Int4 k, Uint1 *output, Uint1 *alphabet)
generates a de Bruijn sequence containing all substrings of length n over an alphabet of size k.
LookupTableWrap * LookupTableWrapFree(LookupTableWrap *lookup)
Deallocate memory for the lookup table.
Int2 LookupTableWrapInit(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc)
Create the lookup table for all query words.
Uint4 EndianIndependentBufferHash(const char *buffer, Uint4 byte_length, Uint4 swap_size, Uint4 hash_seed)
Endianness independent hash function.
Magic spell ;-) needed for some weird compilers... very empiric.
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
Defines: CTimeFormat - storage class for time format.
#define NULL_NUCL_SENTINEL
BOOST_AUTO_TEST_CASE(testStdLookupTable)
Structure to hold a sequence.
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Int4 length
Length of sequence.
Uint1 * sequence
Sequence used for search (could be translation).
The lookup table structure used for Mega BLAST.
The basic lookup table structure for blastn searches.
Used to hold a set of positions, mostly used for filtering.
Complete type definition of Blast Sequence Source ADT.
Lookup table structure for blastn searches with small queries.
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 word_size
Determines the size of the lookup table.
Boolean db_filter
scan the database and include only words that appear in the database between 1 and 9 times (currently...
Int4 mb_template_type
Type of a discontiguous word template.
Int4 mb_template_length
Length of the discontiguous words.
Wrapper structure for different types of BLAST lookup tables.
void * lut
Pointer to the actual lookup table structure.
ELookupTableType lut_type
What kind of a lookup table it is?
void SetUpQuery(Uint4 query_gi)
BlastSeqLoc * lookup_segments
BLAST_SequenceBlk * query_blk
void debruijnInit(int word_size, int alphabet_size)
Options required for setting up the query sequence.
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Declares the CTBlastnOptionsHandle class.
Utility stuff for more convenient using of Boost.Test library.