114 BOOST_REQUIRE(query_blk !=
NULL);
115 BOOST_REQUIRE(query_blk->
sequence[0] != kNullByte);
116 BOOST_REQUIRE(query_blk->
sequence[query_blk->
length - 1] != kNullByte);
139 for(
i=0;
i<
n-1;
i++) sequence[
len-
n+2+
i] = sequence[
i];
143 lookup_segments=
NULL;
156 lookup_segments=
NULL;
168 (hasNeighbor)? 11: -1,
206 GetSeqBlk(
"gi|129295");
209 BOOST_REQUIRE_EQUAL(0,
lookup->threshold);
210 BOOST_REQUIRE_EQUAL(0,
lookup->neighbor_matches);
214 Int4 num_hits_found = 0,
i;
218 BOOST_REQUIRE_EQUAL(230, num_hits_found);
238 BOOST_REQUIRE_EQUAL( num_used, 0 );
241 BOOST_REQUIRE_EQUAL( num_used, 1 );
250 FillLookupTable(
true);
270 if ( (score >= 11) || ( (u==x) && (v==y) && (w==z) ) )
274 index = (u << 10) | (v << 5) | (w);
278 BOOST_REQUIRE_EQUAL(
count, num_used);
289 BOOST_REQUIRE_EQUAL(num_used,
len-2);
301 BOOST_REQUIRE_EQUAL(num_used,
len-2);
327 sequence[
len-
n+2+
i] = sequence[
i];
362 pssm_sbp->posMatrix[
i][j] = sbp->
matrix[j][sequence[
i+1]];
380 lookup_segments_debruijn,
416 index = (
i << 10) | (j << 5) | (k);
458 lookup_segments=
NULL;
461 lookup_wrap_ptr=
NULL;
492 BOOST_REQUIRE(query_blk !=
NULL);
493 BOOST_REQUIRE(query_blk->
sequence[0] != kNullByte);
494 BOOST_REQUIRE(query_blk->
sequence[query_blk->
length - 1] != kNullByte);
549 GetSeqBlk(
"WP_130744894.1");
551 BOOST_REQUIRE_EQUAL(
lookup->threshold, 2100);
552 BOOST_REQUIRE_EQUAL(
lookup->word_length, 6);
553 BOOST_REQUIRE_EQUAL(
lookup->compressed_alphabet_size, 15);
556 backbone_cell =
lookup->backbone + 6189626;
557 BOOST_REQUIRE_EQUAL(backbone_cell->
num_used, 2);
558 Int4 index[8] ={6189626,3308318, 6298163, 9877654, 4975326, 3450036, 6447263, 500762};
559 Int4 num_used[8] ={2, 1, 1, 0, 1, 2, 2, 0};
560 for(
int i=0;
i < 8;
i++) {
561 backbone_cell =
lookup->backbone + index[
i];
562 BOOST_REQUIRE_EQUAL(backbone_cell->
num_used, num_used[
i]);
568 GetSeqBlk(
"CAA50632.1");
570 BOOST_REQUIRE_EQUAL(
lookup->threshold, 2100);
571 BOOST_REQUIRE_EQUAL(
lookup->word_length, 6);
572 BOOST_REQUIRE_EQUAL(
lookup->compressed_alphabet_size, 15);
575 Int4 index[8] ={9958159, 8870278 , 44768, 3870741, 1960395 , 6981008 , 5035027, 166507 };
576 Int4 num_used[8] ={9, 8, 7, 5, 3, 2, 1 , 0};
577 for(
int i=0;
i < 8;
i++) {
578 backbone_cell =
lookup->backbone + index[
i];
579 BOOST_REQUIRE_EQUAL(backbone_cell->
num_used, num_used[
i]);
BOOST_AUTO_TEST_CASE(BackboneIntegrityTest)
Declares the CBl2Seq (BLAST 2 Sequences) class.
Routines for creating protein BLAST lookup tables.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Definitions which are dependant on the NCBI C++ Object Manager.
#define BLAST_GAP_OPEN_PROT
Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
Int2 BLAST_FillScoringOptions(BlastScoringOptions *options, EBlastProgramType program, Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix, Int4 gap_open, Int4 gap_extend)
Fill non-default values in the BlastScoringOptions structure.
#define BLAST_GAP_EXTN_PROT
cost to extend a gap.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program, Boolean is_megablast, double threshold, Int4 word_size)
Allocate memory for lookup table options and fill with default values.
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
Utilities initialize/setup BLAST.
Int2 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
Initializes the substitution matrix in the BlastScoreBlk according to the scoring options specified.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
Int2 BlastSetUp_SeqBlkNew(const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated)
Allocates memory for *sequence_blk and then populates it.
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
static CTestObjMgr & Instance()
static int lookup(const char *name, const struct lookup_int *table)
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
#define BLASTAA_SIZE
Size of aminoacid alphabet.
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
TAutoUint1Ptr data
Sequence data.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
Uint1 GetSentinelByte(EBlastEncoding encoding) THROWS((CBlastException))
Convenience function to centralize the knowledge of which sentinel bytes we use for supported encodin...
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
@ eBlastEncodingProtein
NCBIstdaa.
@ eNoSentinels
Do not use sentinel bytes.
element_type * release(void)
Release will release ownership of pointer to caller.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int32_t Int4
4-byte (32-bit) signed integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
Utility functions for lookup table generation.
Int4 iexp(Int4 x, Int4 n)
Integer exponentiation using right to left binary algorithm.
void debruijn(Int4 n, Int4 k, Uint1 *output, Uint1 *alphabet)
generates a de Bruijn sequence containing all substrings of length n over an alphabet of size k.
LookupTableWrap * LookupTableWrapFree(LookupTableWrap *lookup)
Deallocate memory for the lookup table.
Int2 LookupTableWrapInit(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc)
Create the lookup table for all query words.
Magic spell ;-) needed for some weird compilers... very empiric.
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
Defines: CTimeFormat - storage class for time format.
structure defining one cell of the compacted lookup table
structure defining one cell of the small (i.e., use short) lookup table
A general lookup table for the test.
BlastSeqLoc * lookup_segments
LookupTableWrap * lookup_wrap_ptr
void GetSeqBlk(string gid)
LookupTableOptions * lookup_options
BLAST_SequenceBlk * query_blk
void FillLookupTable(bool hasNeighbor=false)
BlastAaLookupTable * lookup
Structure to hold a sequence.
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Int4 length
Length of sequence.
Uint1 * sequence
Sequence used for search (could be translation).
The basic lookup table structure for blastp searches.
void * thick_backbone
may point to BackboneCell, SmallboneCell, or TinyboneCell.
The lookup table structure for protein searches using a compressed alphabet.
Structure used for scoring calculations.
SBlastScoreMatrix * matrix
scoring matrix data
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Used to hold a set of positions, mostly used for filtering.
BlastSeqLoc * lookup_segments
CompressedAalookupTestFixture()
BlastCompressedAaLookupTable * lookup
BLAST_SequenceBlk * query_blk
LookupTableOptions * lookup_options
LookupTableWrap * lookup_wrap_ptr
~CompressedAalookupTestFixture()
void GetSeqBlk(const string &id)
structure for hashtable of indexed query offsets
Options needed to construct a lookup table Also needed: query sequence and query length.
Wrapper structure for different types of BLAST lookup tables.
void * lut
Pointer to the actual lookup table structure.
ELookupTableType lut_type
What kind of a lookup table it is?
int ** data
actual scoring matrix data, stored in row-major form
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Utility stuff for more convenient using of Boost.Test library.
voidp calloc(uInt items, uInt size)