NCBI C++ ToolKit
aalookup_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: aalookup_unit_test.cpp 98590 2022-12-08 15:54:19Z fongah2 $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: George Coulouris
27 *
28 * File Description:
29 * Protein lookup table unit tests
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
36 #include <corelib/ncbitime.hpp>
38 #include <objmgr/scope.hpp>
39 #include <objmgr/util/sequence.hpp>
40 
45 #include <serial/serial.hpp>
46 #include <serial/iterator.hpp>
47 #include <serial/objostr.hpp>
48 
50 #include <blast_objmgr_priv.hpp>
51 
56 
57 #include "test_objmgr.hpp"
58 
59 using namespace std;
60 using namespace ncbi;
61 using namespace ncbi::objects;
62 using namespace ncbi::blast;
63 
64 /// A general lookup table for the test
66 
73 
74  // constructor
75  // this constructor actually does nothing.
76  // use GetSeqBlk() and FillLookupTable() to instantiate a
77  // testing lookuptable instead.
79  query_blk=NULL;
80  lookup_segments=NULL;
81  lookup_options=NULL;
82  sbp=NULL;
83  lookup_wrap_ptr=NULL;
84  lookup=NULL;
85  }
86 
87  // destructor
89  LookupTableWrapFree(lookup_wrap_ptr);
90  LookupTableOptionsFree(lookup_options);
91  BlastSequenceBlkFree(query_blk);
92  BlastSeqLocFree(lookup_segments);
93  BlastScoreBlkFree(sbp);
94  }
95 
96  // to create a sequence with given gid
97  void GetSeqBlk(string gid){
98  CSeq_id id(gid);
99  unique_ptr<SSeqLoc> ssl(CTestObjMgr::Instance().CreateSSeqLoc(id, eNa_strand_unknown));
100  SBlastSequence sequence =
101  GetSequence(*ssl->seqloc,
103  ssl->scope,
104  eNa_strand_unknown, // strand not applicable
105  eNoSentinels); // nucl sentinel not applicable
106  // Create the sequence block.
107  // Note that GetSequence always includes sentinel bytes for
108  // protein sequences.
109  BlastSeqBlkNew(&query_blk);
110  BlastSeqBlkSetSequence(query_blk, sequence.data.release(),
111  sequence.length - 2);
112 
113  const Uint1 kNullByte = GetSentinelByte(eBlastEncodingProtein);
114  BOOST_REQUIRE(query_blk != NULL);
115  BOOST_REQUIRE(query_blk->sequence[0] != kNullByte);
116  BOOST_REQUIRE(query_blk->sequence[query_blk->length - 1] != kNullByte);
117  BOOST_REQUIRE(query_blk->sequence_start[0] == kNullByte);
118  BOOST_REQUIRE(query_blk->sequence_start[query_blk->length + 1] ==
119  kNullByte);
120  // Indicate which regions of the query to index
121  // The interval is [0...length-1] but must ignore the two
122  // sentinel bytes. This makes the interval [0...length-3]
123  BlastSeqLocNew(&lookup_segments, 0, sequence.length - 3);
124  }
125 
126  // to create debruijn sequence
127  void GetSeqBlk(){
128  // generate sequence
129  Int4 k=0,n=0,i=0, len=0;
130  Uint1 *sequence=NULL;
131  k=BLASTAA_SIZE; //alphabet size
132  n=3; //word size
133  len = iexp(k,n) + (n-1);
134  // leave room for and pad with sentinels
135  sequence = (Uint1*) malloc(len + 2);
136  sequence[0] = 0;
137  sequence[len+1] = 0;
138  debruijn(n,k,sequence+1,NULL);
139  for(i=0;i<n-1;i++) sequence[len-n+2+i] = sequence[i];
140  // create sequence block
141  BlastSetUp_SeqBlkNew(sequence, len, &query_blk, TRUE);
142  // indicate region of query to index
143  lookup_segments=NULL;
144  BlastSeqLocNew(&lookup_segments, 0, len-1);
145  }
146 
147  // to create a trivial sequence (all '0') of length len
149  Uint1 *sequence=NULL;
150  // leave room for and pad with sentinels
151  sequence = (Uint1*) malloc(len + 2);
152  for(Int4 i=0; i<len+2; i++) sequence[i] = 0;
153  // create sequence block
154  BlastSetUp_SeqBlkNew(sequence, len, &query_blk, TRUE);
155  // indicate region of query to index
156  lookup_segments=NULL;
157  BlastSeqLocNew(&lookup_segments, 0, len-1);
158  }
159 
160  // to fill up a lookup table
161  // hasNeighbor specfies if neighboring words will be considered.
162  void FillLookupTable(bool hasNeighbor=false){
163  // create lookup options
164  LookupTableOptionsNew(eBlastTypeBlastp, &lookup_options);
165  BLAST_FillLookupTableOptions(lookup_options,
167  FALSE, // megablast
168  (hasNeighbor)? 11: -1, // threshold
169  3); // word size
170  // create score blocks
172  if(hasNeighbor){
173  // generate score options
174  BlastScoringOptions *score_options;
175  BlastScoringOptionsNew(eBlastTypeBlastp, &score_options);
176  BLAST_FillScoringOptions(score_options,
178  FALSE,
179  0,
180  0,
181  NULL,
184  Blast_ScoreBlkMatrixInit(eBlastTypeBlastp, score_options, sbp,
186  BlastScoringOptionsFree(score_options);
187  }
188  // create lookup table
189  LookupTableWrapInit(query_blk,
190  lookup_options,
191  NULL,
192  lookup_segments,
193  sbp,
194  &lookup_wrap_ptr,
195  NULL /* RPS info */,
196  NULL,
197  NULL);
198  lookup = (BlastAaLookupTable*) lookup_wrap_ptr->lut;
199  }
200 };
201 
202 BOOST_FIXTURE_TEST_SUITE(aalookup, AalookupTestFixture)
203 
204 BOOST_AUTO_TEST_CASE(BackboneIntegrityTest) {
205  // get a sequence block of gi|129295
206  GetSeqBlk("gi|129295");
207  FillLookupTable();
208  // In this case, we asked for no neighboring, so there should be no neighboring words
209  BOOST_REQUIRE_EQUAL(0, lookup->threshold);
210  BOOST_REQUIRE_EQUAL(0, lookup->neighbor_matches);
211  // The selected sequence should use smallbone
212  BOOST_REQUIRE_EQUAL( lookup->bone_type, eSmallbone );
213  // count the total number of words found
214  Int4 num_hits_found = 0, i;
215  for(i=0;i<lookup->backbone_size;i++) {
216  num_hits_found += ((AaLookupSmallboneCell *)(lookup->thick_backbone))[i].num_used;
217  }
218  BOOST_REQUIRE_EQUAL(230, num_hits_found);
219 }
220 
221 
222 BOOST_AUTO_TEST_CASE(DebruijnSequenceTest) {
223  // create a debruijn sequence
224  GetSeqBlk();
225  FillLookupTable();
226  // The constructed sequence should use smallbone
227  BOOST_REQUIRE_EQUAL( lookup->bone_type, eSmallbone );
228  Int4 i;
229  // by definition, a de Bruijn sequence contains one occurrence of each word.
230  for(i=0;i<lookup->backbone_size;i++) {
231  Int4 num_used = ((AaLookupSmallboneCell *)(lookup->thick_backbone))[i].num_used;
232  // some cells should be vacant
233  if(
234  ( ( i & 0x1F ) >= BLASTAA_SIZE )
235  || ( ( (i & 0x3E0) >> 5 ) >= BLASTAA_SIZE )
236  || ( ( ( i & 0x7C00 ) >> 10 ) >= BLASTAA_SIZE )
237  ){
238  BOOST_REQUIRE_EQUAL( num_used, 0 );
239  }else{
240  // otherwise, the cell should contain exactly one hit
241  BOOST_REQUIRE_EQUAL( num_used, 1 );
242  }
243  }
244 }
245 
246 
247 BOOST_AUTO_TEST_CASE(NeighboringWordsTest) {
248  // create a deruijn sequence for neibhoring words test
249  GetSeqBlk();
250  FillLookupTable(true);
251  // The constructed sequence should use smallbone
252  BOOST_REQUIRE_EQUAL( lookup->bone_type, eSmallbone );
253  // Now we have neighboring words, for each possible 3-mer,
254  Int4 index;
255  for(Int4 u=0;u<BLASTAA_SIZE;u++)
256  for(Int4 v=0;v<BLASTAA_SIZE;v++)
257  for(Int4 w=0;w<BLASTAA_SIZE;w++)
258  {
259  Int4 score;
260  Int4 count = 0;
261  // find its neighbors by brute force
262  for(Int4 x=0;x<BLASTAA_SIZE;x++)
263  for(Int4 y=0;y<BLASTAA_SIZE;y++)
264  for(Int4 z=0;z<BLASTAA_SIZE;z++)
265  {
266  // compute the score of these two words
267  score = sbp->matrix->data[u][x] + sbp->matrix->data[v][y] +
268  sbp->matrix->data[w][z];
269  // if the score is above the threshold or the words match, record it
270  if ( (score >= 11) || ( (u==x) && (v==y) && (w==z) ) )
271  count++;
272  }
273  // compute the index of the word
274  index = (u << 10) | (v << 5) | (w);
275  // ensure that the number of neighbors matches the lookup table
276  //printf("count=%d, lut=%d\n",count, lookup->thick_backbone[index].num_used);
277  Int4 num_used = ((AaLookupSmallboneCell *)(lookup->thick_backbone))[index].num_used;
278  BOOST_REQUIRE_EQUAL(count, num_used);
279  }
280 }
281 
282 BOOST_AUTO_TEST_CASE(BackboneSequenceTest) {
283  // create a trivial sequence
284  Int4 len = 65534; // 65535 is the maximum possible unsigned short
285  GetSeqBlk(len);
286  FillLookupTable();
287  BOOST_REQUIRE_EQUAL( lookup->bone_type, eBackbone );
288  Int4 num_used = ((AaLookupBackboneCell *)(lookup->thick_backbone))[0].num_used;
289  BOOST_REQUIRE_EQUAL(num_used, len-2);
290  Int4 offset = ((Int4 *)(lookup->overflow))[num_used-1];
291  BOOST_REQUIRE_EQUAL(offset, len-3);
292 }
293 
294 BOOST_AUTO_TEST_CASE(SmallboneSequenceTest) {
295  // create a trivial sequence
296  Int4 len = 65533;
297  GetSeqBlk(len);
298  FillLookupTable();
299  BOOST_REQUIRE_EQUAL( lookup->bone_type, eSmallbone );
300  Int4 num_used = ((AaLookupSmallboneCell *)(lookup->thick_backbone))[0].num_used;
301  BOOST_REQUIRE_EQUAL(num_used, len-2);
302  Int4 offset = ((Uint2 *)(lookup->overflow))[num_used-1];
303  BOOST_REQUIRE_EQUAL(offset, len-3);
304 }
305 
306 
307 #if 0
308 
309 // Needs to be fixed to actually use a PSSM
310 BOOST_AUTO_TEST_CASE(testDebruijnPSSM)
311 {
312  Int4 i=0,j=0; // loop indices
313 
314  Int4 k=BLASTAA_SIZE; // alphabet size
315  Int4 n=3; // word size
316 
317  Int4 len = iexp(k,n) + (n-1);
318 
319  // leave room for and pad with sentinels
320  Uint1 *sequence = (Uint1*) malloc(len + 2);
321  sequence[0] = 0;
322  sequence[len+1] = 0;
323  // generate debruijn sequence
324  debruijn(n,k,sequence+1,NULL);
325  // unwrap it
326  for(i=0;i<n-1;i++)
327  sequence[len-n+2+i] = sequence[i];
328 
329  // create sequence block
330  BLAST_SequenceBlk *query_blk_debruijn=NULL;
331  BlastSetUp_SeqBlkNew(sequence, len, &query_blk_debruijn, TRUE);
332 
333  // indicate region of query to index
334  BlastSeqLoc *lookup_segments_debruijn=NULL;
335  BlastSeqLocNew(&lookup_segments_debruijn, 0, len-1);
336 
337  // create the score block
338  BlastScoringOptions *score_options = NULL;
339  BlastScoringOptionsNew(eBlastTypeBlastp, &score_options);
340  BLAST_FillScoringOptions(score_options,
342  FALSE,
343  0,
344  0,
345  NULL,
348 
351 
352  // create the PSSM
354 
355  pssm_sbp->posMatrix = (Int4 **) calloc(len, sizeof(Int4 *));
356 
357  for(i=0;i<len;i++)
358  {
359  pssm_sbp->posMatrix[i] = (Int4 *) calloc(BLASTAA_SIZE, sizeof(Int4));
360 
361  for(j=0;j<BLASTAA_SIZE;j++)
362  pssm_sbp->posMatrix[i][j] = sbp->matrix[j][sequence[i+1]];
363  }
364 
365  // set PSSM lookup table options
366  LookupTableOptions* pssm_lut_options=NULL;
367  LookupTableOptionsNew(eBlastTypeBlastp, &pssm_lut_options);
368  BLAST_FillLookupTableOptions(pssm_lut_options,
370  FALSE, // megablast
371  11, // threshold
372  3); // word size
373 
374  // create the PSSM lookup table
375  LookupTableWrap* pssm_lut_wrap_ptr=NULL;
376 
377  LookupTableWrapInit(NULL, //query_blk_debruijn,
378  pssm_lut_options,
379  NULL,
380  lookup_segments_debruijn,
381  pssm_sbp,
382  &pssm_lut_wrap_ptr,
383  NULL /* RPS Info */,
384  NULL);
385 
386  BlastAaLookupTable* pssm_lut = (BlastAaLookupTable*) pssm_lut_wrap_ptr->lut;
387 
388 {
389  /* for each possible 3-mer, */
390 
391  Int4 index;
392 
393  for(i=0;i<BLASTAA_SIZE;i++)
394  for(j=0;j<BLASTAA_SIZE;j++)
395  for(k=0;k<BLASTAA_SIZE;k++)
396  {
397  Int4 score;
398  Int4 count = 0;
399 
400  /* find its neighbors by brute force */
401 
402  for(Int4 x=0;x<BLASTAA_SIZE;x++)
403  for(Int4 y=0;y<BLASTAA_SIZE;y++)
404  for(Int4 z=0;z<BLASTAA_SIZE;z++)
405  {
406  /* compute the score of these two words */
407  score = sbp->matrix[i][x] + sbp->matrix[j][y] + sbp->matrix[k][z];
408 
409  /* if the score is above the threshold, record it */
410  if (score >= 11)
411  count++;
412  }
413 
414  /* compute the index of the word */
415 
416  index = (i << 10) | (j << 5) | (k);
417 
418  /* ensure that the number of neighbors matches the lookup table */
419  //printf("count=%d, lut=%d\n",count, pssm_lut->thick_backbone[index].num_used);
420  BOOST_REQUIRE_EQUAL(count, pssm_lut->thick_backbone[index].num_used);
421  }
422 }
423 
424  LookupTableWrapFree(pssm_lut_wrap_ptr);
425  LookupTableOptionsFree(pssm_lut_options);
426  BlastScoreBlkFree(pssm_sbp);
427 
428  BlastSequenceBlkFree(query_blk_debruijn);
429  BlastSeqLocFree(lookup_segments_debruijn);
430 
431  BlastScoreBlkFree(sbp);
432  BlastScoringOptionsFree(score_options);
433 }
434 // testDebruijnPSSM
435 #endif
436 
437 // TestBlastAaLookupTable
438 
439 // Register this test suite with the default test factory registry
440 
442 
444 
451 
452  // constructor
453  // this constructor actually does nothing.
454  // use GetSeqBlk() and FillLookupTable() to instantiate a
455  // testing lookuptable instead.
457  query_blk=NULL;
458  lookup_segments=NULL;
459  lookup_options=NULL;
460  sbp=NULL;
461  lookup_wrap_ptr=NULL;
462  lookup=NULL;
463  }
464 
465  // destructor
467  LookupTableWrapFree(lookup_wrap_ptr);
468  LookupTableOptionsFree(lookup_options);
469  BlastSequenceBlkFree(query_blk);
470  BlastSeqLocFree(lookup_segments);
471  BlastScoreBlkFree(sbp);
472  }
473 
474  // to create a sequence with given gid
475  void GetSeqBlk(const string & id){
476  CSeq_id seq_id(id);
477  unique_ptr<SSeqLoc> ssl(CTestObjMgr::Instance().CreateSSeqLoc(seq_id, eNa_strand_unknown));
478  SBlastSequence sequence =
479  GetSequence(*ssl->seqloc,
481  ssl->scope,
482  eNa_strand_unknown, // strand not applicable
483  eNoSentinels); // nucl sentinel not applicable
484  // Create the sequence block.
485  // Note that GetSequence always includes sentinel bytes for
486  // protein sequences.
487  BlastSeqBlkNew(&query_blk);
488  BlastSeqBlkSetSequence(query_blk, sequence.data.release(),
489  sequence.length - 2);
490 
491  const Uint1 kNullByte = GetSentinelByte(eBlastEncodingProtein);
492  BOOST_REQUIRE(query_blk != NULL);
493  BOOST_REQUIRE(query_blk->sequence[0] != kNullByte);
494  BOOST_REQUIRE(query_blk->sequence[query_blk->length - 1] != kNullByte);
495  BOOST_REQUIRE(query_blk->sequence_start[0] == kNullByte);
496  BOOST_REQUIRE(query_blk->sequence_start[query_blk->length + 1] ==
497  kNullByte);
498  // Indicate which regions of the query to index
499  // The interval is [0...length-1] but must ignore the two
500  // sentinel bytes. This makes the interval [0...length-3]
501  BlastSeqLocNew(&lookup_segments, 0, sequence.length - 3);
502  }
503 
504  // to fill up a lookup table
505  // hasNeighbor specfies if neighboring words will be considered.
507  // create lookup options
508  LookupTableOptionsNew(eBlastTypeBlastp, &lookup_options);
509  BLAST_FillLookupTableOptions(lookup_options,
511  FALSE, // megablast
512  21, // threshold
513  6); // word size
514  // create score blocks
516  // generate score options
517  BlastScoringOptions *score_options;
518  BlastScoringOptionsNew(eBlastTypeBlastp, &score_options);
519  BLAST_FillScoringOptions(score_options,
521  FALSE,
522  0,
523  0,
524  NULL,
527  Blast_ScoreBlkMatrixInit(eBlastTypeBlastp, score_options, sbp,
529  BlastScoringOptionsFree(score_options);
530  // create lookup table
531  LookupTableWrapInit(query_blk,
532  lookup_options,
533  NULL,
534  lookup_segments,
535  sbp,
536  &lookup_wrap_ptr,
537  NULL /* RPS info */,
538  NULL,
539  NULL);
540  BOOST_REQUIRE_EQUAL(lookup_wrap_ptr->lut_type, eCompressedAaLookupTable);
541  lookup = (BlastCompressedAaLookupTable*) lookup_wrap_ptr->lut;
542  }
543 };
544 
545 BOOST_FIXTURE_TEST_SUITE(CompressedAalookup, CompressedAalookupTestFixture)
546 
547 BOOST_AUTO_TEST_CASE(CompressedLookupNeighboringWordsTest) {
548  // create a deruijn sequence for neibhoring words test
549  GetSeqBlk("WP_130744894.1");
550  FillLookupTable();
551  BOOST_REQUIRE_EQUAL(lookup->threshold, 2100);
552  BOOST_REQUIRE_EQUAL(lookup->word_length, 6);
553  BOOST_REQUIRE_EQUAL(lookup->compressed_alphabet_size, 15);
554 
555  CompressedLookupBackboneCell *backbone_cell;
556  backbone_cell = lookup->backbone + 6189626;
557  BOOST_REQUIRE_EQUAL(backbone_cell->num_used, 2);
558  Int4 index[8] ={6189626,3308318, 6298163, 9877654, 4975326, 3450036, 6447263, 500762};
559  Int4 num_used[8] ={2, 1, 1, 0, 1, 2, 2, 0};
560  for(int i=0; i < 8; i++) {
561  backbone_cell = lookup->backbone + index[i];
562  BOOST_REQUIRE_EQUAL(backbone_cell->num_used, num_used[i]);
563  }
564 }
565 
566 BOOST_AUTO_TEST_CASE(CompressedLookupNeighboringWordsTest2) {
567  // create a deruijn sequence for neibhoring words test
568  GetSeqBlk("CAA50632.1");
569  FillLookupTable();
570  BOOST_REQUIRE_EQUAL(lookup->threshold, 2100);
571  BOOST_REQUIRE_EQUAL(lookup->word_length, 6);
572  BOOST_REQUIRE_EQUAL(lookup->compressed_alphabet_size, 15);
573 
574  CompressedLookupBackboneCell *backbone_cell;
575  Int4 index[8] ={9958159, 8870278 , 44768, 3870741, 1960395 , 6981008 , 5035027, 166507 };
576  Int4 num_used[8] ={9, 8, 7, 5, 3, 2, 1 , 0};
577  for(int i=0; i < 8; i++) {
578  backbone_cell = lookup->backbone + index[i];
579  BOOST_REQUIRE_EQUAL(backbone_cell->num_used, num_used[i]);
580  }
581 }
582 
583 
BOOST_AUTO_TEST_CASE(BackboneIntegrityTest)
Declares the CBl2Seq (BLAST 2 Sequences) class.
Routines for creating protein BLAST lookup tables.
@ eBackbone
@ eSmallbone
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Definition: blast_filter.c:737
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Definition: blast_filter.c:608
Definitions which are dependant on the NCBI C++ Object Manager.
#define BLAST_GAP_OPEN_PROT
Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
Definition: blast_options.h:84
Int2 BLAST_FillScoringOptions(BlastScoringOptions *options, EBlastProgramType program, Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix, Int4 gap_open, Int4 gap_extend)
Fill non-default values in the BlastScoringOptions structure.
#define BLAST_GAP_EXTN_PROT
cost to extend a gap.
Definition: blast_options.h:92
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program, Boolean is_megablast, double threshold, Int4 word_size)
Allocate memory for lookup table options and fill with default values.
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
@ eBlastTypeBlastp
Definition: blast_program.h:73
Utilities initialize/setup BLAST.
Int2 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
Initializes the substitution matrix in the BlastScoreBlk according to the scoring options specified.
Definition: blast_setup.c:330
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Definition: blast_stat.c:965
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Definition: blast_stat.c:884
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Definition: blast_util.c:245
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
Definition: blast_util.c:147
Int2 BlastSetUp_SeqBlkNew(const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated)
Allocates memory for *sequence_blk and then populates it.
Definition: blast_util.c:101
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
Definition: blast_util.c:133
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
static CTestObjMgr & Instance()
Definition: test_objmgr.cpp:69
static int lookup(const char *name, const struct lookup_int *table)
Definition: attributes.c:50
int offset
Definition: replacements.h:160
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
Definition: blast_setup.hpp:65
#define BLASTAA_SIZE
Size of aminoacid alphabet.
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
TAutoUint1Ptr data
Sequence data.
Definition: blast_setup.hpp:64
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
Uint1 GetSentinelByte(EBlastEncoding encoding) THROWS((CBlastException))
Convenience function to centralize the knowledge of which sentinel bytes we use for supported encodin...
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
@ eBlastEncodingProtein
NCBIstdaa.
@ eNoSentinels
Do not use sentinel bytes.
Definition: blast_setup.hpp:95
element_type * release(void)
Release will release ownership of pointer to caller.
Definition: ncbimisc.hpp:472
#define NULL
Definition: ncbistd.hpp:225
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint16_t Uint2
2-byte (16-bit) unsigned integer
Definition: ncbitype.h:101
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
int i
yy_size_t n
int len
Utility functions for lookup table generation.
Int4 iexp(Int4 x, Int4 n)
Integer exponentiation using right to left binary algorithm.
Definition: lookup_util.c:47
void debruijn(Int4 n, Int4 k, Uint1 *output, Uint1 *alphabet)
generates a de Bruijn sequence containing all substrings of length n over an alphabet of size k.
Definition: lookup_util.c:170
LookupTableWrap * LookupTableWrapFree(LookupTableWrap *lookup)
Deallocate memory for the lookup table.
Definition: lookup_wrap.c:197
Int2 LookupTableWrapInit(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc)
Create the lookup table for all query words.
Definition: lookup_wrap.c:47
Magic spell ;-) needed for some weird compilers... very empiric.
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
Defines: CTimeFormat - storage class for time format.
The Object manager core.
#define count
structure defining one cell of the compacted lookup table
structure defining one cell of the small (i.e., use short) lookup table
A general lookup table for the test.
BlastSeqLoc * lookup_segments
LookupTableWrap * lookup_wrap_ptr
void GetSeqBlk(string gid)
LookupTableOptions * lookup_options
BLAST_SequenceBlk * query_blk
void FillLookupTable(bool hasNeighbor=false)
BlastAaLookupTable * lookup
Structure to hold a sequence.
Definition: blast_def.h:242
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Definition: blast_def.h:244
Int4 length
Length of sequence.
Definition: blast_def.h:246
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
The basic lookup table structure for blastp searches.
void * thick_backbone
may point to BackboneCell, SmallboneCell, or TinyboneCell.
The lookup table structure for protein searches using a compressed alphabet.
Structure used for scoring calculations.
Definition: blast_stat.h:177
SBlastScoreMatrix * matrix
scoring matrix data
Definition: blast_stat.h:185
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
BlastCompressedAaLookupTable * lookup
void GetSeqBlk(const string &id)
structure for hashtable of indexed query offsets
Options needed to construct a lookup table Also needed: query sequence and query length.
Wrapper structure for different types of BLAST lookup tables.
Definition: lookup_wrap.h:50
void * lut
Pointer to the actual lookup table structure.
Definition: lookup_wrap.h:52
ELookupTableType lut_type
What kind of a lookup table it is?
Definition: lookup_wrap.h:51
int ** data
actual scoring matrix data, stored in row-major form
Definition: blast_stat.h:140
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Definition: blast_setup.hpp:62
Utility stuff for more convenient using of Boost.Test library.
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Wed Sep 04 14:58:48 2024 by modify_doxy.py rev. 669887