NCBI C++ ToolKit
ntlookup_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: ntlookup_unit_test.cpp 92007 2020-12-17 15:27:31Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Tom Madden
27 *
28 * File Description:
29 * Unit test module for the nucleotide lookup tables.
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
36 #include <corelib/ncbitime.hpp>
38 #include <objmgr/scope.hpp>
40 #include <objmgr/util/sequence.hpp>
41 
46 #include <serial/serial.hpp>
47 #include <serial/iterator.hpp>
48 #include <serial/objostr.hpp>
49 
51 #include <blast_objmgr_priv.hpp>
52 
62 
63 #include "test_objmgr.hpp"
64 #include "blast_test_util.hpp"
65 
66 using namespace std;
67 using namespace ncbi;
68 using namespace ncbi::objects;
69 using namespace ncbi::blast;
70 using namespace TestUtil;
71 
72 #define NULL_NUCL_SENTINEL 0xf
73 #define SMALL_QUERY_GI 1945386
74 #define LARGE_QUERY_GI 19572546
75 
77 
80 
82  query_blk = NULL;
83  lookup_segments = NULL;
84  }
85 
87  query_blk = BlastSequenceBlkFree(query_blk);
88  lookup_segments = BlastSeqLocFree(lookup_segments);
89  }
90 
91  void SetUpQuery(Uint4 query_gi)
92  {
93  char buf[64];
94  Int4 status;
95  // load the query
96  sprintf(buf, "gi|%d", query_gi);
97  CSeq_id id(buf);
98 
99  unique_ptr<SSeqLoc> ssl(CTestObjMgr::Instance().CreateSSeqLoc(
100  id, eNa_strand_both));
101 
102  SBlastSequence sequence(
103  GetSequence(*ssl->seqloc,
105  ssl->scope,
107  eSentinels));
108 
109  // create the sequence block. The size to pass in
110  // must not include the sentinel bytes on either
111  // end of the sequence
112 
113  query_blk = NULL;
114  status = BlastSeqBlkNew(&query_blk);
115  BOOST_REQUIRE_EQUAL(0, status);
116  status = BlastSeqBlkSetSequence(query_blk, sequence.data.release(),
117  sequence.length - 2);
118  BOOST_REQUIRE_EQUAL(0, status);
119 
120  BOOST_REQUIRE(query_blk != NULL);
121  BOOST_REQUIRE(query_blk->sequence != NULL);
122  BOOST_REQUIRE(query_blk->length > 0);
123 
124  BOOST_REQUIRE(query_blk != NULL);
125  BOOST_REQUIRE(query_blk->sequence[0] != NULL_NUCL_SENTINEL);
126  BOOST_REQUIRE(query_blk->sequence[query_blk->length - 1] !=
128  BOOST_REQUIRE(query_blk->sequence_start[0] == NULL_NUCL_SENTINEL);
129  BOOST_REQUIRE(query_blk->sequence_start[query_blk->length + 1] ==
131 
132  lookup_segments = 0;
133  if (query_gi == SMALL_QUERY_GI) {
134  BlastSeqLocNew(&lookup_segments, 0, 1649);
135  BlastSeqLocNew(&lookup_segments, 1656, 2756);
136  BlastSeqLocNew(&lookup_segments, 2789, 3889);
137  BlastSeqLocNew(&lookup_segments, 3896, 5544);
138  }
139  else {
140  BlastSeqLocNew(&lookup_segments, 0, (query_blk->length - 1)/2-1);
141  BlastSeqLocNew(&lookup_segments, (query_blk->length - 1) / 2 + 1,
142  query_blk->length - 1);
143  }
144  }
145 
146  // word_size is word-size
147  // alphabet_size is alphabet size (typically 4 for nucleotides).
148  void debruijnInit(int word_size, int alphabet_size) {
149 
150  // get length of sequence.
151  int len = iexp(alphabet_size,word_size) + (word_size-1);
152 
153  /* leave room for and pad with sentinels */
154  Uint1* sequence = (Uint1*) malloc(len + 2);
155  sequence[0] = NULL_NUCL_SENTINEL;
156  sequence[len+1] = NULL_NUCL_SENTINEL;
157 
158  debruijn(word_size,alphabet_size,sequence+1,0); // generate sequence
159 
160  for(int i=1;i<word_size;i++)
161  sequence[len-word_size+1+i] = sequence[i];
162 
163  /* create sequence block */
164  query_blk = 0;
165  BlastSetUp_SeqBlkNew(sequence, len, &query_blk, TRUE);
166 
167  /* indicate region of query to index */
168  lookup_segments = 0;
169  BlastSeqLocNew(&lookup_segments, 0, len-1);
170 
171  }
172 };
173 
174 BOOST_FIXTURE_TEST_SUITE(ntlookup, NtlookupTestFixture)
175 
176 BOOST_AUTO_TEST_CASE(testStdLookupTable) {
177  SetUpQuery(SMALL_QUERY_GI);
178 
179  LookupTableOptions* lookup_options;
180  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
181  BLAST_FillLookupTableOptions(lookup_options,
183  FALSE, 0, 0);
184 
185 
186  QuerySetUpOptions* query_options;
187  BlastQuerySetUpOptionsNew(&query_options);
188  LookupTableWrap* lookup_wrap_ptr;
189  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
190  lookup_options, query_options, lookup_segments,
191  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
192  query_options = BlastQuerySetUpOptionsFree(query_options);
193  BOOST_REQUIRE(query_options == NULL);
194  BOOST_REQUIRE_EQUAL(eSmallNaLookupTable,
195  (ELookupTableType)lookup_wrap_ptr->lut_type);
196 
198  (BlastSmallNaLookupTable*) lookup_wrap_ptr->lut;
199  BOOST_REQUIRE_EQUAL(65536, lookup->backbone_size);
200  BOOST_REQUIRE_EQUAL(4, lookup->longest_chain);
201  BOOST_REQUIRE_EQUAL(1444, lookup->overflow_size);
202  BOOST_REQUIRE_EQUAL((Int2)2819, lookup->final_backbone[48]);
203  BOOST_REQUIRE_EQUAL((Int2)754, lookup->final_backbone[42889]);
204  BOOST_REQUIRE_EQUAL((Int2)(-345), lookup->final_backbone[21076]);
205 
206  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
207  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
208  lookup_options = LookupTableOptionsFree(lookup_options);
209  BOOST_REQUIRE(lookup_options == NULL);
210 }
211 
212 BOOST_AUTO_TEST_CASE(testMegablastLookupTable)
213 {
214  SetUpQuery(LARGE_QUERY_GI);
215 
216  LookupTableOptions* lookup_options;
217  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
219  TRUE, 0, 0);
220 
221  QuerySetUpOptions* query_options;
222  BlastQuerySetUpOptionsNew(&query_options);
223  LookupTableWrap* lookup_wrap_ptr;
224  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
225  lookup_options, query_options, lookup_segments,
226  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
227  query_options = BlastQuerySetUpOptionsFree(query_options);
228  BOOST_REQUIRE(query_options == NULL);
229  BOOST_REQUIRE_EQUAL((ELookupTableType)lookup_wrap_ptr->lut_type,
231 
232  BlastMBLookupTable* lookup = (BlastMBLookupTable*) lookup_wrap_ptr->lut;
233  BOOST_REQUIRE_EQUAL(4194304, lookup->hashsize);
234  BOOST_REQUIRE_EQUAL(28, (int)lookup->word_length);
235  BOOST_REQUIRE_EQUAL(18, lookup->scan_step);
236  BOOST_REQUIRE_EQUAL(37, lookup->longest_chain);
237  BOOST_REQUIRE_EQUAL(7, lookup->pv_array_bts);
238  BOOST_REQUIRE_EQUAL(5868, lookup->hashtable[36604]);
239  BOOST_REQUIRE_EQUAL(14646, lookup->hashtable[1426260]);
240  BOOST_REQUIRE_EQUAL(290, lookup->hashtable[4007075]);
241 
242  int pv_array_size = (lookup->hashsize >> lookup->pv_array_bts);
243  int pv_array_hash =
244  EndianIndependentBufferHash((char*) lookup->pv_array,
245  pv_array_size * sizeof(PV_ARRAY_TYPE),
246  sizeof(PV_ARRAY_TYPE));
247  BOOST_REQUIRE_EQUAL(-729205454, pv_array_hash);
248 
249  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
250  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
251  lookup_options = LookupTableOptionsFree(lookup_options);
252  BOOST_REQUIRE(lookup_options == NULL);
253 }
254 
255 BOOST_AUTO_TEST_CASE(testDiscontiguousMBLookupTableCodingWordSize11) {
256 
257  SetUpQuery(SMALL_QUERY_GI);
258  LookupTableOptions* lookup_options;
259  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
261  TRUE, 0, 11);
262  lookup_options->mb_template_length = 16;
263  lookup_options->mb_template_type = eMBWordCoding;
264 
265  QuerySetUpOptions* query_options;
266  BlastQuerySetUpOptionsNew(&query_options);
267  LookupTableWrap* lookup_wrap_ptr;
268  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
269  lookup_options, query_options, lookup_segments,
270  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
271  query_options = BlastQuerySetUpOptionsFree(query_options);
272  BOOST_REQUIRE(query_options == NULL);
273  BOOST_REQUIRE_EQUAL(eMBLookupTable, (ELookupTableType)lookup_wrap_ptr->lut_type);
274 
275  BlastMBLookupTable* lookup = (BlastMBLookupTable*) lookup_wrap_ptr->lut;
276  BOOST_REQUIRE_EQUAL(4194304, lookup->hashsize); // 4**11
277  BOOST_REQUIRE_EQUAL(11, (int)lookup->word_length);
278  BOOST_REQUIRE_EQUAL(true, (bool)lookup->discontiguous);
279  BOOST_REQUIRE_EQUAL(16, (int)lookup->template_length);
280  BOOST_REQUIRE_EQUAL(1, (int)lookup->template_type);
281  BOOST_REQUIRE_EQUAL(1, lookup->scan_step);
282  BOOST_REQUIRE_EQUAL(2, lookup->longest_chain);
283  BOOST_REQUIRE_EQUAL(49, lookup->hashtable[2463300]);
284  BOOST_REQUIRE_EQUAL(392, lookup->hashtable[1663305]);
285  BOOST_REQUIRE_EQUAL(1049, lookup->hashtable[3586129]);
286  BOOST_REQUIRE_EQUAL(8, lookup->pv_array_bts);
287 
288  int pv_array_size = (lookup->hashsize >> lookup->pv_array_bts);
289  int pv_array_hash =
290  EndianIndependentBufferHash((char*) lookup->pv_array,
291  pv_array_size * sizeof(PV_ARRAY_TYPE),
292  sizeof(PV_ARRAY_TYPE));
293  BOOST_REQUIRE_EQUAL(-160576483, pv_array_hash);
294 
295  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
296  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
297  lookup_options = LookupTableOptionsFree(lookup_options);
298  BOOST_REQUIRE(lookup_options == NULL);
299 }
300 
301 BOOST_AUTO_TEST_CASE(testDiscontiguousMBLookupTableCodingWordSize12) {
302 
303  SetUpQuery(SMALL_QUERY_GI);
304  LookupTableOptions* lookup_options;
305  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
307  TRUE, 0, 12);
308  lookup_options->mb_template_length = 16;
309  lookup_options->mb_template_type = eMBWordCoding;
310 
311  QuerySetUpOptions* query_options;
312  BlastQuerySetUpOptionsNew(&query_options);
313  LookupTableWrap* lookup_wrap_ptr;
314  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
315  lookup_options, query_options, lookup_segments,
316  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
317  query_options = BlastQuerySetUpOptionsFree(query_options);
318  BOOST_REQUIRE(query_options == NULL);
319  BOOST_REQUIRE_EQUAL(eMBLookupTable, (ELookupTableType)lookup_wrap_ptr->lut_type);
320 
321  BlastMBLookupTable* lookup = (BlastMBLookupTable*) lookup_wrap_ptr->lut;
322  BOOST_REQUIRE_EQUAL(16777216, lookup->hashsize); // 4**11
323  BOOST_REQUIRE_EQUAL(12, (int)lookup->word_length);
324  BOOST_REQUIRE_EQUAL(true, (bool)lookup->discontiguous);
325  BOOST_REQUIRE_EQUAL(16, (int)lookup->template_length);
326  BOOST_REQUIRE_EQUAL(3, (int)lookup->template_type);
327  BOOST_REQUIRE_EQUAL(1, lookup->scan_step);
328  BOOST_REQUIRE_EQUAL(2, lookup->longest_chain);
329  BOOST_REQUIRE_EQUAL(3631, lookup->hashtable[133875]);
330  BOOST_REQUIRE_EQUAL(2092, lookup->hashtable[351221]);
331  BOOST_REQUIRE_EQUAL(4951, lookup->hashtable[1336356]);
332  BOOST_REQUIRE_EQUAL(10, lookup->pv_array_bts);
333 
334  int pv_array_size = (lookup->hashsize >> lookup->pv_array_bts);
335  int pv_array_hash =
336  EndianIndependentBufferHash((char*) lookup->pv_array,
337  pv_array_size * sizeof(PV_ARRAY_TYPE),
338  sizeof(PV_ARRAY_TYPE));
339  BOOST_REQUIRE_EQUAL(-630452942, pv_array_hash);
340 
341  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
342  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
343  lookup_options = LookupTableOptionsFree(lookup_options);
344  BOOST_REQUIRE(lookup_options == NULL);
345 }
346 
347 BOOST_AUTO_TEST_CASE(testDiscontiguousMBLookupTableOptimalWordSize11) {
348 
349  SetUpQuery(SMALL_QUERY_GI);
350  LookupTableOptions* lookup_options;
351  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
353  TRUE, 0, 11);
354  lookup_options->mb_template_length = 16;
355  lookup_options->mb_template_type = eMBWordOptimal;
356 
357  QuerySetUpOptions* query_options;
358  BlastQuerySetUpOptionsNew(&query_options);
359  LookupTableWrap* lookup_wrap_ptr;
360  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
361  lookup_options, query_options, lookup_segments,
362  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
363  query_options = BlastQuerySetUpOptionsFree(query_options);
364  BOOST_REQUIRE(query_options == NULL);
365  BOOST_REQUIRE_EQUAL(eMBLookupTable, (ELookupTableType)lookup_wrap_ptr->lut_type);
366 
367  BlastMBLookupTable* lookup = (BlastMBLookupTable*) lookup_wrap_ptr->lut;
368  BOOST_REQUIRE_EQUAL(4194304, lookup->hashsize); // 4**11
369  BOOST_REQUIRE_EQUAL(11, (int)lookup->word_length);
370  BOOST_REQUIRE_EQUAL(true, (bool)lookup->discontiguous);
371  BOOST_REQUIRE_EQUAL(16, (int)lookup->template_length);
372  BOOST_REQUIRE_EQUAL(2, (int)lookup->template_type);
373  BOOST_REQUIRE_EQUAL(1, lookup->scan_step);
374  BOOST_REQUIRE_EQUAL(2, lookup->longest_chain);
375  BOOST_REQUIRE_EQUAL(36, lookup->hashtable[1353317]);
376  BOOST_REQUIRE_EQUAL(375, lookup->hashtable[1955444]);
377  BOOST_REQUIRE_EQUAL(5455, lookup->hashtable[1735012]);
378  BOOST_REQUIRE_EQUAL(8, lookup->pv_array_bts);
379 
380  int pv_array_size = (lookup->hashsize >> lookup->pv_array_bts);
381  int pv_array_hash =
382  EndianIndependentBufferHash((char*) lookup->pv_array,
383  pv_array_size * sizeof(PV_ARRAY_TYPE),
384  sizeof(PV_ARRAY_TYPE));
385  BOOST_REQUIRE_EQUAL(932347030, pv_array_hash);
386 
387  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
388  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
389  lookup_options = LookupTableOptionsFree(lookup_options);
390  BOOST_REQUIRE(lookup_options == NULL);
391 }
392 
393 BOOST_AUTO_TEST_CASE(testDiscontiguousMBLookupTableOptimalWordSize12) {
394 
395  SetUpQuery(SMALL_QUERY_GI);
396  LookupTableOptions* lookup_options;
397  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
399  TRUE, 0, 12);
400  lookup_options->mb_template_length = 16;
401  lookup_options->mb_template_type = eMBWordOptimal;
402 
403  QuerySetUpOptions* query_options;
404  BlastQuerySetUpOptionsNew(&query_options);
405  LookupTableWrap* lookup_wrap_ptr;
406  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
407  lookup_options, query_options, lookup_segments,
408  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
409  query_options = BlastQuerySetUpOptionsFree(query_options);
410  BOOST_REQUIRE(query_options == NULL);
411  BOOST_REQUIRE_EQUAL(eMBLookupTable, (ELookupTableType)lookup_wrap_ptr->lut_type);
412 
413  BlastMBLookupTable* lookup = (BlastMBLookupTable*) lookup_wrap_ptr->lut;
414  BOOST_REQUIRE_EQUAL(16777216, lookup->hashsize); // 4**11
415  BOOST_REQUIRE_EQUAL(12, (int)lookup->word_length);
416  BOOST_REQUIRE_EQUAL(true, (bool)lookup->discontiguous);
417  BOOST_REQUIRE_EQUAL(16, (int)lookup->template_length);
418  BOOST_REQUIRE_EQUAL(4, (int)lookup->template_type);
419  BOOST_REQUIRE_EQUAL(1, lookup->scan_step);
420  BOOST_REQUIRE_EQUAL(2, lookup->longest_chain);
421  BOOST_REQUIRE_EQUAL(82, lookup->hashtable[9606485]);
422  BOOST_REQUIRE_EQUAL(752, lookup->hashtable[15622537]);
423  BOOST_REQUIRE_EQUAL(5408, lookup->hashtable[10084009]);
424  BOOST_REQUIRE_EQUAL(10, lookup->pv_array_bts);
425 
426  int pv_array_size = (lookup->hashsize >> lookup->pv_array_bts);
427  int pv_array_hash =
428  EndianIndependentBufferHash((char*) lookup->pv_array,
429  pv_array_size * sizeof(PV_ARRAY_TYPE),
430  sizeof(PV_ARRAY_TYPE));
431  BOOST_REQUIRE_EQUAL(558099690, pv_array_hash);
432 
433  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
434  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
435  lookup_options = LookupTableOptionsFree(lookup_options);
436  BOOST_REQUIRE(lookup_options == NULL);
437 }
438 
439 BOOST_AUTO_TEST_CASE(testDiscontiguousMBLookupTableTwoTemplatesWordSize11) {
440 
441  SetUpQuery(SMALL_QUERY_GI);
442  LookupTableOptions* lookup_options;
443  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
445  TRUE, 0, 11);
446  lookup_options->mb_template_length = 16;
447  lookup_options->mb_template_type = eMBWordTwoTemplates;
448 
449  QuerySetUpOptions* query_options = NULL;
450  BlastQuerySetUpOptionsNew(&query_options);
451  LookupTableWrap* lookup_wrap_ptr;
452  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
453  lookup_options, query_options, lookup_segments,
454  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
455  query_options = BlastQuerySetUpOptionsFree(query_options);
456  BOOST_REQUIRE(query_options == NULL);
457  BOOST_REQUIRE_EQUAL(eMBLookupTable, (ELookupTableType)lookup_wrap_ptr->lut_type);
458 
459  BlastMBLookupTable* lookup = (BlastMBLookupTable*) lookup_wrap_ptr->lut;
460  BOOST_REQUIRE_EQUAL(4194304, lookup->hashsize); // 4**11
461  BOOST_REQUIRE_EQUAL(11, (int)lookup->word_length);
462  BOOST_REQUIRE_EQUAL(true, static_cast<bool>(lookup->discontiguous));
463  BOOST_REQUIRE_EQUAL(16, (int)lookup->template_length);
464  BOOST_REQUIRE_EQUAL(1, (int)lookup->template_type);
465  BOOST_REQUIRE_EQUAL(1, (int)lookup->two_templates);
466  BOOST_REQUIRE_EQUAL(2, (int)lookup->second_template_type);
467  BOOST_REQUIRE_EQUAL(1, lookup->scan_step);
468  BOOST_REQUIRE_EQUAL(4, lookup->longest_chain);
469  BOOST_REQUIRE_EQUAL(128, lookup->hashtable[1450605]);
470  BOOST_REQUIRE_EQUAL(342, lookup->hashtable[4025953]);
471  BOOST_REQUIRE_EQUAL(663, lookup->hashtable[3139906]);
472  BOOST_REQUIRE_EQUAL(72, lookup->hashtable2[2599530]);
473  BOOST_REQUIRE_EQUAL(225, lookup->hashtable2[4110966]);
474  BOOST_REQUIRE_EQUAL(8, lookup->pv_array_bts);
475 
476  int pv_array_size = (lookup->hashsize >> lookup->pv_array_bts);
477  int pv_array_hash =
478  EndianIndependentBufferHash((char*) lookup->pv_array,
479  pv_array_size * sizeof(PV_ARRAY_TYPE),
480  sizeof(PV_ARRAY_TYPE));
481  BOOST_REQUIRE_EQUAL(-36132604, pv_array_hash);
482 
483  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
484  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
485  lookup_options = LookupTableOptionsFree(lookup_options);
486  BOOST_REQUIRE(lookup_options == NULL);
487 }
488 
489 
490 BOOST_AUTO_TEST_CASE(testHashLookupTableWordSize16) {
491 
492  SetUpQuery(LARGE_QUERY_GI);
493  LookupTableOptions* lookup_options;
494  LookupTableOptionsNew(eBlastTypeMapping, &lookup_options);
496  FALSE, 0, 0);
497  lookup_options->word_size = 16;
498 
499  QuerySetUpOptions* query_options = NULL;
500  BlastQuerySetUpOptionsNew(&query_options);
501 
503  BOOST_REQUIRE_EQUAL((int)BlastNaHashLookupTableNew(query_blk,
504  lookup_segments, &lookup, lookup_options,
505  query_options, NULL, 1), 0);
506 
507  query_options = BlastQuerySetUpOptionsFree(query_options);
508  BOOST_REQUIRE(query_options == NULL);
509 
510  BOOST_REQUIRE_EQUAL(16, (int)lookup->lut_word_length);
511  BOOST_REQUIRE_EQUAL(1, lookup->scan_step);
512  BOOST_REQUIRE_EQUAL(11, lookup->longest_chain);
513  BOOST_REQUIRE_EQUAL(32768, lookup->backbone_size);
514  BOOST_REQUIRE_EQUAL(1494, lookup->offsets_size);
515  BOOST_REQUIRE_EQUAL(5, lookup->pv_array_bts);
516  BOOST_REQUIRE(lookup->hash_callback);
517 
518  Uint4 pv_array_size = 1u << (32 - 10);
519  int pv_array_hash =
521  pv_array_size * sizeof(PV_ARRAY_TYPE),
522  sizeof(PV_ARRAY_TYPE));
523  BOOST_REQUIRE_EQUAL(1515308782, pv_array_hash);
524 
526  (TNaLookupHashFunction)lookup->hash_callback;
527 
528  // locate the first sequence word in the lookup table
529  // get the word in BLASTNA
530  Uint4 word = 0;
531  for (int i=0;i < 16;i++) {
532  BOOST_REQUIRE((query_blk->sequence[i] & 0xfc) == 0);
533  word = (word << 2) | query_blk->sequence[i];
534  }
535  // hash the word
536  Uint4 hashed_word = hash_func((Uint1*)&word, lookup->mask);
537  // the word must be present in the lookup table ...
538  BOOST_REQUIRE(lookup->thick_backbone[hashed_word].num_words > 0);
539  BOOST_REQUIRE_EQUAL(word, lookup->thick_backbone[hashed_word].words[0]);
540  BOOST_REQUIRE(lookup->thick_backbone[hashed_word].num_offsets[0] <
542  // ... at position zero
543  BOOST_REQUIRE_EQUAL(0, lookup->thick_backbone[hashed_word].offsets[0]);
544 
546  BOOST_REQUIRE(lookup == NULL);
547  lookup_options = LookupTableOptionsFree(lookup_options);
548  BOOST_REQUIRE(lookup_options == NULL);
549 }
550 
551 
552 BOOST_AUTO_TEST_CASE(testHashLookupTableWordSize16WithDbFilter) {
553 
554  SetUpQuery(LARGE_QUERY_GI);
555  LookupTableOptions* lookup_options;
556  LookupTableOptionsNew(eBlastTypeMapping, &lookup_options);
558  FALSE, 0, 0);
559  lookup_options->db_filter = TRUE;
560 
562  CLocalDbAdapter db_adapter(db);
563  BlastSeqSrc* seqsrc = db_adapter.MakeSeqSrc();
564  BOOST_REQUIRE(seqsrc);
565 
566  QuerySetUpOptions* query_options = NULL;
567  BlastQuerySetUpOptionsNew(&query_options);
568 
569  BOOST_REQUIRE(lookup_options->db_filter);
570  LookupTableWrap* lookup_wrap_ptr;
571  BOOST_REQUIRE(lookup_options->db_filter);
572  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
573  lookup_options, query_options, lookup_segments,
574  0, &lookup_wrap_ptr, NULL, NULL, seqsrc), 0);
575  query_options = BlastQuerySetUpOptionsFree(query_options);
576  BOOST_REQUIRE(query_options == NULL);
577  BOOST_REQUIRE_EQUAL(eNaHashLookupTable,
578  (ELookupTableType)lookup_wrap_ptr->lut_type);
579 
581  (BlastNaHashLookupTable*)lookup_wrap_ptr->lut;
582  BOOST_REQUIRE_EQUAL(16, (int)lookup->lut_word_length);
583  BOOST_REQUIRE_EQUAL(1, lookup->scan_step);
584  BOOST_REQUIRE_EQUAL(10, lookup->longest_chain);
585  BOOST_REQUIRE_EQUAL(256, lookup->backbone_size);
586  BOOST_REQUIRE_EQUAL(38, lookup->offsets_size);
587  BOOST_REQUIRE_EQUAL(5, lookup->pv_array_bts);
588  BOOST_REQUIRE(lookup->hash_callback);
589 
590  Uint4 pv_array_size = 1u << (32 - 10);
591  int pv_array_hash =
593  pv_array_size * sizeof(PV_ARRAY_TYPE),
594  sizeof(PV_ARRAY_TYPE));
595  BOOST_REQUIRE_EQUAL(130150681, pv_array_hash);
596 
597 
599  (TNaLookupHashFunction)lookup->hash_callback;
600 
601  // locate the first sequence word in the lookup table
602  Uint4 word = 0;
603  for (int i=0;i < 16;i++) {
604  BOOST_REQUIRE((query_blk->sequence[i] & 0xfc) == 0);
605  word = (word << 2) | query_blk->sequence[i];
606  }
607  Uint4 hashed_word = hash_func((Uint1*)&word, lookup->mask);
608  // the word was filtered out and is not in the lookup table
609  BOOST_REQUIRE(lookup->thick_backbone[hashed_word].num_words == 0);
610 
611  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
612  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
613  lookup_options = LookupTableOptionsFree(lookup_options);
614  BOOST_REQUIRE(lookup_options == NULL);
615 }
616 
617 
618 BOOST_AUTO_TEST_CASE(testHashLookupTableMissingSeqSrc) {
619 
620  SetUpQuery(LARGE_QUERY_GI);
621  LookupTableOptions* lookup_options;
622  LookupTableOptionsNew(eBlastTypeMapping, &lookup_options);
624  FALSE, 0, 0);
625  lookup_options->db_filter = TRUE;
626 
627  QuerySetUpOptions* query_options = NULL;
628  BlastQuerySetUpOptionsNew(&query_options);
629 
630  BOOST_REQUIRE(lookup_options->db_filter);
631  LookupTableWrap* lookup_wrap_ptr;
632  BOOST_REQUIRE(lookup_options->db_filter);
633  BOOST_REQUIRE((int)LookupTableWrapInit(query_blk,
634  lookup_options, query_options, lookup_segments,
635  0, &lookup_wrap_ptr, NULL, NULL, NULL) != 0);
636 
637  lookup_options = LookupTableOptionsFree(lookup_options);
638  BOOST_REQUIRE(lookup_options == NULL);
639 }
640 
641 BOOST_AUTO_TEST_CASE(testStdLookupTableDebruijn) {
642 
643  const int alphabet_size=4; // in alphabet there are A,C,G,T
644  const int word_size=8; // 5 letters for every hash value.
645 
646  debruijnInit(word_size, alphabet_size);
647 
648  LookupTableOptions* lookup_options;
649  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
651  FALSE, 0, word_size);
652 
653  QuerySetUpOptions* query_options = NULL;
654  BlastQuerySetUpOptionsNew(&query_options);
655  LookupTableWrap* lookup_wrap_ptr;
656  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
657  lookup_options, query_options, lookup_segments,
658  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
659  query_options = BlastQuerySetUpOptionsFree(query_options);
660  BOOST_REQUIRE(query_options == NULL);
661  BOOST_REQUIRE_EQUAL(eNaLookupTable, (ELookupTableType)lookup_wrap_ptr->lut_type);
662 
663  BlastNaLookupTable* lookup = (BlastNaLookupTable*) lookup_wrap_ptr->lut;
664  BOOST_REQUIRE_EQUAL(65536, lookup->backbone_size); // 4^8
665  BOOST_REQUIRE_EQUAL(1, lookup->longest_chain);
666  BOOST_REQUIRE_EQUAL(0, lookup->overflow_size);
667 
668  int index;
669  for(index=0;index<lookup->backbone_size;index++)
670  {
671  BOOST_REQUIRE_EQUAL(1, lookup->thick_backbone[index].num_used);
672  }
673 
674  PV_ARRAY_TYPE *pv_array = lookup->pv;
675  int pv_size = lookup->backbone_size >> PV_ARRAY_BTS;
676  for (index=0; index<pv_size; index++)
677  {
678  BOOST_REQUIRE_EQUAL((Uint4) 0xFFFFFFFF, (Uint4) pv_array[index]);
679  }
680 
681  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
682  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
683  lookup_options = LookupTableOptionsFree(lookup_options);
684  BOOST_REQUIRE(lookup_options == NULL);
685 }
686 
687 BOOST_AUTO_TEST_CASE(testMegablastLookupTableDebruijn) {
688 
689  const int alphabet_size=4; // in alphabet there are A,C,G,T
690  const int word_size=12; // 12 letters for every hash value.
691 
692  debruijnInit(word_size, alphabet_size);
693 
694  LookupTableOptions* lookup_options;
695  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
697  TRUE, 0, 0);
698 
699  QuerySetUpOptions* query_options = NULL;
700  BlastQuerySetUpOptionsNew(&query_options);
701  LookupTableWrap* lookup_wrap_ptr;
702  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
703  lookup_options, query_options, lookup_segments,
704  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
705  query_options = BlastQuerySetUpOptionsFree(query_options);
706  BOOST_REQUIRE(query_options == NULL);
707  BOOST_REQUIRE_EQUAL(eMBLookupTable, (ELookupTableType)lookup_wrap_ptr->lut_type);
708 
709  BlastMBLookupTable* lookup = (BlastMBLookupTable*) lookup_wrap_ptr->lut;
710  BOOST_REQUIRE_EQUAL(16777216, lookup->hashsize); // 4**12
711  BOOST_REQUIRE_EQUAL(28, (int)lookup->word_length);
712  BOOST_REQUIRE_EQUAL(2, lookup->longest_chain); // An overestimate, should be 1.
713  BOOST_REQUIRE_EQUAL(10, lookup->pv_array_bts);
714 
715  int index;
716 
717  for (index=0; index<query_blk->length+1; index++)
718  {
719  BOOST_REQUIRE_EQUAL(0, lookup->next_pos[index]);
720  }
721 
722  PV_ARRAY_TYPE *pv_array = lookup->pv_array;
723  int pv_array_size = (lookup->hashsize >> lookup->pv_array_bts);
724  for (index=0; index<pv_array_size; index++)
725  {
726  BOOST_REQUIRE_EQUAL((Uint4) 0xFFFFFFFF, (Uint4) pv_array[index]);
727  }
728 
729  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
730  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
731  lookup_options = LookupTableOptionsFree(lookup_options);
732  BOOST_REQUIRE(lookup_options == NULL);
733 }
734 
735 // Test that nothing is put into the lookup table if contiguous unmasked
736 // regions are smaller than user specified word size.
737 BOOST_AUTO_TEST_CASE(testStdTableSmallUnmaskedRegion) {
738 
739  SetUpQuery(SMALL_QUERY_GI);
740  LookupTableOptions* lookup_options;
741  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
743  FALSE, 0, 28);
744 
745  BlastSeqLoc* segments = NULL;
746  BlastSeqLocNew(&segments, 0, 20);
747  BlastSeqLocNew(&segments, 3869, 3889);
748 
749  QuerySetUpOptions* query_options = NULL;
750  BlastQuerySetUpOptionsNew(&query_options);
751  LookupTableWrap* lookup_wrap_ptr;
752  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
753  lookup_options, query_options, segments,
754  0, &lookup_wrap_ptr, NULL, NULL, NULL), 0);
755  query_options = BlastQuerySetUpOptionsFree(query_options);
756  BOOST_REQUIRE(query_options == NULL);
757  BOOST_REQUIRE_EQUAL(eSmallNaLookupTable,
758  (ELookupTableType)lookup_wrap_ptr->lut_type);
759 
761  (BlastSmallNaLookupTable*) lookup_wrap_ptr->lut;
762  BOOST_REQUIRE_EQUAL(65536, lookup->backbone_size); // 4**8
763  BOOST_REQUIRE_EQUAL(0, lookup->longest_chain);
764  BOOST_REQUIRE_EQUAL(28, (int)lookup->word_length);
765  BOOST_REQUIRE_EQUAL(2, lookup->overflow_size);
766 
767  for (int index=0; index<lookup->backbone_size; index++)
768  {
769  // We expect all backbone cells to be empty
770  // since there are no words.
771  BOOST_REQUIRE_EQUAL((Int2)(-1), lookup->final_backbone[index]);
772  }
773 
774  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
775  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
776  lookup_options = LookupTableOptionsFree(lookup_options);
777  BOOST_REQUIRE(lookup_options == NULL);
778  segments = BlastSeqLocFree(segments);
779  BOOST_REQUIRE(segments == NULL);
780 }
781 
782 // Test that nothing is put into the lookup table if contiguous unmasked regions are smaller than user specified word size.
783 BOOST_AUTO_TEST_CASE(testMegablastTableSmallUnmaskedRegion) {
784 
785  const Int4 word_size = 28;
786  SetUpQuery(LARGE_QUERY_GI);
787  LookupTableOptions* lookup_options;
788  LookupTableOptionsNew(eBlastTypeBlastn, &lookup_options);
790  TRUE, 0, word_size);
791 
792  // make a large number of segments, all smaller than
793  // the word size. We cannot have just a few segments because
794  // then a large lookup table will not be created
795 
796  BlastSeqLoc* segments = NULL;
797  Int4 offset = 0;
798  while (offset < query_blk->length) {
799  BlastSeqLocNew(&segments, offset, offset + word_size - 2);
800  offset += word_size;
801  }
802 
803  QuerySetUpOptions* query_options = NULL;
804  BlastQuerySetUpOptionsNew(&query_options);
805  LookupTableWrap* lookup_wrap_ptr;
806  BOOST_REQUIRE_EQUAL((int)LookupTableWrapInit(query_blk,
807  lookup_options, query_options, segments, 0,
808  &lookup_wrap_ptr, NULL, NULL, NULL), 0);
809  query_options = BlastQuerySetUpOptionsFree(query_options);
810  BOOST_REQUIRE(query_options == NULL);
811  BOOST_REQUIRE_EQUAL((ELookupTableType)lookup_wrap_ptr->lut_type, eMBLookupTable);
812 
813  BlastMBLookupTable* lookup = (BlastMBLookupTable*) lookup_wrap_ptr->lut;
814  BOOST_REQUIRE_EQUAL(4194304, lookup->hashsize); // 4**11
815  BOOST_REQUIRE_EQUAL(28, (int)lookup->word_length);
816  BOOST_REQUIRE_EQUAL(18, lookup->scan_step);
817  BOOST_REQUIRE_EQUAL(2, lookup->longest_chain);
818  BOOST_REQUIRE_EQUAL(7, lookup->pv_array_bts);
819 
820  int index;
821  int pv_array_size = (lookup->hashsize >> lookup->pv_array_bts);
822  PV_ARRAY_TYPE *pv_array = lookup->pv_array;
823  for (index=0; index<pv_array_size; index++)
824  {
825  // We expect pv_array to be all zeros as there are no words.
826  BOOST_REQUIRE_EQUAL((PV_ARRAY_TYPE) 0, pv_array[index]);
827  }
828 
829  lookup_wrap_ptr = LookupTableWrapFree(lookup_wrap_ptr);
830  BOOST_REQUIRE(lookup_wrap_ptr == NULL);
831  lookup_options = LookupTableOptionsFree(lookup_options);
832  BOOST_REQUIRE(lookup_options == NULL);
833  segments = BlastSeqLocFree(segments);
834  BOOST_REQUIRE(segments == NULL);
835 }
836 
837 
839 
840 /*
841 * ===========================================================================
842 *
843 * $Log: ntlookup-cppunit.cpp,v $
844 * Revision 1.47 2008/10/27 17:00:12 camacho
845 * Fix include paths to deprecated headers
846 *
847 * Revision 1.46 2008/01/31 22:07:00 madden
848 * Change call to LookupTableWrapInit as part of fix for SB-44
849 *
850 * Revision 1.45 2007/02/14 20:18:01 papadopo
851 * remove SetFullByteScan and discontig. megablast with stride 4
852 *
853 * Revision 1.44 2006/12/13 19:19:35 papadopo
854 * full_byte_scan -> scan_step
855 *
856 * Revision 1.43 2006/12/01 16:56:40 papadopo
857 * modify expectations now that there is an extra blastn lookup table type
858 *
859 * Revision 1.42 2006/11/21 17:46:27 papadopo
860 * rearrange headers, change lookup table type, use enums for lookup table constants
861 *
862 * Revision 1.41 2006/09/15 13:12:05 madden
863 * Change to LookupTableWrapInit prototype
864 *
865 * Revision 1.40 2006/05/04 15:53:22 camacho
866 * Removed unused BLAST_SequenceBlk::context field
867 *
868 * Revision 1.39 2005/12/22 14:18:11 papadopo
869 * change signature of BlastFillLookupTableOptions
870 *
871 * Revision 1.38 2005/12/19 16:44:15 papadopo
872 * 1. Do not assume that lookup table types are those specified
873 * when lookup table is constructed
874 * 2. Add use of small query and small wordsize to force use of standard
875 * lookup table instead of megablast lookup table
876 * 3. Do not assume a single width for standard/megablast lookup tables
877 *
878 * Revision 1.37 2005/06/09 20:37:06 camacho
879 * Use new private header blast_objmgr_priv.hpp
880 *
881 * Revision 1.36 2005/05/20 18:30:52 camacho
882 * Update to use new signature to BLAST_FillLookupTableOptions
883 *
884 * Revision 1.35 2005/05/10 16:09:04 camacho
885 * Changed *_ENCODING #defines to EBlastEncoding enumeration
886 *
887 * Revision 1.34 2005/03/16 18:37:18 papadopo
888 * change expected values to account for modifications to megablast lookup table construction
889 *
890 * Revision 1.33 2005/03/04 17:20:44 bealer
891 * - Command line option support.
892 *
893 * Revision 1.32 2005/02/10 21:26:30 bealer
894 * - Use endianness independant techniques for unit test hashing.
895 *
896 * Revision 1.31 2005/01/28 18:30:48 camacho
897 * Fix memory leak
898 *
899 * Revision 1.30 2005/01/13 13:06:51 madden
900 * New tests for fix to exclude regions of query that are not as long as user specified word size from lookup table
901 *
902 * Revision 1.29 2005/01/10 14:01:47 madden
903 * Prototype change for BLAST_FillLookupTableOptions
904 *
905 * Revision 1.28 2004/12/28 16:48:26 camacho
906 * 1. Use typedefs to AutoPtr consistently
907 * 2. Use SBlastSequence structure instead of std::pair as return value to
908 * blast::GetSequence
909 *
910 * Revision 1.27 2004/09/13 12:54:14 madden
911 * BlastSeqLoc changes
912 *
913 * Revision 1.26 2004/08/03 16:13:43 madden
914 * Correction for use of helper_array
915 *
916 * Revision 1.25 2004/07/22 14:29:32 madden
917 * Add two template discontig mb test
918 *
919 * Revision 1.24 2004/07/20 15:50:57 madden
920 * Added Discontig test cases for optimal pattern, removed dead code
921 *
922 * Revision 1.23 2004/07/12 16:28:38 papadopo
923 * Prepend 'Blast' to {MB|PHI|RPS}LookupTable
924 *
925 * Revision 1.22 2004/06/22 16:46:19 camacho
926 * Changed the blast_type_* definitions for the EBlastProgramType enumeration.
927 *
928 * Revision 1.21 2004/04/16 14:35:06 papadopo
929 * remove unneeded RPS argument in FillLookupTableOptions
930 *
931 * Revision 1.20 2004/04/05 16:10:26 camacho
932 * Rename DoubleInt -> SSeqRange
933 *
934 * Revision 1.19 2004/03/23 16:10:34 camacho
935 * Minor changes to CTestObjMgr
936 *
937 * Revision 1.18 2004/03/10 17:39:40 papadopo
938 * add (unused) RPS blast parameters to LookupTableWrapInit and FillLookupTableOptions
939 *
940 * Revision 1.17 2004/03/06 00:40:27 camacho
941 * Use correct enum argument to ncbi::blast::GetSequence
942 *
943 * Revision 1.16 2004/03/05 15:12:07 papadopo
944 * add (unused) RPS blast parameter to FillLookupTable calls
945 *
946 * Revision 1.15 2004/02/24 15:19:39 madden
947 * Check pv_array_bts for megablast tables, use calculated size of pv_array rather than hard-coded numbers, append WordSize11 to testDiscontiguousMegablastLookupTable
948 *
949 * Revision 1.14 2004/02/23 19:52:52 madden
950 * Add testDiscontiguousMegablastLookupTableWordSize12 test
951 *
952 * Revision 1.13 2004/02/20 23:20:37 camacho
953 * Remove undefs.h
954 *
955 * Revision 1.12 2004/02/17 20:33:12 dondosha
956 * Use BOOST_REQUIRE_EQUAL; const int array sizes
957 *
958 * Revision 1.11 2004/02/09 22:37:15 dondosha
959 * Sentinel values are 15, not 0 for nucleotide sequence; corrected one location endpoint
960 *
961 * Revision 1.10 2004/01/26 20:25:20 coulouri
962 * Use offset rather than pointer for LookupBackboneCell
963 *
964 * Revision 1.9 2004/01/06 21:32:06 dondosha
965 * Corrected values in assertions for megablast lookup table
966 *
967 * Revision 1.8 2004/01/02 16:12:34 madden
968 * Add Debruijn sequences for standard and (contiguous) megablast lookup tables
969 *
970 * Revision 1.7 2004/01/02 14:38:22 madden
971 * Changes for new offset conventions both for lookup table creation and recording of hits
972 *
973 * Revision 1.6 2003/12/09 21:38:21 madden
974 * Compensate for recent discontig. mb changes
975 *
976 * Revision 1.5 2003/12/09 18:02:26 camacho
977 * Use BOOST_REQUIRE_EQUALS to see expected/actual values in error report
978 *
979 * Revision 1.4 2003/12/08 21:56:58 madden
980 * Use setUp and tearDown methods
981 *
982 * Revision 1.3 2003/12/08 20:39:00 madden
983 * Discontiguous megablast test, some cleanup
984 *
985 * Revision 1.2 2003/12/08 14:20:28 madden
986 * Add megablast test
987 *
988 * Revision 1.1 2003/12/04 22:03:09 madden
989 * Nucleotide lookup table tests, first cut
990 *
991 *
992 * ===========================================================================
993 */
Declares the CBl2Seq (BLAST 2 Sequences) class.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Definition: blast_filter.c:737
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Definition: blast_filter.c:608
#define PV_ARRAY_BTS
bits-to-shift from lookup_index to pv_array index.
Definition: blast_lookup.h:43
Uint4(* TNaLookupHashFunction)(Uint1 *, Uint4)
Hash function type for the lookup table.
Definition: blast_lookup.h:142
#define PV_ARRAY_TYPE
The pv_array 'native' type.
Definition: blast_lookup.h:41
Routines for creating nucleotide BLAST lookup tables.
#define NA_OFFSETS_PER_HASH
@ eMBWordOptimal
@ eMBWordCoding
@ eMBWordTwoTemplates
BlastNaHashLookupTable * BlastNaHashLookupTableDestruct(BlastNaHashLookupTable *lookup)
Free a nucleotide lookup table.
Int4 BlastNaHashLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastNaHashLookupTable **lut, const LookupTableOptions *opt, const QuerySetUpOptions *query_options, BlastSeqSrc *seqsrc, Uint4 num_threads)
Declares the CBlastNucleotideOptionsHandle class.
Definitions which are dependant on the NCBI C++ Object Manager.
Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)
Allocate memory for QuerySetUpOptions and fill with default values.
Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program, Boolean is_megablast, double threshold, Int4 word_size)
Allocate memory for lookup table options and fill with default values.
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
ELookupTableType
Types of the lookup table.
@ eSmallNaLookupTable
lookup table for blastn with small query
@ eNaLookupTable
blastn lookup table
@ eMBLookupTable
megablast lookup table (includes both contiguous and discontiguous megablast)
@ eNaHashLookupTable
used for 16-base words
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)
Deallocate memory for QuerySetUpOptions.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
@ eBlastTypeBlastn
Definition: blast_program.h:74
@ eBlastTypeMapping
Definition: blast_program.h:88
Declares the CBlastProteinOptionsHandle class.
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Definition: blast_util.c:245
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
Definition: blast_util.c:147
Int2 BlastSetUp_SeqBlkNew(const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated)
Allocates memory for *sequence_blk and then populates it.
Definition: blast_util.c:101
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
Definition: blast_util.c:133
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Declares the CBlastxOptionsHandle class.
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
Blast Search Subject.
static CTestObjMgr & Instance()
Definition: test_objmgr.cpp:69
Declares the CDiscNucleotideOptionsHandle class.
static int lookup(const char *name, const struct lookup_int *table)
Definition: attributes.c:50
int offset
Definition: replacements.h:160
TSeqPos length
Length of the buffer above (not necessarily sequence length!)
Definition: blast_setup.hpp:65
BlastSeqSrc * MakeSeqSrc()
Retrieves or constructs the BlastSeqSrc.
TAutoUint1Ptr data
Sequence data.
Definition: blast_setup.hpp:64
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
@ eSentinels
Use sentinel bytes.
Definition: blast_setup.hpp:94
@ eBlastDbIsNucleotide
nucleotide
element_type * release(void)
Release will release ownership of pointer to caller.
Definition: ncbimisc.hpp:472
#define NULL
Definition: ncbistd.hpp:225
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define hash_func
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
for(len=0;yy_str[len];++len)
char * buf
int i
int len
Utility functions for lookup table generation.
Int4 iexp(Int4 x, Int4 n)
Integer exponentiation using right to left binary algorithm.
Definition: lookup_util.c:47
void debruijn(Int4 n, Int4 k, Uint1 *output, Uint1 *alphabet)
generates a de Bruijn sequence containing all substrings of length n over an alphabet of size k.
Definition: lookup_util.c:170
LookupTableWrap * LookupTableWrapFree(LookupTableWrap *lookup)
Deallocate memory for the lookup table.
Definition: lookup_wrap.c:197
Int2 LookupTableWrapInit(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc)
Create the lookup table for all query words.
Definition: lookup_wrap.c:47
Uint4 EndianIndependentBufferHash(const char *buffer, Uint4 byte_length, Uint4 swap_size, Uint4 hash_seed)
Endianness independent hash function.
Magic spell ;-) needed for some weird compilers... very empiric.
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
Defines: CTimeFormat - storage class for time format.
#define NULL_NUCL_SENTINEL
BOOST_AUTO_TEST_CASE(testStdLookupTable)
#define LARGE_QUERY_GI
#define SMALL_QUERY_GI
The Object manager core.
Utilities to develop and debug unit tests for BLAST.
Structure to hold a sequence.
Definition: blast_def.h:242
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Definition: blast_def.h:244
Int4 length
Length of sequence.
Definition: blast_def.h:246
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
The lookup table structure used for Mega BLAST.
The basic lookup table structure for blastn searches.
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Lookup table structure for blastn searches with small queries.
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 word_size
Determines the size of the lookup table.
Boolean db_filter
scan the database and include only words that appear in the database between 1 and 9 times (currently...
Int4 mb_template_type
Type of a discontiguous word template.
Int4 mb_template_length
Length of the discontiguous words.
Wrapper structure for different types of BLAST lookup tables.
Definition: lookup_wrap.h:50
void * lut
Pointer to the actual lookup table structure.
Definition: lookup_wrap.h:52
ELookupTableType lut_type
What kind of a lookup table it is?
Definition: lookup_wrap.h:51
void SetUpQuery(Uint4 query_gi)
BlastSeqLoc * lookup_segments
BLAST_SequenceBlk * query_blk
void debruijnInit(int word_size, int alphabet_size)
Options required for setting up the query sequence.
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Definition: blast_setup.hpp:62
Declares the CTBlastnOptionsHandle class.
Utility stuff for more convenient using of Boost.Test library.
Uniform BLAST Search Interface.
voidp malloc(uInt size)
Modified on Thu May 02 14:28:01 2024 by modify_doxy.py rev. 669887