NCBI C++ ToolKit
linkhsp_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: linkhsp_unit_test.cpp 92002 2020-12-17 15:27:21Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Ilya Dondoshansky
27 *
28 * File Description:
29 * Unit test module to test the algorithms for linking HSPs
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
36 #include <corelib/ncbitime.hpp>
38 #include <objmgr/scope.hpp>
39 
41 #include <objmgr/util/sequence.hpp>
42 
43 #include "test_objmgr.hpp"
44 
51 #include <blast_objmgr_priv.hpp>
53 
54 using namespace std;
55 using namespace ncbi;
56 using namespace ncbi::objects;
57 using namespace ncbi::blast;
58 
69 };
70 
71 /// Sets up the query information structure without a real sequence. Used
72 /// only for blastn test below, where query sequence is not available.
73 static void
74 s_SetupNuclQueryInfo(Uint4 query_length, BlastQueryInfo* *query_info)
75 {
76  (*query_info) = BlastQueryInfoNew(eBlastTypeBlastn, 1);
77  (*query_info)->contexts[0].query_offset = 0;
78  (*query_info)->contexts[0].query_length = query_length;
79  (*query_info)->contexts[1].query_offset = query_length + 1;
80  (*query_info)->contexts[1].query_length = query_length;
81  (*query_info)->max_length = query_length;
82 }
83 
85 
93 
95  freeStructures();
96  }
97 
98  /// Sets up the input list of HSPs. These must be sorted by score.
100  {
101  const int kNumHsps = 10;
102  const int kScores[kNumHsps] =
103  { 1023, 282, 246, 202, 142, 117, 98, 92, 63, 53 };
104  const int kQueryOffsets[kNumHsps] =
105  { 11, 346, 399, 244, 287, 224, 311, 218, 0, 404};
106  const int kQueryLengths[kNumHsps] =
107  { 244, 56, 49, 49, 104, 29, 36, 37, 12, 25 };
108  const int kSubjectFrames[kNumHsps] =
109  { 2, 2, 3, 2, 1, 1, 2, 3, 3, 2 };
110  const int kSubjectOffsets[kNumHsps] =
111  { 1372, 2677, 2756, 2062, 2209, 1832, 2351, 1732, 1140, 2683 };
112  const int kSubjectLengths[kNumHsps] =
113  {300, 56, 49, 50, 75, 29, 32, 36, 12, 26 };
114 
115  m_HspList = Blast_HSPListNew(0);
116  Int4 index;
117  BlastHSP* hsp;
118 
119  for (index = 0; index < kNumHsps; ++index) {
120  m_HspList->hsp_array[index] = hsp =
121  (BlastHSP*) calloc(1, sizeof(BlastHSP));
122  hsp->score = kScores[index];
123  if (m_ProgramType == eBlastTypeTblastn) {
124  hsp->query.offset = kQueryOffsets[index];
125  hsp->query.end = kQueryOffsets[index] + kQueryLengths[index];
126  hsp->subject.offset = kSubjectOffsets[index];
127  hsp->subject.end =
128  kSubjectOffsets[index] + kSubjectLengths[index];
129  hsp->subject.frame = kSubjectFrames[index];
130  } else {
131  hsp->query.offset = kSubjectOffsets[index];
132  hsp->query.end =
133  kSubjectOffsets[index] + kSubjectLengths[index];
134  hsp->subject.offset = kQueryOffsets[index];
135  hsp->subject.end = kQueryOffsets[index] + kQueryLengths[index];
136  hsp->query.frame = kSubjectFrames[index];
137  }
138  }
139 
140  m_HspList->hspcnt = kNumHsps;
141  }
142 
143  /// Sets up the scoring block with the Karlin-Altschul parameters
144  void setupScoreBlk(Uint1* seqbuf, bool gapped,
145  BlastScoringOptions** score_options_ptr)
146  {
147  Int2 status;
148  BlastScoringOptions* score_options = NULL;
149  m_ScoreBlk =
150  BlastScoreBlkNew((m_ProgramType==eBlastTypeBlastn ?
152  m_QueryInfo->last_context+1);
153 
154  BlastScoringOptionsNew(m_ProgramType, &score_options);
155  score_options->gapped_calculation = (gapped ? TRUE : FALSE);
156 
157  if (m_ProgramType != eBlastTypeBlastn) {
158  BOOST_REQUIRE(!strcmp("BLOSUM62", score_options->matrix));
159  }
160  status = Blast_ScoreBlkMatrixInit(m_ProgramType, score_options,
161  m_ScoreBlk, &BlastFindMatrixPath);
162 
163  BOOST_REQUIRE(status == 0);
164 
165  Blast_Message* message = NULL;
166  status = Blast_ScoreBlkKbpUngappedCalc(m_ProgramType, m_ScoreBlk,
167  seqbuf, m_QueryInfo, &message);
168  message = Blast_MessageFree(message);
169  BOOST_REQUIRE(message == NULL);
170 
171  BOOST_REQUIRE(status == 0);
172 
173  if (gapped) {
174  status = Blast_ScoreBlkKbpGappedCalc(m_ScoreBlk, score_options,
175  m_ProgramType, m_QueryInfo, NULL);
176  BOOST_REQUIRE(status == 0);
177  m_ScoreBlk->kbp_gap = m_ScoreBlk->kbp_gap_std;
178  }
179 
180  m_ScoreBlk->kbp = m_ScoreBlk->kbp_std;
181 
182  if (score_options_ptr)
183  *score_options_ptr = score_options;
184  else
185  BlastScoringOptionsFree(score_options);
186  }
187 
188  /// Sets up the hit saving parameters structures. Only the fields relevant
189  /// to linking HSPs are filled.
190  void setupHitParams(int longest_intron, double evalue)
191  {
192  int cutoff_small_gap = (m_ProgramType == eBlastTypeBlastn ? 16 : 42);
193  m_HitParams =
195  m_HitParams->options = (BlastHitSavingOptions *)
196  calloc(1, sizeof(BlastHitSavingOptions));
197  m_HitParams->options->expect_value = evalue;
198  BlastLinkHSPParametersNew(m_ProgramType, TRUE,
199  &m_HitParams->link_hsp_params);
200  m_HitParams->link_hsp_params->cutoff_big_gap = 0;
201  m_HitParams->link_hsp_params->cutoff_small_gap = cutoff_small_gap;
202  m_HitParams->link_hsp_params->longest_intron = longest_intron;
203  }
204 
205  /// Fills the effective lengths data into the query information structure
206  void
208  Int8 db_length, Int4 db_num_seq)
209  {
210  BlastEffectiveLengthsOptions* eff_len_options = NULL;
211  BlastEffectiveLengthsOptionsNew(&eff_len_options);
212  BlastEffectiveLengthsParameters* eff_len_params = NULL;
213  BlastEffectiveLengthsParametersNew(eff_len_options, db_length,
214  db_num_seq, &eff_len_params);
215  BLAST_CalcEffLengths(m_ProgramType, score_options, eff_len_params,
216  m_ScoreBlk, m_QueryInfo, NULL);
217  BlastEffectiveLengthsParametersFree(eff_len_params);
218  BlastEffectiveLengthsOptionsFree(eff_len_options);
219  }
220 
221  /// Complete set-up before calling the HSP linking algorithm
223  {
224  const string kProtGi = "9930103";
225  const string kNuclGi = "9930102";
226  const Uint4 kProtLength = 448;
227  const Uint4 kNuclLength = 8872;
228 
229  string qid_str = "gi|" + ((m_ProgramType == eBlastTypeTblastn) ?
230  kProtGi : kNuclGi);
231  CSeq_id query_id(qid_str);
232  TSeqLocVector query_v;
233 
234  if (m_ProgramType == eBlastTypeBlastx) {
235  unique_ptr<SSeqLoc> qsl(
236  CTestObjMgr::Instance().CreateSSeqLoc(query_id,
237  eNa_strand_both));
238  query_v.push_back(*qsl);
239  } else {
240  unique_ptr<SSeqLoc> qsl(
241  CTestObjMgr::Instance().CreateSSeqLoc(query_id));
242  query_v.push_back(*qsl);
243  }
244 
245  CBlastOptions options;
247  if (m_ProgramType == eBlastTypeBlastx)
248  options.SetQueryGeneticCode(1);
249 
250  options.SetProgram(m_Program);
251  CBLAST_SequenceBlk query_blk;
252  TSearchMessages blast_msg;
253 
254  ENa_strand strand_opt = options.GetStrandOption();
255 
256  SetupQueryInfo(query_v, m_ProgramType, strand_opt, &m_QueryInfo);
257  SetupQueries(query_v, m_QueryInfo, &query_blk,
258  m_ProgramType, strand_opt, blast_msg);
259  ITERATE(TSearchMessages, m, blast_msg) {
260  BOOST_REQUIRE(m->empty());
261  }
262 
263  BlastScoringOptions* score_options = NULL;
264  setupScoreBlk(query_blk->sequence, true, &score_options);
265 
266  m_SubjectLength = (m_ProgramType == eBlastTypeTblastn ?
267  kNuclLength / 3 : kProtLength);
268 
269  fillEffectiveLengths(score_options, (Int8)m_SubjectLength, 1);
270  BlastScoringOptionsFree(score_options);
271 
272  }
273 
274  /// Frees all the C structures used in the test
276  {
277  m_HspList = Blast_HSPListFree(m_HspList);
278 
279  if (m_HitParams) {
280  BlastHitSavingOptionsFree(m_HitParams->options);
281  m_HitParams = BlastHitSavingParametersFree(m_HitParams);
282  }
283  m_ScoreBlk = BlastScoreBlkFree(m_ScoreBlk);
284  }
285 
286  /// Test linking with uneven gap sum statistics
288  const int kNumHsps = 8;
289  const int kLongestIntron = 4000;
290  const double kEvalue = 1e-10;
291  const int kNumsLinked[kNumHsps] = { 1, 5, 5, 5, 2, 5, 5, 2 };
292  const int kScores[kNumHsps] = { 1023, 282, 246, 202, 142, 117, 98, 92 };
293 
294  setupLinkHspInputTblastn();
295  setupHSPListTransl();
296  setupHitParams(kLongestIntron, kEvalue);
297 
298  BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength,
299  m_ScoreBlk, m_HitParams->link_hsp_params, TRUE);
300 
301  Blast_HSPListReapByEvalue(m_HspList, m_HitParams->options);
302 
303  BOOST_REQUIRE_EQUAL(kNumHsps, m_HspList->hspcnt);
304 
305  for (int index = 0; index < kNumHsps; ++index) {
306  BOOST_REQUIRE_EQUAL(kNumsLinked[index], m_HspList->hsp_array[index]->num);
307  BOOST_REQUIRE_EQUAL(kScores[index], m_HspList->hsp_array[index]->score);
308  }
309  }
310 
312  {
313  const int kNumHsps = 5;
314  const int kScores[kNumHsps] =
315  { 80, 60, 55, 54, 52 };
316  const int kQueryOffsets[kNumHsps] =
317  { 100, 130, 239, 239, 191 };
318  const int kLengths[kNumHsps] =
319  { 100, 50, 100, 9, 57 };
320  const int kSubjectOffsets[kNumHsps] =
321  { 1100, 1130, 3240, 3240, 2195 };
322 
323  m_HspList = Blast_HSPListNew(0);
324  Int4 index;
325  BlastHSP* hsp;
326 
327  for (index = 0; index < kNumHsps; ++index) {
328  m_HspList->hsp_array[index] = hsp =
329  (BlastHSP*) calloc(1, sizeof(BlastHSP));
330  hsp->score = kScores[index];
331  hsp->query.offset = kQueryOffsets[index];
332  hsp->subject.offset = kSubjectOffsets[index];
333  hsp->subject.frame = 1;
334  hsp->query.end = hsp->query.offset + kLengths[index];
335  hsp->subject.end = hsp->subject.offset + kLengths[index];
336  }
337 
338  m_HspList->hspcnt = kNumHsps;
339  }
340 
341  /// HSP list setup for blastn
343  {
344  const int kNumHsps = 8;
345  const int kScores[kNumHsps] = { 35, 31, 22, 21, 20, 20, 20, 20 };
346  const int kQueryFrames[kNumHsps] = { 1, 1, 1, -1, 1, -1, -1, -1 };
347  const int kQueryStarts[kNumHsps] =
348  { 790, 790, 791, 4606, 870, 4572, 4526, 4589 };
349  const int kQueryEnds[kNumHsps] =
350  { 865, 865, 833, 4635, 894, 4604, 4550, 4629 };
351  const int kSubjectStarts[kNumHsps] =
352  { 453, 3469, 5837, 12508, 5951, 11005, 9899, 7397 };
353  const int kSubjectEnds[kNumHsps] =
354  { 528, 3544, 5879, 12537, 5975, 11037, 9923, 7437 };
355  Int4 index;
356  BlastHSP* hsp;
357 
358  m_HspList = Blast_HSPListNew(0);
359 
360  for (index = 0; index < kNumHsps; ++index) {
361  hsp = m_HspList->hsp_array[index] =
362  (BlastHSP*) calloc(1, sizeof(BlastHSP));
363  hsp->score = kScores[index];
364  hsp->query.offset = kQueryStarts[index];
365  hsp->query.end = kQueryEnds[index];
366  hsp->query.frame = kQueryFrames[index];
367  hsp->context = (kQueryFrames[index] > 0 ? 0 : 1);
368  hsp->subject.offset = kSubjectStarts[index];
369  hsp->subject.end = kSubjectEnds[index];
370  hsp->subject.frame = 1;
371  }
372  m_HspList->hspcnt = kNumHsps;
373  }
374 
375  /// Complete set-up before calling the HSP linking algorithm
377  {
378  const Uint4 kQueryLength = 5419;
379  const Int8 kEffDbLength = 122632232;
380 
381  m_ProgramType = eBlastTypeBlastn;
382  m_Program = eBlastn;
383 
384  // In subject sequence block, we only need to fill sequence length.
385  s_SetupNuclQueryInfo(kQueryLength, &m_QueryInfo);
386  m_SubjectLength = 12991;
387 
388  CSeq_id seqid("gi|24638835");
389  pair<TSeqPos, TSeqPos> range(26993,32411);
390 
391  unique_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(seqid, range));
392 
393  SBlastSequence sequence(
395  sl->scope, eNa_strand_both, eSentinels));
396  BlastScoringOptions* score_options = NULL;
397  setupScoreBlk(sequence.data.get(), false, &score_options);
398 
399  fillEffectiveLengths(score_options, kEffDbLength, 1);
400  BlastScoringOptionsFree(score_options);
401 
402  setupHSPListNucl();
403  }
404 
406  setupCutoffScores(bool gapped, Int8 db_length, Uint4 db_num_seq,
407  Uint4 subj_length, int longest_intron=0)
408  {
409  BlastInitialWordOptions* word_options = NULL;
410  BlastExtensionOptions* ext_options = NULL;
411  BlastHitSavingOptions* hit_options = NULL;
412 
413  BlastInitialWordOptionsNew(m_ProgramType, &word_options);
414  BlastExtensionOptionsNew(m_ProgramType, &ext_options, true);
415  if (m_ProgramType == eBlastTypeBlastn) {
419  }
420  BlastHitSavingOptionsNew(m_ProgramType, &hit_options, gapped);
421  if (longest_intron > 0)
422  hit_options->longest_intron = longest_intron;
423 
424  BlastInitialWordParameters* word_params = NULL;
425  BlastExtensionParameters* ext_params = NULL;
426 
427  CRef<CSeq_id> qid;
428  TSeqLocVector qv;
429 
430  if (m_ProgramType == eBlastTypeBlastn || m_ProgramType == eBlastTypeBlastx ||
431  m_ProgramType == eBlastTypeTblastx) {
432  qid.Reset(new CSeq_id("gi|555"));
433  unique_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(*qid,
434  eNa_strand_both));
435  qv.push_back(*qsl);
436  } else {
437  qid.Reset(new CSeq_id("gi|129295"));
438  unique_ptr<SSeqLoc> qsl(CTestObjMgr::Instance().CreateSSeqLoc(*qid));
439  qv.push_back(*qsl);
440  }
441 
442  CBlastOptions options;
444  if (m_ProgramType == eBlastTypeBlastx ||
445  m_ProgramType == eBlastTypeTblastx)
446  options.SetQueryGeneticCode(1);
447 
448  options.SetProgram(m_Program);
449  CBLAST_SequenceBlk query_blk;
450  TSearchMessages blast_msg;
451 
452  ENa_strand strand_opt = options.GetStrandOption();
453 
454  SetupQueryInfo(qv, m_ProgramType, strand_opt, &m_QueryInfo);
455  SetupQueries(qv, m_QueryInfo, &query_blk,
456  m_ProgramType, strand_opt, blast_msg);
457  ITERATE(TSearchMessages, m, blast_msg) {
458  BOOST_REQUIRE(m->empty());
459  }
460 
461  BlastScoringOptions* score_options = NULL;
462  setupScoreBlk(query_blk->sequence, gapped, &score_options);
463 
464  BlastExtensionParametersNew(m_ProgramType, ext_options, m_ScoreBlk,
465  m_QueryInfo, &ext_params);
466  fillEffectiveLengths(score_options, (Int8)db_length, db_num_seq);
467  score_options = BlastScoringOptionsFree(score_options);
468  BOOST_REQUIRE(score_options == NULL);
469 
470  BlastHitSavingParametersNew(m_ProgramType, hit_options,
471  m_ScoreBlk, m_QueryInfo, subj_length, 0, &m_HitParams);
472 
473 
474  QuerySetUpOptions* query_options = NULL;
475  BlastQuerySetUpOptionsNew(&query_options);
476  LookupTableWrap* lookup_wrap = NULL;
477  LookupTableOptions* lookup_options = NULL;
478  BlastSeqLoc* blast_seq_loc = BlastSeqLocNew(NULL, 0, m_QueryInfo->contexts[0].query_length-1);
479  LookupTableOptionsNew(m_ProgramType, &lookup_options);
480  LookupTableWrapInit(query_blk, lookup_options, query_options, blast_seq_loc, m_ScoreBlk, &lookup_wrap, NULL, NULL, NULL);
481  query_options = BlastQuerySetUpOptionsFree(query_options);
482  BOOST_REQUIRE(query_options == NULL);
483 
484  Uint4 avg_subj_length = (Uint4)(db_length/db_num_seq);
485  BlastInitialWordParametersNew(m_ProgramType, word_options, m_HitParams, lookup_wrap,
486  m_ScoreBlk, m_QueryInfo, avg_subj_length, &word_params);
487 
488  blast_seq_loc = BlastSeqLocFree(blast_seq_loc);
489  BOOST_REQUIRE(blast_seq_loc == NULL);
490  lookup_wrap = LookupTableWrapFree(lookup_wrap);
491  BOOST_REQUIRE(lookup_wrap == NULL);
492  lookup_options = LookupTableOptionsFree(lookup_options);
493  BOOST_REQUIRE(lookup_options == NULL);
494 
495  BlastLinkHSPParametersUpdate(word_params, m_HitParams, (gapped ? TRUE : FALSE));
496 
497 
498  if (m_HitParams->link_hsp_params &&
499  m_ProgramType != eBlastTypeBlastn && !gapped) {
500  CalculateLinkHSPCutoffs(m_ProgramType, m_QueryInfo, m_ScoreBlk,
501  m_HitParams->link_hsp_params, word_params, db_length,
502  subj_length);
503  }
504 
505  AllCutoffScores* retval =
506  (AllCutoffScores*) calloc(1, sizeof(AllCutoffScores));
507  retval->x_drop_ungapped = word_params->x_dropoff_max;
508  retval->x_drop_gapped = ext_params->gap_x_dropoff;
509  retval->x_drop_final = ext_params->gap_x_dropoff_final;
510  retval->cutoff_score_ungapped = word_params->cutoff_score_min;
511  retval->cutoff_score_final = m_HitParams->cutoff_score_min;
512  retval->do_sum_stats = m_HitParams->do_sum_stats;
513  if (retval->do_sum_stats) {
514  retval->cutoff_small_gap =
515  m_HitParams->link_hsp_params->cutoff_small_gap;
516  retval->cutoff_big_gap =
517  m_HitParams->link_hsp_params->cutoff_big_gap;
518  }
519 
520  BlastInitialWordParametersFree(word_params);
521  BlastInitialWordOptionsFree(word_options);
522  BlastExtensionParametersFree(ext_params);
523  BlastExtensionOptionsFree(ext_options);
524  // Set to NULL those member fields that are not used in these tests.
525  m_HspList = NULL;
526 
527  return retval;
528  }
529 
530 };
531 
532 BOOST_FIXTURE_TEST_SUITE(linkhsp, LinkHspTestFixture)
533 
534 /// Test linking with uneven gap sum statistics
535 BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsTblastn) {
536  m_ProgramType = eBlastTypeTblastn;
537  m_Program = eTblastn;
538  testUnevenGapLinkHsps();
539 }
540 
541 /// Test linking with uneven gap sum statistics
542 BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsBlastx) {
543  m_ProgramType = eBlastTypeBlastx;
544  m_Program = eBlastx;
545  testUnevenGapLinkHsps();
546 }
547 
548 /// Tests the uneven gap linking where an HSP has to be inserted in the
549 /// middle between two higher scoring HSPs that can be linked by themselves.
550 BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsMiddleInsertion) {
551  const int kNumHsps = 5;
552  const int kLongestIntron = 3000;
553  const double kEvalue = 10;
554  const int kLinkNums[kNumHsps] = { 3, 1, 3, 1, 3 };
555  m_ProgramType = eBlastTypeTblastn;
556  m_Program = eTblastn;
557 
558  setupLinkHspInputTblastn();
559  setupHSPListForMiddleInsertTest();
560  setupHitParams(kLongestIntron, kEvalue);
561 
562  BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength,
563  m_ScoreBlk, m_HitParams->link_hsp_params, TRUE);
564  for (int index = 0; index < m_HspList->hspcnt; ++index) {
565  BOOST_REQUIRE_EQUAL(kLinkNums[index],
566  m_HspList->hsp_array[index]->num);
567  }
568 }
569 
570 /// Test linking with small/large gap sum statistics for tblastn
571 BOOST_AUTO_TEST_CASE(testEvenGapLinkHspsTblastn) {
572  const int kNumHsps = 5;
573  const double kEvalue = 1e-10;
574  const int kNumsLinked[kNumHsps] = { 1, 2, 2, 1, 1 };
575  const int kScores[kNumHsps] = { 1023, 282, 246, 202, 142 };
576 
577  m_ProgramType = eBlastTypeTblastn;
578  m_Program = eTblastn;
579  setupLinkHspInputTblastn();
580  setupHSPListTransl();
581 
582  setupHitParams(0, kEvalue);
583 
584  BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength,
585  m_ScoreBlk, m_HitParams->link_hsp_params, TRUE);
586 
587  Blast_HSPListReapByEvalue(m_HspList, m_HitParams->options);
588 
589  BOOST_REQUIRE_EQUAL(kNumHsps, m_HspList->hspcnt);
590 
591  Int4 index;
592  for (index = 0; index < kNumHsps; ++index) {
593  BOOST_REQUIRE_EQUAL(kNumsLinked[index], m_HspList->hsp_array[index]->num);
594  BOOST_REQUIRE_EQUAL(kScores[index],
595  m_HspList->hsp_array[index]->score);
596  }
597 }
598 
599 /// Test linking with small/large gap sum statistics for blastn
600 BOOST_AUTO_TEST_CASE(testEvenGapLinkHspsBlastn) {
601  const int kNumHsps = 8;
602  const double kEvalue = 10;
603  const int kNumsLinked[kNumHsps] =
604  { 2, 1, 1, 3, 2, 1, 3, 3 };
605  const double kEvalues[kNumHsps] =
606  { 3e-12, 3e-7, 0.07, 1e-7, 3e-12, 1.1, 1e-7, 1e-7 };
607 
608  setupLinkHspInputBlastn();
609  setupHitParams(0, kEvalue);
610 
611  BLAST_LinkHsps(m_ProgramType, m_HspList, m_QueryInfo, m_SubjectLength,
612  m_ScoreBlk, m_HitParams->link_hsp_params, FALSE);
613 
614  Blast_HSPListReapByEvalue(m_HspList, m_HitParams->options);
615  BOOST_REQUIRE_EQUAL(kNumHsps, m_HspList->hspcnt);
616 
617  for (Int4 index = 0; index < kNumHsps; ++index) {
618  BOOST_REQUIRE_EQUAL(kNumsLinked[index],
619  m_HspList->hsp_array[index]->num);
620  BOOST_REQUIRE(fabs(kEvalues[index] - m_HspList->hsp_array[index]->evalue)/kEvalues[index] < 0.5);
621  }
622 }
623 
624 static void
625 testAllCutoffs(const AllCutoffScores& good_cutoffs,
626  AllCutoffScores& cutoffs)
627 {
628  BOOST_REQUIRE_EQUAL(good_cutoffs.x_drop_ungapped,
629  cutoffs.x_drop_ungapped);
630  BOOST_REQUIRE_EQUAL(good_cutoffs.x_drop_gapped,
631  cutoffs.x_drop_gapped);
632  BOOST_REQUIRE_EQUAL(good_cutoffs.x_drop_final,
633  cutoffs.x_drop_final);
634  BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_score_ungapped,
635  cutoffs.cutoff_score_ungapped);
636  BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_score_final,
637  cutoffs.cutoff_score_final);
638  BOOST_REQUIRE_EQUAL(good_cutoffs.do_sum_stats,
639  cutoffs.do_sum_stats);
640  BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_small_gap,
641  cutoffs.cutoff_small_gap);
642  BOOST_REQUIRE_EQUAL(good_cutoffs.cutoff_big_gap,
643  cutoffs.cutoff_big_gap);
644 }
645 
646 BOOST_AUTO_TEST_CASE(UngappedBlastnCutoffs)
647 {
648  const int kNumDbs = 4;
649  const Int8 kDbLengths[kNumDbs] =
650  { 10000000000LL, 10000000000LL, 3000000000LL, 10000LL };
651  const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500, 100 };
652  const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000, 100 };
653  const AllCutoffScores kGoodCutoffs[kNumDbs] = {
654  { 11, 0, 0, 0, 14, 20, true, 14, 0 },
655  { 11, 0, 0, 0, 12, 20, true, 12, 0 },
656  { 11, 0, 0, 0, 19, 19, true, 19, 0 },
657  { 11, 0, 0, 0, 10, 10, true, 10, 0 } };
658 
659  AllCutoffScores* cutoffs = NULL;
660  int index;
661  m_ProgramType = eBlastTypeBlastn;
662  m_Program = eBlastn;
663  for (index = 0; index < kNumDbs; ++index) {
664  cutoffs = setupCutoffScores(false, kDbLengths[index],
665  kDbNumSeqs[index], kSubjectLengths[index]);
666  testAllCutoffs(kGoodCutoffs[index], *cutoffs);
667  sfree(cutoffs);
668  freeStructures();
669  if (index < kNumDbs-1)
670  BlastQueryInfoFree(m_QueryInfo);
671  }
672 }
673 
674 BOOST_AUTO_TEST_CASE(UngappedBlastpCutoffs)
675 {
676  const Int8 kDbLength = 500000000;
677  const Uint4 kDbNumSeqs = 1000000;
678  const int kNumSubjects = 3;
679  const Uint4 kSubjectLengths[kNumSubjects] = {400, 60, 3000 };
680  const AllCutoffScores kGoodCutoffs[kNumSubjects] = {
681  { 16, 0, 0, 0, 41, 66, true, 41, 38 },
682  { 16, 0, 0, 0, 41, 66, true, 0, 29 },
683  { 16, 0, 0, 0, 41, 66, true, 41, 44 } };
684  AllCutoffScores* cutoffs = NULL;
685  int index;
686  m_ProgramType = eBlastTypeBlastp;
687  m_Program = eBlastp;
688  for (index = 0; index < kNumSubjects; ++index) {
689  cutoffs = setupCutoffScores(false, kDbLength,
690  kDbNumSeqs, kSubjectLengths[index]);
691  testAllCutoffs(kGoodCutoffs[index], *cutoffs);
692  sfree(cutoffs);
693  freeStructures();
694  if (index < kNumSubjects-1)
695  BlastQueryInfoFree(m_QueryInfo);
696  }
697 }
698 
699 BOOST_AUTO_TEST_CASE(UngappedBlastxCutoffs)
700 {
701  const Int8 kDbLength = /*500000000*/227102922;
702  const Uint4 kDbNumSeqs = /*1000000*/761886;
703  const int kNumSubjects = 3;
704  const Uint4 kSubjectLengths[kNumSubjects] = { 400, 100, 3000 };
705  const AllCutoffScores kGoodCutoffs[kNumSubjects] = {
706  { 16, 0, 0, 0, 31, 63, true, 31, 37 },
707  { 16, 0, 0, 0, 31, 63, true, 0, 31 },
708  { 16, 0, 0, 0, 31, 63, true, 31, 43 } };
709  AllCutoffScores* cutoffs = NULL;
710  int index;
711  m_ProgramType = eBlastTypeBlastx;
712  m_Program = eBlastx;
713  for (index = 0; index < kNumSubjects; ++index) {
714  cutoffs = setupCutoffScores(false, kDbLength, kDbNumSeqs,
715  kSubjectLengths[index]);
716  testAllCutoffs(kGoodCutoffs[index], *cutoffs);
717  sfree(cutoffs);
718  freeStructures();
719  if (index < kNumSubjects-1)
720  BlastQueryInfoFree(m_QueryInfo);
721  }
722 }
723 
724 BOOST_AUTO_TEST_CASE(UngappedTblastnCutoffs)
725 {
726  const int kNumDbs = 3;
727  const Int8 kDbLengths[kNumDbs] =
728  { 10000000000LL, 10000000000LL, 3000000000LL };
729  const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500 };
730  const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000 };
731  const AllCutoffScores kGoodCutoffs[kNumDbs] = {
732  { 16, 0, 0, 0, 40, 72, true, 40, 40 },
733  { 16, 0, 0, 0, 33, 71, true, 33, 35 },
734  { 16, 0, 0, 0, 41, 69, true, 41, 60 } };
735 
736  AllCutoffScores* cutoffs = NULL;
737  int index;
738  m_ProgramType = eBlastTypeTblastn;
739  m_Program = eTblastn;
740  for (index = 0; index < kNumDbs; ++index) {
741  cutoffs = setupCutoffScores(false, kDbLengths[index],
742  kDbNumSeqs[index], kSubjectLengths[index]);
743  testAllCutoffs(kGoodCutoffs[index], *cutoffs);
744  sfree(cutoffs);
745  freeStructures();
746  if (index < kNumDbs-1)
747  BlastQueryInfoFree(m_QueryInfo);
748  }
749 }
750 
751 BOOST_AUTO_TEST_CASE(UngappedTblastxCutoffs)
752 {
753  const int kNumDbs = 4;
754  const Int8 kDbLengths[kNumDbs] =
755  { 10000000000LL, 10000000000LL, 10000000000LL, 3000000000LL };
756  const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 2000000, 20000000, 500 };
757  const Uint4 kSubjectLengths[kNumDbs] = { 2000, 100, 400, 3000000 };
758  const AllCutoffScores kGoodCutoffs[kNumDbs] = {
759  { 16, 0, 0, 0, 41, 72, true, 41, 40 },
760  { 16, 0, 0, 0, 41, 72, true, 0, 27 },
761  { 16, 0, 0, 0, 41, 70, true, 41, 34 },
762  { 16, 0, 0, 0, 41, 68, true, 41, 60 } };
763 
764  AllCutoffScores* cutoffs = NULL;
765  int index;
766  m_ProgramType = eBlastTypeTblastx;
767  m_Program = eTblastx;
768  for (index = 0; index < kNumDbs; ++index) {
769  cutoffs = setupCutoffScores(false, kDbLengths[index],
770  kDbNumSeqs[index], kSubjectLengths[index]);
771  testAllCutoffs(kGoodCutoffs[index], *cutoffs);
772  sfree(cutoffs);
773  freeStructures();
774  if (index < kNumDbs-1)
775  BlastQueryInfoFree(m_QueryInfo);
776  }
777 }
778 
779 BOOST_AUTO_TEST_CASE(GappedBlastnCutoffs)
780 {
781  const int kNumDbs = 4;
782  const Int8 kDbLengths[kNumDbs] =
783  { 10000000000LL, 10000000000LL, 3000000000LL, 10000LL };
784  const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500, 200 };
785  const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000, 60 };
786  const AllCutoffScores kGoodCutoffs[kNumDbs] = {
787  { 11, 15, 50, 0, 13, 20, false, 0, 0 },
788  { 11, 15, 50, 0, 13, 20, false, 0, 0 },
789  { 11, 15, 50, 0, 13, 19, false, 0, 0 },
790  { 11, 15, 50, 0, 10, 10, false, 0, 0 } };
791 
792  AllCutoffScores* cutoffs = NULL;
793  int index;
794  m_ProgramType = eBlastTypeBlastn;
795  m_Program = eBlastn;
796  for (index = 0; index < kNumDbs; ++index) {
797  cutoffs = setupCutoffScores(true, kDbLengths[index],
798  kDbNumSeqs[index], kSubjectLengths[index]);
799  testAllCutoffs(kGoodCutoffs[index], *cutoffs);
800  sfree(cutoffs);
801  freeStructures();
802  if (index < kNumDbs-1)
803  BlastQueryInfoFree(m_QueryInfo);
804  }
805 }
806 
807 BOOST_AUTO_TEST_CASE(GappedBlastpCutoffs)
808 {
809  const Int8 kDbLength = 600000000;
810  const Uint4 kDbNumSeqs = 1800000;
811  const Uint4 kSubjectLength = 200;
812  m_ProgramType = eBlastTypeBlastp;
813  m_Program = eBlastp;
814  const AllCutoffScores kGoodCutoffs =
815  { 16, 38, 64, 41, 19, 19, false, 0, 0 };
816  AllCutoffScores* cutoffs =
817  setupCutoffScores(true, kDbLength, kDbNumSeqs, kSubjectLength);
818  testAllCutoffs(kGoodCutoffs, *cutoffs);
819  sfree(cutoffs);
820  freeStructures();
821 }
822 
823 BOOST_AUTO_TEST_CASE(GappedBlastxCutoffs)
824 {
825  const int kNumDbs = 2;
826  const Int8 kDbLengths[kNumDbs] =
827  {600000000, 6000000000LL};
828  const Uint4 kDbNumSeqs = 1800000;
829  const Uint4 kSubjectLength[kNumDbs] = {500, 2000};
830  const AllCutoffScores kGoodCutoffs[kNumDbs] = {
831  { 16, 38, 64, 0, 22, 22, true, 22, 0 },
832  { 16, 38, 64, 0, 27, 27, true, 27, 0 } };
833  m_ProgramType = eBlastTypeBlastx;
834  m_Program = eBlastx;
835  for (int index = 0; index < kNumDbs; ++index) {
836  AllCutoffScores* cutoffs = setupCutoffScores(true,
837  kDbLengths[index], kDbNumSeqs, kSubjectLength[index]);
838  testAllCutoffs(kGoodCutoffs[index], *cutoffs);
839  sfree(cutoffs);
840  freeStructures();
841  if (index < kNumDbs-1)
842  BlastQueryInfoFree(m_QueryInfo);
843  }
844 }
845 
846 BOOST_AUTO_TEST_CASE(GappedTblastnCutoffs)
847 {
848  const int kNumDbs = 3;
849  const Int8 kDbLengths[kNumDbs] =
850  { 10000000000LL, 10000000000LL, 3000000000LL };
851  const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500 };
852  const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000 };
853  const AllCutoffScores kGoodCutoffs[kNumDbs] = {
854  { 16, 38, 64, 41, 27, 27, true, 27, 0 },
855  { 16, 38, 64, 41, 21, 21, true, 21, 0 },
856  { 16, 38, 64, 41, 41, 54, true, 41, 0 } };
857 
858  AllCutoffScores* cutoffs = NULL;
859  int index;
860  m_ProgramType = eBlastTypeTblastn;
861  m_Program = eTblastn;
862  for (index = 0; index < kNumDbs; ++index) {
863  cutoffs = setupCutoffScores(true, kDbLengths[index],
864  kDbNumSeqs[index], kSubjectLengths[index]);
865  testAllCutoffs(kGoodCutoffs[index], *cutoffs);
866  sfree(cutoffs);
867  freeStructures();
868  if (index < kNumDbs-1)
869  BlastQueryInfoFree(m_QueryInfo);
870  }
871 }
872 
873 BOOST_AUTO_TEST_CASE(GappedTblastnVeryShortIntron)
874 {
875  const int kNumDbs = 3;
876  const Int8 kDbLengths[kNumDbs] =
877  { 10000000000LL, 10000000000LL, 3000000000LL };
878  const Uint4 kDbNumSeqs[kNumDbs] = { 2000000, 20000000, 500 };
879  const Uint4 kSubjectLengths[kNumDbs] = { 2000, 400, 3000000 };
880 
881  AllCutoffScores* cutoffs = NULL;
882  int index;
883  m_ProgramType = eBlastTypeTblastn;
884  m_Program = eTblastn;
885  for (index = 0; index < kNumDbs; ++index) {
886  cutoffs = setupCutoffScores(true, kDbLengths[index],
887  kDbNumSeqs[index], kSubjectLengths[index], 1);
888 
889  BOOST_REQUIRE_EQUAL((int) false, (int) cutoffs->do_sum_stats);
890  sfree(cutoffs);
891  freeStructures();
892  if (index < kNumDbs-1)
893  BlastQueryInfoFree(m_QueryInfo);
894  }
895 }
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Definition: blast_filter.c:737
BlastSeqLoc * BlastSeqLocNew(BlastSeqLoc **head, Int4 from, Int4 to)
Create and initialize a new sequence interval.
Definition: blast_filter.c:608
Structures and API used for saving BLAST hits.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Definition: blast_hits.c:1558
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Definition: blast_hits.c:1542
Int2 Blast_HSPListReapByEvalue(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the e-value threshold from the HSP list.
Definition: blast_hits.c:1976
Blast_Message * Blast_MessageFree(Blast_Message *blast_msg)
Deallocates message memory.
Definition: blast_message.c:80
Definitions which are dependant on the NCBI C++ Object Manager.
The structures and functions in blast_options.
#define BLAST_GAP_X_DROPOFF_NUCL
default dropoff for non-greedy nucleotide gapped extensions
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
#define BLAST_UNGAPPED_X_DROPOFF_NUCL
ungapped dropoff score for blastn (and megablast)
Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)
Allocate memory for QuerySetUpOptions and fill with default values.
Int2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)
Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.
BlastInitialWordOptions * BlastInitialWordOptionsFree(BlastInitialWordOptions *options)
Deallocate memory for BlastInitialWordOptions.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
BlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)
Deallocate memory for BlastEffectiveLengthsOptions*.
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
#define BLAST_GAP_X_DROPOFF_FINAL_NUCL
default dropoff for nucleotide gapped extensions)
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
Int2 BlastInitialWordOptionsNew(EBlastProgramType program, BlastInitialWordOptions **options)
Allocate memory for BlastInitialWordOptions and fill with default values.
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)
Deallocate memory for QuerySetUpOptions.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
Declares class to encapsulate all BLAST options.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
Int2 BlastLinkHSPParametersNew(EBlastProgramType program_number, Boolean gapped_calculation, BlastLinkHSPParameters **link_hsp_params)
Initialize the linking HSPs parameters with default values.
BlastEffectiveLengthsParameters * BlastEffectiveLengthsParametersFree(BlastEffectiveLengthsParameters *parameters)
Deallocate memory for BlastEffectiveLengthsParameters*.
Int2 BlastExtensionParametersNew(EBlastProgramType blast_program, const BlastExtensionOptions *options, BlastScoreBlk *sbp, BlastQueryInfo *query_info, BlastExtensionParameters **parameters)
Calculate the raw values for the X-dropoff parameters.
BlastInitialWordParameters * BlastInitialWordParametersFree(BlastInitialWordParameters *parameters)
Deallocate memory for BlastInitialWordParameters.
BlastExtensionParameters * BlastExtensionParametersFree(BlastExtensionParameters *parameters)
Deallocate memory for BlastExtensionParameters.
Int2 BlastInitialWordParametersNew(EBlastProgramType program_number, const BlastInitialWordOptions *word_options, const BlastHitSavingParameters *hit_params, const LookupTableWrap *lookup_wrap, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Uint4 subject_length, BlastInitialWordParameters **parameters)
Allocate memory for BlastInitialWordParameters and set x_dropoff.
Int2 BlastHitSavingParametersNew(EBlastProgramType program_number, const BlastHitSavingOptions *options, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subject_length, Int4 compositionBasedStats, BlastHitSavingParameters **parameters)
Allocate memory and initialize the BlastHitSavingParameters structure.
Int2 BlastLinkHSPParametersUpdate(const BlastInitialWordParameters *word_params, const BlastHitSavingParameters *hit_params, Boolean gapped_calculation)
Update BlastLinkHSPParameters, using calculated values of other parameters.
void CalculateLinkHSPCutoffs(EBlastProgramType program, BlastQueryInfo *query_info, const BlastScoreBlk *sbp, BlastLinkHSPParameters *link_hsp_params, const BlastInitialWordParameters *word_params, Int8 db_length, Int4 subject_length)
Calculates cutoff scores and returns them.
Int2 BlastEffectiveLengthsParametersNew(const BlastEffectiveLengthsOptions *options, Int8 db_length, Int4 num_seqs, BlastEffectiveLengthsParameters **parameters)
Allocate memory for BlastEffectiveLengthsParameters.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
@ eBlastTypeBlastx
Definition: blast_program.h:75
@ eBlastTypeTblastx
Definition: blast_program.h:79
@ eBlastTypeTblastn
Definition: blast_program.h:77
@ eBlastTypeBlastp
Definition: blast_program.h:73
BlastQueryInfo * BlastQueryInfoFree(BlastQueryInfo *query_info)
Deallocate memory for query information structure.
BlastQueryInfo * BlastQueryInfoNew(EBlastProgramType program, int num_queries)
Allocate memory for query information structure.
Utilities initialize/setup BLAST.
Int2 Blast_ScoreBlkKbpGappedCalc(BlastScoreBlk *sbp, const BlastScoringOptions *scoring_options, EBlastProgramType program, const BlastQueryInfo *query_info, Blast_Message **error_return)
Blast_ScoreBlkKbpGappedCalc, fills the ScoreBlkPtr for a gapped search.
Definition: blast_setup.c:41
Int2 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
Initializes the substitution matrix in the BlastScoreBlk according to the scoring options specified.
Definition: blast_setup.c:330
Int2 BLAST_CalcEffLengths(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, const BlastEffectiveLengthsParameters *eff_len_params, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Blast_Message **blast_message)
Function to calculate effective query length and db length as well as effective search space.
Definition: blast_setup.c:699
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Definition: blast_stat.c:965
Int2 Blast_ScoreBlkKbpUngappedCalc(EBlastProgramType program, BlastScoreBlk *sbp, Uint1 *query, const BlastQueryInfo *query_info, Blast_Message **blast_message)
Calculate and fill the ungapped Karlin-Altschul parameters in the BlastScoreBlk structure (fields kbp...
Definition: blast_stat.c:2737
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Definition: blast_stat.c:884
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eTblastx
Translated nucl-Translated nucl.
Definition: blast_types.hpp:62
@ eBlastn
Nucl-Nucl (traditional blastn)
Definition: blast_types.hpp:58
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
@ eTblastn
Protein-Translated nucl.
Definition: blast_types.hpp:61
@ eBlastx
Translated nucl-Protein.
Definition: blast_types.hpp:60
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Wrapper class for BLAST_SequenceBlk .
Definition: blast_aux.hpp:309
Encapsulates ALL the BLAST algorithm's options.
Wrapper class for BlastQueryInfo .
Definition: blast_aux.hpp:311
static CTestObjMgr & Instance()
Definition: test_objmgr.cpp:71
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
void SetStrandOption(objects::ENa_strand s)
void SetQueryGeneticCode(int gc)
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
#define BLASTNA_SEQ_CODE
Identifies the blastna alphabet, for use in blast only.
void SetProgram(EProgram p)
Sets the task this object is best suited for.
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
TAutoUint1Ptr data
Sequence data.
Definition: blast_setup.hpp:64
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
@ eSentinels
Use sentinel bytes.
Definition: blast_setup.hpp:94
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
element_type * get(void) const
Get pointer.
Definition: ncbimisc.hpp:469
#define NULL
Definition: ncbistd.hpp:225
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
BOOST_AUTO_TEST_CASE(testUnevenGapLinkHspsTblastn)
Test linking with uneven gap sum statistics.
static void s_SetupNuclQueryInfo(Uint4 query_length, BlastQueryInfo **query_info)
Sets up the query information structure without a real sequence.
static void testAllCutoffs(const AllCutoffScores &good_cutoffs, AllCutoffScores &cutoffs)
LookupTableWrap * LookupTableWrapFree(LookupTableWrap *lookup)
Deallocate memory for the lookup table.
Definition: lookup_wrap.c:197
Int2 LookupTableWrapInit(BLAST_SequenceBlk *query, const LookupTableOptions *lookup_options, const QuerySetUpOptions *query_options, BlastSeqLoc *lookup_segments, BlastScoreBlk *sbp, LookupTableWrap **lookup_wrap_ptr, const BlastRPSInfo *rps_info, Blast_Message **error_msg, BlastSeqSrc *seqsrc)
Create the lookup table for all query words.
Definition: lookup_wrap.c:47
range(_Ty, _Ty) -> range< _Ty >
Magic spell ;-) needed for some weird compilers... very empiric.
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
#define fabs(v)
Definition: ncbi_dispd.c:46
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
Defines: CTimeFormat - storage class for time format.
The Object manager core.
Implementation of the BlastSeqSrc interface using the C++ BLAST databases API.
static const string kEvalue
Definition: showdefline.cpp:79
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Int4 query_length
Length of this query, strand or frame.
Options for setting up effective lengths and search spaces.
Parameters for setting up effective lengths and search spaces.
Options used for gapped extension These include: a.
double gap_x_dropoff_final
X-dropoff value for the final gapped extension (in bits)
double gap_x_dropoff
X-dropoff value for gapped extension (in bits)
Computed values used as parameters for gapped alignments.
Int4 gap_x_dropoff_final
X-dropoff value for the final gapped extension (raw)
Int4 gap_x_dropoff
X-dropoff value for gapped extension (raw)
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
Structure holding all information about an HSP.
Definition: blast_hits.h:126
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
Int4 context
Context number of query.
Definition: blast_hits.h:133
Int4 num
How many HSP's are linked together for sum statistics evaluation? If unset (0), this HSP is not part ...
Definition: blast_hits.h:135
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
Options used when evaluating and saving hits These include: a.
Int4 longest_intron
The longest distance between HSPs allowed for combining via sum statistics with uneven gaps.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Int4 cutoff_score_min
smallest cutoff score across all contexts
Boolean do_sum_stats
TRUE if sum stats will be used.
BlastLinkHSPParameters * link_hsp_params
Parameters for linking HSPs with sum statistics; linking is not done if NULL.
BlastHitSavingOptions * options
The original (unparsed) options.
Options needed for initial word finding and processing.
double x_dropoff
X-dropoff value (in bits) for the ungapped extension.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
Int4 cutoff_score_min
smallest cutoff score across all contexts
Int4 x_dropoff_max
largest X-drop cutoff across all contexts
Int4 cutoff_big_gap
Cutoff sum score for linked HSPs with big gaps.
Int4 cutoff_small_gap
Cutoff sum score for linked HSPs with small gaps.
Int4 longest_intron
Length of a longest intron for uneven gap linking of HSPs.
The query related information.
BlastContextInfo * contexts
Information per context.
Int4 last_context
Index of the last element of the context array.
Structure used for scoring calculations.
Definition: blast_stat.h:177
Blast_KarlinBlk ** kbp
Karlin-Altschul parameters.
Definition: blast_stat.h:207
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
Definition: blast_stat.h:208
Blast_KarlinBlk ** kbp_gap_std
K-A parameters for std (not position-based) alignments.
Definition: blast_stat.h:214
Blast_KarlinBlk ** kbp_std
K-A parameters for ungapped alignments.
Definition: blast_stat.h:212
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Boolean gapped_calculation
gap-free search if FALSE
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int2 frame
Translation frame.
Definition: blast_hits.h:97
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Structure to hold the a message from the core of the BLAST engine.
Definition: blast_message.h:70
void freeStructures()
Frees all the C structures used in the test.
BlastScoreBlk * m_ScoreBlk
void fillEffectiveLengths(const BlastScoringOptions *score_options, Int8 db_length, Int4 db_num_seq)
Fills the effective lengths data into the query information structure.
void setupLinkHspInputBlastn()
Complete set-up before calling the HSP linking algorithm.
BlastHSPList * m_HspList
void setupScoreBlk(Uint1 *seqbuf, bool gapped, BlastScoringOptions **score_options_ptr)
Sets up the scoring block with the Karlin-Altschul parameters.
void setupLinkHspInputTblastn()
Complete set-up before calling the HSP linking algorithm.
BlastHitSavingParameters * m_HitParams
void setupHitParams(int longest_intron, double evalue)
Sets up the hit saving parameters structures.
void setupHSPListNucl()
HSP list setup for blastn.
void setupHSPListTransl()
Sets up the input list of HSPs. These must be sorted by score.
AllCutoffScores * setupCutoffScores(bool gapped, Int8 db_length, Uint4 db_num_seq, Uint4 subj_length, int longest_intron=0)
CBlastQueryInfo m_QueryInfo
void testUnevenGapLinkHsps()
Test linking with uneven gap sum statistics.
EBlastProgramType m_ProgramType
Options needed to construct a lookup table Also needed: query sequence and query length.
Wrapper structure for different types of BLAST lookup tables.
Definition: lookup_wrap.h:50
Options required for setting up the query sequence.
Structure to store sequence data and its length for use in the CORE of BLAST (it's a malloc'ed array ...
Definition: blast_setup.hpp:62
Utility stuff for more convenient using of Boost.Test library.
@ FALSE
Definition: testodbc.c:27
@ TRUE
Definition: testodbc.c:27
voidp calloc(uInt items, uInt size)
Modified on Fri Mar 01 10:04:51 2024 by modify_doxy.py rev. 669887