NCBI C++ ToolKit
blastextend_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blastextend_unit_test.cpp 92017 2020-12-17 15:27:36Z grichenk $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Ilya Dondoshansky
27 *
28 * File Description:
29 * Unit test module to test the nucleotide gapped alignment part of BLAST
30 *
31 * ===========================================================================
32 */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
36 #include <corelib/ncbitime.hpp>
38 #include <objmgr/scope.hpp>
39 
46 #include <blast_objmgr_priv.hpp>
47 #ifdef NCBI_OS_IRIX
48 #include <stdlib.h>
49 #else
50 #include <cstdlib>
51 #endif
52 
53 #include "test_objmgr.hpp"
54 
55 using namespace std;
56 using namespace ncbi;
57 using namespace ncbi::objects;
58 using namespace ncbi::blast;
59 
60 extern "C" int x_score_compare_hsps(const void* v1, const void* v2)
61 {
62  BlastHSP* h1,* h2;
63 
64  h1 = *((BlastHSP**) v1);
65  h2 = *((BlastHSP**) v2);
66 
67  if (h1->score > h2->score)
68  return -1;
69  else if (h1->score < h2->score)
70  return 1;
71  return 0;
72 }
73 
75 {
78 
85 
89 
91  m_ScoringOpts = NULL;
92  m_ExtnOpts = NULL;
93  m_HitSavingOpts = NULL;
94  m_ipScoreBlk = NULL;
95  m_ipInitHitlist = NULL;
96  m_ipGapAlign = NULL;
97  m_ipScoreParams = NULL;
98  m_ipHitParams = NULL;
99  m_ipExtParams = NULL;
100  }
101 
103  {
104  m_ipScoreBlk = BlastScoreBlkFree(m_ipScoreBlk);
105  m_ipInitHitlist = BLAST_InitHitListFree(m_ipInitHitlist);
106  m_ipGapAlign = BLAST_GapAlignStructFree(m_ipGapAlign);
107  m_ipHitParams = BlastHitSavingParametersFree(m_ipHitParams);
108  sfree(m_ipScoreParams);
109  sfree(m_ipHitParams);
110  sfree(m_ipExtParams);
111 
112  BlastScoringOptionsFree(m_ScoringOpts);
113  BlastExtensionOptionsFree(m_ExtnOpts);
114  BlastHitSavingOptionsFree(m_HitSavingOpts);
115  }
116 
118  {
119  const int num_hsps = 8;
120  const int q_offsets[num_hsps] =
121  {8799, 1358, 14042, 27664, 5143, 27737, 5231, 3212 };
122  const int s_offsets[num_hsps] =
123  { 2728, 2736, 2784, 2784, 2792, 2856, 2888, 3640 };
124  const int q_starts[num_hsps] =
125  { 8794, 1355, 14015, 27637, 5131, 27732, 5226, 3201 };
126  const int s_starts[num_hsps] =
127  { 2723, 2733, 2757, 2757, 2780, 2851, 2883, 3629 };
128  const int lengths[num_hsps] = { 174, 18, 141, 92, 38, 37, 28, 20 };
129  const int scores[num_hsps] = { 146, 18, 93, 40, 34, 21, 24, 16 };
130 
131  m_ipInitHitlist = BLAST_InitHitListNew();
132  BlastUngappedData* ungapped_data;
133  Int4 index;
134 
135  for (index = 0; index < num_hsps; ++index) {
136  ungapped_data =
138  ungapped_data->q_start = q_starts[index];
139  ungapped_data->s_start = s_starts[index];
140  ungapped_data->length = lengths[index];
141  ungapped_data->score = scores[index];
142  BLAST_SaveInitialHit(m_ipInitHitlist, q_offsets[index],
143  s_offsets[index], ungapped_data);
144  }
145  }
146 
148  {
149  const int num_hsps = 14;
150  const int q_offsets[num_hsps] =
151  { 8799, 1358, 8831, 14042, 27664, 5143, 8863, 8903, 8927, 14114,
152  27737, 8943, 5231, 3212 };
153  const int s_offsets[num_hsps] =
154  { 2728, 2736, 2760, 2784, 2784, 2792, 2792, 2832, 2856, 2856,
155  2856, 2872, 2888, 3640 };
156 
157  m_ipInitHitlist = BLAST_InitHitListNew();
158 
159  Int4 index;
160  for (index = 0; index < num_hsps; ++index) {
161  BLAST_SaveInitialHit(m_ipInitHitlist, q_offsets[index],
162  s_offsets[index], NULL);
163  }
164  }
165 
166  void
168  const BlastScoringOptions* score_options,
169  Int8 db_length, Int4 db_num_seq) {
170  BlastEffectiveLengthsOptions* eff_len_options = NULL;
171  BlastEffectiveLengthsOptionsNew(&eff_len_options);
172  BlastEffectiveLengthsParameters* eff_len_params = NULL;
173  BlastEffectiveLengthsParametersNew(eff_len_options, db_length,
174  db_num_seq, &eff_len_params);
175  BLAST_CalcEffLengths(program_type, score_options, eff_len_params,
176  m_ipScoreBlk, m_iclsQueryInfo, NULL);
177  BlastEffectiveLengthsParametersFree(eff_len_params);
178  BlastEffectiveLengthsOptionsFree(eff_len_options);
179  }
180 
181  void setupStructures(Uint4 subject_length, bool greedy)
182  {
183  Int2 status;
184 
185  const EBlastProgramType kCoreProgramType = eBlastTypeBlastn;
186 
187  status = BlastScoringOptionsNew(kCoreProgramType, &m_ScoringOpts);
188  BOOST_REQUIRE(status == 0);
189 
190  m_ipScoreBlk = BlastScoreBlkNew(BLASTNA_SEQ_CODE, 2);
191  if (m_ipScoreBlk->gbp) {
192  sfree(m_ipScoreBlk->gbp);
193  m_ipScoreBlk->gbp = NULL;
194  }
195  status = Blast_ScoreBlkMatrixInit(kCoreProgramType, m_ScoringOpts,
196  m_ipScoreBlk, &BlastFindMatrixPath);
197 
198  BOOST_REQUIRE(status == 0);
199  Blast_Message* message = NULL;
201  kCoreProgramType, m_ipScoreBlk,
202  m_iclsQueryBlk->sequence, m_iclsQueryInfo,
203  &message);
204  message = Blast_MessageFree(message);
205  BOOST_REQUIRE(message == NULL);
206 
207  BOOST_REQUIRE(status == 0);
208  status = Blast_ScoreBlkKbpGappedCalc(m_ipScoreBlk, m_ScoringOpts,
209  kCoreProgramType, m_iclsQueryInfo, NULL);
210 
211  BOOST_REQUIRE(status == 0);
212 
213  m_ipScoreBlk->kbp = m_ipScoreBlk->kbp_std;
214  m_ipScoreBlk->kbp_gap = m_ipScoreBlk->kbp_gap_std;
215 
216  fillEffectiveLengths(kCoreProgramType, m_ScoringOpts,
217  subject_length, 1);
218 
219  BlastScoringParametersNew(m_ScoringOpts,
220  m_ipScoreBlk, &m_ipScoreParams);
221 
222  status = BlastExtensionOptionsNew(kCoreProgramType, &m_ExtnOpts, true);
223  if (greedy)
224  m_ExtnOpts->ePrelimGapExt = eGreedyScoreOnly;
225 
226  BOOST_REQUIRE(status == 0);
227 
228  BlastExtensionParametersNew(kCoreProgramType,
229  m_ExtnOpts, m_ipScoreBlk,
230  m_iclsQueryInfo, &m_ipExtParams);
231 
232  status = BlastHitSavingOptionsNew(kCoreProgramType, &m_HitSavingOpts,
233  m_ScoringOpts->gapped_calculation);
234  BOOST_REQUIRE(status == 0);
235 
236  BlastHitSavingParametersNew(kCoreProgramType, m_HitSavingOpts,
237  m_ipScoreBlk, m_iclsQueryInfo, 0, 0, &m_ipHitParams);
238 
239  status = BLAST_GapAlignStructNew(m_ipScoreParams, m_ipExtParams,
240  subject_length, m_ipScoreBlk, &m_ipGapAlign);
241  BOOST_REQUIRE(status == 0);
242  }
243 };
244 
245 BOOST_FIXTURE_TEST_SUITE(BlastExtend, CBlastExtendTestFixture)
246 
247 BOOST_AUTO_TEST_CASE(testGapAlignment) {
248  const int num_hsps = 7;
249  const int query_starts[num_hsps] =
250  { 8794, 13982, 12612, 5131, 5226, 1355, 3201 };
251  const int subject_starts[num_hsps] =
252  { 2723, 2723, 2733, 2780, 2883, 2733, 3629 };
253  const int query_lengths[num_hsps] = { 174, 174, 182, 38, 28, 18, 20 };
254  const int subject_lengths[num_hsps] = { 174, 175, 183, 38, 28, 18, 20 };
255  BlastGappedStats* gapped_stats = NULL;
256 
257  CSeq_id qid("gi|2655203");
258  pair<TSeqPos, TSeqPos> range(20000, 35000);
259  unique_ptr<SSeqLoc> qsl(
260  CTestObjMgr::Instance().CreateSSeqLoc(qid, range, eNa_strand_both));
261  CSeq_id sid("gi|2516238");
262  unique_ptr<SSeqLoc> ssl(
263  CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
264 
265  CBlastNucleotideOptionsHandle opts_handle;
266  TSeqLocVector queries;
267  TSeqLocVector subjects;
268  queries.push_back(*qsl);
269  subjects.push_back(*ssl);
270 
271  const CBlastOptions& kOpts = opts_handle.GetOptions();
273  ENa_strand strand_opt = kOpts.GetStrandOption();
274  TSearchMessages blast_msg;
275 
276  SetupQueryInfo(queries, prog, strand_opt, &m_iclsQueryInfo);
277  SetupQueries(queries, m_iclsQueryInfo, &m_iclsQueryBlk,
278  prog, strand_opt, blast_msg);
279  ITERATE(TSearchMessages, m, blast_msg) {
280  BOOST_REQUIRE(m->empty());
281  }
282 
283  Uint4 subject_length;
284  vector<BLAST_SequenceBlk*> subject_blk_v;
285  SetupSubjects(subjects, opts_handle.GetOptions().GetProgramType(),
286  &subject_blk_v, &subject_length);
287 
288  setupStructures(subject_length, false);
289 
290  setupHitList();
291 
292  Blast_InitHitListSortByScore(m_ipInitHitlist);
293 
294  BlastHSPList* hsp_list = Blast_HSPListNew(0);
295  gapped_stats =
297 
299  m_iclsQueryBlk, m_iclsQueryInfo, subject_blk_v[0],
300  m_ipGapAlign, m_ipScoreParams, m_ipExtParams,
301  m_ipHitParams, m_ipInitHitlist, &hsp_list,
302  gapped_stats, NULL);
303 
304  BlastSequenceBlkFree(subject_blk_v[0]);
305  BOOST_REQUIRE_EQUAL(num_hsps, hsp_list->hspcnt);
306 
307  BOOST_REQUIRE_EQUAL(num_hsps, gapped_stats->extensions);
308 
309  sfree(gapped_stats);
310 
311  qsort(hsp_list->hsp_array, hsp_list->hspcnt, sizeof(BlastHSP*),
313  Int4 index;
314  for (index = 0; index < num_hsps; ++index) {
315  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[index]->query.offset,
316  query_starts[index]);
317  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[index]->subject.offset,
318  subject_starts[index]);
319  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[index]->query.end -
320  hsp_list->hsp_array[index]->query.offset,
321  query_lengths[index]);
322  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[index]->subject.end -
323  hsp_list->hsp_array[index]->subject.offset,
324  subject_lengths[index]);
325  }
326 
327  Blast_HSPListFree(hsp_list);
328 }
329 
330 BOOST_AUTO_TEST_CASE(testGreedyAlignment) {
331  const int num_hsps = 7;
332  const int query_starts[num_hsps] =
333  { 8794, 13982, 12612, 5131, 5226, 1355, 3201 };
334  const int subject_starts[num_hsps] =
335  { 2723, 2723, 2733, 2780, 2883, 2733, 3629 };
336  const int query_lengths[num_hsps] =
337  { 174, 174, 182, 38, 28, 18, 20 };
338  const int subject_lengths[num_hsps] =
339  { 174, 175, 183, 38, 28, 18, 20 };
340  BlastGappedStats* gapped_stats = NULL;
341 
342 
343  CSeq_id qid("gi|2655203");
344  pair<TSeqPos, TSeqPos> range(20000, 35000);
345  unique_ptr<SSeqLoc> qsl(
346  CTestObjMgr::Instance().CreateSSeqLoc(qid, range, eNa_strand_both));
347  CSeq_id sid("gi|2516238");
348  unique_ptr<SSeqLoc> ssl(
349  CTestObjMgr::Instance().CreateSSeqLoc(sid, eNa_strand_both));
350 
351  CBlastNucleotideOptionsHandle opts_handle;
352 
353  TSeqLocVector queries;
354  TSeqLocVector subjects;
355  queries.push_back(*qsl);
356  subjects.push_back(*ssl);
357 
358  const CBlastOptions& kOpts = opts_handle.GetOptions();
360  ENa_strand strand_opt = kOpts.GetStrandOption();
361  TSearchMessages blast_msg;
362 
363  SetupQueryInfo(queries, prog, strand_opt, &m_iclsQueryInfo);
364  SetupQueries(queries, m_iclsQueryInfo, &m_iclsQueryBlk,
365  prog, strand_opt, blast_msg);
366  ITERATE(TSearchMessages, m, blast_msg) {
367  BOOST_REQUIRE(m->empty());
368  }
369 
370  Uint4 subject_length;
371  vector<BLAST_SequenceBlk*> subject_blk_v;
372  SetupSubjects(subjects, opts_handle.GetOptions().GetProgramType(),
373  &subject_blk_v, &subject_length);
374 
375  setupStructures(subject_length, true);
376 
377  setupGreedyHitList();
378 
379  BlastHSPList* hsp_list = Blast_HSPListNew(0);
380  gapped_stats =
382 
384  m_iclsQueryBlk, m_iclsQueryInfo, subject_blk_v[0],
385  m_ipGapAlign, m_ipScoreParams, m_ipExtParams,
386  m_ipHitParams, m_ipInitHitlist, &hsp_list,
387  gapped_stats, NULL);
388 
389  BOOST_REQUIRE_EQUAL(num_hsps, hsp_list->hspcnt);
390 
391  // Now test that introduction of a percent identity and length cutoffs
392  // does not influence the BLAST_MbGetGappedScore behavior.
393  // Free the HSPList
394  hsp_list = Blast_HSPListFree(hsp_list);
395  BOOST_REQUIRE(hsp_list == NULL);
396  // The initial seeds have been modified if they were on reverse strand,
397  // so setup the initial hit list again.
398  m_ipInitHitlist = BLAST_InitHitListFree(m_ipInitHitlist);
399  setupGreedyHitList();
400 
401  // Set the percent identity and minimal length cutoffs
402  m_ipHitParams->options->min_hit_length = 100;
403  m_ipHitParams->options->percent_identity = 99;
404 
406  m_iclsQueryBlk, m_iclsQueryInfo, subject_blk_v[0],
407  m_ipGapAlign, m_ipScoreParams, m_ipExtParams,
408  m_ipHitParams, m_ipInitHitlist, &hsp_list,
409  gapped_stats, NULL);
410 
411  BOOST_REQUIRE_EQUAL(num_hsps, hsp_list->hspcnt);
412 
413  BlastSequenceBlkFree(subject_blk_v[0]);
414 
415  // Since gapped alignment function was called twice, the number of
416  // extensions is double the real one.
417  BOOST_REQUIRE_EQUAL(2*num_hsps, gapped_stats->extensions);
418 
419  sfree(gapped_stats);
420 
421  qsort(hsp_list->hsp_array, hsp_list->hspcnt, sizeof(BlastHSP*),
423  Int4 index;
424  for (index = 0; index < num_hsps; ++index) {
425  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[index]->query.offset,
426  query_starts[index]);
427  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[index]->subject.offset,
428  subject_starts[index]);
429  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[index]->query.end -
430  hsp_list->hsp_array[index]->query.offset,
431  query_lengths[index]);
432  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[index]->subject.end -
433  hsp_list->hsp_array[index]->subject.offset,
434  subject_lengths[index]);
435  }
436 
437  Blast_HSPListFree(hsp_list);
438 }
439 
440 // Test for SB-666 fix
441 BOOST_AUTO_TEST_CASE(testGreedyAlignmentWithBadStart) {
442  const int query_start = 2612;
443  const int query_end = 2754;
444  const int subject_start = 291;
445  const int subject_end = 438;
446  const int q_offset = 2754;
447  const int s_offset = 438;
448  BlastGappedStats* gapped_stats = NULL;
449  BlastUngappedData ungapped;
450 
451  ungapped.q_start = 2671;
452  ungapped.s_start = 355;
453  ungapped.length = 167;
454  ungapped.score = 42;
455 
456  CSeq_id qid("gi|156523973");
457  unique_ptr<SSeqLoc> qsl(
458  CTestObjMgr::Instance().CreateSSeqLoc(qid, eNa_strand_both));
459  CSeq_id sid("gi|224514626");
460  pair<TSeqPos, TSeqPos> range(1896999, 1897550);
461  unique_ptr<SSeqLoc> ssl(
462  CTestObjMgr::Instance().CreateSSeqLoc(sid, range, eNa_strand_both));
463 
464  CBlastNucleotideOptionsHandle opts_handle;
465 
466  TSeqLocVector queries;
467  TSeqLocVector subjects;
468  queries.push_back(*qsl);
469  subjects.push_back(*ssl);
470 
471  const CBlastOptions& kOpts = opts_handle.GetOptions();
473  ENa_strand strand_opt = kOpts.GetStrandOption();
474  TSearchMessages blast_msg;
475 
476  SetupQueryInfo(queries, prog, strand_opt, &m_iclsQueryInfo);
477  SetupQueries(queries, m_iclsQueryInfo, &m_iclsQueryBlk,
478  prog, strand_opt, blast_msg);
479  ITERATE(TSearchMessages, m, blast_msg) {
480  BOOST_REQUIRE(m->empty());
481  }
482 
483  Uint4 subject_length;
484  vector<BLAST_SequenceBlk*> subject_blk_v;
485  SetupSubjects(subjects, opts_handle.GetOptions().GetProgramType(),
486  &subject_blk_v, &subject_length);
487 
488  setupStructures(subject_length, true);
489 
490  // The following options must be patched to reproduce SB-666
491  m_ipScoreParams->reward = 1;
492  m_ipScoreParams->penalty = -2;
493  m_ipScoreParams->gap_open = 0;
494  m_ipScoreParams->gap_extend = 0;
495 
496  m_ipExtParams->gap_x_dropoff = 16;
497  m_ipExtParams->gap_x_dropoff_final = 54;
498 
499  m_ipGapAlign = BLAST_GapAlignStructFree(m_ipGapAlign);
500 
501  BLAST_GapAlignStructNew(m_ipScoreParams, m_ipExtParams,
502  subject_length, m_ipScoreBlk, &m_ipGapAlign);
503 
504  m_ipInitHitlist = BLAST_InitHitListNew();
505 
506  BLAST_SaveInitialHit(m_ipInitHitlist, q_offset, s_offset, &ungapped);
507 
508  BlastHSPList* hsp_list = Blast_HSPListNew(0);
509  gapped_stats =
511 
513  m_iclsQueryBlk, m_iclsQueryInfo, subject_blk_v[0],
514  m_ipGapAlign, m_ipScoreParams, m_ipExtParams,
515  m_ipHitParams, m_ipInitHitlist, &hsp_list,
516  gapped_stats, NULL);
517 
518  m_ipInitHitlist->init_hsp_array[0].ungapped_data = NULL;
519 
520  BOOST_REQUIRE_EQUAL(1, hsp_list->hspcnt);
521 
522  BlastSequenceBlkFree(subject_blk_v[0]);
523 
524  sfree(gapped_stats);
525 
526  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[0]->query.offset, query_start);
527  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[0]->subject.offset, subject_start);
528  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[0]->query.end, query_end);
529  BOOST_REQUIRE_EQUAL(hsp_list->hsp_array[0]->subject.end, subject_end);
530 
531  // The following are required to fix SB-666
532  BOOST_REQUIRE(m_ipGapAlign->greedy_query_seed_start >= query_start);
533  BOOST_REQUIRE(m_ipGapAlign->greedy_query_seed_start <= query_end);
534  BOOST_REQUIRE(m_ipGapAlign->greedy_subject_seed_start >= subject_start);
535  BOOST_REQUIRE(m_ipGapAlign->greedy_subject_seed_start <= subject_end);
536 
537  Blast_HSPListFree(hsp_list);
538 }
539 
540 BOOST_AUTO_TEST_CASE(testSmallMBSpaceValue) {
541  const int kSize = 100;
542  const int kDefaultSize = 1000000;
543  SMBSpace* retval = MBSpaceNew(kSize);
544  BOOST_REQUIRE(retval);
545  BOOST_REQUIRE_EQUAL(kDefaultSize, retval->space_allocated);
546  MBSpaceFree(retval);
547 }
548 
549 BOOST_AUTO_TEST_CASE(testZeroMBSpaceValue) {
550  const int kSize = 0;
551  const int kDefaultSize = 1000000;
552  SMBSpace* retval = MBSpaceNew(kSize);
553  BOOST_REQUIRE(retval);
554  BOOST_REQUIRE_EQUAL(kDefaultSize, retval->space_allocated);
555  MBSpaceFree(retval);
556 }
557 
558 BOOST_AUTO_TEST_CASE(testLargeMBSpaceValue) {
559  const int kSize = 5000000;
560  SMBSpace* retval = MBSpaceNew(kSize);
561  BOOST_REQUIRE(retval);
562  BOOST_REQUIRE_EQUAL(kSize, retval->space_allocated);
563  MBSpaceFree(retval);
564 }
565 
566 BOOST_AUTO_TEST_CASE(testInitHitListFreeWithNULLInput) {
569  bool null_output = false;
571  if (output == NULL)
572  null_output = true;
573  BOOST_REQUIRE_EQUAL(true, null_output);
574 }
575 
576 BOOST_AUTO_TEST_CASE(testBlastExtendWordFreeWithNULLInput) {
579  bool null_output = false;
581  if (output == NULL)
582  null_output = true;
583  BOOST_REQUIRE_EQUAL(true, null_output);
584 }
585 
587 
588 /*
589 * ===========================================================================
590 *
591 * $Log: blastextend-cppunit.cpp,v $
592 * Revision 1.55 2008/07/18 14:05:21 camacho
593 * Irix fixes
594 *
595 * Revision 1.54 2007/10/22 19:16:09 madden
596 * BlastExtensionOptionsNew has Boolean gapped arg
597 *
598 * Revision 1.53 2007/03/20 14:54:02 camacho
599 * changes related to addition of multiple genetic code specification
600 *
601 * Revision 1.52 2007/02/08 17:13:29 papadopo
602 * change enum value
603 *
604 * Revision 1.51 2006/11/29 17:26:16 bealer
605 * - HSP range support.
606 *
607 * Revision 1.50 2006/09/08 17:17:09 camacho
608 * Fix memory leaks
609 *
610 * Revision 1.49 2006/06/29 16:25:24 camacho
611 * Changed BlastHitSavingOptionsNew signature
612 *
613 * Revision 1.48 2006/06/05 13:34:05 madden
614 * Changes to remove [GS]etMatrixPath and use callback instead
615 *
616 * Revision 1.47 2006/05/18 16:32:03 papadopo
617 * change signature of BLAST_CalcEffLengths
618 *
619 * Revision 1.46 2006/04/20 19:35:05 madden
620 * Blast_ScoreBlkKbpUngappedCalc prototype change
621 *
622 * Revision 1.45 2006/01/23 16:53:44 papadopo
623 * replace BLAST_MbGetGappedScore
624 *
625 * Revision 1.44 2005/12/16 20:51:50 camacho
626 * Diffuse the use of CSearchMessage, TQueryMessages, and TSearchMessages
627 *
628 * Revision 1.43 2005/10/14 13:47:32 camacho
629 * Fixes to pacify icc compiler
630 *
631 * Revision 1.42 2005/08/15 16:13:08 dondosha
632 * Added new argument in call to Blast_ScoreBlkKbpGappedCalc
633 *
634 * Revision 1.41 2005/06/09 20:37:05 camacho
635 * Use new private header blast_objmgr_priv.hpp
636 *
637 * Revision 1.40 2005/05/24 20:05:17 camacho
638 * Changed signature of SetupQueries and SetupQueryInfo
639 *
640 * Revision 1.39 2005/04/11 14:04:46 dondosha
641 * Really do greedy alignment in testGreedyAlignment test, and check that it works with affine gap penalties - it failed because of premature perc. identity check
642 *
643 * Revision 1.38 2005/04/07 19:38:09 madden
644 * Add MBSpaceNew checks as well as NULL input checks on BLAST_InitHitListFree and BlastExtendWordFree
645 *
646 * Revision 1.37 2005/04/06 21:26:37 dondosha
647 * GapEditBlock structure and redundant fields in BlastHSP have been removed
648 *
649 * Revision 1.36 2005/03/31 13:45:58 camacho
650 * BLAST options API clean-up
651 *
652 * Revision 1.35 2005/03/29 15:03:30 papadopo
653 * fill in all search spaces for valid contexts (engine requires this now)
654 *
655 * Revision 1.34 2005/03/29 14:20:45 camacho
656 * Refactorings
657 *
658 * Revision 1.33 2005/03/04 17:20:44 bealer
659 * - Command line option support.
660 *
661 * Revision 1.32 2005/01/10 14:02:49 madden
662 * Removed calls to SetScanStep
663 *
664 * Revision 1.31 2005/01/06 15:43:25 camacho
665 * Make use of modified signature to blast::SetupQueries
666 *
667 * Revision 1.30 2004/12/09 15:25:11 dondosha
668 * BLAST_ScoreBlkFill changed to Blast_ScoreBlkUngappedCalc
669 *
670 * Revision 1.29 2004/12/02 16:50:13 bealer
671 * - Change multiple-arrays to array-of-struct in BlastQueryInfo
672 *
673 * Revision 1.28 2004/11/02 18:30:17 madden
674 * BlastHitSavingParametersNew no longer requires BlastExtensionParameters
675 *
676 * Revision 1.27 2004/10/19 16:39:46 dondosha
677 * Sort input initial hit list by score, as this order is expected in gapped alignment routines
678 *
679 * Revision 1.26 2004/10/14 17:13:56 madden
680 * New parameter in BlastHitSavingParametersNew
681 *
682 * Revision 1.25 2004/07/06 15:58:45 dondosha
683 * Use EBlastProgramType enumeration type for program when calling C functions
684 *
685 * Revision 1.24 2004/06/08 19:28:01 dondosha
686 * Removed unused argument in call to BLAST_GapAlignStructNew
687 *
688 * Revision 1.23 2004/05/17 15:44:02 dondosha
689 * Memory leak fixes
690 *
691 * Revision 1.22 2004/05/14 17:17:39 dondosha
692 * Check diagnostics information returned from BLAST engine
693 *
694 * Revision 1.21 2004/05/07 15:42:06 papadopo
695 * fill in and use BlastScoringParameters instead of BlastScoringOptions
696 *
697 * Revision 1.20 2004/04/21 17:34:14 madden
698 * Use cleaned up API for saving HSPs, HSPLists, HitLists
699 *
700 * Revision 1.19 2004/04/07 03:06:21 camacho
701 * Added blast_encoding.[hc], refactoring blast_stat.[hc]
702 *
703 * Revision 1.18 2004/03/26 21:41:48 dondosha
704 * Use const int instead of hard coded constants in array sizes
705 *
706 * Revision 1.17 2004/03/24 22:14:22 dondosha
707 * Fixed memory leaks
708 *
709 * Revision 1.16 2004/03/24 19:21:40 dondosha
710 * BLAST_InitHitListDestruct name changed to BLAST_InitHitListFree
711 *
712 * Revision 1.15 2004/03/23 16:10:34 camacho
713 * Minor changes to CTestObjMgr
714 *
715 * Revision 1.14 2004/03/15 20:00:56 dondosha
716 * SetupSubjects prototype changed to take just program instead of CBlastOptions*
717 *
718 * Revision 1.13 2004/03/11 21:17:16 camacho
719 * Fix calls to BlastHitSavingParametersNew
720 *
721 * Revision 1.12 2004/03/09 18:58:44 dondosha
722 * Added extension parameters argument to BlastHitSavingParametersNew calls
723 *
724 * Revision 1.11 2004/02/27 15:57:20 papadopo
725 * change initialization of ScoreBlk
726 *
727 * Revision 1.10 2004/02/20 23:20:36 camacho
728 * Remove undefs.h
729 *
730 * Revision 1.9 2004/02/20 21:47:18 camacho
731 * Rename score_compare_hsps as it collides with function in libncbitool
732 *
733 * Revision 1.8 2004/02/20 19:55:00 camacho
734 * Fix compare function for usage with qsort
735 *
736 * Revision 1.7 2004/02/18 00:35:50 dondosha
737 * Reinstated changes from revision 1.4 - they are valid no
738 *
739 * Revision 1.6 2004/02/17 21:52:17 dondosha
740 * Query info argument to calls to gapped alignment will have to be added just a little bit later
741 *
742 * Revision 1.5 2004/02/17 20:42:47 dondosha
743 * One data change in previous commit needs to wait a couple of hours longer for other relevant files
744 *
745 * Revision 1.4 2004/02/17 20:33:12 dondosha
746 * Use BOOST_REQUIRE_EQUAL; const int array sizes
747 *
748 * Revision 1.3 2004/01/30 23:22:21 dondosha
749 * Use getters for options structures because of the API change
750 *
751 * Revision 1.2 2004/01/09 21:58:28 dondosha
752 * Added a test for greedy alignment
753 *
754 * Revision 1.1 2004/01/08 23:20:54 dondosha
755 * Test for gapped extensions
756 *
757 *
758 * ===========================================================================
759 */
Declares the CBl2Seq (BLAST 2 Sequences) class.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
BlastInitHitList * BLAST_InitHitListNew(void)
Allocate memory for the BlastInitHitList structure.
Definition: blast_extend.c:216
Boolean BLAST_SaveInitialHit(BlastInitHitList *init_hitlist, Int4 q_off, Int4 s_off, BlastUngappedData *ungapped_data)
Save the initial hit data into the initial hit list structure.
Definition: blast_extend.c:325
void Blast_InitHitListSortByScore(BlastInitHitList *init_hitlist)
Sort array of initial HSPs by score.
Definition: blast_extend.c:306
Blast_ExtendWord * BlastExtendWordFree(Blast_ExtendWord *ewp)
Deallocate memory for the word extension structure.
Definition: blast_extend.c:203
BlastInitHitList * BLAST_InitHitListFree(BlastInitHitList *init_hitlist)
Free memory for the BlastInitList structure.
Definition: blast_extend.c:261
Structures and functions prototypes used for BLAST gapped extension.
Int2 BLAST_GetGappedScore(EBlastProgramType program_number, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, BLAST_SequenceBlk *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, const BlastHitSavingParameters *hit_params, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list_ptr, BlastGappedStats *gapped_stats, Boolean *fence_hit)
Performs gapped extension for all non-Mega BLAST programs, given that ungapped extension has been don...
Int2 BLAST_GapAlignStructNew(const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, Uint4 max_subject_length, BlastScoreBlk *sbp, BlastGapAlignStruct **gap_align_ptr)
Initializes the BlastGapAlignStruct structure.
BlastGapAlignStruct * BLAST_GapAlignStructFree(BlastGapAlignStruct *gap_align)
Deallocates memory in the BlastGapAlignStruct structure.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Definition: blast_hits.c:1558
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Definition: blast_hits.c:1542
Blast_Message * Blast_MessageFree(Blast_Message *blast_msg)
Deallocates message memory.
Definition: blast_message.c:80
Declares the CBlastNucleotideOptionsHandle class.
Definitions which are dependant on the NCBI C++ Object Manager.
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
@ eGreedyScoreOnly
Greedy extension (megaBlast)
Int2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)
Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
BlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)
Deallocate memory for BlastEffectiveLengthsOptions*.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
BlastHitSavingParameters * BlastHitSavingParametersFree(BlastHitSavingParameters *parameters)
Deallocate memory for BlastHitSavingOptions*.
BlastEffectiveLengthsParameters * BlastEffectiveLengthsParametersFree(BlastEffectiveLengthsParameters *parameters)
Deallocate memory for BlastEffectiveLengthsParameters*.
Int2 BlastExtensionParametersNew(EBlastProgramType blast_program, const BlastExtensionOptions *options, BlastScoreBlk *sbp, BlastQueryInfo *query_info, BlastExtensionParameters **parameters)
Calculate the raw values for the X-dropoff parameters.
Int2 BlastScoringParametersNew(const BlastScoringOptions *options, BlastScoreBlk *sbp, BlastScoringParameters **parameters)
Calculate scaled cutoff scores and gap penalties.
Int2 BlastHitSavingParametersNew(EBlastProgramType program_number, const BlastHitSavingOptions *options, const BlastScoreBlk *sbp, const BlastQueryInfo *query_info, Int4 avg_subject_length, Int4 compositionBasedStats, BlastHitSavingParameters **parameters)
Allocate memory and initialize the BlastHitSavingParameters structure.
Int2 BlastEffectiveLengthsParametersNew(const BlastEffectiveLengthsOptions *options, Int8 db_length, Int4 num_seqs, BlastEffectiveLengthsParameters **parameters)
Allocate memory for BlastEffectiveLengthsParameters.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
Utilities initialize/setup BLAST.
Int2 Blast_ScoreBlkKbpGappedCalc(BlastScoreBlk *sbp, const BlastScoringOptions *scoring_options, EBlastProgramType program, const BlastQueryInfo *query_info, Blast_Message **error_return)
Blast_ScoreBlkKbpGappedCalc, fills the ScoreBlkPtr for a gapped search.
Definition: blast_setup.c:41
Int2 Blast_ScoreBlkMatrixInit(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, BlastScoreBlk *sbp, GET_MATRIX_PATH get_path)
Initializes the substitution matrix in the BlastScoreBlk according to the scoring options specified.
Definition: blast_setup.c:330
Int2 BLAST_CalcEffLengths(EBlastProgramType program_number, const BlastScoringOptions *scoring_options, const BlastEffectiveLengthsParameters *eff_len_params, const BlastScoreBlk *sbp, BlastQueryInfo *query_info, Blast_Message **blast_message)
Function to calculate effective query length and db length as well as effective search space.
Definition: blast_setup.c:699
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Definition: blast_stat.c:965
Int2 Blast_ScoreBlkKbpUngappedCalc(EBlastProgramType program, BlastScoreBlk *sbp, Uint1 *query, const BlastQueryInfo *query_info, Blast_Message **blast_message)
Calculate and fill the ungapped Karlin-Altschul parameters in the BlastScoreBlk structure (fields kbp...
Definition: blast_stat.c:2737
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Definition: blast_stat.c:884
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Definition: blast_util.c:245
BOOST_AUTO_TEST_CASE(testGapAlignment)
int x_score_compare_hsps(const void *v1, const void *v2)
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
Wrapper class for BLAST_SequenceBlk .
Definition: blast_aux.hpp:309
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Encapsulates ALL the BLAST algorithm's options.
Wrapper class for BlastQueryInfo .
Definition: blast_aux.hpp:311
static CTestObjMgr & Instance()
Definition: test_objmgr.cpp:69
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
static SQLCHAR output[256]
Definition: print.c:5
void MBSpaceFree(SMBSpace *sp)
Free the space structure.
Definition: greedy_align.c:80
SMBSpace * MBSpaceNew(int num_space_arrays)
Allocate a space structure for greedy alignment At least num_space_arrays will be allocated,...
Definition: greedy_align.c:43
void SetupSubjects(TSeqLocVector &subjects, EBlastProgramType program, vector< BLAST_SequenceBlk * > *seqblk_vec, unsigned int *max_subjlen)
Sets up internal subject data structure for the BLAST search.
void SetupQueries(TSeqLocVector &queries, BlastQueryInfo *qinfo, BLAST_SequenceBlk **seqblk, EBlastProgramType prog, objects::ENa_strand strand_opt, TSearchMessages &messages)
Populates BLAST_SequenceBlk with sequence data for use in CORE BLAST.
objects::ENa_strand GetStrandOption() const
#define BLASTNA_SEQ_CODE
Identifies the blastna alphabet, for use in blast only.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
char * BlastFindMatrixPath(const char *matrix_name, Boolean is_prot)
Returns the path to a specified matrix.
void SetupQueryInfo(TSeqLocVector &queries, EBlastProgramType prog, objects::ENa_strand strand_opt, BlastQueryInfo **qinfo)
Allocates the query information structure and fills the context offsets, in case of multiple queries,...
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
const CVect2< U > & v2
Definition: globals.hpp:440
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
static int input()
static char * prog
Definition: mdb_load.c:33
range(_Ty, _Ty) -> range< _Ty >
Magic spell ;-) needed for some weird compilers... very empiric.
Defines: CTimeFormat - storage class for time format.
The Object manager core.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Options for setting up effective lengths and search spaces.
Parameters for setting up effective lengths and search spaces.
Options used for gapped extension These include: a.
EBlastPrelimGapExt ePrelimGapExt
type of preliminary gapped extension (normally) for calculating score.
Computed values used as parameters for gapped alignments.
Structure supporting the gapped alignment.
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
Structure holding all information about an HSP.
Definition: blast_hits.h:126
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
Options used when evaluating and saving hits These include: a.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Structure to hold all initial HSPs for a given subject sequence.
Definition: blast_extend.h:158
Structure used for scoring calculations.
Definition: blast_stat.h:177
Blast_KarlinBlk ** kbp
Karlin-Altschul parameters.
Definition: blast_stat.h:207
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
Definition: blast_stat.h:208
Blast_KarlinBlk ** kbp_gap_std
K-A parameters for std (not position-based) alignments.
Definition: blast_stat.h:214
Blast_KarlinBlk ** kbp_std
K-A parameters for ungapped alignments.
Definition: blast_stat.h:212
Blast_GumbelBlk * gbp
Gumbel parameters for FSC.
Definition: blast_stat.h:209
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Boolean gapped_calculation
gap-free search if FALSE
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
Structure to hold ungapped alignment information.
Definition: blast_extend.h:142
Int4 score
Score of the ungapped alignment.
Definition: blast_extend.h:146
Int4 length
Length of the ungapped alignment.
Definition: blast_extend.h:145
Int4 q_start
Start of the ungapped alignment in query.
Definition: blast_extend.h:143
Int4 s_start
Start of the ungapped alignment in subject.
Definition: blast_extend.h:144
Structure for keeping initial word extension information.
Definition: blast_extend.h:109
Structure to hold the a message from the core of the BLAST engine.
Definition: blast_message.h:70
BlastHitSavingParameters * m_ipHitParams
BlastHitSavingOptions * m_HitSavingOpts
CBLAST_SequenceBlk m_iclsQueryBlk
BlastGapAlignStruct * m_ipGapAlign
void setupStructures(Uint4 subject_length, bool greedy)
BlastScoringOptions * m_ScoringOpts
BlastExtensionParameters * m_ipExtParams
BlastInitHitList * m_ipInitHitlist
BlastScoringParameters * m_ipScoreParams
void fillEffectiveLengths(EBlastProgramType program_type, const BlastScoringOptions *score_options, Int8 db_length, Int4 db_num_seq)
BlastExtensionOptions * m_ExtnOpts
Space structure for greedy alignment algorithm.
Definition: greedy_align.h:65
Int4 space_allocated
number of structures allocated
Definition: greedy_align.h:67
Utility stuff for more convenient using of Boost.Test library.
voidp calloc(uInt items, uInt size)
Modified on Wed May 29 18:39:43 2024 by modify_doxy.py rev. 669887