NCBI C++ ToolKit
delta_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: delta_unit_test.cpp 94717 2021-09-02 13:40:12Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho, Greg Boratyn
27  *
28  */
29 
30 /** @file delta_unit_test.cpp
31  * Unit test module for deltablast.
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
36 // Serial library includes
37 #include <serial/serial.hpp>
38 #include <serial/objistr.hpp>
39 
40 // Object includes
46 
47 #include <objmgr/scope.hpp>
48 
49 // PSSM includes
52 
53 // BLAST includes
60 
61 // SeqAlign comparison includes
62 #include "seqalign_cmp.hpp"
63 #include "seqalign_set_convert.hpp"
64 
65 // Unit test auxiliary includes
66 #include "blast_test_util.hpp"
67 #include "test_objmgr.hpp"
68 
69 /// Calculate the size of a static array
70 #define STATIC_ARRAY_SIZE(array) (sizeof(array)/sizeof(*array))
71 
72 using namespace std;
73 using namespace ncbi;
74 using namespace ncbi::objects;
75 using namespace ncbi::blast;
76 
78 
82 
83  // This is needed only to get sequence for gi 129295 from asn1 file
85 
86  /// Contains a single Bioseq
88 
89  /// Contains a Bioseq-set with two Bioseqs, gi 7450545 and gi 129295
91 
92  /// Scope
94 
95  /// Seq-locs for creating instances of CObjMgr_QueryFactory
96  vector< CRef<CSeq_loc> > m_Seq_locs;
97 
99 
100  const string kPssmFile("data/pssm_freq_ratios.asn");
101  m_Pssm = TestUtil::ReadObject<CPssmWithParameters>(kPssmFile);
102  BOOST_REQUIRE(m_Pssm->GetPssm().CanGetQuery());
103  m_SeqEntry.Reset(&m_Pssm->SetPssm().SetQuery());
104  BOOST_REQUIRE(m_SeqEntry->IsSeq());
105 
106  CRef<CSeq_id> id(
107  const_cast<CSeq_id*>(m_SeqEntry->SetSeq().GetFirstId()));
108  CRef<CSeq_loc> seqloc(new CSeq_loc(CSeq_loc::e_Whole));
109  seqloc->SetId(*id);
110  m_Seq_locs.push_back(seqloc);
111 
112 
113  const string kSeqEntryFile("data/7450545.seqentry.asn");
114  CRef<CSeq_entry> seq_entry =
115  TestUtil::ReadObject<CSeq_entry>(kSeqEntryFile);
116 
117  m_SeqSet.Reset(new CSeq_entry);
118  m_SeqSet->SetSet().SetSeq_set().push_back(m_SeqEntry);
119  m_SeqSet->SetSet().SetSeq_set().push_back(seq_entry);
120  BOOST_REQUIRE(m_SeqEntry->IsSeq());
121 
122  id.Reset(const_cast<CSeq_id*>(seq_entry->SetSeq().GetFirstId()));
123  seqloc.Reset(new CSeq_loc(CSeq_loc::e_Whole));
124  seqloc->SetId(*id);
125  m_Seq_locs.push_back(seqloc);
126  }
127 
128  void x_InitScope() {
130  m_Scope->AddTopLevelSeqEntry(*m_SeqSet);
131  }
132 
134  m_OptHandle.Reset(new CDeltaBlastOptionsHandle);
135  BOOST_REQUIRE_EQUAL(eCompositionBasedStats,
136  m_OptHandle->GetCompositionBasedStats());
137  m_SearchDb.Reset(new CSearchDatabase("data/seqp",
139  m_DomainDb.Reset(new CSearchDatabase("data/deltatest",
141 
142  x_ReadSeqEntriesFromFile();
143  x_InitScope();
144  }
145 
147  m_OptHandle.Reset();
148  m_SearchDb.Reset();
149  m_DomainDb.Reset();
150  m_SeqEntry.Reset();
151  m_SeqSet.Reset();
152  m_Seq_locs.clear();
153  m_Scope.Reset();
154  }
155 
157  {
158  int num_ids = 0;
159  TGi last_id = INVALID_GI;
160  string last_acc;
161  CTextseq_id::TVersion last_ver = -1;
162  ITERATE(CSeq_align_set::Tdata, itr, sas->Get()) {
163  const CSeq_id& seqid = (*itr)->GetSeq_id(1);
165  BOOST_REQUIRE(seqid.IsGi() || seqid.IsGeneral());
166  }
167  else {
168  BOOST_REQUIRE(seqid.IsOther() ||
169  seqid.IsGeneral() ||
170  seqid.IsGenbank());
171  }
172 
173  const CTextseq_id* txt_id = seqid.GetTextseq_Id();
174  if (CSeq_id::PreferAccessionOverGi() && txt_id) {
175  string acc = txt_id->GetAccession();
176  CTextseq_id::TVersion ver = txt_id->IsSetVersion() ?
177  txt_id->GetVersion() : -1;
178  if (acc != last_acc || ver != last_ver) {
179  num_ids++;
180  last_acc = acc;
181  last_ver = ver;
182  }
183  last_id = INVALID_GI;
184  }
185  else if (!CSeq_id::PreferAccessionOverGi() && seqid.IsGi()) {
186  TGi new_gi = seqid.GetGi();
187 
188  if (new_gi != last_id) {
189  num_ids++;
190  last_id = new_gi;
191  }
192  last_acc.clear();
193  last_ver = -1;
194  }
195  else {
196  BOOST_REQUIRE(seqid.GetGeneral().IsSetTag());
197  Uint4 new_tag =(Uint4) (seqid.GetGeneral().GetTag().GetId());
198 
199  if (new_tag != GI_TO(Uint4, last_id)) {
200  num_ids++;
201  last_id = GI_FROM(Uint4, new_tag);
202  }
203  last_acc.clear();
204  last_ver = -1;
205  }
206 
207  }
208  return num_ids;
209  }
210 };
211 
212 
213 BOOST_FIXTURE_TEST_SUITE(deltablast, CDeltaBlastTestFixture)
214 
215 BOOST_AUTO_TEST_CASE(TestSingleQuery_CBS)
216 {
218  query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
219  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
220 
221 
222  CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
223  CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
224 
225  // create rpsblast options hanlde and set composition based statistics
227  rps_opts->SetCompositionBasedStats(true);
228  rps_opts->SetEvalueThreshold(m_OptHandle->GetDomainInclusionThreshold());
229  rps_opts->SetFilterString("F");
230 
231  CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
232  m_OptHandle, rps_opts);
233 
234  CSearchResultSet results(*deltablast.Run());
235 
236 
237  // check intermediate results: alignments with domains
238  CRef<CSearchResultSet> domain_results = deltablast.GetDomainResults();
239  BOOST_REQUIRE((*domain_results)[0].GetErrors().empty());
240 
241  const int kNumExpectedMatchingDomains = 3;
242  CConstRef<CSeq_align_set> domain_sas = (*domain_results)[0].GetSeqAlign();
243 
244  BOOST_REQUIRE_EQUAL(kNumExpectedMatchingDomains,
245  x_CountNumberUniqueIds(domain_sas));
246 
247  const size_t kNumExpectedDomainHSPs = 3;
248  qa::TSeqAlignSet expected_domain_results(kNumExpectedDomainHSPs);
249 
250  // HSP # 1
251  {
252  expected_domain_results[0].score = 742;
253  expected_domain_results[0].evalue = 5.21679e-102;
254  expected_domain_results[0].bit_score = 2.89507e+02;
255  expected_domain_results[0].num_ident = 111;
256  int starts[] = {1, 139, 80, -1, 81, 218};
257  int lengths[] = {79, 1, 151};
258  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
259  back_inserter(expected_domain_results[0].starts));
260  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
261  back_inserter(expected_domain_results[0].lengths));
262  }
263 
264  // HSP # 2
265  {
266  expected_domain_results[1].score = 713;
267  expected_domain_results[1].evalue = 1.33296e-97;
268  expected_domain_results[1].bit_score = 2.78295e+02;
269  expected_domain_results[1].num_ident = 107;
270  int starts[] = {1, 135, 6, -1, 8, 140, -1, 190, 58, 191, 80, -1, 81,
271  213, 94, -1, 95, 226, 114, -1, 116, 245, 200, -1,
272  202, 329};
273  int lengths[] = {5, 2, 50, 1, 22, 1, 13, 1, 19, 2, 84, 2, 30};
274  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
275  back_inserter(expected_domain_results[1].starts));
276  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
277  back_inserter(expected_domain_results[1].lengths));
278  }
279 
280  // HSP # 3
281  {
282  expected_domain_results[2].score = 673;
283  expected_domain_results[2].evalue = 1.63233e-91;
284  expected_domain_results[2].bit_score = 2.62959e+02;
285  expected_domain_results[2].num_ident = 106;
286  int starts[] = {0, 137, 115, -1, 117, 252 };
287  int lengths[] = {115, 2, 112};
288  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
289  back_inserter(expected_domain_results[2].starts));
290  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
291  back_inserter(expected_domain_results[2].lengths));
292  }
293 
294 
295  qa::TSeqAlignSet actual_domain_results;
296  qa::SeqAlignSetConvert(*domain_sas, actual_domain_results);
297 
298  qa::CSeqAlignCmpOpts opts;
299  qa::CSeqAlignCmp cmp_d(expected_domain_results, actual_domain_results, opts);
300  string errors;
301  bool identical_results = cmp_d.Run(&errors);
302 
303  BOOST_REQUIRE_MESSAGE(identical_results, errors);
304 
305  // check PSSM
306 
307  CRef<CPssmWithParameters> pssm = deltablast.GetPssm();
308  BOOST_REQUIRE(pssm->HasQuery());
309  BOOST_REQUIRE(pssm->GetQuery().GetSeq().IsSetInst());
310  BOOST_REQUIRE_EQUAL(pssm->GetQuery().GetSeq().GetFirstId()->GetGi(),
311  GI_CONST(129295));
312 
313  // check alignments from sequence search results
314 
315  BOOST_REQUIRE(results[0].GetErrors().empty());
316 
317  BOOST_REQUIRE_EQUAL(
318  results[0].GetSeqAlign()->Get().front()->GetSeq_id(0).GetGi(),
319  GI_CONST(129295));
320 
321  const int kNumExpectedMatchingSeqs = 8;
322  CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
323 
324  BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, x_CountNumberUniqueIds(sas));
325 
326  const size_t kNumExpectedHSPs = 9;
327  qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
328 
329  // HSP # 1
330  {
331  expected_results[0].score = 861;
332  expected_results[0].evalue = 2.80386e-109;
333  expected_results[0].bit_score = 3.35656e+02;
334  expected_results[0].num_ident = 101;
335  int starts[] = {0, 941, -1, 1094, 153, 1095};
336  int lengths[] = {153, 1, 79};
337  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
338  back_inserter(expected_results[0].starts));
339  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
340  back_inserter(expected_results[0].lengths));
341  }
342 
343  // HSP # 2
344  {
345  expected_results[1].score = 633;
346  expected_results[1].evalue = 3.86624e-77;
347  expected_results[1].bit_score = 2.47830e+02;
348  expected_results[1].num_ident = 73;
349  int starts[] = {0, 154, -1, 307, 153, 308};
350  int lengths[] = {153, 1, 25};
351  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
352  back_inserter(expected_results[1].starts));
353  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
354  back_inserter(expected_results[1].lengths));
355  }
356 
357  // HSP # 3
358  {
359  expected_results[2].score = 645;
360  expected_results[2].evalue = 6.62145e-84;
361  expected_results[2].bit_score = 2.52452e+02;
362  expected_results[2].num_ident = 80;
363  int starts[] = {0, 190, 68, -1, 70, 258, 92, -1, 93, 280, 118, -1,
364  119, 305, 151, -1, 152, 337, 161, -1, 162, 346, -1,
365  367, 183, 371};
366  int lengths[] = {68, 2, 22, 1, 25, 1, 32, 1, 9, 1, 21, 4, 49};
367  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
368  back_inserter(expected_results[2].starts));
369  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
370  back_inserter(expected_results[2].lengths));
371  }
372 
373  // HSP #4
374  {
375  expected_results[3].score = 53;
376  expected_results[3].evalue = 3.83197e+00;
377  expected_results[3].bit_score = 244103049e-7;
378  expected_results[3].num_ident = 7;
379  int starts[] = {127, 104, 132, -1, 134, 109};
380  int lengths[] = {5, 2, 15};
381  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
382  back_inserter(expected_results[3].starts));
383  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
384  back_inserter(expected_results[3].lengths));
385  }
386 
387 
388  // HSP # 5
389  {
390  expected_results[4].score = 51;
391  expected_results[4].evalue = 5.55808;
392  expected_results[4].bit_score = 23.6440;
393  expected_results[4].num_ident = 5;
394  int starts[] = {137, 20, 151, -1, 156, 34};
395  int lengths[] = {14, 5, 17};
396  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
397  back_inserter(expected_results[4].starts));
398  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
399  back_inserter(expected_results[4].lengths));
400  }
401 
402  // HSP # 6
403  {
404  expected_results[5].score = 51;
405  expected_results[5].evalue = 6.14178;
406  expected_results[5].bit_score = 23.6440;
407  expected_results[5].num_ident = 8;
408  int starts[] = {153, 102, -1, 122, 173, 127};
409  int lengths[] = {20, 5, 12};
410  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
411  back_inserter(expected_results[5].starts));
412  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
413  back_inserter(expected_results[5].lengths));
414  }
415 
416  // HSP # 7
417  {
418  expected_results[6].score = 51;
419  expected_results[6].evalue = 6.33109;
420  expected_results[6].bit_score = 23.6440;
421  expected_results[6].num_ident = 7;
422  int starts[] = {172, 305, 179, -1, 182, 312};
423  int lengths[] = {7, 3, 33};
424  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
425  back_inserter(expected_results[6].starts));
426  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
427  back_inserter(expected_results[6].lengths));
428  }
429 
430 
431  // HSP # 8
432  {
433  expected_results[7].score = 48;
434  expected_results[7].evalue = 7.91913;
435  expected_results[7].bit_score = 22.4884;
436  expected_results[7].num_ident = 5;
437  int starts[] = {155, 78};
438  int lengths[] = {18};
439  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
440  back_inserter(expected_results[7].starts));
441  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
442  back_inserter(expected_results[7].lengths));
443  }
444 
445 
446  // HSP # 9
447  {
448  expected_results[8].score = 49;
449  expected_results[8].evalue = 8.65824;
450  expected_results[8].bit_score = 22.8736;
451  expected_results[8].num_ident = 4;
452  int starts[] = {175, 11};
453  int lengths[] = {14};
454  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
455  back_inserter(expected_results[8].starts));
456  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
457  back_inserter(expected_results[8].lengths));
458  }
459 
460 
461  qa::TSeqAlignSet actual_results;
462  qa::SeqAlignSetConvert(*sas, actual_results);
463 
464  qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
465  identical_results = cmp.Run(&errors);
466 
467  BOOST_REQUIRE_MESSAGE(identical_results, errors);
468 }
469 
470 
471 BOOST_AUTO_TEST_CASE(TestSingleQuery_NoCBS)
472 {
473  m_OptHandle->SetCompositionBasedStats(eNoCompositionBasedStats);
474  // no cbs for rpsblast
476  rps_opts->SetEvalueThreshold(m_OptHandle->GetDomainInclusionThreshold());
477  rps_opts->SetCompositionBasedStats(false);
478 
479  m_OptHandle->SetEvalueThreshold(5);
480 
482  query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
483  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
484 
485  CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
486 
487  // use CDD database that does not have freq ratios file,
488  // for no CBS option rpsblast does not need the '.freq' file
489  m_DomainDb.Reset(new CSearchDatabase("data/deltatest_nocbs",
491  CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
492  CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
493  m_OptHandle, rps_opts);
494 
495  CSearchResultSet results(*deltablast.Run());
496 
497 
498  // check intermediate results: alignments with domains
499  CRef<CSearchResultSet> domain_results = deltablast.GetDomainResults();
500  BOOST_REQUIRE((*domain_results)[0].GetErrors().empty());
501 
502  const int kNumExpectedMatchingDomains = 3;
503  CConstRef<CSeq_align_set> domain_sas = (*domain_results)[0].GetSeqAlign();
504 
505  BOOST_REQUIRE_EQUAL(kNumExpectedMatchingDomains,
506  x_CountNumberUniqueIds(domain_sas));
507 
508  const size_t kNumExpectedDomainHSPs = 3;
509  qa::TSeqAlignSet expected_domain_results(kNumExpectedDomainHSPs);
510 
511  // HSP # 1
512  {
513  expected_domain_results[0].score = 728;
514  expected_domain_results[0].evalue = 6.61511e-100;
515  expected_domain_results[0].bit_score = 2841082571e-7;
516  expected_domain_results[0].num_ident = 111;
517  int starts[] = {1, 139, 80, -1, 81, 218, 162, -1, 163, 299};
518  int lengths[] = {79, 1, 81, 1, 69};
519  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
520  back_inserter(expected_domain_results[0].starts));
521  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
522  back_inserter(expected_domain_results[0].lengths));
523  }
524 
525  // HSP # 2
526  {
527  expected_domain_results[1].score = 698;
528  expected_domain_results[1].evalue = 2.17510e-95;
529  expected_domain_results[1].bit_score = 2725169055e-7;
530  expected_domain_results[1].num_ident = 107;
531  int starts[] = {1, 135, 6, -1, 8, 140, -1, 190, 58, 191, 80, -1, 81,
532  213, 94, -1, 95, 226, 114, -1, 116, 245, 200, -1,
533  202, 329};
534  int lengths[] = {5, 2, 50, 1, 22, 1, 13, 1, 19, 2, 84, 2, 30};
535  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
536  back_inserter(expected_domain_results[1].starts));
537  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
538  back_inserter(expected_domain_results[1].lengths));
539  }
540 
541  // HSP # 3
542  {
543  expected_domain_results[2].score = 661;
544  expected_domain_results[2].evalue = 9.15785e-90;
545  expected_domain_results[2].bit_score = 2583366987e-7;
546  expected_domain_results[2].num_ident = 106;
547  int starts[] = {0, 137, 115, -1, 117, 252 };
548  int lengths[] = {115, 2, 112};
549  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
550  back_inserter(expected_domain_results[2].starts));
551  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
552  back_inserter(expected_domain_results[2].lengths));
553  }
554 
555 
556  qa::TSeqAlignSet actual_domain_results;
557  qa::SeqAlignSetConvert(*domain_sas, actual_domain_results);
558 
559  qa::CSeqAlignCmpOpts opts;
560  qa::CSeqAlignCmp cmp_d(expected_domain_results, actual_domain_results, opts);
561  string errors;
562  bool identical_results = cmp_d.Run(&errors);
563 
564  BOOST_REQUIRE_MESSAGE(identical_results, errors);
565 
566  // check PSSM
567 
568  CRef<CPssmWithParameters> pssm = deltablast.GetPssm();
569  BOOST_REQUIRE(pssm->HasQuery());
570  BOOST_REQUIRE(pssm->GetQuery().GetSeq().IsSetInst());
571  BOOST_REQUIRE_EQUAL(pssm->GetQuery().GetSeq().GetFirstId()->GetGi(),
572  GI_CONST(129295));
573 
574  // check alignments from sequence search results
575 
576  BOOST_REQUIRE(results[0].GetErrors().empty());
577 
578  BOOST_REQUIRE_EQUAL(
579  results[0].GetSeqAlign()->Get().front()->GetSeq_id(0).GetGi(),
580  GI_CONST(129295));
581 
582  const int kNumExpectedMatchingSeqs = 5;
583  CConstRef<CSeq_align_set> sas = results[0].GetSeqAlign();
584 
585  BOOST_REQUIRE_EQUAL(kNumExpectedMatchingSeqs, x_CountNumberUniqueIds(sas));
586 
587  const size_t kNumExpectedHSPs = 6;
588  qa::TSeqAlignSet expected_results(kNumExpectedHSPs);
589 
590  // HSP # 1
591  {
592  expected_results[0].score = 876;
593  expected_results[0].evalue = 2.04885e-111;
594  expected_results[0].bit_score = 3414303038e-7;
595  expected_results[0].num_ident = 101;
596  int starts[] = {0, 941, -1, 1094, 153, 1095};
597  int lengths[] = {153, 1, 79};
598  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
599  back_inserter(expected_results[0].starts));
600  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
601  back_inserter(expected_results[0].lengths));
602  }
603 
604 
605  // HSP # 2
606  {
607  expected_results[1].score = 642;
608  expected_results[1].evalue = 2.54740e-78;
609  expected_results[1].bit_score = 2512936031e-7;
610  expected_results[1].num_ident = 73;
611  int starts[] = {0, 154, -1, 307, 153, 308};
612  int lengths[] = {153, 1, 25};
613  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
614  back_inserter(expected_results[1].starts));
615  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
616  back_inserter(expected_results[1].lengths));
617  }
618 
619  // HSP # 3
620  {
621  expected_results[2].score = 736;
622  expected_results[2].evalue = 1.10324e-97;
623  expected_results[2].bit_score = 2875023632e-7;
624  expected_results[2].num_ident = 83;
625  int starts[] = {0, 190, 68, -1, 70, 258, 92, -1, 93, 280, 118, -1,
626  119, 305, 151, -1, 152, 337, 161, -1, 162, 346, -1,
627  374, 190, 378};
628  int lengths[] = {68, 2, 22, 1, 25, 1, 32, 1, 9, 1, 28, 4, 42};
629  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
630  back_inserter(expected_results[2].starts));
631  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
632  back_inserter(expected_results[2].lengths));
633  }
634 
635  // HSP # 4
636  {
637  expected_results[3].score = 53;
638  expected_results[3].evalue = 3.45771;
639  expected_results[3].bit_score = 2441105291e-8;
640  expected_results[3].num_ident = 4;
641  int starts[] = {139, 22, 151, -1, 156, 34};
642  int lengths[] = {12, 5, 17};
643  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
644  back_inserter(expected_results[3].starts));
645  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
646  back_inserter(expected_results[3].lengths));
647  }
648 
649 
650  // HSP # 5
651  {
652  expected_results[4].score = 52;
653  expected_results[4].evalue = 4.61874;
654  expected_results[4].bit_score = 2402585333e-8;
655  expected_results[4].num_ident = 7;
656  int starts[] = {172, 305, 179, -1, 182, 312};
657  int lengths[] = {7, 3, 33};
658  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
659  back_inserter(expected_results[4].starts));
660  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
661  back_inserter(expected_results[4].lengths));
662  }
663 
664  // HSP # 6
665  {
666  expected_results[5].score = 52;
667  expected_results[5].evalue = 4.62283;
668  expected_results[5].bit_score = 2402585333e-8;
669  expected_results[5].num_ident = 7;
670  int starts[] = {127, 104, 132, -1, 134, 109};
671  int lengths[] = {5, 2, 15};
672  copy(&starts[0], &starts[STATIC_ARRAY_SIZE(starts)],
673  back_inserter(expected_results[5].starts));
674  copy(&lengths[0], &lengths[STATIC_ARRAY_SIZE(lengths)],
675  back_inserter(expected_results[5].lengths));
676  }
677 
678  qa::TSeqAlignSet actual_results;
679  qa::SeqAlignSetConvert(*sas, actual_results);
680 
681  qa::CSeqAlignCmp cmp(expected_results, actual_results, opts);
682  identical_results = cmp.Run(&errors);
683 
684  BOOST_REQUIRE_MESSAGE(identical_results, errors);
685 }
686 
687 
688 BOOST_AUTO_TEST_CASE(TestMultipleQueries)
689 {
690  TSeqLocVector queries;
691  ITERATE (vector< CRef<CSeq_loc> >, it, m_Seq_locs) {
692  queries.push_back(SSeqLoc(**it, *m_Scope));
693  }
694  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries));
695 
696  CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
697  CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
698  CDeltaBlast deltablast(query_factory, dbadapter, domain_dbadapter,
699  m_OptHandle);
700 
701  CSearchResultSet results(*deltablast.Run());
702 
703  // check results
704  BOOST_REQUIRE(results[0].GetErrors().empty());
705  BOOST_REQUIRE(results[1].GetErrors().empty());
706 
707  // verify query id in Seq_aligns
708  BOOST_REQUIRE_EQUAL(
709  results[0].GetSeqAlign()->Get().front()->GetSeq_id(0).GetGi(),
710  GI_CONST(129295));
711 
712  BOOST_REQUIRE_EQUAL(
713  results[1].GetSeqAlign()->Get().front()->GetSeq_id(0).GetPir().GetName(),
714  "H70430");
715 
716 
717  // verify query id in PSSMs
718  BOOST_REQUIRE_EQUAL(
719  deltablast.GetPssm(0)->GetQuery().GetSeq().GetFirstId()->GetGi(),
720  GI_CONST(129295));
721 
722  BOOST_REQUIRE_EQUAL(
723  deltablast.GetPssm(1)->GetQuery().GetSeq().GetFirstId()->GetPir().GetName(),
724  "H70430");
725 }
726 
727 // Verify that null inputs result in exceptions
728 BOOST_AUTO_TEST_CASE(TestNullQuery)
729 {
730  CRef<IQueryFactory> query_factory;
731  CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
732  CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
733  BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
734  domain_dbadapter, m_OptHandle),
735  CBlastException);
736 }
737 
738 
739 BOOST_AUTO_TEST_CASE(TestNullOptions)
740 {
741  m_OptHandle.Reset();
743  query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
744  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
745  CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
746  CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
747  BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
748  domain_dbadapter, m_OptHandle),
749  CBlastException);
750 }
751 
752 
753 BOOST_AUTO_TEST_CASE(TestNullDatabase)
754 {
756  query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
757  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
758  CRef<CLocalDbAdapter> dbadapter;
759  CRef<CLocalDbAdapter> domain_dbadapter(new CLocalDbAdapter(*m_DomainDb));
760  BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
761  domain_dbadapter, m_OptHandle),
762  CBlastException);
763 }
764 
765 
766 BOOST_AUTO_TEST_CASE(TestNullDomainDatabase)
767 {
769  query.push_back(SSeqLoc(*m_Seq_locs.front(), *m_Scope));
770  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query));
771  CRef<CLocalDbAdapter> dbadapter(new CLocalDbAdapter(*m_SearchDb));
772  CRef<CLocalDbAdapter> domain_dbadapter;
773  BOOST_REQUIRE_THROW(CDeltaBlast deltablast(query_factory, dbadapter,
774  domain_dbadapter, m_OptHandle),
775  CBlastException);
776 }
777 
778 
780 
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Declares the CBlastRPSOptionsHandle class.
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
const CSeq_id * GetFirstId() const
Definition: Bioseq.cpp:271
Defines BLAST error codes (user errors included)
Handle to the rpsblast options to the BLAST algorithm.
Handle to the protein-protein options to the BLAST algorithm.
A simple realization of the DELTA-BLAST algorithm: seacrch domain database, compute PSSM,...
Definition: deltablast.hpp:60
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
const CSeq_entry & GetQuery() const
Retrieve the query sequence.
bool HasQuery() const
Has this PSSM a query in it?
CScope –.
Definition: scope.hpp:92
Blast Search Subject.
Search Results for All Queries.
Definition: Seq_entry.hpp:56
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
#define STATIC_ARRAY_SIZE(array)
Calculate the size of a static array.
BOOST_AUTO_TEST_CASE(TestSingleQuery_CBS)
Declares CPsiBlast, the C++ API for the PSI-BLAST engine.
Declares the CDeltaBlastOptionsHandle class.
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
void SetCompositionBasedStats(bool mode)
Set composition based statistics mode.
CConstRef< objects::CPssmWithParameters > GetPssm(int index=0) const
Accessor for PSSM computd from CDD hits and used in protein search.
Definition: deltablast.cpp:177
CRef< CSearchResultSet > Run()
Run the DELTA-BLAST engine with one iteration.
Definition: deltablast.cpp:83
ECompoAdjustModes GetCompositionBasedStats() const
Returns this mode, which mostly specifies whether composition of db sequence is taken into account wh...
CRef< CSearchResultSet > GetDomainResults()
Get results of conserved domain search (intermediate results)
Definition: deltablast.hpp:112
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
@ eBlastDbIsProtein
protein
#define INVALID_GI
Definition: ncbimisc.hpp:1089
#define GI_FROM(T, value)
Definition: ncbimisc.hpp:1086
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
#define GI_TO(T, gi)
Definition: ncbimisc.hpp:1085
const CTextseq_id * GetTextseq_Id(void) const
Return embedded CTextseq_id, if any.
Definition: Seq_id.cpp:169
static bool PreferAccessionOverGi(void)
Check if the option to prefer accession.version over GI is enabled (SeqId/PreferAccessionOverGi or SE...
Definition: Seq_id.cpp:3404
void SetId(CSeq_id &id)
set the 'id' field in all parts of this location
Definition: Seq_loc.cpp:3474
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
const TTag & GetTag(void) const
Get the Tag member data.
Definition: Dbtag_.hpp:267
bool IsSetTag(void) const
appropriate tag Check if a value has been assigned to Tag data member.
Definition: Dbtag_.hpp:255
TId GetId(void) const
Get the variant data.
Definition: Object_id_.hpp:270
void SetPssm(TPssm &value)
Assign a value to Pssm data member.
bool CanGetQuery(void) const
Check if it is safe to call GetQuery method.
Definition: Pssm_.hpp:766
const TPssm & GetPssm(void) const
Get the Pssm member data.
list< CRef< CSeq_align > > Tdata
const Tdata & Get(void) const
Get the member data.
bool IsGenbank(void) const
Check if variant Genbank is selected.
Definition: Seq_id_.hpp:841
bool IsOther(void) const
Check if variant Other is selected.
Definition: Seq_id_.hpp:871
bool IsGeneral(void) const
Check if variant General is selected.
Definition: Seq_id_.hpp:877
TGi GetGi(void) const
Get the variant data.
Definition: Seq_id_.hpp:889
TVersion GetVersion(void) const
Get the Version member data.
const TGeneral & GetGeneral(void) const
Get the variant data.
Definition: Seq_id_.cpp:369
bool IsGi(void) const
Check if variant Gi is selected.
Definition: Seq_id_.hpp:883
bool IsSetVersion(void) const
Check if a value has been assigned to Version data member.
const TAccession & GetAccession(void) const
Get the Accession member data.
@ e_Whole
whole sequence
Definition: Seq_loc_.hpp:100
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TSet & SetSet(void)
Select the variant.
Definition: Seq_entry_.cpp:130
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
bool IsSetInst(void) const
the sequence data Check if a value has been assigned to Inst data member.
Definition: Bioseq_.hpp:324
Declares class which provides internal BLAST database representations to the internal BLAST APIs.
const TYPE & Get(const CNamedParameterList *param)
constexpr auto front(list< Head, As... >, T=T()) noexcept -> Head
constexpr bool empty(list< Ts... >) noexcept
Magic spell ;-) needed for some weird compilers... very empiric.
std::vector< SeqAlign > TSeqAlignSet
Vector of neutral sequence alignments.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Utilities to develop and debug unit tests for BLAST.
API to compare CSeq-aligns produced by BLAST.
void SeqAlignSetConvert(const objects::CSeq_align_set &ss, std::vector< SeqAlign > &retval)
Converts a Seq-align-set into a neutral seqalign for use with the CSeqAlignCmp class.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
vector< CRef< CSeq_loc > > m_Seq_locs
Seq-locs for creating instances of CObjMgr_QueryFactory.
int x_CountNumberUniqueIds(CConstRef< CSeq_align_set > sas)
CRef< CPssmWithParameters > m_Pssm
CRef< CSeq_entry > m_SeqEntry
Contains a single Bioseq.
CRef< CSeq_entry > m_SeqSet
Contains a Bioseq-set with two Bioseqs, gi 7450545 and gi 129295.
CRef< CScope > m_Scope
Scope.
CRef< CSearchDatabase > m_DomainDb
CRef< CDeltaBlastOptionsHandle > m_OptHandle
CRef< CSearchDatabase > m_SearchDb
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
static string query
Utility stuff for more convenient using of Boost.Test library.
Uniform BLAST Search Interface.
Modified on Tue Jul 23 17:52:04 2024 by modify_doxy.py rev. 669887