NCBI C++ ToolKit
querydata_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: querydata_unit_test.cpp 95565 2021-11-26 14:52:37Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  * File Description:
29  * Unit test module for the query data extraction interface
30  *
31  * ===========================================================================
32  */
33 #include <ncbi_pch.hpp>
34 #include <corelib/test_boost.hpp>
35 
39 
41 
42 // needed to obtain the blast options handle from the search factory
45 
46 // needed for objmgr dependent tests of query data interface
47 #include "test_objmgr.hpp"
48 
49 #include <objects/seqloc/Seq_interval.hpp> // for CSeq_int
50 
51 #include <objtools/simple/simple_om.hpp> // Simple Object manager interface
52 #include <objmgr/util/seq_loc_util.hpp> // CSeq_loc utilities
53 #include <objmgr/util/sequence.hpp> // for GetGiForAccession
54 #include <corelib/ncbidiag.hpp> // for NCBI_CURRENT_FUNCTION
55 #include <serial/iterator.hpp> // for CTypeConstIterator
56 // needed to perform translations between encodings
58 
59 using namespace std;
60 using namespace ncbi;
61 using namespace ncbi::objects;
62 using namespace ncbi::blast;
63 
64 // ========================================================================= //
65 // Helper Classes
66 
68 {
69 public:
71  TGi gi);
72 
74  const vector<TGi>& gis);
75 
77  const IRemoteQueryData::TSeqLocs& seqlocs);
78 
79  void operator()(void);
80 
81 private:
83  vector<TGi> m_Gis;
84  vector<ENa_strand> m_Strands;
87 
88  void x_Init(const IRemoteQueryData::TSeqLocs* seqlocs = 0);
89  bool x_IsProtein();
90  ENa_strand x_GetStrand(int index);
91  void x_TestSingleSequence_Local(int index);
92  void x_TestSingleSequence_Remote(int index);
93 
94  void x_TestSingleProtein_Local(int index,
95  const BlastQueryInfo* qinfo,
96  const BLAST_SequenceBlk* seqblk);
97  void x_TestSingleNucleotide_Local(int index,
98  const BlastQueryInfo* qinfo,
99  const BLAST_SequenceBlk* seqblk);
100  void x_TestSingleTranslatedNucl_Local(int index,
101  const BlastQueryInfo* qinfo,
102  const BLAST_SequenceBlk* seqblk);
103  void x_TestSingleProtein_Remote(const CSeq_id& id,
104  const CSeq_inst& seq_inst);
105  void x_TestSingleNucleotide_Remote(const CSeq_id& id,
106  const CSeq_inst& seq_inst);
107 
108  void x_TestLocalStrand(const CSeq_id& id,
109  int ctx_index,
110  ENa_strand strand,
111  const BlastQueryInfo* qinfo,
112  const BLAST_SequenceBlk* seqblk);
113  void x_CompareSequenceData(CSeqVector& sv, const Uint1* sequence,
114  const string& strand = "");
115 };
116 
118  TGi gi)
119  : m_QueryFactory(query_factory), m_Gis(1, gi)
120 {
121  x_Init();
122 }
123 
125  const vector<TGi>& gis)
126  : m_QueryFactory(query_factory), m_Gis(gis)
127 {
128  BOOST_REQUIRE(!gis.empty());
129  x_Init();
130 }
131 
133  const IRemoteQueryData::TSeqLocs& seqlocs)
134  : m_QueryFactory(query_factory)
135 {
136  BOOST_REQUIRE(!seqlocs.empty());
137  x_Init(&seqlocs);
138 }
139 
140 void
142 {
143  // based on the first sequence, determine if all are nucleotide or protein
145  bool is_protein_sequence = false;
147  try {
148  bioseq = bh.GetBioseqCore();
149  is_protein_sequence = bioseq->GetInst().IsAa();
150  } catch (...) {
151  // deliberately ignore exception, as thi would occur in the case of an
152  // invalid gi
153  }
154 
155  blast::EProgram prog = is_protein_sequence
157  : blast::eBlastn;
158  // FIXME: this should allow for translated query searches also!
160  m_Options.Reset(&oh->GetOptions());
161 
163 
164  if (seqlocs) {
165  // No gis were provided, build gi list
166  m_Gis.reserve(seqlocs->size());
167  m_Strands.reserve(seqlocs->size());
168 
169  size_t index = 0;
170  ITERATE(IRemoteQueryData::TSeqLocs, itr, *seqlocs) {
171  // Get the gi
172  const CSeq_id& id = sequence::GetId(**itr, &*m_Scope);
173  m_Gis[index] = sequence::GetGiForAccession(id.AsFastaString(),
174  *m_Scope);
175  BOOST_REQUIRE(m_Gis[index] != ZERO_GI);
176 
177  // Get the strand
178  m_Strands[index] = (*itr)->GetStrand();
179  index++;
180  }
181  BOOST_REQUIRE(index == seqlocs->size());
182 
183  } else {
184  // No IRemoteQueryData::TSeqLocs were provided, build strand list
185 
187  m_Strands.resize(m_Gis.size(), s);
188 
189  if (x_IsProtein()) {
190  s = eNa_strand_unknown;
191  } else if ( (s = m_Options->GetStrandOption()) == eNa_strand_unknown) {
192  s = eNa_strand_both;
193  }
194  fill(m_Strands.begin(), m_Strands.end(), s);
195  }
196 }
197 
198 bool
200 {
201  if ( !m_Options ) {
202  x_Init();
203  }
205 }
206 
207 inline ENa_strand
209 {
210  return m_Strands[index];
211 }
212 
213 void
215 {
216  for (size_t i = 0; i < m_Gis.size(); i++) {
219  }
220 }
221 
222 void
224  const BlastQueryInfo* qinfo,
225  const BLAST_SequenceBlk* seqblk)
226 {
227  const CSeq_id id(CSeq_id::e_Gi, m_Gis[index]);
228 
229  int ctx_index = index * 2; // index into BlastQueryInfo::contexts
230  switch (x_GetStrand(index)) {
231  case eNa_strand_plus:
232  x_TestLocalStrand(id, ctx_index, x_GetStrand(index), qinfo, seqblk);
233  break;
234  case eNa_strand_minus:
235  x_TestLocalStrand(id, ctx_index + 1, x_GetStrand(index), qinfo, seqblk);
236  break;
237  case eNa_strand_both:
238  x_TestLocalStrand(id, ctx_index, eNa_strand_plus, qinfo, seqblk);
239  x_TestLocalStrand(id, ctx_index + 1, eNa_strand_minus, qinfo, seqblk);
240  break;
241  default:
242  throw runtime_error("Internal error in " +
243  string(NCBI_CURRENT_FUNCTION));
244  }
245 }
246 
247 void
249  int ctx_index,
250  ENa_strand strand,
251  const BlastQueryInfo* qinfo,
252  const BLAST_SequenceBlk* seqblk)
253 {
254  BOOST_REQUIRE(strand == eNa_strand_plus || strand == eNa_strand_minus);
255  BOOST_REQUIRE(qinfo->contexts[ctx_index].query_length != 0);
256 
257  // Test the sequence length
258  const Int4 kLength = sequence::GetLength(id, &*m_Scope);
259  BOOST_REQUIRE_EQUAL(kLength, qinfo->contexts[ctx_index].query_length);
260 
261  // Test the actual sequence data
262  CSeqVector sv = CSimpleOM::GetSeqVector(id, strand);
263  Uint1* sequence = seqblk->sequence +
264  qinfo->contexts[ctx_index].query_offset;
265  x_CompareSequenceData(sv, sequence,
266  strand == eNa_strand_plus ? "plus" : "minus");
267 }
268 
269 void
271  const BlastQueryInfo* qinfo,
272  const BLAST_SequenceBlk* seqblk)
273 {
274  string msg("CSequenceDataTester::x_TestSingleTranslatedNucl_Local ");
275  msg += "not implemented";
276  throw runtime_error(msg);
277 }
278 
279 void
281  const Uint1* sequence,
282  const string& strand)
283 {
284  if (x_IsProtein()) {
285  BOOST_REQUIRE(sv.IsProtein());
287  } else {
288  BOOST_REQUIRE(sv.IsNucleotide());
290  }
291 
292  string msg("Different ");
293  msg += x_IsProtein() ? "residues" : "bases";
294  msg += " at position ";
295 
296  for (TSeqPos i = 0; i < sv.size(); i++) {
297 
298  const Uint1 kBase = x_IsProtein() ? sv[i] : NCBI4NA_TO_BLASTNA[sv[i]];
299  msg += NStr::IntToString(i);
300  if ( !x_IsProtein() && !strand.empty() ) {
301  msg += " (" + strand + " strand)";
302  }
303 
304  BOOST_REQUIRE_MESSAGE(static_cast<int>(sequence[i]) == static_cast<int>(kBase),
305  msg);
306  }
307 }
308 
309 void
311  const BlastQueryInfo* qinfo,
312  const BLAST_SequenceBlk* seqblk)
313 {
314  BOOST_REQUIRE(index >= qinfo->first_context);
315  BOOST_REQUIRE(index <= qinfo->last_context);
316 
317  const CSeq_id id(CSeq_id::e_Gi, m_Gis[index]);
318 
319  // Test the sequence length
320  const Int4 kLength = sequence::GetLength(id, &*m_Scope);
321  BOOST_REQUIRE_EQUAL(kLength, qinfo->contexts[index].query_length);
322 
323  // Test the actual sequence data
324  Uint1* sequence = seqblk->sequence + qinfo->contexts[index].query_offset;
326  x_CompareSequenceData(sv, sequence);
327 }
328 
329 void
331 {
334  BOOST_REQUIRE(queries.NotEmpty());
335 
336  const BlastQueryInfo* qinfo = queries->GetQueryInfo();
337  const BLAST_SequenceBlk* seq_blk = queries->GetSequenceBlk();
338  BOOST_REQUIRE(qinfo != NULL);
339  BOOST_REQUIRE(seq_blk != NULL);
340 
341  TQueryMessages msgs;
342  queries->GetQueryMessages(index, msgs);
343  if ( !msgs.empty() ) {
344  string message;
345  ITERATE(TQueryMessages, m, msgs) {
346  message += (*m)->GetMessage();
347  }
348  NCBI_THROW(CBlastException, eCoreBlastError, message);
349  }
350 
351  BOOST_REQUIRE_EQUAL(m_Gis.size(),
352  static_cast<size_t>(qinfo->num_queries));
353 
354  if (x_IsProtein()) {
355  x_TestSingleProtein_Local(index, qinfo, seq_blk);
356  } else {
357  x_TestSingleNucleotide_Local(index, qinfo, seq_blk);
358  }
359 
360  // Make sure we get the same pointer to local queries from the factory
361  BOOST_REQUIRE_EQUAL(queries.GetNonNullPointer(),
363 }
364 
365 void
367 {
369  BOOST_REQUIRE(queries.NotEmpty());
370 
371  // Test the seqlocs
372  IRemoteQueryData::TSeqLocs seqlocs = queries->GetSeqLocs();
373  IRemoteQueryData::TSeqLocs::const_iterator itr = seqlocs.begin();
374  BOOST_REQUIRE_EQUAL(m_Gis.size(), seqlocs.size());
375  BOOST_REQUIRE(index >= 0);
376  BOOST_REQUIRE(index < static_cast<int>(seqlocs.size()));
377  for (int i = 0; itr != seqlocs.end(); ++itr, ++i) {
378  if (i == index) break;
379  }
380  BOOST_REQUIRE(itr != seqlocs.end());
381 
382 #if 0
383  // Currently disabled because the seqid string doesn't necessarily contain
384  // the gi...
385  // Test the gi being present in the Seq-id string
386  const CSeq_id* seqid = (*itr)->GetId();
387  BOOST_REQUIRE(seqid != NULL);
388  string gi_string(NStr::IntToString(m_Gis[index]));
389  string seqid_string(seqid->AsFastaString());
390  cout << seqid_string << endl;
391  BOOST_REQUIRE_MESSAGE(seqid_string.find(gi_string) != ncbi::NPOS,
392  "Cannot find gi in Seq-id string for remote query "
393  "data");
394 #endif
395 
396  // Test the Bioseq_set
397  CRef<CBioseq_set> bioseq_set = queries->GetBioseqSet();
398  CTypeConstIterator<CBioseq> bioseq(ConstBegin(*bioseq_set));
399  TSeqPos seq_index = 0;
400  for (; bioseq; ++bioseq) {
401  if (seq_index != static_cast<TSeqPos>(index)) {
402  seq_index++;
403  } else {
404  break;
405  }
406  }
407  BOOST_REQUIRE(seq_index < m_Gis.size());
408 
409  const CSeq_id id(CSeq_id::e_Gi, m_Gis[index]);
410  const CBioseq::TInst& seq_inst = bioseq->GetInst();
411  BOOST_REQUIRE_EQUAL(CSeq_inst::eRepr_raw, seq_inst.GetRepr());
412  BOOST_REQUIRE_EQUAL(x_IsProtein(), seq_inst.IsAa());
413  BOOST_REQUIRE_EQUAL(sequence::GetLength(id, &*m_Scope),
414  seq_inst.GetLength());
415 
416  if (x_IsProtein()) {
417  x_TestSingleProtein_Remote(id, seq_inst);
418  } else {
419  x_TestSingleNucleotide_Remote(id, seq_inst);
420  }
421 
422  // Make sure we get the same pointer to local queries from the factory
423  BOOST_REQUIRE_EQUAL(queries.GetNonNullPointer(),
425 }
426 
427 void
429  const CSeq_inst& seq_inst)
430 {
431  CSeq_inst::TSeq_data seq_data;
432  TSeqPos nconv = CSeqportUtil::Convert(seq_inst.GetSeq_data(),
433  &seq_data, CSeq_data::e_Ncbistdaa);
434  BOOST_REQUIRE(seq_data.IsNcbistdaa());
435 
437  BOOST_REQUIRE_EQUAL(sv.size(), nconv);
438  BOOST_REQUIRE(sv.IsProtein() == seq_inst.IsAa());
440 
441  for (TSeqPos i = 0; i < sv.size(); i++) {
442  const char kResidue = sv[i];
443  BOOST_REQUIRE_MESSAGE(kResidue == seq_data.GetNcbistdaa().Get()[i],
444  "Different residues at position " + NStr::IntToString(i));
445  }
446 }
447 
448 void
450  const CSeq_inst& seq_inst)
451 {
452  CSeq_inst::TSeq_data seq_data;
453 
454  // N.B.: data returned in seq_data is compressed 2 bases per byte
455  TSeqPos nconv = CSeqportUtil::Convert(seq_inst.GetSeq_data(),
456  &seq_data, CSeq_data::e_Ncbi4na);
457  BOOST_REQUIRE(seq_data.IsNcbi4na());
458 
460  BOOST_REQUIRE_EQUAL(sv.size(), nconv);
461  BOOST_REQUIRE(sv.IsProtein() == seq_inst.IsAa());
463 
464  for (TSeqPos i = 0; i < sv.size(); i++) {
465  const char kBase = sv[i];
466  const char kCompressedBase = seq_data.GetNcbi4na().Get()[(int)i/2];
467  char BaseTest;
468 
469  if ((i%2) == 0) {
470  // get the high 4 bits
471  BaseTest = (kCompressedBase & 0xF0) >> 4;
472  } else {
473  // get the low 4 bits
474  BaseTest = kCompressedBase & 0x0F;
475  }
476 
477  BOOST_REQUIRE_MESSAGE(static_cast<int>(kBase) == static_cast<int>(BaseTest),
478  "Different bases at position " + NStr::IntToString(i));
479  }
480 }
481 
482 // ========================================================================= //
483 // Unit Tests
484 
486 {
489 
490  static void
492  {
495  CRef<IQueryFactory> query_factory(new CObjMgrFree_QueryFactory(bs));/* NCBI_FAKE_WARNING */
496  CSequenceDataTester(query_factory, kGi)();
497  }
498 
499  static void
501  {
503  ITERATE(vector<TGi>, itr, gis) {
505  CRef<CSeq_entry> seq_entry(new CSeq_entry);
506  seq_entry->SetSeq(const_cast<CBioseq&>(*bh.GetBioseqCore()));
507  bs->SetSeq_set().push_back(seq_entry);
508  }
509  CRef<IQueryFactory> query_factory(new CObjMgrFree_QueryFactory(bs));/* NCBI_FAKE_WARNING */
510 
511  CSequenceDataTester(query_factory, gis)();
512  }
513 
514  static void
516  {
517  TSeqLocVector queries;
518  ITERATE(vector<TGi>, itr, gis) {
519  CSeq_id qid(CSeq_id::e_Gi, *itr);
520  unique_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(qid));
521  queries.push_back(*sl);
522  }
523 
524  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries));
525 
526  CSequenceDataTester(query_factory, gis)();
527  }
528 
529  static void
531  {
533 
534  ITERATE(vector<TGi>, itr, gis) {
535  CSeq_id qid(CSeq_id::e_Gi, *itr);
536 
538  sq(CTestObjMgr::Instance().CreateBlastSearchQuery(qid));
539 
540  queries->AddQuery(sq);
541  }
542 
543  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(*queries));
544 
545  CSequenceDataTester(query_factory, gis)();
546  }
547 
549  {
550  TSeqLocVector queries;
551  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries));
552  }
553 
555  {
557  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(*queries));
558  }
559 
560 };
561 
562 BOOST_FIXTURE_TEST_SUITE(QueryData, CQueryDataTestFixture)
563 
564 //
565 // Object-manager dependant test cases
566 //
567 
568 BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_LocalData_GetSumOfSequenceLengths) {
569  vector<TGi> gis;
570  gis.push_back(GI_CONST(26));
571  gis.push_back(GI_CONST(555));
572  gis.push_back(GI_CONST(556));
574  ITERATE(vector<TGi>, itr, gis) {
576  CRef<CSeq_entry> seq_entry(new CSeq_entry);
577  seq_entry->SetSeq(const_cast<CBioseq&>(*bh.GetBioseqCore()));
578  bs->SetSeq_set().push_back(seq_entry);
579  }
580  CRef<IQueryFactory> query_factory(new CObjMgrFree_QueryFactory(bs)); /* NCBI_FAKE_WARNING */
582 
583  size_t kExpectedSize = 416+624+310;
584  CRef<ILocalQueryData> query_data =
585  query_factory->MakeLocalQueryData(&oh->GetOptions());
586  BOOST_REQUIRE_EQUAL(kExpectedSize,
587  query_data->GetSumOfSequenceLengths());
588 
589  // try a chromosome :)
590  CRef<CSeq_id> id(new CSeq_id(CSeq_id::e_Gi, 89161185));
592  q->AddQuery(CTestObjMgr::Instance().CreateBlastSearchQuery(*id));
593  query_factory.Reset(new CObjMgr_QueryFactory(*q));
594  kExpectedSize = 247249719;
595  query_data.Reset(query_factory->MakeLocalQueryData(&oh->GetOptions()));
596  BOOST_REQUIRE_EQUAL(kExpectedSize,
597  query_data->GetSumOfSequenceLengths());
598 }
599 
600 BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_LocalDataFromTSeqLocVector_Protein) {
601  vector<TGi> gis;
602  gis.push_back(GI_CONST(38092615));
603  gis.push_back(GI_CONST(4506509));
604  s_ObjMgr_QueryFactory_LocalDataFromTSeqLocVector(gis);
605 }
606 
607 BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_LocalDataFromTSeqLocVector_Nucleotide) {
608  vector<TGi> gis;
609  gis.push_back(GI_CONST(555));
610  gis.push_back(GI_CONST(556));
611  gis.push_back(GI_CONST(26));
612  s_ObjMgr_QueryFactory_LocalDataFromTSeqLocVector(gis);
613 }
614 
615 BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_LocalDataFromBlastQueryVector_Protein) {
616  vector<TGi> gis;
617  gis.push_back(GI_CONST(38092615));
618  gis.push_back(GI_CONST(4506509));
619  s_ObjMgr_QueryFactory_LocalDataFromBlastQueryVector(gis);
620 }
621 
622 BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_LocalDataFromBlastQueryVector_Nucleotide) {
623  vector<TGi> gis;
624  gis.push_back(GI_CONST(555));
625  gis.push_back(GI_CONST(556));
626  gis.push_back(GI_CONST(26));
627  s_ObjMgr_QueryFactory_LocalDataFromBlastQueryVector(gis);
628 }
629 
630 BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_RemoteData_SingleBioseqFromTSeqLocVector) {
631  TGi kGi = GI_CONST(129295);
632  TSeqLocVector queries;
633  CSeq_id qid(CSeq_id::e_Gi, kGi);
634  unique_ptr<SSeqLoc> sl(CTestObjMgr::Instance().CreateSSeqLoc(qid));
635  queries.push_back(*sl);
636  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(queries));
637  CSequenceDataTester(query_factory, kGi)();
638 }
639 
640 BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_RemoteData_SingleBioseqFromBlastQueryVector) {
641  TGi kGi = GI_CONST(129295);
642 
644  CSeq_id qid(CSeq_id::e_Gi, kGi);
645 
647  sq(CTestObjMgr::Instance().CreateBlastSearchQuery(qid));
648 
649  queries->AddQuery(sq);
650 
651  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(*queries));
652  CSequenceDataTester(query_factory, kGi)();
653 }
654 
655 BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_EmptyTSeqLocVector) {
656  BOOST_REQUIRE_THROW(create_EmptyTSeqLocVector(), CBlastException);
657 }
658 
659 BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_EmptyBlastQueryVector) {
660  BOOST_REQUIRE_THROW(create_EmptyBlastQueryVector(), CBlastException);
661 }
662 
663 //
664 // Object-manager independant test cases
665 //
666 
667 BOOST_AUTO_TEST_CASE(ObjMgrFree_QueryFactory_LocalDataFromBioseq_Protein) {
668  const TGi kGi = GI_CONST(129295);
669  s_ObjMgrFree_QueryFactory_LocalDataFromBioseq(kGi);
670 }
671 
672 BOOST_AUTO_TEST_CASE(ObjMgrFree_QueryFactory_LocalDataFromBioseq_Nucleotide) {
673  const TGi kGi = GI_CONST(555);
674  s_ObjMgrFree_QueryFactory_LocalDataFromBioseq(kGi);
675 }
676 
677 BOOST_AUTO_TEST_CASE(ObjMgrFree_QueryFactory_LocalDataFromBioseq_set_Protein) {
678  vector<TGi> gis;
679  gis.push_back(GI_CONST(129295));
680  gis.push_back(GI_CONST(87));
681  gis.push_back(GI_CONST(1900));
682  s_ObjMgrFree_QueryFactory_LocalDataFromBioseq_set(gis);
683 }
684 
685 BOOST_AUTO_TEST_CASE(ObjMgrFree_QueryFactory_LocalDataFromBioseq_set_Nucleotide) {
686  vector<TGi> gis;
687  gis.push_back(GI_CONST(26));
688  gis.push_back(GI_CONST(555));
689  gis.push_back(GI_CONST(556));
690  s_ObjMgrFree_QueryFactory_LocalDataFromBioseq_set(gis);
691 }
692 
Declares the BLAST exception class.
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Definition: blast_program.c:40
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eBlastn
Nucl-Nucl (traditional blastn)
Definition: blast_types.hpp:58
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
CBioseq_Handle –.
Defines BLAST error codes (user errors included)
Query Vector.
Definition: sseqloc.hpp:276
void AddQuery(CRef< CBlastSearchQuery > q)
Add a query to the set.
Definition: sseqloc.hpp:293
NCBI C++ Object Manager free implementation of IQueryFactory.
NCBI C++ Object Manager dependant implementation of IQueryFactory.
CSeqVector –.
Definition: seq_vector.hpp:65
Definition: Seq_entry.hpp:56
static bool IsAa(EMol mol)
Definition: Seq_inst.hpp:99
static TSeqPos Convert(const CSeq_data &in_seq, CSeq_data *out_seq, CSeq_data::E_Choice to_code, TSeqPos uBeginIdx=0, TSeqPos uLength=0, bool bAmbig=false, Uint4 seed=17734276)
void x_TestSingleNucleotide_Remote(const CSeq_id &id, const CSeq_inst &seq_inst)
void x_CompareSequenceData(CSeqVector &sv, const Uint1 *sequence, const string &strand="")
void x_TestSingleTranslatedNucl_Local(int index, const BlastQueryInfo *qinfo, const BLAST_SequenceBlk *seqblk)
void x_TestSingleSequence_Local(int index)
void x_TestLocalStrand(const CSeq_id &id, int ctx_index, ENa_strand strand, const BlastQueryInfo *qinfo, const BLAST_SequenceBlk *seqblk)
void x_Init(const IRemoteQueryData::TSeqLocs *seqlocs=0)
vector< ENa_strand > m_Strands
void x_TestSingleProtein_Local(int index, const BlastQueryInfo *qinfo, const BLAST_SequenceBlk *seqblk)
CConstRef< CBlastOptions > m_Options
CRef< IQueryFactory > m_QueryFactory
void x_TestSingleSequence_Remote(int index)
CSequenceDataTester(CRef< IQueryFactory > query_factory, TGi gi)
void x_TestSingleProtein_Remote(const CSeq_id &id, const CSeq_inst &seq_inst)
void x_TestSingleNucleotide_Local(int index, const BlastQueryInfo *qinfo, const BLAST_SequenceBlk *seqblk)
ENa_strand x_GetStrand(int index)
static CRef< CScope > NewScope(bool with_defaults=true)
Return a new scope, possibly (by default) with default loaders, which will include the Genbank loader...
Definition: simple_om.cpp:202
static CSeqVector GetSeqVector(const CSeq_id &id, ENa_strand strand=eNa_strand_plus)
Return a sequence vector for some kind of id or location.
Definition: simple_om.cpp:141
static CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Return a biosequence handle for some kind of id.
Definition: simple_om.cpp:173
static CTestObjMgr & Instance()
Definition: test_objmgr.cpp:69
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
Class for the messages for an individual query sequence.
objects::ENa_strand GetStrandOption() const
virtual BLAST_SequenceBlk * GetSequenceBlk()=0
Accessor for the BLAST_SequenceBlk structure.
CRef< ILocalQueryData > MakeLocalQueryData(const CBlastOptions *opts)
Creates and caches an ILocalQueryData.
Definition: query_data.cpp:52
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
list< CRef< objects::CSeq_loc > > TSeqLocs
Type definition for CSeq_loc set used as queries in the BLAST remote search class.
Definition: query_data.hpp:123
const Uint1 NCBI4NA_TO_BLASTNA[]
Translates between ncbi4na and blastna.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
virtual BlastQueryInfo * GetQueryInfo()=0
Accessor for the BlastQueryInfo structure.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
virtual CRef< objects::CBioseq_set > GetBioseqSet()=0
Accessor for the CBioseq_set.
CRef< IRemoteQueryData > MakeRemoteQueryData()
Creates and caches an IRemoteQueryData.
Definition: query_data.cpp:61
void GetQueryMessages(size_t index, TQueryMessages &qmsgs)
Retrieve error/warning messages for a specific query.
Definition: query_data.cpp:135
virtual TSeqLocs GetSeqLocs()=0
Accessor for the TSeqLocs.
size_t GetSumOfSequenceLengths()
Compute the sum of all the sequence's lengths.
Definition: query_data.cpp:107
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define GI_CONST(gi)
Definition: ncbimisc.hpp:1087
#define ZERO_GI
Definition: ncbimisc.hpp:1088
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_CURRENT_FUNCTION
Get current function name.
Definition: ncbidiag.hpp:142
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
const TPrim & Get(void) const
Definition: serialbase.hpp:347
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
TGi GetGiForAccession(const string &acc, CScope &scope, EGetIdType flags=0)
Given an accession string retrieve the GI id.
Definition: sequence.cpp:638
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
TSeqPos size(void) const
Definition: seq_vector.hpp:291
bool IsProtein(void) const
Definition: seq_vector.hpp:350
void SetCoding(TCoding coding)
bool IsNucleotide(void) const
Definition: seq_vector.hpp:357
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:968
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
#define NPOS
Definition: ncbistr.hpp:133
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_plus
Definition: Na_strand_.hpp:66
@ eNa_strand_minus
Definition: Na_strand_.hpp:67
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
@ eNa_strand_both
in forward orientation
Definition: Na_strand_.hpp:68
@ e_Gi
GenInfo Integrated Database.
Definition: Seq_id_.hpp:106
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
TSeq_set & SetSeq_set(void)
Assign a value to Seq_set data member.
TRepr GetRepr(void) const
Get the Repr member data.
Definition: Seq_inst_.hpp:565
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
bool IsNcbistdaa(void) const
Check if variant Ncbistdaa is selected.
Definition: Seq_data_.hpp:684
bool IsNcbi4na(void) const
Check if variant Ncbi4na is selected.
Definition: Seq_data_.hpp:564
TLength GetLength(void) const
Get the Length member data.
Definition: Seq_inst_.hpp:659
const TNcbistdaa & GetNcbistdaa(void) const
Get the variant data.
Definition: Seq_data_.hpp:690
const TNcbi4na & GetNcbi4na(void) const
Get the variant data.
Definition: Seq_data_.hpp:570
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
Definition: Seq_inst_.hpp:817
@ eRepr_raw
continuous sequence
Definition: Seq_inst_.hpp:94
@ e_Ncbistdaa
consecutive codes for std aas
Definition: Seq_data_.hpp:113
@ e_Ncbi4na
4 bit nucleic acid code
Definition: Seq_data_.hpp:107
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
Implementation of the uniform BLAST search interface for searching locally installed BLAST databases.
static char * prog
Definition: mdb_load.c:33
Magic spell ;-) needed for some weird compilers... very empiric.
Defines NCBI C++ diagnostic APIs, classes, and macros.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
BOOST_AUTO_TEST_CASE(ObjMgr_QueryFactory_LocalData_GetSumOfSequenceLengths)
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Structure to hold a sequence.
Definition: blast_def.h:242
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
static void create_EmptyBlastQueryVector()
static void s_ObjMgrFree_QueryFactory_LocalDataFromBioseq_set(const vector< TGi > &gis)
static void create_EmptyTSeqLocVector()
static void s_ObjMgr_QueryFactory_LocalDataFromTSeqLocVector(const vector< TGi > &gis)
static void s_ObjMgr_QueryFactory_LocalDataFromBlastQueryVector(const vector< TGi > &gis)
static void s_ObjMgrFree_QueryFactory_LocalDataFromBioseq(TGi kGi)
Utility stuff for more convenient using of Boost.Test library.
Uniform BLAST Search Interface.
Modified on Tue May 14 16:20:07 2024 by modify_doxy.py rev. 669887