NCBI C++ ToolKit
cobalt_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cobalt_unit_test.cpp 100474 2023-08-04 15:13:04Z boratyng $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Greg Boratyn
27 *
28 * File Description:
29 * Unit tests for CMultiAligner
30 *
31 *
32 * ===========================================================================
33 */
34 
35 #include <ncbi_pch.hpp>
36 
37 #include <corelib/ncbi_system.hpp>
39 #include <objmgr/util/sequence.hpp>
41 #include <serial/iterator.hpp>
42 
49 
52 
53 #include <algo/cobalt/cobalt.hpp>
54 #include <algo/cobalt/options.hpp>
55 #include "cobalt_test_util.hpp"
56 
57 //#include <corelib/hash_set.hpp>
58 #include <unordered_set>
59 
60 
61 // This macro should be defined before inclusion of test_boost.hpp in all
62 // "*.cpp" files inside executable except one. It is like function main() for
63 // non-Boost.Test executables is defined only in one *.cpp file - other files
64 // should not include it. If NCBI_BOOST_NO_AUTO_TEST_MAIN will not be defined
65 // then test_boost.hpp will define such "main()" function for tests.
66 //
67 // Usually if your unit tests contain only one *.cpp file you should not
68 // care about this macro at all.
69 //
70 //#undef NCBI_BOOST_NO_AUTO_TEST_MAIN
71 #define NCBI_BOOST_NO_AUTO_TEST_MAIN
72 
73 // This header must be included before all Boost.Test headers if there are any
74 #include <corelib/test_boost.hpp>
75 
76 #include <common/test_assert.h> /* This header must go last */
77 
78 #ifndef SKIP_DOXYGEN_PROCESSING
79 
80 /// Calculate the size of a static array
81 #define STATIC_ARRAY_SIZE(array) (sizeof(array)/sizeof(*array))
82 
84 USING_SCOPE(cobalt);
86 
87 
88 /// Representation of a hit for computing constraints
89 struct SHit {
90  /// query id
91  string query;
92 
93  /// subject ordinal id in the database
94  int subject;
95 
96  /// alignment score
97  int score;
98 
99  /// alignment extents
102 
103  SHit(void) : query("lcl|null"), subject(-1), score(-1),
104  query_range(TRange(-1, -1)), subject_range(TRange(-1, -1))
105  {}
106 };
107 
108 
109 /// Test class for accessing CMultiAligner private attributes and methods
111 {
112 public:
113 
114  /// Set queries in the aligner only as Seq-locs do not retrieve sequences.
115  /// Useful for testing functions that only match sequence ids and do not
116  /// align sequences
117  static void SetQuerySeqlocs(CMultiAligner& aligner,
118  const vector< CRef<CSeq_loc> >& queries)
119  {
120  aligner.m_tQueries.clear();
121  ITERATE (vector< CRef<CSeq_loc> >, it, queries) {
122  BOOST_REQUIRE(it->NotEmpty());
123  aligner.m_tQueries.push_back(*it);
124  }
125  }
126 
127  /// Set pre-computed domain hits without invoking CMlultiAligner::Run()
128  static void SetDomainHits(CMultiAligner& aligner,
130  {
131  BOOST_REQUIRE(!aligner.m_tQueries.empty());
132  BOOST_REQUIRE(!archive.Empty());
133 
134  blast::TSeqLocVector queries;
135  vector<int> indices;
136  aligner.x_CreateBlastQueries(queries, indices);
137  aligner.x_SetDomainHits(queries, indices, *archive);
138  }
139 
140  static const vector<bool>& GetIsDomainSearched(
141  const CMultiAligner& aligner)
142  {
143  return aligner.m_IsDomainSearched;
144  }
145 
146  static const CHitList& GetDomainHits(const CMultiAligner& aligner)
147  {
148  return aligner.m_DomainHits;
149  }
150 
151  /// Quit after doing RPS-BLAST search
153  {
154  return progress->stage == CMultiAligner::eDomainHitsSearch;
155  }
156 
157  /// Compare domain hits in CMultiAligner with reference alignements
158  /// @param expected_hits Reference alignments [in]
159  /// @param aligner CMultiAligner object with with domain hits to compare [in]
160  /// @param err Error messages [out]
161  /// @return True if all hits are the same as reference, false otherwise
162  static bool CompareDomainHits(const vector<SHit>& expected_hits,
163  const CMultiAligner& aligner,
164  string& err)
165  {
166  bool retval = true;
167  const CHitList& hitlist = aligner.m_DomainHits;
168 
169  // compare numbers of hits
170  if ((int)expected_hits.size() != hitlist.Size()) {
171  err += "Hitlist sizes "
172  + NStr::UIntToString((unsigned)expected_hits.size())
173  + " and "
174  + NStr::IntToString(hitlist.Size())
175  + " do not match\n";
176 
177  retval = false;
178  }
179 
180  for (size_t i=0;i < min(expected_hits.size(), (size_t)hitlist.Size());
181  i++) {
182 
183  string header = "Hit " + NStr::UIntToString((unsigned)i) + ": ";
184  const CHit* hit = hitlist.GetHit((int)i);
185 
186  // compare query ids
187  CSeq_id expected_query_id(expected_hits[i].query);
188  if (aligner.GetQueries()[hit->m_SeqIndex1]->GetId()
189  ->CompareOrdered(expected_query_id) != 0) {
190 
191  err += header + "Query ids " + expected_hits[i].query
192  + " and " + aligner.GetQueries()[hit->m_SeqIndex1]
193  ->GetId()->AsFastaString() + " do not match\n";
194 
195  retval = false;
196  }
197 
198  // compare subject ordinal ids
199  if (expected_hits[i].subject != hit->m_SeqIndex2) {
200  err += header + "Subject ids "
201  + NStr::IntToString(expected_hits[i].subject)
202  + " and " + NStr::IntToString(hit->m_SeqIndex2)
203  + " do not match\n";
204 
205  retval = false;
206  }
207 
208  // compare query ranges
209  if (expected_hits[i].query_range.GetFrom()
210  != hit->m_SeqRange1.GetFrom()
211  || expected_hits[i].query_range.GetTo()
212  != hit->m_SeqRange1.GetTo()) {
213 
214  err += header + "Query ranges "
215  + NStr::IntToString(expected_hits[i].query_range.GetFrom())
216  + "-"
217  + NStr::IntToString(expected_hits[i].query_range.GetTo())
218  + " and "
219  + NStr::IntToString(hit->m_SeqRange1.GetFrom()) + "-"
221  + " do not match\n";
222 
223  retval = false;
224  }
225 
226  // compare subjet ranges
227  if (expected_hits[i].subject_range.GetFrom()
228  != hit->m_SeqRange2.GetFrom()
229  || expected_hits[i].subject_range.GetTo()
230  != hit->m_SeqRange2.GetTo()) {
231 
232  err += header + "Subject ranges "
233  + NStr::IntToString(expected_hits[i].subject_range.GetFrom())
234  + "-"
235  + NStr::IntToString(expected_hits[i].subject_range.GetTo())
236  + " and "
237  + NStr::IntToString(hit->m_SeqRange2.GetFrom()) + "-"
239  + " do not match\n";
240 
241  retval = false;
242  }
243 
244  // compare alignment scores
245  if (expected_hits[i].score != hit->m_Score) {
246  err += header + "Scores "
247  + NStr::IntToString(expected_hits[i].score)
248  + " and "
249  + NStr::IntToString(hit->m_Score)
250  + " do not match\n";
251 
252  retval = false;
253  }
254  }
255 
256  return retval;
257  }
258 };
259 
260 
261 /// Fixture class initialized for each multialigner test
263 {
264 public:
267  static vector< CRef<CSeq_loc> > m_Sequences;
271 
274 
276  {
278  m_Options->SetRpsDb("data/cddtest");
279 
281  }
282 
284  {
285  m_Options.Reset();
286  m_Aligner.Reset();
287  }
288 
289  /// Initialize scope
290  static void x_InitScope(void)
291  {
293  m_Scope.Reset(new CScope(*m_Objmgr));
294  m_Scope->AddDefaults();
295  }
296 
297  /// Read test sequences in FASTA format from file
298  static void x_ReadSequences(void)
299  {
300  bool kParseDeflines = true;
301  int status = ReadFastaQueries("data/small.fa", m_Sequences, m_Scope,
302  kParseDeflines);
303 
304  if (status) {
305  NCBI_THROW(CException, eInvalid,
306  "Reading FASTA sequences has failed");
307  }
308  }
309 
310  /// Read test MSAs from files
311  static void x_ReadAlignments(void)
312  {
313  bool kParseDeflines = true;
314  int status = ReadMsa("data/msa1.fa", m_Align1, m_Scope,
315  kParseDeflines);
316 
317  if (status) {
318  NCBI_THROW(CException, eInvalid, "Reading alignments failed");
319  }
320 
321  status = ReadMsa("data/msa2.fa", m_Align2, m_Scope,
322  kParseDeflines);
323 
324  if (status) {
325  NCBI_THROW(CException, eInvalid, "Reading alignments failed");
326  }
327  }
328 
329  /// Read test RPS-BLAST output in the archive format from file
330  static void x_ReadRpsArchive(void)
331  {
333  CNcbiIfstream istr("data/rps_archive_seqloclist.asn");
334  if (!istr) {
335  NCBI_THROW(CException, eInvalid, "RPS-BLAST archive not found");
336  }
337  istr >> MSerial_AsnText >> *m_RpsArchive;
338  }
339 
340  /// Initialize static attributes
341  static void Initialize(void)
342  {
343  x_InitScope();
344  x_ReadSequences();
347  }
348 
349  /// Release static attributes
350  static void Finalize(void)
351  {
352  m_Sequences.clear();
353  m_Scope.Reset();
354  m_Objmgr.Reset();
355  m_Align1.Reset();
356  m_Align2.Reset();
358  }
359 };
360 
363 vector< CRef<CSeq_loc> > CMultiAlignerFixture::m_Sequences;
367 
368 
369 // Queries returned by aligner must be in the same order as
370 // in which they were assigned
371 static void s_TestQueriesAsSeq_locs(const vector< CRef<CSeq_loc> >& seqlocs,
372  CRef<CScope> scope)
373 {
374  CMultiAligner aligner;
375  aligner.SetQueries(seqlocs, scope);
376 
377  const vector< CRef<CSeq_loc> >& q = aligner.GetQueries();
378  BOOST_REQUIRE_EQUAL(seqlocs.size(), q.size());
379  for (size_t i=0;i < q.size();i++) {
380  BOOST_CHECK(seqlocs[i]->GetId()->Match(*q[i]->GetId()));
381  }
382 }
383 
384 // Queries returned by aligner must be in the same order as
385 // in which they were assigned
386 static void s_TestQueriesAsBioseqs(const vector< CRef<CBioseq> >& bioseqs)
387 {
388  CMultiAligner aligner;
389  aligner.SetQueries(bioseqs);
390 
391  const vector< CRef<CSeq_loc> >& q = aligner.GetQueries();
392  BOOST_REQUIRE_EQUAL(bioseqs.size(), q.size());
393  for (size_t i=0;i < q.size();i++) {
394  BOOST_CHECK(bioseqs[i]->GetFirstId()->Match(*q[i]->GetId()));
395  }
396 }
397 
398 
399 static void s_MakeBioseqs(const vector< CRef<CSeq_loc> >& seqlocs,
400  CRef<CScope> scope,
401  vector< CRef<CBioseq> >& bioseqs)
402 {
403  bioseqs.clear();
404  ITERATE(vector< CRef<CSeq_loc> >, it, seqlocs) {
405 
406  BOOST_REQUIRE((*it)->GetId());
407  CBioseq_Handle handle = scope->GetBioseqHandle(*(*it)->GetId());
408  CBioseq* bseq = (CBioseq*)handle.GetCompleteBioseq().GetNonNullPointer();
409  bioseqs.push_back(CRef<CBioseq>(bseq));
410  }
411 
412 }
413 
415 {
417 }
418 
420 {
422 }
423 
424 BOOST_FIXTURE_TEST_SUITE(multialigner, CMultiAlignerFixture)
425 
426 
427 // Make sure assiging query sequences are assigned properly
428 BOOST_AUTO_TEST_CASE(TestSetQueries)
429 {
430  vector< CRef<CBioseq> > bioseqs;
431 
432  // Test for fasta input
433  s_MakeBioseqs(m_Sequences, m_Scope, bioseqs);
434  s_TestQueriesAsSeq_locs(m_Sequences, m_Scope);
435  s_TestQueriesAsBioseqs(bioseqs);
436 
437  // Test for sequences as gis
438  // There is a problem with CSeqVector
439 
440  // Using gis causes problems with CSeqVector
441  CSeq_id id("gi|129295");
443  BOOST_CHECK(h.GetSeqId()->Match(id));
444 
445  CSeq_loc s;
446  s.SetInt().SetFrom(0);
447  s.SetInt().SetTo(sequence::GetLength(id, m_Scope)-1);
448  s.SetInt().SetId().Assign(id);
449  BOOST_REQUIRE(s.GetId());
450  h = m_Scope->GetBioseqHandle(*s.GetId());
451 
452  //TODO: test for sequences as gis and mix.
453  // Currently there is a problem in CSeqVector used in CSequence
454  // CSeqMap::GetLength(CScope*) throws exception for some reason
455 }
456 
457 
458 BOOST_AUTO_TEST_CASE(TestBadQueries)
459 {
460  CMultiAligner aligner;
461 
462  vector< CRef<CSeq_loc> > seqlocs;
463  vector< CRef<CBioseq> > bioseqs;
464 
465  // Empty list of qurey sequences causes exception
466  BOOST_CHECK_THROW(aligner.SetQueries(seqlocs, m_Scope),
468 
469  BOOST_CHECK_THROW(aligner.SetQueries(bioseqs), CMultiAlignerException);
470 
471  // Empty seqloc or bioseq causes exception
472  seqlocs.push_back(CRef<CSeq_loc>(new CSeq_loc()));
473  seqlocs.push_back(CRef<CSeq_loc>(new CSeq_loc()));
474  BOOST_CHECK_THROW(aligner.SetQueries(seqlocs, m_Scope),
476 
477  bioseqs.push_back(CRef<CBioseq>(new CBioseq()));
478  bioseqs.push_back(CRef<CBioseq>(new CBioseq()));
479  BOOST_CHECK_THROW(aligner.SetQueries(bioseqs), CMultiAlignerException);
480 
481  seqlocs.clear();
482  seqlocs.push_back(m_Sequences.front());
483 
484  // Single input sequence causes exception
485  BOOST_REQUIRE_EQUAL((int)seqlocs.size(), 1);
486  BOOST_CHECK_THROW(aligner.SetQueries(seqlocs, m_Scope),
488 
489  s_MakeBioseqs(seqlocs, m_Scope, bioseqs);
490  BOOST_CHECK_THROW(aligner.SetQueries(bioseqs), CMultiAlignerException);
491 
492  // A gap in input sequence causes exception
494  CRef<CScope> scope(new CScope(*objmgr));
495  seqlocs.clear();
496  int status = ReadFastaQueries("data/queries_with_gaps.fa", seqlocs, scope);
497  BOOST_REQUIRE_EQUAL(status, 0);
498  BOOST_CHECK_THROW(aligner.SetQueries(seqlocs, scope),
500 
501  s_MakeBioseqs(seqlocs, scope, bioseqs);
502  BOOST_CHECK_THROW(aligner.SetQueries(bioseqs), CMultiAlignerException);
503 }
504 
505 
506 // Bad user constraints must be reported
507 BOOST_AUTO_TEST_CASE(TestBadUserConstraints)
508 {
509  vector< CRef<CBioseq> > bioseqs;
510  s_MakeBioseqs(m_Sequences, m_Scope, bioseqs);
511 
512  m_Options->SetRpsDb("");
513  CMultiAlignerOptions::TConstraints& constr = m_Options->SetUserConstraints();
514  constr.resize(1);
515 
516  int query_index = (int)m_Sequences.size();
517  constr[0].seq1_index = query_index;
518  constr[0].seq1_start = 0;
519  constr[0].seq1_stop = 50;
520 
521  constr[0].seq2_index = 0;
522  constr[0].seq2_start = 0;
523  constr[0].seq2_stop = 50;
524 
525  // this problem cannot be detected in options validation before queries
526  // are set
527  BOOST_REQUIRE(m_Options->Validate());
528 
529  m_Aligner.Reset(new CMultiAligner(m_Options));
530  BOOST_CHECK_THROW(m_Aligner->SetQueries(m_Sequences, m_Scope),
532 
533  m_Aligner.Reset(new CMultiAligner(m_Options));
534  BOOST_CHECK_THROW(m_Aligner->SetQueries(bioseqs),
536 
537  // user constraint with position of ouf of sequence range must be reported
538  constr.clear();
539  constr.resize(1);
540  constr[0].seq1_index = 0;
541  constr[0].seq1_start = 0;
542  constr[0].seq1_stop = 50;
543 
544  int length = (int)sequence::GetLength(*m_Sequences[1],
546 
547  constr[0].seq2_index = 1;
548  constr[0].seq2_start = 0;
549  constr[0].seq2_stop = length + 1;
550 
551  BOOST_REQUIRE(m_Options->Validate());
552 
553  m_Aligner.Reset(new CMultiAligner(m_Options));
554  BOOST_CHECK_THROW(m_Aligner->SetQueries(m_Sequences, m_Scope),
556 
557  m_Aligner.Reset(new CMultiAligner(m_Options));
558  BOOST_CHECK_THROW(m_Aligner->SetQueries(bioseqs),
560 }
561 
562 BOOST_AUTO_TEST_CASE(TestNoResults)
563 {
564  // Getting results without computing them causes exception
565  BOOST_CHECK_THROW(m_Aligner->GetResults(), CMultiAlignerException);
566  BOOST_CHECK_THROW(m_Aligner->GetTreeContainer(), CMultiAlignerException);
567 }
568 
569 
570 static bool s_Interrupt(CMultiAligner::SProgress* progress)
571 {
572  return true;
573 }
574 
575 BOOST_AUTO_TEST_CASE(TestInterrupt)
576 {
577  m_Aligner->SetQueries(m_Sequences, m_Scope);
578  m_Aligner->SetInterruptCallback(s_Interrupt);
579  CMultiAligner::TStatus status = m_Aligner->Run();
580  BOOST_CHECK_EQUAL((CMultiAligner::EStatus)status, CMultiAligner::eInterrupt);
581 }
582 
583 
584 // Check if all queries appear in the tree and all distances and labels are set
585 static void s_TestTree(vector<bool>& queries, const TPhyTreeNode* node)
586 {
587  // each node except for root must have set distance
588  if (node->GetParent()) {
589  BOOST_CHECK(node->GetValue().IsSetDist());
590  }
591 
592  if (node->IsLeaf()) {
593 
594  // each leaf node must have set label ...
595  BOOST_CHECK(!node->GetValue().GetLabel().empty());
596  const char* label = node->GetValue().GetLabel().c_str();
597  // some labels are of the form N<number>
598  if (!isdigit((unsigned char)label[0])) {
599  label++;
600  }
601  // ... which is the same as node id
602  BOOST_CHECK_EQUAL(node->GetValue().GetId(),
603  NStr::StringToInt((string)label));
604 
605  // each query id appears in the tree exactly once
606  int id = node->GetValue().GetId();
607  BOOST_REQUIRE(!queries[id]);
608  queries[id] = true;
609  }
610  else {
611 
612  // non-leaf nodes must have empty labels
613  BOOST_CHECK(node->GetValue().GetLabel().empty());
614 
616  for (; it != node->SubNodeEnd();it++) {
617  s_TestTree(queries, *it);
618  }
619  }
620 }
621 
622 // Check if all queries appear in the tree and all distances and labels are set
623 static void s_TestResultTree(int num_queries, const TPhyTreeNode* tree)
624 {
625  // make sure that all query sequences are represented by id and label
626  // check both GetTree() and GetTreeContainer()
627 
628  vector<bool> used_queries(num_queries, false);
629  s_TestTree(used_queries, tree);
630 
631  // make sure all queries are in the tree
632  ITERATE(vector<bool>, it, used_queries) {
633  BOOST_REQUIRE(*it);
634  }
635 }
636 
637 // Make sure that all queries appear in the tree
638 static void s_TestResultTreeContainer(int num_queries,
639  const CBioTreeContainer& btc)
640 {
641  vector<bool> used_queries(num_queries, false);
642 
643  // Find feature id for node label
644  int label_fid = -1;
645  int dist_fid = -1;
647  BOOST_CHECK((*it)->CanGetName());
648  if ((*it)->GetName() == "label") {
649  label_fid = (*it)->GetId();
650  }
651  if ((*it)->GetName() == "dist") {
652  dist_fid = (*it)->GetId();
653  }
654  }
655  // node label feature must be present
656  BOOST_REQUIRE(label_fid >= 0);
657  // dist feature must be present
658  BOOST_REQUIRE(dist_fid >= 0);
659 
660  ITERATE(CNodeSet::Tdata, node, btc.GetNodes().Get()) {
661  if ((*node)->GetId() == 0) {
662  continue;
663  }
664 
665  // each node except for rooot must have features
666  BOOST_REQUIRE((*node)->CanGetFeatures());
667 
668  bool is_dist = false;
669  ITERATE(CNodeFeatureSet::Tdata, feat, (*node)->GetFeatures().Get()) {
670  if ((*feat)->GetFeatureid() == label_fid) {
671  string label = (*feat)->GetValue();
672  const char* ptr = label.c_str();
673  if (!isdigit(ptr[0])) {
674  ptr++;
675  }
676 
677  // each query id must appear exactly once in the tree
678  int id = NStr::StringToInt((string)ptr);
679  BOOST_REQUIRE(!used_queries[id]);
680  used_queries[id] = true;
681  }
682 
683  if ((*feat)->GetFeatureid() == dist_fid) {
684  is_dist = true;
685  }
686  }
687  // each node except for root must have dist feature
688  BOOST_CHECK(is_dist);
689  }
690 
691  // make sure all query ids were found
692  ITERATE(vector<bool>, it, used_queries) {
693  BOOST_REQUIRE(*it);
694  }
695 }
696 
697 
698 // Check whether all queries appear in clusters and each prototype belongs
699 // to the cluster
700 static void s_TestResultClusters(int num_queries,
701  const CClusterer::TClusters& clusters,
702  const CMultiAlignerOptions::TConstraints& constraints)
703 {
704  // Exit if there are no clusters
705  if (clusters.empty()) {
706  return;
707  }
708 
709  // Check if all queries appear in clusters
710  vector<bool> used_queries(num_queries, false);
711  int num_elems = 0;
712  ITERATE(CClusterer::TClusters, cluster, clusters) {
713 
714  // cluster must not be empty
715  BOOST_REQUIRE(cluster->size() > 0);
716  num_elems += (int)cluster->size();
717 
718  int prototype = cluster->GetPrototype();
719  bool is_prototype = false;
720  ITERATE(CClusterer::TSingleCluster, elem, *cluster) {
721 
722  // each query appears exactly onces in all clusters
723  BOOST_REQUIRE(!used_queries[*elem]);
724  used_queries[*elem] = true;
725 
726  if (prototype == *elem) {
727  is_prototype = true;
728  }
729  }
730 
731  // prototype must belong to the cluster
732  BOOST_CHECK(is_prototype);
733  }
734 
735  // make sure all queries appear once in all clusters
736  BOOST_REQUIRE_EQUAL(num_elems, num_queries);
737 
738  // make sure all queries were found
739  ITERATE(vector<bool>, it, used_queries) {
740  BOOST_CHECK(*it);
741  }
742 
743 
744  // Check if queries involved in user constraints for one-element clusters
745  if (constraints.empty()) {
746  return;
747  }
748 
749  unordered_set<int> constr_queries;
750  ITERATE(CMultiAlignerOptions::TConstraints, it, constraints) {
751  constr_queries.insert(it->seq1_index);
752  constr_queries.insert(it->seq2_index);
753  }
754  size_t remain = constr_queries.size();
755 
756  ITERATE(CClusterer::TClusters, cluster, clusters) {
757  ITERATE(CClusterer::TSingleCluster, elem, *cluster) {
758  unordered_set<int>::const_iterator it = constr_queries.find(*elem);
759  if (it != constr_queries.end()) {
760 
761  // query that appears in a user constraint must be in
762  // a one-element cluster
763  BOOST_CHECK_EQUAL(cluster->size(), 1u);
764  remain--;
765  }
766  }
767 
768  // exit loop if all constraint queries are checked
769  if (remain == 0) {
770  break;
771  }
772  }
773 }
774 
775 
776 static void s_TestResultAlignment(const vector< CRef<CSeq_loc> >& queries,
777  const CRef<CSeq_align>& seqalign,
778  const vector<CSequence>& seqs,
779  CRef<CScope> scope,
780  const string& aln_ref = "")
781 {
782  // alignment must be of type global
783  BOOST_CHECK_EQUAL(seqalign->GetType(), CSeq_align::eType_global);
784 
785  int num_queries = (int)queries.size();
786  // dim must be equal to number of queries
787  BOOST_REQUIRE_EQUAL(seqalign->GetDim(), num_queries);
788 
789  // sequences in the seqalign must be in the same order as in queries
790  for (int i=0;i < num_queries;i++) {
791  BOOST_REQUIRE(queries[i]->GetId()->Match(seqalign->GetSeq_id(i)));
792 
793  //seqalign->GetSeq_id(i).Match(ref_align.GetSeq_id(i)));
794  }
795 
796  // all sequneces in CSequence format must have equal length
797  BOOST_REQUIRE_EQUAL(seqs.size(), queries.size());
798  int len = seqs[0].GetLength();
799  ITERATE(vector<CSequence>, it, seqs) {
800  BOOST_CHECK_EQUAL(it->GetLength(), len);
801  }
802 
803  // make sure that all residues appear in the resulting alignment
804  for (size_t i=0;i < queries.size();i++) {
805  int query_len = (int)sequence::GetLength(*queries[i],
806  scope.GetPointer());
807 
808  int num_residues = 0;
809  const unsigned char* sequence = seqs[i].GetSequence();
810  for (int k=0;k < seqs[i].GetLength();k++) {
811  if (sequence[k] != CSequence::kGapChar) {
812  num_residues++;
813  }
814  }
815  BOOST_CHECK_EQUAL(query_len, num_residues);
816  }
817 
818 
819  // if reference file name provided, compare seq-align with the reference
820  if (!aln_ref.empty()) {
821  CSeq_align ref_align;
822  CNcbiIfstream istr(aln_ref.c_str());
823  istr >> MSerial_AsnText >> ref_align;
824 
825  // check order of seq-ids
826  BOOST_REQUIRE_EQUAL(seqalign->GetDim(), ref_align.GetDim());
827 
828  // compare resulting alignment to the reference
829  const CDense_seg& denseg = seqalign->GetSegs().GetDenseg();
830  const CDense_seg::TStarts& starts = denseg.GetStarts();
831  const CDense_seg::TLens& lens = denseg.GetLens();
832 
833  const CDense_seg& ref_denseg = ref_align.GetSegs().GetDenseg();
834  const CDense_seg::TStarts& ref_starts = ref_denseg.GetStarts();
835  const CDense_seg::TLens& ref_lens = ref_denseg.GetLens();
836 
837 
838  BOOST_REQUIRE_EQUAL(starts.size(), ref_starts.size());
839  BOOST_REQUIRE_EQUAL(lens.size(), ref_lens.size());
840  for (size_t i=0;i < starts.size();i++) {
841  BOOST_CHECK_EQUAL(starts[i], ref_starts[i]);
842  }
843  for (size_t i=0;i < lens.size();i++) {
844  BOOST_CHECK_EQUAL(lens[i], ref_lens[i]);
845  }
846  }
847 }
848 
849 
850 static void s_TestResults(CMultiAligner& aligner,
851  const string& ref_aln = "")
852 {
853  const int kNumQueries = (int)aligner.GetQueries().size();
854 
855  s_TestResultClusters(kNumQueries, aligner.GetQueryClusters(),
856  aligner.GetOptions()->GetUserConstraints());
857  s_TestResultTree(kNumQueries, aligner.GetTree());
858  s_TestResultTreeContainer(kNumQueries, *aligner.GetTreeContainer());
859  s_TestResultAlignment(aligner.GetQueries(), aligner.GetResults(),
860  aligner.GetSeqResults(), aligner.GetScope(),
861  ref_aln);
862 }
863 
864 // The blow tests ckeck for proper results for differnt options
865 
866 // The default options should result in success status
867 BOOST_AUTO_TEST_CASE(TestResultsForDefaultOpts)
868 {
869  BOOST_REQUIRE(m_Options->Validate());
870 
871  m_Aligner->SetQueries(m_Sequences, m_Scope);
872 
873  CMultiAligner::TStatus status = m_Aligner->Run();
874  BOOST_REQUIRE_EQUAL(status, (CMultiAligner::TStatus)CMultiAligner::eSuccess);
875  BOOST_CHECK_EQUAL((int)m_Aligner->GetMessages().size(), 0);
876 
877  s_TestResults(*m_Aligner);
878 // s_TestResults(aligner, "data/ref_seqalign.asn");
879 }
880 
881 
882 // Max cluster diameter set to zero must cause a warning
883 BOOST_AUTO_TEST_CASE(TestResultsForZeroClusterDiam)
884 {
885  m_Options->SetMaxInClusterDist(0.0);
886  BOOST_REQUIRE(m_Options->Validate());
887 
888  m_Aligner.Reset(new CMultiAligner(m_Options));
889  m_Aligner->SetQueries(m_Sequences, m_Scope);
890 
891  CMultiAligner::TStatus status = m_Aligner->Run();
892 
893  //Bad input file - there are two exactly same sequences
894 
895  BOOST_REQUIRE_EQUAL(status, (CMultiAligner::TStatus)CMultiAligner::eSuccess);
896  BOOST_CHECK((int)m_Aligner->GetMessages().size() == 0);
897 
898  s_TestResults(*m_Aligner);
899 }
900 
901 
902 // Max cluster diameter set to 1 (maxiumu value) must cause a warning
903 BOOST_AUTO_TEST_CASE(TestResultsForMaxClusterDiam)
904 {
905  m_Options->SetMaxInClusterDist(1.0);
906  BOOST_REQUIRE(m_Options->Validate());
907 
908  m_Aligner.Reset(new CMultiAligner(m_Options));
909  m_Aligner->SetQueries(m_Sequences, m_Scope);
910 
911  CMultiAligner::TStatus status = m_Aligner->Run();
912 
913  //Bad input file - there are two exactly same sequences
914 
915  BOOST_REQUIRE_EQUAL(status, (CMultiAligner::TStatus)CMultiAligner::eSuccess);
916 
917  s_TestResults(*m_Aligner);
918 }
919 
920 
921 BOOST_AUTO_TEST_CASE(TestResultsForNoClusters)
922 {
923  m_Options.Reset(new CMultiAlignerOptions(
926 
927  BOOST_REQUIRE(m_Options->Validate());
928 
929  m_Aligner.Reset(new CMultiAligner(m_Options));
930  m_Aligner->SetQueries(m_Sequences, m_Scope);
931 
932  CMultiAligner::TStatus status = m_Aligner->Run();
933  BOOST_REQUIRE_EQUAL(status, (CMultiAligner::TStatus)CMultiAligner::eSuccess);
934  BOOST_CHECK_EQUAL((int)m_Aligner->GetMessages().size(), 0);
935 
936  s_TestResults(*m_Aligner);
937 }
938 
939 
940 // Make sure that queries that appear in user constraints form one-element
941 // clusters
942 BOOST_AUTO_TEST_CASE(TestResultsForClustersAndUserConstraints)
943 {
944  vector<CMultiAlignerOptions::SConstraint>& constr
945  = m_Options->SetUserConstraints();
946 
947  constr.resize(2);
948  constr[0].seq1_index = 0;
949  constr[0].seq1_start = 0;
950  constr[0].seq1_stop = 50;
951 
952  constr[0].seq2_index = 1;
953  constr[0].seq2_start = 0;
954  constr[0].seq2_stop = 50;
955 
956  constr[1].seq1_index = 1;
957  constr[1].seq1_start = 0;
958  constr[1].seq1_stop = 50;
959 
960  constr[1].seq2_index = 5;
961  constr[1].seq2_start = 0;
962  constr[1].seq2_stop = 50;
963 
964  BOOST_REQUIRE(m_Options->Validate());
965 
966  m_Aligner.Reset(new CMultiAligner(m_Options));
967  m_Aligner->SetQueries(m_Sequences, m_Scope);
968 
969  CMultiAligner::TStatus status = m_Aligner->Run();
970  BOOST_REQUIRE_EQUAL(status, (CMultiAligner::TStatus)CMultiAligner::eSuccess);
971  BOOST_CHECK_EQUAL((int)m_Aligner->GetMessages().size(), 0);
972 
973  s_TestResults(*m_Aligner);
974 }
975 
976 
977 // Two sequences is a special case for query clustering and computing
978 // guide tree as cluster dendrogram
979 BOOST_AUTO_TEST_CASE(TestTwoSequences)
980 {
981  // case with one cluster
982  m_Options->SetMaxInClusterDist(1.0);
983  m_Aligner.Reset(new CMultiAligner(m_Options));
984 
985  vector< CRef<CSeq_loc> > queries;
986  queries.push_back(m_Sequences[0]);
987  queries.push_back(m_Sequences[1]);
988  m_Aligner->SetQueries(queries, m_Scope);
989 
990  CMultiAligner::TStatus status = m_Aligner->Run();
991  BOOST_CHECK(status == CMultiAligner::eSuccess);
992  s_TestResults(*m_Aligner);
993 
994  // case with two clusters
995  m_Options->SetMaxInClusterDist(0.01);
996  m_Aligner.Reset(new CMultiAligner(m_Options));
997  m_Aligner->SetQueries(queries, m_Scope);
998  status = m_Aligner->Run();
999  BOOST_CHECK(status == CMultiAligner::eSuccess);
1000  s_TestResults(*m_Aligner);
1001 }
1002 
1003 // Test for computing a large alignment for results of BLAST search, good
1004 // for testing alignments with large query clusters
1005 BOOST_AUTO_TEST_CASE(TestLargeAlignment)
1006 {
1008  CRef<CScope> scope(new CScope(*objmgr));
1009 
1010  vector< CRef<CSeq_loc> > sequences;
1011  BOOST_REQUIRE_EQUAL(ReadFastaQueries("data/large.fa", sequences, scope,
1012  false), 0);
1013 
1014  m_Options->SetUseQueryClusters(true);
1015  CMultiAligner aligner(m_Options);
1016  aligner.SetQueries(sequences, scope);
1017 
1018  CMultiAligner::TStatus status = aligner.Run();
1019  // this task runs out of memory on some machines
1020  BOOST_CHECK(status == CMultiAligner::eSuccess
1021  || status == CMultiAligner::eOutOfMemory);
1022 
1023  if (status == CMultiAligner::eSuccess) {
1024  s_TestResults(aligner);
1025  }
1026 }
1027 
1029  CRef<CSeq_align> in_second)
1030 {
1031  // alignment must be of type global
1032  BOOST_REQUIRE_EQUAL(result->GetType(), CSeq_align::eType_global);
1033 
1034  BOOST_REQUIRE(result->GetSegs().IsDenseg());
1035  BOOST_REQUIRE(in_first->GetSegs().IsDenseg());
1036  BOOST_REQUIRE(in_second->GetSegs().IsDenseg());
1037 
1038  const CDense_seg& first_denseg = in_first->GetSegs().GetDenseg();
1039  const CDense_seg& second_denseg = in_second->GetSegs().GetDenseg();
1040 
1041  int num_input_sequences = (int)first_denseg.GetDim()
1042  + second_denseg.GetDim();
1043 
1044  // dim must be equal to number of input sequences
1045  BOOST_REQUIRE_EQUAL(result->GetDim(), num_input_sequences);
1046 
1047  vector<int> first_rows, second_rows;
1048 
1049  // all sequence ids in input alignments must be present in the result
1050  for (size_t i=0;i < first_denseg.GetIds().size();i++) {
1051  const CSeq_id& id = *first_denseg.GetIds()[i];
1052  int j = 0;
1053  for (;j < result->GetDim();j++) {
1054  if (id.Match(*result->GetSegs().GetDenseg().GetIds()[j])) {
1055  first_rows.push_back(j);
1056  break;
1057  }
1058  }
1059  BOOST_REQUIRE(j < result->GetDim());
1060  }
1061 
1062  for (size_t i=0;i < second_denseg.GetIds().size();i++) {
1063  const CSeq_id& id = *second_denseg.GetIds()[i];
1064  int j = 0;
1065  for (;j < result->GetDim();j++) {
1066  if (id.Match(*result->GetSegs().GetDenseg().GetIds()[j])) {
1067  second_rows.push_back(j);
1068  break;
1069  }
1070  }
1071  BOOST_REQUIRE(j < result->GetDim());
1072  }
1073 
1074  // if one extracts input alignments from the result, they should be the
1075  // same as the input alignments
1076 
1077  // compare the first input alignment
1078  CRef<CDense_seg> f = result->GetSegs().GetDenseg().ExtractRows(first_rows);
1079  f->RemovePureGapSegs();
1080  f->Compact();
1081  BOOST_REQUIRE_EQUAL(first_denseg.GetStarts().size(), f->GetStarts().size());
1082  BOOST_REQUIRE_EQUAL(first_denseg.GetLens().size(), f->GetLens().size());
1083  for (size_t i=0;i < first_denseg.GetStarts().size();i++) {
1084  BOOST_REQUIRE_EQUAL(first_denseg.GetStarts()[i], f->GetStarts()[i]);
1085  }
1086  for (size_t i=0;i < first_denseg.GetLens().size();i++) {
1087  BOOST_REQUIRE_EQUAL(first_denseg.GetLens()[i], f->GetLens()[i]);
1088  }
1089 
1090  CRef<CDense_seg> s = result->GetSegs().GetDenseg().ExtractRows(second_rows);
1091 
1092  // compare the second input alignment
1093  s->RemovePureGapSegs();
1094  s->Compact();
1095  BOOST_REQUIRE_EQUAL(second_denseg.GetStarts().size(), s->GetStarts().size());
1096  BOOST_REQUIRE_EQUAL(second_denseg.GetLens().size(), s->GetLens().size());
1097  for (size_t i=0;i < second_denseg.GetStarts().size();i++) {
1098  BOOST_REQUIRE_EQUAL(second_denseg.GetStarts()[i], s->GetStarts()[i]);
1099  }
1100  for (size_t i=0;i < second_denseg.GetLens().size();i++) {
1101  BOOST_REQUIRE_EQUAL(second_denseg.GetLens()[i], s->GetLens()[i]);
1102  }
1103 }
1104 
1105 
1106 BOOST_AUTO_TEST_CASE(TestAlignMSAs)
1107 {
1108  set<int> repr;
1109 
1110  // align input MSAs
1111  m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr, m_Scope);
1112  m_Aligner->Run();
1113 
1114  // test result
1115  s_TestAlignmentFromMSAs(m_Aligner->GetResults(), m_Align1, m_Align2);
1116 }
1117 
1118 
1119 BOOST_AUTO_TEST_CASE(TestAlignMSAWithSequence)
1120 {
1121  set<int> repr;
1122 
1123  // create a one-sequence Seq_align from the input sequence
1124  CRef<CSeq_align> align(new CSeq_align());
1126  align->SetDim(1);
1127  CDense_seg& denseg = align->SetSegs().SetDenseg();
1128  denseg.SetDim(1);
1129  denseg.SetNumseg(1);
1130  denseg.SetIds().push_back(CRef<CSeq_id>(
1131  const_cast<CSeq_id*>(m_Sequences.front()->GetId())));
1132  denseg.SetStarts().push_back(0);
1133  CSeqVector v(*m_Sequences.front(), *m_Scope);
1134  denseg.SetLens().push_back(v.size());
1135 
1136  // align MSA and sequence
1137  m_Aligner->SetInputMSAs(*m_Align1, *align, repr, repr, m_Scope);
1138  m_Aligner->Run();
1139 
1140  // test result
1141  s_TestAlignmentFromMSAs(m_Aligner->GetResults(), m_Align1, align);
1142 }
1143 
1144 BOOST_AUTO_TEST_CASE(TestAlignMSAsWithRepresentatives)
1145 {
1146  // set representatives
1147  BOOST_REQUIRE(m_Align1->CheckNumRows() > 3);
1148  BOOST_REQUIRE(m_Align2->CheckNumRows() > 3);
1149  set<int> repr;
1150  repr.insert(2);
1151  repr.insert(3);
1152 
1153  // align input MSAs
1154  m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr, m_Scope);
1155  m_Aligner->Run();
1156 
1157  // test result
1158  s_TestAlignmentFromMSAs(m_Aligner->GetResults(), m_Align1, m_Align2);
1159 }
1160 
1161 BOOST_AUTO_TEST_CASE(TestAlignMSAsWithWrongRepresentatives)
1162 {
1163  // set representatives
1164  set<int> repr;
1165 
1166  // indeces of representatives must be non-negative
1167  repr.insert(-1);
1168  BOOST_CHECK_THROW(m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr,
1169  m_Scope),
1171 
1172 
1173  // indeces of representatives must be smaller than number of sequences
1174  // in MSA
1175  repr.clear();
1176  repr.insert(m_Align1->CheckNumRows());
1177  BOOST_CHECK_THROW(m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr,
1178  m_Scope),
1180 }
1181 
1182 BOOST_AUTO_TEST_CASE(TestSetPrecomputedDomainHitsWithQueriesAsBioseqset)
1183 {
1184  // make a copy of sequence ids
1185  vector< CRef<CSeq_id> > expected_queries;
1186  ITERATE(vector< CRef<CSeq_loc> >, it, m_Sequences) {
1187  string id = (*it)->GetId()->AsFastaString();
1188  expected_queries.push_back(CRef<CSeq_id>(new CSeq_id(id)));
1189  }
1190 
1191  // set options
1192  m_Options->SetUseQueryClusters(false);
1193  m_Options->SetRpsEvalue(0.1);
1194 
1195  m_Aligner.Reset(new CMultiAligner(m_Options));
1196  m_Aligner->SetQueries(m_Sequences, m_Scope);
1197 
1198  // read RPS-BLAST archive
1200  CNcbiIfstream istr("data/rps_archive_bioseqset.asn");
1201  istr >> MSerial_AsnText >> *archive;
1202 
1203  // check pre conditions
1204  BOOST_REQUIRE_EQUAL(m_Options->GetUseQueryClusters(), false);
1205  BOOST_REQUIRE(fabs(m_Options->GetRpsEvalue() - 0.1) < 0.01);
1206 
1207  // set pre-computed domain hits
1208  CMultiAlignerTest::SetDomainHits(*m_Aligner, archive);
1209 
1210  // Tests
1211 
1212  // verify sure that queries did not change
1213  BOOST_REQUIRE_EQUAL(expected_queries.size(),
1214  m_Aligner->GetQueries().size());
1215 
1216  for (size_t i=0;i < expected_queries.size();i++) {
1217  BOOST_REQUIRE(expected_queries[i]->CompareOrdered(
1218  *m_Aligner->GetQueries()[i]->GetId()) == 0);
1219  }
1220 
1221  // expected values of CMultiAligner::m_IsSearchedDomain
1222  vector<bool> expected_is_domain_searched(m_Sequences.size(), false);
1223  expected_is_domain_searched[0] = true;
1224  expected_is_domain_searched[2] = true;
1225 
1226  BOOST_REQUIRE_EQUAL(expected_is_domain_searched.size(),
1227  CMultiAlignerTest::GetIsDomainSearched(*m_Aligner).size());
1228 
1229  for (size_t i=0;i < expected_is_domain_searched.size();i++) {
1230  BOOST_REQUIRE_EQUAL(expected_is_domain_searched[i],
1232  }
1233 
1234 
1235  // Compare domain hits
1236 
1237  const size_t kNumExpectedPreHits = 7;
1238  vector<SHit> expected_hits(kNumExpectedPreHits);
1239 
1240  // Domain Hit #0
1241  expected_hits[0].query = "lcl|1buc_A";
1242  expected_hits[0].subject = 1;
1243  expected_hits[0].query_range = TRange(6, 382);
1244  expected_hits[0].subject_range = TRange(0, 372);
1245  expected_hits[0].score = 1414;
1246 
1247  // Domain Hit #1
1248  expected_hits[1].query = "lcl|1buc_A";
1249  expected_hits[1].subject = 0;
1250  expected_hits[1].query_range = TRange(95, 377);
1251  expected_hits[1].subject_range = TRange(42, 325);
1252  expected_hits[1].score = 885;
1253 
1254  // Domain Hit #2
1255  expected_hits[2].query = "lcl|1buc_A";
1256  expected_hits[2].subject = 2;
1257  expected_hits[2].query_range = TRange(1, 382);
1258  expected_hits[2].subject_range = TRange(19, 405);
1259  expected_hits[2].score = 718;
1260 
1261  // Domain Hit #3
1262  expected_hits[3].query = "lcl|Q8jzn5";
1263  expected_hits[3].subject = 2;
1264  expected_hits[3].query_range = TRange(41, 448);
1265  expected_hits[3].subject_range = TRange(0, 408);
1266  expected_hits[3].score = 1779;
1267 
1268  // Domain Hit #4
1269  expected_hits[4].query = "lcl|Q8jzn5";
1270  expected_hits[4].subject = 1;
1271  expected_hits[4].query_range = TRange(88, 440);
1272  expected_hits[4].subject_range = TRange(22, 367);
1273  expected_hits[4].score = 981;
1274 
1275  // Domain Hit #5
1276  expected_hits[5].query = "lcl|Q8jzn5";
1277  expected_hits[5].subject = 0;
1278  expected_hits[5].query_range = TRange(151, 440);
1279  expected_hits[5].subject_range = TRange(42, 325);
1280  expected_hits[5].score = 872;
1281 
1282  // Domain Hit #6
1283  expected_hits[6].query = "lcl|Q8jzn5";
1284  expected_hits[6].subject = 0;
1285  expected_hits[6].query_range = TRange(511, 581);
1286  expected_hits[6].subject_range = TRange(208, 280);
1287  expected_hits[6].score = 75;
1288 
1289  string errors;
1290  bool hits_match = CMultiAlignerTest::CompareDomainHits(expected_hits,
1291  *m_Aligner,
1292  errors);
1293 
1294  BOOST_REQUIRE_MESSAGE(hits_match, errors);
1295 
1296 
1297  // Do RPS-BLAST search inside aligner and verify that the search was done
1298  // only for queries without pre-computed domain hits
1299 
1300  // set interrupt so that COBALT quits right after RPS-BLAST search
1301  m_Aligner->SetInterruptCallback(
1303  m_Aligner->Run();
1304 
1305  const size_t kNumExpectedHits = 10;
1306  BOOST_REQUIRE(kNumExpectedHits > kNumExpectedPreHits);
1307  expected_hits.resize(kNumExpectedHits);
1308 
1309 
1310  // Domain Hit #7
1311  expected_hits[7].query = "lcl|Q10535";
1312  expected_hits[7].subject = 2;
1313  expected_hits[7].query_range = TRange(27, 432);
1314  expected_hits[7].subject_range = TRange(0, 400);
1315  expected_hits[7].score = 768;
1316 
1317  // Domain Hit #8
1318  expected_hits[8].query = "lcl|Q10535";
1319  expected_hits[8].subject = 0;
1320  expected_hits[8].query_range = TRange(138, 433);
1321  expected_hits[8].subject_range = TRange(42, 326);
1322  expected_hits[8].score = 738;
1323 
1324  // Domain Hit #9
1325  expected_hits[9].query = "lcl|Q10535";
1326  expected_hits[9].subject = 1;
1327  expected_hits[9].query_range = TRange(75, 434);
1328  expected_hits[9].subject_range = TRange(24, 369);
1329  expected_hits[9].score = 704;
1330 
1331 
1332  hits_match = CMultiAlignerTest::CompareDomainHits(expected_hits, *m_Aligner,
1333  errors);
1334 
1335  BOOST_REQUIRE_MESSAGE(hits_match, errors);
1336 }
1337 
1338 BOOST_AUTO_TEST_CASE(TestSetPrecomputedDomainHitsWithQueriesAsSeqLocs)
1339 {
1340  // make a copy of sequence ids
1341  vector< CRef<CSeq_id> > expected_queries;
1342  ITERATE(vector< CRef<CSeq_loc> >, it, m_Sequences) {
1343  string id = (*it)->GetId()->AsFastaString();
1344  expected_queries.push_back(CRef<CSeq_id>(new CSeq_id(id)));
1345  }
1346 
1347  // set options
1348  m_Options->SetUseQueryClusters(false);
1349  m_Options->SetRpsEvalue(0.1);
1350  m_Aligner.Reset(new CMultiAligner(m_Options));
1351 
1352  m_Aligner->SetQueries(m_Sequences, m_Scope);
1353 
1354  // check pre conditions
1355  BOOST_REQUIRE_EQUAL(m_Options->GetUseQueryClusters(), false);
1356  BOOST_REQUIRE(fabs(m_Options->GetRpsEvalue() - 0.1) < 0.01);
1357 
1358  // set pre-computed domain hits
1359  CMultiAlignerTest::SetDomainHits(*m_Aligner, m_RpsArchive);
1360 
1361  // Tests
1362 
1363  // verify sure that queries did not change
1364  BOOST_REQUIRE_EQUAL(expected_queries.size(),
1365  m_Aligner->GetQueries().size());
1366 
1367  for (size_t i=0;i < expected_queries.size();i++) {
1368  BOOST_REQUIRE(expected_queries[i]->CompareOrdered(
1369  *m_Aligner->GetQueries()[i]->GetId()) == 0);
1370  }
1371 
1372  // expected values of CMultiAligner::m_IsSearchedDomain
1373  vector<bool> expected_is_domain_searched(m_Sequences.size(), false);
1374  expected_is_domain_searched[0] = true;
1375  expected_is_domain_searched[2] = true;
1376 
1377  BOOST_REQUIRE_EQUAL(expected_is_domain_searched.size(),
1378  CMultiAlignerTest::GetIsDomainSearched(*m_Aligner).size());
1379 
1380  for (size_t i=0;i < expected_is_domain_searched.size();i++) {
1381  BOOST_REQUIRE_EQUAL(expected_is_domain_searched[i],
1383  }
1384 
1385 
1386  // Compare domain hits
1387 
1388  const size_t kNumExpectedPreHits = 7;
1389  vector<SHit> expected_hits(kNumExpectedPreHits);
1390 
1391  // Domain Hit #0
1392  expected_hits[0].query = "lcl|1buc_A";
1393  expected_hits[0].subject = 1;
1394  expected_hits[0].query_range = TRange(6, 382);
1395  expected_hits[0].subject_range = TRange(0, 372);
1396  expected_hits[0].score = 1414;
1397 
1398  // Domain Hit #1
1399  expected_hits[1].query = "lcl|1buc_A";
1400  expected_hits[1].subject = 0;
1401  expected_hits[1].query_range = TRange(95, 377);
1402  expected_hits[1].subject_range = TRange(42, 325);
1403  expected_hits[1].score = 885;
1404 
1405  // Domain Hit #2
1406  expected_hits[2].query = "lcl|1buc_A";
1407  expected_hits[2].subject = 2;
1408  expected_hits[2].query_range = TRange(1, 382);
1409  expected_hits[2].subject_range = TRange(19, 405);
1410  expected_hits[2].score = 718;
1411 
1412  // Domain Hit #3
1413  expected_hits[3].query = "lcl|Q8jzn5";
1414  expected_hits[3].subject = 2;
1415  expected_hits[3].query_range = TRange(41, 448);
1416  expected_hits[3].subject_range = TRange(0, 408);
1417  expected_hits[3].score = 1779;
1418 
1419  // Domain Hit #4
1420  expected_hits[4].query = "lcl|Q8jzn5";
1421  expected_hits[4].subject = 1;
1422  expected_hits[4].query_range = TRange(88, 440);
1423  expected_hits[4].subject_range = TRange(22, 367);
1424  expected_hits[4].score = 981;
1425 
1426  // Domain Hit #5
1427  expected_hits[5].query = "lcl|Q8jzn5";
1428  expected_hits[5].subject = 0;
1429  expected_hits[5].query_range = TRange(151, 440);
1430  expected_hits[5].subject_range = TRange(42, 325);
1431  expected_hits[5].score = 872;
1432 
1433  // Domain Hit #6
1434  expected_hits[6].query = "lcl|Q8jzn5";
1435  expected_hits[6].subject = 0;
1436  expected_hits[6].query_range = TRange(511, 581);
1437  expected_hits[6].subject_range = TRange(208, 280);
1438  expected_hits[6].score = 75;
1439 
1440  string errors;
1441  bool hits_match = CMultiAlignerTest::CompareDomainHits(expected_hits,
1442  *m_Aligner,
1443  errors);
1444 
1445  BOOST_REQUIRE_MESSAGE(hits_match, errors);
1446 
1447 
1448  // Do RPS-BLAST search inside aligner and verify that the search was done
1449  // only for queries without pre-computed domain hits
1450 
1451  // set interrupt so that COBALT quits right after RPS-BLAST search
1452  m_Aligner->SetInterruptCallback(
1454  m_Aligner->Run();
1455 
1456  const size_t kNumExpectedHits = 10;
1457  BOOST_REQUIRE(kNumExpectedHits > kNumExpectedPreHits);
1458  expected_hits.resize(kNumExpectedHits);
1459 
1460 
1461  // Domain Hit #7
1462  expected_hits[7].query = "lcl|Q10535";
1463  expected_hits[7].subject = 2;
1464  expected_hits[7].query_range = TRange(27, 432);
1465  expected_hits[7].subject_range = TRange(0, 400);
1466  expected_hits[7].score = 768;
1467 
1468  // Domain Hit #8
1469  expected_hits[8].query = "lcl|Q10535";
1470  expected_hits[8].subject = 0;
1471  expected_hits[8].query_range = TRange(138, 433);
1472  expected_hits[8].subject_range = TRange(42, 326);
1473  expected_hits[8].score = 738;
1474 
1475  // Domain Hit #9
1476  expected_hits[9].query = "lcl|Q10535";
1477  expected_hits[9].subject = 1;
1478  expected_hits[9].query_range = TRange(75, 434);
1479  expected_hits[9].subject_range = TRange(24, 369);
1480  expected_hits[9].score = 704;
1481 
1482 
1483  hits_match = CMultiAlignerTest::CompareDomainHits(expected_hits, *m_Aligner,
1484  errors);
1485 
1486  BOOST_REQUIRE_MESSAGE(hits_match, errors);
1487 }
1488 
1489 
1490 BOOST_AUTO_TEST_CASE(TestSetPrecomputedDomainHitsWithNoMatchingQueries)
1491 {
1492  m_Options->SetUseQueryClusters(false);
1493  m_Aligner.Reset(new CMultiAligner(m_Options));
1494 
1495  // create cobalt queries with fake Seq-ids
1496  vector< CRef<CSeq_loc> > queries;
1497  queries.push_back(m_Sequences.back());
1498  queries.push_back(m_Sequences.back());
1499 
1500  // set cobalt queries without retrieving sequences
1501  m_Aligner->SetQueries(queries, m_Scope);
1502 
1503  // set pre-computed domain hits
1504  CMultiAlignerTest::SetDomainHits(*m_Aligner, m_RpsArchive);
1505 
1506  // verify that none of the pre-computed hits made it to the domain hit list
1507  BOOST_REQUIRE_EQUAL(CMultiAlignerTest::GetDomainHits(*m_Aligner).Size(), 0);
1508 
1509  BOOST_REQUIRE(CMultiAlignerTest::GetIsDomainSearched(*m_Aligner).empty());
1510 }
1511 
1512 
1513 BOOST_AUTO_TEST_CASE(TestSetPrecomputedDomainHitsAboveEThresh)
1514 {
1515  // create cobalt queries that match at least one query in the RPS-BLAST
1516  // archive, and are added to m_Scope
1517  vector< CRef<CSeq_loc> > queries;
1518  CRef<CSeq_loc> seq(new CSeq_loc);
1519  seq->SetWhole().Set("gi|129295");
1520  queries.push_back(seq);
1521  queries.push_back(seq);
1522 
1523  // set options
1524  m_Options->SetRpsEvalue(10);
1525  m_Options->SetUseQueryClusters(false);
1526 
1527  // First make sure that there is a hit
1528 
1529  m_Aligner.Reset(new CMultiAligner(m_Options));
1530 
1531  // set cobalt queries
1532  m_Aligner->SetQueries(queries, m_Scope);
1533 
1534  // set pre-computed domain hits
1535  CMultiAlignerTest::SetDomainHits(*m_Aligner, m_RpsArchive);
1536 
1537  // verify that there is at least one matching RPS-BLAST query with results
1538  BOOST_REQUIRE(CMultiAlignerTest::GetDomainHits(*m_Aligner).Size() > 0);
1539 
1540 
1541  // Test for low RPS-BLAST E-value
1542 
1543  m_Options->SetRpsEvalue(0.00001);
1544  m_Aligner.Reset(new CMultiAligner(m_Options));
1545 
1546  // set cobalt queries
1547  m_Aligner->SetQueries(queries, m_Scope);
1548 
1549  // set pre-computed domain hits
1550  CMultiAlignerTest::SetDomainHits(*m_Aligner, m_RpsArchive);
1551 
1552  // verify that none of the pre-computed hits made it to the hit list
1553  BOOST_REQUIRE_EQUAL(CMultiAlignerTest::GetDomainHits(*m_Aligner).Size(), 0);
1554  BOOST_REQUIRE_EQUAL(
1555  CMultiAlignerTest::GetIsDomainSearched(*m_Aligner).size(),
1556  0u);
1557 }
1558 
1559 
1560 // verify that alignment runs without errors
1561 BOOST_AUTO_TEST_CASE(TestAlignSequencesWithPrecomputedDomainHits)
1562 {
1563  // test no query clusters
1564  m_Options->SetUseQueryClusters(false);
1565  m_Options->SetDomainHits(m_RpsArchive);
1566  BOOST_REQUIRE(m_Options->Validate());
1567  m_Aligner.Reset(new CMultiAligner(m_Options));
1568  m_Aligner->SetQueries(m_Sequences, m_Scope);
1569  m_Aligner->Run();
1570 
1571  s_TestResults(*m_Aligner);
1572 
1573 
1574  // test with query clusters
1575  m_Options->SetUseQueryClusters(true);
1576  BOOST_REQUIRE(m_Options->CanGetDomainHits());
1577  m_Aligner.Reset(new CMultiAligner(m_Options));
1578  m_Aligner->SetQueries(m_Sequences, m_Scope);
1579  m_Aligner->Run();
1580 
1581  s_TestResults(*m_Aligner);
1582 }
1583 
1584 BOOST_AUTO_TEST_CASE(TestAlignMSAsWithPrecomputedDomainHits)
1585 {
1586  // set a larger RPS-BLAST e-value threshold, due to hits in m_RpsArchive
1587  m_Options->SetRpsEvalue(10);
1588  m_Options->SetDomainHits(m_RpsArchive);
1589  BOOST_REQUIRE(m_Options->Validate());
1590  m_Aligner.Reset(new CMultiAligner(m_Options));
1591  set<int> repr;
1592 
1593  // Test without representative sequences
1594 
1595  m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr, m_Scope);
1596  m_Aligner->Run();
1597 
1598  // test result
1599  s_TestAlignmentFromMSAs(m_Aligner->GetResults(), m_Align1, m_Align2);
1600 
1601 
1602  // Test with representative sequences
1603  repr.insert(0);
1604 
1605  BOOST_REQUIRE(m_Options->CanGetDomainHits());
1606  m_Aligner.Reset(new CMultiAligner(m_Options));
1607 
1608  m_Aligner->SetInputMSAs(*m_Align1, *m_Align2, repr, repr, m_Scope);
1609  m_Aligner->Run();
1610 
1611  // test result
1612  s_TestAlignmentFromMSAs(m_Aligner->GetResults(), m_Align1, m_Align2);
1613 }
1614 
1615 
1616 BOOST_AUTO_TEST_CASE(TestPrecomputedDomainSubjectNotInDatabase)
1617 {
1618  m_Options->SetUseQueryClusters(false);
1619  m_Aligner.Reset(new CMultiAligner(m_Options));
1620  m_Aligner->SetQueries(m_Sequences, m_Scope);
1621 
1622  // read RPS-BLAST archive
1624  CNcbiIfstream istr("data/rps_archive_subjectnotindb.asn");
1625  BOOST_REQUIRE(istr);
1626  istr >> MSerial_AsnText >> *archive;
1627 
1628  // get domain hits
1629  const CSeq_align& align =
1630  *archive->GetResults().GetAlignments().Get().front();
1631 
1632  // veryfy that expected query and subject pair is in the domain hits
1633  BOOST_REQUIRE_EQUAL(align.GetSeq_id(0).AsFastaString(),
1634  (string)"lcl|1buc_A");
1635  BOOST_REQUIRE_EQUAL(align.GetSeq_id(1).AsFastaString(),
1636  (string)"gnl|CDD|273847");
1637 
1638  // verify that all subjects in the domain hits must exist in the domain
1639  // database used by cobalt
1640  BOOST_REQUIRE_THROW(CMultiAlignerTest::SetDomainHits(*m_Aligner, archive),
1642 }
1643 
1644 BOOST_AUTO_TEST_CASE(TestSetDomainHitsWithUnsupportedQueries)
1645 {
1646  m_Options->SetUseQueryClusters(false);
1647  m_Aligner.Reset(new CMultiAligner(m_Options));
1648  m_Aligner->SetQueries(m_Sequences, m_Scope);
1649 
1650  // change queries to PSSM
1651  CPssm& pssm = m_RpsArchive->SetRequest().SetBody().SetQueue_search()
1652  .SetQueries().SetPssm().SetPssm();
1653 
1654  pssm.SetNumRows(28);
1655  pssm.SetNumColumns(200);
1656 
1657  // PSSM as query is not supported
1658  BOOST_REQUIRE_THROW(CMultiAlignerTest::SetDomainHits(*m_Aligner,
1659  m_RpsArchive),
1661 
1662  // re-read RPS-BLAST archive so that it is not changed for future tests
1664 }
1665 
1666 
1668 
1669 #endif /* SKIP_DOXYGEN_PROCESSING */
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
CLocalRange< TOffset > TRange
define for the fundamental building block of sequence ranges
Definition: base.hpp:115
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
CBioseq_Handle –.
CBlast4_archive –.
vector< TSingleCluster > TClusters
Definition: clusterer.hpp:160
CConstRef –.
Definition: ncbiobj.hpp:1266
void RemovePureGapSegs()
Remove any segments in which every row has a gap (these can arise when ExtractRows is used)
Definition: Dense_seg.cpp:574
void Compact()
Join adjacent mergeable segments to create a more compact alignment.
Definition: Dense_seg.cpp:432
An ordered collection of CHit objects.
Definition: hitlist.hpp:50
int Size() const
Retrieve number of hits in list.
Definition: hitlist.hpp:75
CHit * GetHit(int index)
Retrieve a hit from the hitlist.
Definition: hitlist.hpp:93
A generalized representation of a pairwise alignment.
Definition: hit.hpp:86
int m_Score
Score of alignment.
Definition: hit.hpp:104
int m_SeqIndex1
Numerical identifier for first sequence in alignment.
Definition: hit.hpp:97
int m_SeqIndex2
Numerical identifier for second sequence in alignment.
Definition: hit.hpp:101
TRange m_SeqRange1
The range of offsets on the first sequence.
Definition: hit.hpp:107
TRange m_SeqRange2
The range of offsets on the second sequence.
Definition: hit.hpp:110
Fixture class initialized for each multialigner test.
static CRef< CBlast4_archive > m_RpsArchive
static CRef< CObjectManager > m_Objmgr
static void Initialize(void)
Initialize static attributes.
static CRef< CScope > m_Scope
CRef< CMultiAlignerOptions > m_Options
static vector< CRef< CSeq_loc > > m_Sequences
static void x_ReadSequences(void)
Read test sequences in FASTA format from file.
static void x_InitScope(void)
Initialize scope.
static void x_ReadAlignments(void)
Read test MSAs from files.
static void x_ReadRpsArchive(void)
Read test RPS-BLAST output in the archive format from file.
static CRef< CSeq_align > m_Align2
CRef< CMultiAligner > m_Aligner
static void Finalize(void)
Release static attributes.
static CRef< CSeq_align > m_Align1
Options and parameters for multiple alignement.
Definition: options.hpp:95
void SetRpsDb(const string &dbname)
Use RPS Blast with given database.
Definition: options.hpp:355
const TConstraints & GetUserConstraints(void) const
Get user constraints.
Definition: options.hpp:410
@ fNoQueryClusters
No query clustering.
Definition: options.hpp:228
@ fNoRpsBlast
Do not use RPS Blast.
Definition: options.hpp:231
vector< SConstraint > TConstraints
Definition: options.hpp:222
Test class for accessing CMultiAligner private attributes and methods.
static void SetDomainHits(CMultiAligner &aligner, CConstRef< CBlast4_archive > archive)
Set pre-computed domain hits without invoking CMlultiAligner::Run()
static bool InterruptAfterRpsBlastSearch(CMultiAligner::SProgress *progress)
Quit after doing RPS-BLAST search.
static void SetQuerySeqlocs(CMultiAligner &aligner, const vector< CRef< CSeq_loc > > &queries)
Set queries in the aligner only as Seq-locs do not retrieve sequences.
static const vector< bool > & GetIsDomainSearched(const CMultiAligner &aligner)
static const CHitList & GetDomainHits(const CMultiAligner &aligner)
static bool CompareDomainHits(const vector< SHit > &expected_hits, const CMultiAligner &aligner, string &err)
Compare domain hits in CMultiAligner with reference alignements.
Simultaneously align multiple protein sequences.
Definition: cobalt.hpp:69
CConstRef< CMultiAlignerOptions > GetOptions(void) const
Get mutli aligner parameters.
Definition: cobalt.hpp:196
const vector< CSequence > & GetSeqResults(void) const
Retrieve the current aligned results in CSequence format.
Definition: cobalt.hpp:240
const vector< CRef< objects::CSeq_loc > > & GetQueries(void) const
Get query sequences.
Definition: cobalt.hpp:182
CRef< objects::CSeq_align > GetResults(void) const
Retrieve the current aligned results in Seq-align format.
Definition: seqalign.cpp:157
vector< CRef< objects::CSeq_loc > > m_tQueries
Definition: cobalt.hpp:688
TStatus Run(void)
Align the current set of input sequences (reset any existing alignment information).
Definition: cobalt.cpp:683
const TPhyTreeNode * GetTree(void) const
Get ree used guide in progressive alignment.
Definition: cobalt.hpp:245
EStatus
Return status.
Definition: cobalt.hpp:77
@ eOutOfMemory
Out of memory error.
Definition: cobalt.hpp:84
@ eSuccess
Alignment successfully completed.
Definition: cobalt.hpp:78
@ eInterrupt
Alignment interruped through callback function.
Definition: cobalt.hpp:83
const CClusterer::TClusters & GetQueryClusters(void) const
Get clusters of query sequences.
Definition: cobalt.hpp:255
vector< bool > m_IsDomainSearched
Marks sequences with pre-computed domain hits.
Definition: cobalt.hpp:724
CHitList m_DomainHits
Definition: cobalt.hpp:716
void SetQueries(const vector< CRef< objects::CSeq_loc > > &queries, CRef< objects::CScope > scope)
Set query sequences.
Definition: cobalt.cpp:194
CRef< objects::CScope > GetScope(void)
Get scope.
Definition: cobalt.hpp:188
void x_SetDomainHits(const blast::TSeqLocVector &queruies, const vector< int > &indices, const objects::CBlast4_archive &archive)
Set pre-computed domain hits using BLAST archive format.
Definition: rps.cpp:664
CRef< objects::CBioTreeContainer > GetTreeContainer(void) const
Get serializable tree used as guide in progressive alignment.
Definition: cobalt.cpp:357
@ eDomainHitsSearch
Definition: cobalt.hpp:93
void x_CreateBlastQueries(blast::TSeqLocVector &queries, vector< int > &indices)
Create query set for RPS Blast and Blastp searches along with indices in multiple alignment queries a...
Definition: cobalt.cpp:1433
Definition: Pssm.hpp:55
CScope –.
Definition: scope.hpp:92
CSeqVector –.
Definition: seq_vector.hpp:65
const CSeq_id & GetSeq_id(TDim row) const
Get seq-id (the first one if segments have different ids).
Definition: Seq_align.cpp:317
static const unsigned char kGapChar
The ncbistdaa code for a gap.
Definition: seq.hpp:58
definition of a Culling tree
Definition: ncbi_tree.hpp:100
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
void clear()
Definition: set.hpp:153
Interface for CMultiAligner.
int ReadFastaQueries(const string &filename, vector< CRef< objects::CSeq_loc > > &seqs, CRef< objects::CScope > &scope, bool parse_deflines, objects::CSeqIdGenerator *id_generator)
int ReadMsa(const string &filename, CRef< CSeq_align > &align, CRef< CScope > scope, bool parse_deflines, objects::CSeqIdGenerator *id_generator)
static void s_TestResultAlignment(const vector< CRef< CSeq_loc > > &queries, const CRef< CSeq_align > &seqalign, const vector< CSequence > &seqs, CRef< CScope > scope, const string &aln_ref="")
USING_SCOPE(cobalt)
static void s_TestResultTreeContainer(int num_queries, const CBioTreeContainer &btc)
static void s_TestResults(CMultiAligner &aligner, const string &ref_aln="")
BOOST_AUTO_TEST_CASE(TestSetQueries)
static bool s_Interrupt(CMultiAligner::SProgress *progress)
static void s_MakeBioseqs(const vector< CRef< CSeq_loc > > &seqlocs, CRef< CScope > scope, vector< CRef< CBioseq > > &bioseqs)
void s_TestAlignmentFromMSAs(CRef< CSeq_align > result, CRef< CSeq_align > in_first, CRef< CSeq_align > in_second)
static void s_TestResultTree(int num_queries, const TPhyTreeNode *tree)
static void s_TestQueriesAsBioseqs(const vector< CRef< CBioseq > > &bioseqs)
static void s_TestQueriesAsSeq_locs(const vector< CRef< CSeq_loc > > &seqlocs, CRef< CScope > scope)
static void s_TestResultClusters(int num_queries, const CClusterer::TClusters &clusters, const CMultiAlignerOptions::TConstraints &constraints)
NCBITEST_AUTO_INIT()
USING_NCBI_SCOPE
static void s_TestTree(vector< bool > &queries, const TPhyTreeNode *node)
NCBITEST_AUTO_FINI()
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
const string AsFastaString(void) const
Definition: Seq_id.cpp:2266
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1065
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
void SetInt(TInt &v)
Definition: Seq_loc.hpp:983
const CSeq_id * GetId(void) const
Get the id of the location return NULL if has multiple ids or no id at all.
Definition: Seq_loc.hpp:941
const CSeq_id & GetId(const CSeq_loc &loc, CScope *scope)
If all CSeq_ids embedded in CSeq_loc refer to the same CBioseq, returns the first CSeq_id found,...
TSeqPos GetLength(const CSeq_id &id, CScope *scope)
Get sequence length if scope not null, else return max possible TSeqPos.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
CConstRef< CSeq_id > GetSeqId(void) const
Get id which can be used to access this bioseq handle Throws an exception if none is available.
TSeqPos size(void) const
Definition: seq_vector.hpp:291
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:968
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
TObjectType * GetNonNullPointer(void) const
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:1654
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static string UIntToString(unsigned int value, TNumToStringFlags flags=0, int base=10)
Convert UInt to string.
Definition: ncbistr.hpp:5109
TNodeList_CI SubNodeBegin(void) const
Return first const iterator on subnode list.
Definition: ncbi_tree.hpp:160
TNodeList::const_iterator TNodeList_CI
Definition: ncbi_tree.hpp:110
bool IsLeaf() const
Report whether this is a leaf node.
Definition: ncbi_tree.hpp:296
TNodeList_CI SubNodeEnd(void) const
Return last const iterator on subnode list.
Definition: ncbi_tree.hpp:166
const TValue & GetValue(void) const
Return node's value.
Definition: ncbi_tree.hpp:184
const TTreeType * GetParent(void) const
Get node's parent.
Definition: ncbi_tree.hpp:139
static const char label[]
list< CRef< CFeatureDescr > > Tdata
list< CRef< CNodeFeature > > Tdata
list< CRef< CNode > > Tdata
Definition: NodeSet_.hpp:89
const Tdata & Get(void) const
Get the member data.
Definition: NodeSet_.hpp:164
const Tdata & Get(void) const
Get the member data.
const TFdict & GetFdict(void) const
Get the Fdict member data.
const TNodes & GetNodes(void) const
Get the Nodes member data.
const TAlignments & GetAlignments(void) const
Get the Alignments member data.
const TResults & GetResults(void) const
Get the Results member data.
TTo GetTo(void) const
Get the To member data.
Definition: Range_.hpp:269
TFrom GetFrom(void) const
Get the From member data.
Definition: Range_.hpp:222
void SetNumColumns(TNumColumns value)
Assign a value to NumColumns data member.
Definition: Pssm_.hpp:666
void SetNumRows(TNumRows value)
Assign a value to NumRows data member.
Definition: Pssm_.hpp:619
const TDenseg & GetDenseg(void) const
Get the variant data.
Definition: Seq_align_.cpp:153
TLens & SetLens(void)
Assign a value to Lens data member.
Definition: Dense_seg_.hpp:561
vector< TSeqPos > TLens
Definition: Dense_seg_.hpp:108
const TStarts & GetStarts(void) const
Get the Starts member data.
Definition: Dense_seg_.hpp:530
TDim GetDim(void) const
Get the Dim member data.
Definition: Seq_align_.hpp:856
void SetSegs(TSegs &value)
Assign a value to Segs data member.
Definition: Seq_align_.cpp:310
const TLens & GetLens(void) const
Get the Lens member data.
Definition: Dense_seg_.hpp:555
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Seq_align_.hpp:865
vector< TSignedSeqPos > TStarts
Definition: Dense_seg_.hpp:107
void SetDim(TDim value)
Assign a value to Dim data member.
Definition: Dense_seg_.hpp:427
void SetType(TType value)
Assign a value to Type data member.
Definition: Seq_align_.hpp:818
TDim GetDim(void) const
Get the Dim member data.
Definition: Dense_seg_.hpp:421
TStarts & SetStarts(void)
Assign a value to Starts data member.
Definition: Dense_seg_.hpp:536
TType GetType(void) const
Get the Type member data.
Definition: Seq_align_.hpp:809
void SetNumseg(TNumseg value)
Assign a value to Numseg data member.
Definition: Dense_seg_.hpp:474
const TIds & GetIds(void) const
Get the Ids member data.
Definition: Dense_seg_.hpp:505
TIds & SetIds(void)
Assign a value to Ids data member.
Definition: Dense_seg_.hpp:511
const Tdata & Get(void) const
Get the member data.
const TSegs & GetSegs(void) const
Get the Segs member data.
Definition: Seq_align_.hpp:921
bool IsDenseg(void) const
Check if variant Denseg is selected.
Definition: Seq_align_.hpp:740
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
int i
int len
const struct ncbi::grid::netcache::search::fields::SIZE size
#define fabs(v)
Definition: ncbi_dispd.c:46
int isdigit(Uchar c)
Definition: ncbictype.hpp:64
T min(T x_, T y_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
The Object manager core.
Options for CMultiAligner.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Structure for reporting alignment progress.
Definition: cobalt.hpp:102
EAlignmentStage stage
Definition: cobalt.hpp:103
Representation of a hit for computing constraints.
int subject
subject ordinal id in the database
TRange subject_range
TRange query_range
alignment extents
int score
alignment score
string query
query id
static string subject
static string query
Utility stuff for more convenient using of Boost.Test library.
else result
Definition: token2.c:20
Modified on Wed Apr 17 13:10:41 2024 by modify_doxy.py rev. 669887