NCBI C++ ToolKit
phytree_format_unit_test.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: phytree_format_unit_test.cpp 92084 2020-12-21 15:19:58Z grichenk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Greg Boratyn
27  *
28  * File Description:
29  * Unit tests for the Phylogenetic tree computation API
30  *
31  * ===========================================================================
32  */
33 
34 #include <ncbi_pch.hpp>
35 
37 #include <objmgr/scope.hpp>
40 
41 #include <serial/serial.hpp>
42 #include <serial/objistr.hpp>
43 #include <serial/objostr.hpp>
44 
47 
48 #include <corelib/test_boost.hpp>
49 
50 #ifndef SKIP_DOXYGEN_PROCESSING
51 
54 
55 
56 // Create scope
57 static CRef<CScope> s_CreateScope(void);
58 
59 // Check whether tree container node is a leaf
60 static bool s_IsLeaf(const CNode& node);
61 
62 // Check serialized tree
63 static bool s_TestTreeContainer(const CBioTreeContainer& tree, int num_leaves);
64 
65 // Id of node that is not a leaf used in tests
66 static const int kNodeId = 2;
67 
68 
69 BOOST_AUTO_TEST_SUITE(guide_tree)
70 
71 BOOST_AUTO_TEST_CASE(TestCreateTreeFromCalcProtein)
72 {
73  CRef<CScope> scope = s_CreateScope();
74  CNcbiIfstream istr("data/seqalign_protein.asn");
75  BOOST_REQUIRE(istr);
76  CSeq_align seq_align;
77  istr >> MSerial_AsnText >> seq_align;
78 
79  CPhyTreeCalc calc(seq_align, scope);
80  calc.SetMaxDivergence(0.92);
81  BOOST_REQUIRE(calc.CalcBioTree());
82 
83  CPhyTreeFormatter tree(calc);
84  s_TestTreeContainer(*tree.GetSerialTree(), calc.GetSeqAlign()->GetDim());
85 }
86 
87 BOOST_AUTO_TEST_CASE(TestCreateTreeFromCalcNucleotide)
88 {
89  CRef<CScope> scope = s_CreateScope();
90  CNcbiIfstream istr("data/seqalign_nucleotide.asn");
91  BOOST_REQUIRE(istr);
92  CSeq_align seq_align;
93  istr >> MSerial_AsnText >> seq_align;
94 
95  CPhyTreeCalc calc(seq_align, scope);
96  calc.SetMaxDivergence(0.92);
97  BOOST_REQUIRE(calc.CalcBioTree());
98 
99  CPhyTreeFormatter tree(calc);
100  s_TestTreeContainer(*tree.GetSerialTree(), calc.GetSeqAlign()->GetDim());
101 }
102 
103 BOOST_AUTO_TEST_CASE(TestInitTreeFeatures)
104 {
105  CRef<CScope> scope = s_CreateScope();
106 
107  CNcbiIfstream istr("data/bare_tree.asn");
108  BOOST_REQUIRE(istr);
109  CBioTreeContainer btc;
110  istr >> MSerial_AsnText >> btc;
111 
112  // find number of leaves
113  int num = 0;
114  ITERATE (CNodeSet::Tdata, it, btc.GetNodes().Get()) {
115  // if root node
116  if (!(*it)->IsSetParent()) {
117  continue;
118  }
119 
120  if (s_IsLeaf(**it)) {
121  num++;
122  }
123  }
124 
125  // create fake seq-ids
126  CRef<CSeq_id> seq_id(new CSeq_id("gi|129295"));
127  vector< CRef<CSeq_id> > ids(num, seq_id);
128 
129  CPhyTreeFormatter tree(btc, ids, *scope);
130 
131  s_TestTreeContainer(*tree.GetSerialTree(), num);
132 }
133 
134 BOOST_AUTO_TEST_CASE(TestMultipleQueries)
135 {
136  CRef<CScope> scope = s_CreateScope();
137  CNcbiIfstream istr("data/mole_seqalign.asn");
138  BOOST_REQUIRE(istr);
139  CSeq_align seq_align;
140  istr >> MSerial_AsnText >> seq_align;
141 
142  CPhyTreeCalc calc(seq_align, scope);
143  BOOST_REQUIRE(calc.CalcBioTree());
144 
145  vector<string> queries;
146  queries.push_back("KC128904.1");
147  queries.push_back("AB682225.1");
148  queries.push_back("HM748810.1");
149 
150  CPhyTreeFormatter tree(calc, queries);
151  CRef<CBioTreeContainer> btc = tree.GetSerialTree();
152  s_TestTreeContainer(*btc, calc.GetSeqAlign()->GetDim());
153 
154  // find query nodes and check that node info is set properly
155  size_t num_queries_found = 0;
156  ITERATE (CNodeSet::Tdata, node, btc->GetNodes().Get()) {
157 
158  // if root node
159  if (!(*node)->IsSetParent()) {
160  continue;
161  }
162 
163  // skip non-leaf nodes
164  if (!s_IsLeaf(**node)) {
165  continue;
166  }
167 
168  string accession;
169  string node_info;
170 
171  // iterate over node features
172  ITERATE (CNodeFeatureSet::Tdata, it, (*node)->GetFeatures().Get()) {
173 
174  // collect sequence accesion
175  if ((*it)->GetFeatureid() == CPhyTreeFormatter::eAccessionNbrId) {
176  accession = (*it)->GetValue();
177  }
178 
179  // collect node info
180  if ((*it)->GetFeatureid() == CPhyTreeFormatter::eNodeInfoId) {
181  node_info = (*it)->GetValue();
182  }
183  }
184 
185  // accession must always be set
186  BOOST_REQUIRE(!accession.empty());
187  bool found_query = false;
188 
189  // check if the accession is for a query
190  ITERATE (vector<string>, q, queries) {
191  // if this is a query
192  if (accession == *q) {
193  // node info feature must be set to kNodeInfoQuery
194  BOOST_REQUIRE_EQUAL(node_info,
196  num_queries_found++;
197  found_query = true;
198  }
199  }
200  // node info must be different from kNodeInfoQuery for other nodes
201  BOOST_REQUIRE(found_query ||
202  node_info != CPhyTreeFormatter::kNodeInfoQuery);
203  }
204 
205  // check that all query nodes were found
206  BOOST_REQUIRE_EQUAL(num_queries_found, queries.size());
207 }
208 
209 
210 // Check that exceptions are thrown for incorrect constructor arguments
211 BOOST_AUTO_TEST_CASE(TestInitTreeFeaturesWithBadInput)
212 {
213  CRef<CScope> scope = s_CreateScope();
214 
215  CNcbiIfstream istr("data/bare_tree.asn");
216  BOOST_REQUIRE(istr);
217  CBioTreeContainer btc;
218  istr >> MSerial_AsnText >> btc;
219  istr.close();
220 
221  // find number of leaves
222  int num = 0;
223  ITERATE (CNodeSet::Tdata, it, btc.GetNodes().Get()) {
224  // if root node
225  if (!(*it)->IsSetParent()) {
226  continue;
227  }
228 
229  if (s_IsLeaf(**it)) {
230  num++;
231  }
232  }
233 
234  // create fake seq-ids
235  CRef<CSeq_id> seq_id(new CSeq_id("gi|129295"));
236  vector< CRef<CSeq_id> > ids(num - 1, seq_id);
237 
239 
240  // number of seq-ids must be the same as number of leaves in btc
241  BOOST_REQUIRE_THROW(tree.Reset(new CPhyTreeFormatter(btc, ids, *scope)),
243 
244  istr.open("data/bare_tree.asn");
245  BOOST_REQUIRE(istr);
246  istr >> MSerial_AsnText >> btc;
247 
248  // number of seq-ids must be the same as number of leaves in btc
249  ids.resize(num + 1, seq_id);
250  BOOST_REQUIRE_THROW(tree.Reset(new CPhyTreeFormatter(btc, ids, *scope)),
252 }
253 
254 
255 BOOST_AUTO_TEST_CASE(TestInitLabels)
256 {
257  CRef<CScope> scope = s_CreateScope();
258 
259  CNcbiIfstream istr("data/tree.asn");
260  BOOST_REQUIRE(istr);
261  CBioTreeContainer btc;
262  istr >> MSerial_AsnText >> btc;
263 
265 
266  // find number of leaves
267  int num = 0;
268  ITERATE (CNodeSet::Tdata, it, btc.GetNodes().Get()) {
269  // if root node
270  if (!(*it)->IsSetParent()) {
271  continue;
272  }
273 
274  if (s_IsLeaf(**it)) {
275  num++;
276  }
277  }
278 
279  s_TestTreeContainer(*tree.GetSerialTree(), num);
280 }
281 
282 
283 BOOST_AUTO_TEST_CASE(TestIsSingleBlastName)
284 {
285  CRef<CScope> scope = s_CreateScope();
286 
287  // read tree with single blast name
288  CNcbiIfstream istr("data/tree_single_bn.asn");
289  BOOST_REQUIRE(istr);
290  CBioTreeContainer btc;
291  istr >> MSerial_AsnText >> btc;
292  istr.close();
293 
294  // feature id for blast name
295  const int kBlastNameFeatureId = 6;
296  string blast_name;
297 
298  // pointer to a blast name node feature
299  CNodeFeature* bn_node_feature = NULL;
300 
301  // check pre condition -- the tree has single blast name
302  bool is_single_blast_name = true;
303  // for each node
304  ITERATE(CNodeSet::Tdata, node, btc.GetNodes().Get()) {
305  // if root node
306  if (!(*node)->IsSetParent()) {
307  continue;
308  }
309 
310  // for each node feature
311  ITERATE (CNodeFeatureSet::Tdata, it, (*node)->GetFeatures().Get()) {
312  if ((*it)->CanGetFeatureid()
313  && (*it)->GetFeatureid() == kBlastNameFeatureId) {
314 
315  if ((*it)->CanGetValue()) {
316  if (blast_name.empty()) {
317  blast_name = (*it)->GetValue();
318  }
319  else if (blast_name != (*it)->GetValue()) {
320  is_single_blast_name = false;
321  break;
322  }
323 
324  // set the pointer to a node feature for later use
325  if (!bn_node_feature) {
326  bn_node_feature = const_cast<CNodeFeature*>(&**it);
327  }
328  }
329  }
330  }
331  if (!is_single_blast_name) {
332  break;
333  }
334  }
335  // check pre condition
336  BOOST_REQUIRE(is_single_blast_name);
337 
338  // check post condition
340  BOOST_REQUIRE(tree->IsSingleBlastName());
341 
342  // change blast name for a single node
343  bn_node_feature->SetValue(blast_name + "fake_name");
344  tree.Reset(new CPhyTreeFormatter(btc));
345  BOOST_REQUIRE(!tree->IsSingleBlastName());
346 }
347 
348 
349 BOOST_AUTO_TEST_CASE(TestSimplifyByBlastName)
350 {
351  CRef<CScope> scope = s_CreateScope();
352 
353  CNcbiIfstream istr("data/tree_single_bn.asn");
354  BOOST_REQUIRE(istr);
355  CBioTreeContainer btc;
356  istr >> MSerial_AsnText >> btc;
357 
358  // feature id for blast name
359  const int kBlastNameFeatureId = 6;
360  string blast_name;
361 
362  // check pre condition -- the tree has single blast name
363  bool is_single_blast_name = true;
364  // for each node
365  ITERATE(CNodeSet::Tdata, node, btc.GetNodes().Get()) {
366  // if root node
367  if (!(*node)->IsSetParent()) {
368  continue;
369  }
370 
371  // for each node feature
372  ITERATE (CNodeFeatureSet::Tdata, it, (*node)->GetFeatures().Get()) {
373  if ((*it)->CanGetFeatureid()
374  && (*it)->GetFeatureid() == kBlastNameFeatureId) {
375 
376  if ((*it)->CanGetValue()) {
377  if (blast_name.empty()) {
378  blast_name = (*it)->GetValue();
379  }
380  else if (blast_name != (*it)->GetValue()) {
381  is_single_blast_name = false;
382  break;
383  }
384  }
385  }
386  }
387  if (!is_single_blast_name) {
388  break;
389  }
390  }
391  // check pre condition
392  BOOST_REQUIRE(is_single_blast_name);
393 
394  CPhyTreeFormatter tree(btc);
396 
397  const CBioTreeDynamic::CBioNode* node
398  = tree.GetNonNullNode(tree.GetRootNodeID());
399 
400  // this tree must be collapsed at root
401  BOOST_REQUIRE_EQUAL(node->GetFeature(
404 
405  BOOST_REQUIRE_EQUAL(tree.GetSimplifyMode(),
407 }
408 
409 
410 BOOST_AUTO_TEST_CASE(TestFullyExpand)
411 {
412  CRef<CScope> scope = s_CreateScope();
413 
414  CNcbiIfstream istr("data/bare_tree.asn");
415  BOOST_REQUIRE(istr);
416  CBioTreeContainer btc;
417  istr >> MSerial_AsnText >> btc;
418 
419  // find number of leaves
420  int num = 0;
421  ITERATE (CNodeSet::Tdata, it, btc.GetNodes().Get()) {
422  // if root node
423  if (!(*it)->IsSetParent()) {
424  continue;
425  }
426 
427  if (s_IsLeaf(**it)) {
428  num++;
429  }
430  }
431 
432  // create fake seq-ids with the same blast name
433  CRef<CSeq_id> seq_id(new CSeq_id("gi|129295"));
434  vector< CRef<CSeq_id> > ids(num, seq_id);
435 
436  CPhyTreeFormatter tree(btc, ids, *scope);
437 
438  // collapse node so that action fully expand will make a change
439  tree.ExpandCollapseSubtree(2);
440 
442 
443  CRef<CBioTreeContainer> tree_cont = tree.GetSerialTree();
444 
445  // by checking BioTreeContainer we do not have to do recursion
446  ITERATE (CNodeSet::Tdata, node, tree_cont->GetNodes().Get()) {
447 
448  if (!(*node)->IsSetParent()) {
449  continue;
450  }
451 
452  ITERATE (CNodeFeatureSet::Tdata, it, (*node)->GetFeatures().Get()) {
453 
454  // for expanded nodes the feature does not need to be present
455  if ((*it)->GetFeatureid()
457 
458  // but if it is, then its value must be "0"
459  BOOST_REQUIRE_EQUAL((*it)->GetValue(), "0");
460  }
461  }
462  }
463 
464  BOOST_REQUIRE_EQUAL(tree.GetSimplifyMode(),
466 }
467 
468 
469 BOOST_AUTO_TEST_CASE(TestExpandCollapse)
470 {
471  CRef<CScope> scope = s_CreateScope();
472 
473  CNcbiIfstream istr("data/bare_tree.asn");
474  BOOST_REQUIRE(istr);
475  CBioTreeContainer btc;
476  istr >> MSerial_AsnText >> btc;
477 
478  // find number of leaves
479  int num = 0;
480  ITERATE (CNodeSet::Tdata, it, btc.GetNodes().Get()) {
481  // if root node
482  if (!(*it)->IsSetParent()) {
483  continue;
484  }
485 
486  if (s_IsLeaf(**it)) {
487  num++;
488  }
489  }
490 
491  // create fake seq-ids with the same blast name
492  CRef<CSeq_id> seq_id(new CSeq_id("gi|129295"));
493  vector< CRef<CSeq_id> > ids(num, seq_id);
494 
495  CPhyTreeFormatter tree(btc, ids, *scope);
496 
497  // collapse node (assuming it is expanded)
498  tree.ExpandCollapseSubtree(kNodeId);
499 
500  // subtree must be collapsed
501  BOOST_REQUIRE_EQUAL(tree.GetNonNullNode(kNodeId)->GetFeature(
504  "1");
505 
506  // expand node (assuming it is collapsed)
507  tree.ExpandCollapseSubtree(kNodeId);
508 
509  // subtree must be expanded
510  BOOST_REQUIRE_EQUAL(tree.GetNonNullNode(kNodeId)->GetFeature(
513  "0");
514 
515  BOOST_REQUIRE_EQUAL(tree.GetSimplifyMode(), CPhyTreeFormatter::eNone);
516 }
517 
518 
519 BOOST_AUTO_TEST_CASE(TestRerootTree)
520 {
521  CRef<CScope> scope = s_CreateScope();
522 
523  CNcbiIfstream istr("data/tree.asn");
524  BOOST_REQUIRE(istr);
525  CBioTreeContainer btc;
526  istr >> MSerial_AsnText >> btc;
527 
529 
530  int old_root_id = tree.GetRootNodeID();
531  BOOST_REQUIRE(old_root_id != kNodeId);
532 
533  tree.RerootTree(kNodeId);
534 
535  // new root's id must be kNodeId
536  BOOST_REQUIRE_EQUAL(tree.GetRootNodeID(), kNodeId);
537  BOOST_REQUIRE_EQUAL((int)tree.GetTree().GetTreeNode()->GetValue().GetId(),
538  kNodeId);
539 
540  // node above kNodeId must be present in the tree
541  BOOST_REQUIRE(tree.GetNode(old_root_id));
542 }
543 
544 
545 BOOST_AUTO_TEST_CASE(TestShowSubtree)
546 {
547  CRef<CScope> scope = s_CreateScope();
548 
549  CNcbiIfstream istr("data/tree.asn");
550  BOOST_REQUIRE(istr);
551  CBioTreeContainer btc;
552  istr >> MSerial_AsnText >> btc;
553 
555 
556  int old_root_id = tree.GetRootNodeID();
557  BOOST_REQUIRE(old_root_id != kNodeId);
558 
559  tree.ShowSubtree(kNodeId);
560 
561  // new root node must have the new id
562  BOOST_REQUIRE_EQUAL(tree.GetRootNodeID(), kNodeId);
563  BOOST_REQUIRE_EQUAL((int)tree.GetTree().GetTreeNode()->GetValue().GetId(),
564  kNodeId);
565 
566  // node above kNodeId must not be present in the tree
567  BOOST_REQUIRE(!tree.GetNode(old_root_id));
568 }
569 
570 
571 BOOST_AUTO_TEST_CASE(TestPrintTreeNewick)
572 {
573  CRef<CScope> scope = s_CreateScope();
574 
575  CNcbiIfstream istr("data/tree.asn");
576  BOOST_REQUIRE(istr);
577  CBioTreeContainer btc;
578  istr >> MSerial_AsnText >> btc;
579 
582 
583  unique_ptr<CNcbiOstrstream> ostr(new CNcbiOstrstream);
584 
585  tree->PrintNewickTree(*ostr);
586  string output = CNcbiOstrstreamToString(*ostr);
587 
588  BOOST_CHECK_EQUAL(output, "(serpin_B9__Homo_sapiens_:4.73586, (hypothetical_protein__Homo_sapiens_:0, ((serpin_peptidase_inhibitor__clade_B__ovalbumin___member_11__Homo_sapiens_:0.0646582, unnamed_protein_product__Homo_sapiens_:0):0.368709, (((antithrombin_III_precursor__Homo_sapiens_:0, antithrombin_III__Homo_sapiens_:0.00969429):0.00807382, antithrombin_III_variant__Homo_sapiens_:0):0.0311961, unnamed_protein_product__Homo_sapiens_:0):0.560057):0.656712):0);\n");
589 
590 
591  ostr.reset(new CNcbiOstrstream);
593  tree->PrintNewickTree(*ostr);
595 
596  BOOST_CHECK_EQUAL(output, "(Homo_sapiens:4.73586, (Homo_sapiens:0, ((Homo_sapiens:0.0646582, Homo_sapiens:0):0.368709, (((Homo_sapiens:0, Homo_sapiens:0.00969429):0.00807382, Homo_sapiens:0):0.0311961, Homo_sapiens:0):0.560057):0.656712):0);\n");
597 
598 
599  ostr.reset(new CNcbiOstrstream);
601  tree->PrintNewickTree(*ostr);
603  BOOST_CHECK_EQUAL(output, "(primates:4.73586, (primates:0, ((primates:0.0646582, primates:0):0.368709, (((primates:0, primates:0.00969429):0.00807382, primates:0):0.0311961, primates:0):0.560057):0.656712):0);\n");
604 
605 
606  ostr.reset(new CNcbiOstrstream);
608  tree->PrintNewickTree(*ostr);
610  BOOST_CHECK_EQUAL(output, "(ref_NP_004146_1:4.73586, (emb_CAE45712_1:0, ((gb_EAW63158_1:0.0646582, dbj_BAG59299_1:0):0.368709, (((ref_NP_000479_1:0, gb_AAA51796_1:0.00969429):0.00807382, dbj_BAA06212_1:0):0.0311961, dbj_BAG35537_1:0):0.560057):0.656712):0);\n");
611 
612 
613  ostr.reset(new CNcbiOstrstream);
614  tree.Reset(new CPhyTreeFormatter(btc,
616 
617  tree->PrintNewickTree(*ostr);
619  BOOST_CHECK_EQUAL(output, "(ref_NP_004146_1_primates_:4.73586, (emb_CAE45712_1_primates_:0, ((gb_EAW63158_1_primates_:0.0646582, dbj_BAG59299_1_primates_:0):0.368709, (((ref_NP_000479_1_primates_:0, gb_AAA51796_1_primates_:0.00969429):0.00807382, dbj_BAA06212_1_primates_:0):0.0311961, dbj_BAG35537_1_primates_:0):0.560057):0.656712):0);\n");
620 }
621 
622 
623 BOOST_AUTO_TEST_CASE(TestPrintTreeNexus)
624 {
625  CRef<CScope> scope = s_CreateScope();
626 
627  CNcbiIfstream istr("data/tree.asn");
628  BOOST_REQUIRE(istr);
629  CBioTreeContainer btc;
630  istr >> MSerial_AsnText >> btc;
631 
634 
635  unique_ptr<CNcbiOstrstream> ostr(new CNcbiOstrstream);
636 
637  tree->PrintNexusTree(*ostr);
638  string output = CNcbiOstrstreamToString(*ostr);
639 
640  BOOST_REQUIRE(output.find("#NEXUS") != NPOS);
641  BOOST_REQUIRE(output.find("BEGIN TAXA;") != NPOS);
642  BOOST_REQUIRE(output.find("DIMENSIONS ntax=8;") != NPOS);
643  BOOST_REQUIRE(output.find("TAXLABELS serpin_B9__Homo_sapiens_ hypothetical_protein__Homo_sapiens_ serpin_peptidase_inhibitor__clade_B__ovalbumin___member_11__Homo_sapiens_ unnamed_protein_product__Homo_sapiens_ antithrombin_III_precursor__Homo_sapiens_ antithrombin_III__Homo_sapiens_ antithrombin_III_variant__Homo_sapiens_ unnamed_protein_product__Homo_sapiens_;")
644  != NPOS);
645  BOOST_REQUIRE(output.find("BEGIN TREES;") != NPOS);
646  BOOST_REQUIRE(output.find("TREE Blast_guide_tree = (serpin_B9__Homo_sapiens_:4.73586, (hypothetical_protein__Homo_sapiens_:0, ((serpin_peptidase_inhibitor__clade_B__ovalbumin___member_11__Homo_sapiens_:0.0646582, unnamed_protein_product__Homo_sapiens_:0):0.368709, (((antithrombin_III_precursor__Homo_sapiens_:0, antithrombin_III__Homo_sapiens_:0.00969429):0.00807382, antithrombin_III_variant__Homo_sapiens_:0):0.0311961, unnamed_protein_product__Homo_sapiens_:0):0.560057):0.656712):0);")
647  != NPOS);
648  BOOST_REQUIRE(output.find("ENDBLOCK;") != NPOS);
649 
650 
651  ostr.reset(new CNcbiOstrstream);
653  tree->PrintNexusTree(*ostr);
655 
656  BOOST_REQUIRE(output.find("#NEXUS") != NPOS);
657  BOOST_REQUIRE(output.find("BEGIN TAXA;") != NPOS);
658  BOOST_REQUIRE(output.find("DIMENSIONS ntax=8;") != NPOS);
659  BOOST_REQUIRE(output.find("TAXLABELS Homo_sapiens Homo_sapiens Homo_sapiens Homo_sapiens Homo_sapiens Homo_sapiens Homo_sapiens Homo_sapiens;")
660  != NPOS);
661  BOOST_REQUIRE(output.find("BEGIN TREES;") != NPOS);
662  BOOST_REQUIRE(output.find("TREE Blast_guide_tree = (Homo_sapiens:4.73586, (Homo_sapiens:0, ((Homo_sapiens:0.0646582, Homo_sapiens:0):0.368709, (((Homo_sapiens:0, Homo_sapiens:0.00969429):0.00807382, Homo_sapiens:0):0.0311961, Homo_sapiens:0):0.560057):0.656712):0);")
663  != NPOS);
664  BOOST_REQUIRE(output.find("ENDBLOCK;") != NPOS);
665 
666 
667  ostr.reset(new CNcbiOstrstream);
669  tree->PrintNexusTree(*ostr);
671 
672  BOOST_REQUIRE(output.find("#NEXUS") != NPOS);
673  BOOST_REQUIRE(output.find("BEGIN TAXA;") != NPOS);
674  BOOST_REQUIRE(output.find("DIMENSIONS ntax=8;") != NPOS);
675  BOOST_REQUIRE(output.find("TAXLABELS primates primates primates primates primates primates primates primates;")
676  != NPOS);
677  BOOST_REQUIRE(output.find("BEGIN TREES;") != NPOS);
678  BOOST_REQUIRE(output.find("TREE Blast_guide_tree = (primates:4.73586, (primates:0, ((primates:0.0646582, primates:0):0.368709, (((primates:0, primates:0.00969429):0.00807382, primates:0):0.0311961, primates:0):0.560057):0.656712):0);\n")
679  != NPOS);
680  BOOST_REQUIRE(output.find("ENDBLOCK;") != NPOS);
681 
682 
683  ostr.reset(new CNcbiOstrstream);
685  tree->PrintNexusTree(*ostr);
687 
688  BOOST_REQUIRE(output.find("#NEXUS") != NPOS);
689  BOOST_REQUIRE(output.find("BEGIN TAXA;") != NPOS);
690  BOOST_REQUIRE(output.find("DIMENSIONS ntax=8;") != NPOS);
691  BOOST_REQUIRE(output.find("TAXLABELS ref_NP_004146_1 emb_CAE45712_1 gb_EAW63158_1 dbj_BAG59299_1 ref_NP_000479_1 gb_AAA51796_1 dbj_BAA06212_1 dbj_BAG35537_1;")
692  != NPOS);
693  BOOST_REQUIRE(output.find("BEGIN TREES;") != NPOS);
694  BOOST_REQUIRE(output.find("TREE Blast_guide_tree = (ref_NP_004146_1:4.73586, (emb_CAE45712_1:0, ((gb_EAW63158_1:0.0646582, dbj_BAG59299_1:0):0.368709, (((ref_NP_000479_1:0, gb_AAA51796_1:0.00969429):0.00807382, dbj_BAA06212_1:0):0.0311961, dbj_BAG35537_1:0):0.560057):0.656712):0);")
695  != NPOS);
696  BOOST_REQUIRE(output.find("ENDBLOCK;") != NPOS);
697 
698 
699  ostr.reset(new CNcbiOstrstream);
700  tree.Reset(new CPhyTreeFormatter(btc,
702 
703  tree->PrintNexusTree(*ostr);
705 
706  BOOST_REQUIRE(output.find("#NEXUS") != NPOS);
707  BOOST_REQUIRE(output.find("BEGIN TAXA;") != NPOS);
708  BOOST_REQUIRE(output.find("DIMENSIONS ntax=8;") != NPOS);
709  BOOST_REQUIRE(output.find("TAXLABELS ref_NP_004146_1_primates_ emb_CAE45712_1_primates_ gb_EAW63158_1_primates_ dbj_BAG59299_1_primates_ ref_NP_000479_1_primates_ gb_AAA51796_1_primates_ dbj_BAA06212_1_primates_ dbj_BAG35537_1_primates_;")
710  != NPOS);
711  BOOST_REQUIRE(output.find("BEGIN TREES;") != NPOS);
712  BOOST_REQUIRE(output.find("TREE Blast_guide_tree = (ref_NP_004146_1_primates_:4.73586, (emb_CAE45712_1_primates_:0, ((gb_EAW63158_1_primates_:0.0646582, dbj_BAG59299_1_primates_:0):0.368709, (((ref_NP_000479_1_primates_:0, gb_AAA51796_1_primates_:0.00969429):0.00807382, dbj_BAA06212_1_primates_:0):0.0311961, dbj_BAG35537_1_primates_:0):0.560057):0.656712):0);")
713  != NPOS);
714  BOOST_REQUIRE(output.find("ENDBLOCK") != NPOS);
715 }
716 
717 
719 
720 
721 // Create scope
723 {
725  CGBDataLoader::RegisterInObjectManager(*object_manager, "ID2");
726  CRef<CScope> scope(new CScope(*object_manager));
727  scope->AddDefaults();
728  return scope;
729 }
730 
731 
732 static bool s_TestFeatureDict(const CFeatureDictSet& dict)
733 {
734  vector<CPhyTreeFormatter::EFeatureID>
735  feature_ids(CPhyTreeFormatter::eLastId + 1);
736 
740  feature_ids[CPhyTreeFormatter::eOrganismId]
742 
746 
749 
752 
755 
758 
761 
764 
767 
768  feature_ids[CPhyTreeFormatter::eNodeInfoId]
770 
773 
776 
777  feature_ids[CPhyTreeFormatter::eNodeSizeID]
779 
782 
783  vector<bool> found(CPhyTreeFormatter::eLastId + 1, false);
784 
785  // for each feature in the dictionary
786  ITERATE (CFeatureDictSet::Tdata, it, dict.Get()) {
787 
788  int id = (*it)->GetId();
789 
790  // make sure that ids and descriptors match
791  BOOST_REQUIRE(id >= 0 && id <= CPhyTreeFormatter::eLastId);
792  BOOST_REQUIRE_EQUAL((*it)->GetName(),
793  CPhyTreeFormatter::GetFeatureTag(feature_ids[id]));
794 
795  found[id] = true;
796  }
797 
798  // all features must be present in the dictionary
799  ITERATE(vector<bool>, it, found) {
800  BOOST_REQUIRE(*it);
801  }
802 
803  return true;
804 }
805 
806 static bool s_IsLeaf(const CNode& node)
807 {
809  if ((*it)->GetFeatureid() == CPhyTreeFormatter::eLabelId) {
810  return true;
811  }
812  }
813  return false;
814 }
815 
816 static bool s_TestNode(const CNode& node, bool is_leaf)
817 {
818  vector<bool> features(CPhyTreeFormatter::eLastId + 1, false);
819  string node_info;
820  string color;
822  int id = (*it)->GetFeatureid();
823  BOOST_REQUIRE(id >= 0 && id <= CPhyTreeFormatter::eLastId);
824 
825  if (!is_leaf) {
826  BOOST_REQUIRE(id == CPhyTreeFormatter::eDistId
828  }
829 
830  if (is_leaf && id == CPhyTreeFormatter::eNodeInfoId) {
831  node_info = (*it)->GetValue();
832  }
833 
834  if (is_leaf && id == CPhyTreeFormatter::eLabelBgColorId) {
835  color = (*it)->GetValue();
836  }
837 
838  BOOST_REQUIRE((*it)->GetValue() != "");
839  BOOST_REQUIRE(!features[id]);
840  features[id] = true;
841  }
842  // only nodes with node info feature set may have colored labels
843  BOOST_REQUIRE_EQUAL(node_info.empty(), color.empty());
844  // query nodes must be marked with label background color
845  if (node_info == CPhyTreeFormatter::kNodeInfoQuery) {
846  BOOST_REQUIRE_EQUAL(color, "255 255 0");
847  }
848 
849  if (is_leaf) {
850  // the last few features are not always set
851  for (int i=0;i < CPhyTreeFormatter::eLabelColorId;i++) {
852  BOOST_REQUIRE(features[i]);
853  }
854  }
855  else {
856  BOOST_REQUIRE(features[CPhyTreeFormatter::eDistId]
858  }
859 
860  return true;
861 }
862 
863 static bool s_TestTreeContainer(const CBioTreeContainer& tree, int num_leaves)
864 {
865  BOOST_REQUIRE(tree.CanGetFdict());
866  s_TestFeatureDict(tree.GetFdict());
867 
868  int num = 0;
869  ITERATE (CNodeSet::Tdata, it, tree.GetNodes().Get()) {
870 
871  // if root node
872  if (!(*it)->IsSetParent()) {
873  continue;
874  }
875 
876  bool is_leaf = s_IsLeaf(**it);
877  s_TestNode(**it, is_leaf);
878  if (is_leaf) {
879  num++;
880  }
881  }
882 
883  BOOST_REQUIRE_EQUAL(num, num_leaves);
884 
885  return true;
886 }
887 
888 
889 
890 #endif /* SKIP_DOXYGEN_PROCESSING */
891 
#define static
BOOST_AUTO_TEST_SUITE_END() static int s_GetSegmentFlags(const CBioseq &bioseq)
CFeatureDictSet –.
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CNodeFeature –.
Definition: NodeFeature.hpp:66
CNode –.
Definition: Node.hpp:66
Computaion of distance-based phylognetic tree.
CRef< CSeq_align > GetSeqAlign(void) const
Get seq_align that corresponds to current tree.
void SetMaxDivergence(double div)
Set maximum allowed divergence between sequences included in tree.
bool CalcBioTree(void)
Compute bio tree for the current alignment in a black box manner.
Guide tree exceptions.
Class for adding tree features, maniplating and printing tree in standard text formats.
static string GetFeatureTag(EFeatureID feat)
Get tree feature tag.
@ eLabelId
Node label.
@ eLabelBgColorId
Color for backgroud of node label.
@ eNodeInfoId
Used for denoting query nodes.
@ eTitleId
Sequence title.
@ eTreeSimplificationTagId
Is subtree collapsed.
@ eLabelColorId
Node label color.
@ eLastId
Last Id (with largest index)
@ eNodeColorId
Node color.
@ eOrganismId
Taxonomic organism id (for sequence)
@ eBlastNameId
Sequence Blast Name.
@ eAccessionNbrId
Sequence accession.
@ eDistId
Edge length from parent to this node.
@ eAlignIndexId
Index of sequence in Seq_align.
@ eSeqIdId
Sequence id.
@ eFullyExpanded
Tree fully expanded.
@ eNone
No simplification mode.
@ eByBlastName
Subtrees that contain sequences with the the same Blast Name are collapsed.
static const string kNodeInfoQuery
Node feature "node-info" value for query nodes.
CScope –.
Definition: scope.hpp:92
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddDefaults(TPriority pri=kPriority_Default)
Add default data loaders from object manager.
Definition: scope.cpp:504
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
Definition: ncbistre.hpp:439
#define NPOS
Definition: ncbistr.hpp:133
list< CRef< CFeatureDescr > > Tdata
void SetValue(const TValue &value)
Assign a value to Value data member.
list< CRef< CNodeFeature > > Tdata
list< CRef< CNode > > Tdata
Definition: NodeSet_.hpp:89
const Tdata & Get(void) const
Get the member data.
Definition: NodeSet_.hpp:164
const Tdata & Get(void) const
Get the member data.
const TFeatures & GetFeatures(void) const
Get the Features member data.
Definition: Node_.hpp:346
const Tdata & Get(void) const
Get the member data.
const TNodes & GetNodes(void) const
Get the Nodes member data.
TDim GetDim(void) const
Get the Dim member data.
Definition: Seq_align_.hpp:856
n background color
int i
The Object manager core.
static const int kNodeId
USING_SCOPE(objects)
static CRef< CScope > s_CreateScope(void)
static bool s_TestTreeContainer(const CBioTreeContainer &tree, int num_leaves)
static bool s_IsLeaf(const CNode &node)
static bool s_TestFeatureDict(const CFeatureDictSet &dict)
static bool s_TestNode(const CNode &node, bool is_leaf)
BOOST_AUTO_TEST_CASE(TestCreateTreeFromCalcProtein)
static SQLCHAR output[256]
Definition: print.c:5
BOOST_AUTO_TEST_SUITE(psiblast_iteration)
Utility stuff for more convenient using of Boost.Test library.
static const char *const features[]
Modified on Wed Nov 29 02:17:56 2023 by modify_doxy.py rev. 669887