NCBI C++ ToolKit
phylo_tree_algorithm.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: phylo_tree_algorithm.cpp 47479 2023-05-02 13:24:02Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Vladimir Tereshkov
27  *
28  * File Description:
29  *
30  */
31 
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiobj.hpp>
35 #include <corelib/ncbistd.hpp>
36 
37 #include <algorithm>
38 #include <functional>
39 #include <cmath>
40 #include <gui/objutils/utils.hpp>
42 
43 
48 
55 #include <objects/biotree/Node.hpp>
56 
58 
59 
60 // IPhyloTreeVisitor
62 {
63  return x_OnStep(tree_node, delta);
64 }
65 
67 {
68  switch (delta) {
69  case 0: return x_OnStepDown(x_node);
70  case 1: return x_OnStepRight(x_node);
71  case -1: return x_OnStepLeft(x_node);
72  default: break;
73  }
74  return eTreeTraverse;
75 }
76 
78 {
79  return eTreeTraverse;
80 }
81 
83 {
84  return eTreeTraverse;
85 }
86 
88 {
89  return eTreeTraverse;
90 }
91 
92 
93 
94 // CPhyloTreeCalculator
98 {
99  Init(ct);
100 }
101 
103 {
104  m_PamlID = 0;
105  m_ID = 0;
106  m_NodeCount = 0;
107  m_VisibleNodes = 0;
108  m_PosX = 0;
109  m_PosY = 0;
110  m_Leaves = 0;
111  m_Childs = 0;
112  m_Width = 0;
113  m_Height = 0;
114  m_MaxDistance = 0;
115  m_MinDistance = 0;
116  m_MaxLabelLen = 0;
117  m_Clusters.clear();
119  m_DistFromRoot = 0;
120  m_LastDist = 0;
122  m_AttrTable = NULL;
123  m_AttrKeyId = -1;
124  m_LabelFormat = "";
125  m_ColorTable = ct;
126 }
127 
129 {
130  _TRACE("Calc Results:");
131  _TRACE("GetNumNodes(): " << m_NodeCount );
132  _TRACE("GetVisibleNodes(): " << m_VisibleNodes );
133  _TRACE("GetNumEdges(): " << m_NodeCount-1 );
134  _TRACE("GetWidth(): " << m_Width );
135  _TRACE("GetHeight(): " << m_Height );
136  _TRACE("GetMaxDistance(): " << m_MaxDistance );
137  _TRACE("GetMinDistance(): " << m_MinDistance );
138  _TRACE("GetMaxLabelLen(): " << m_MaxLabelLen );
139 }
140 
142 {
143  // Just use first column as key (we used to look for a seq-id, but
144  // users wanted more flexibility in key selection)
145  unsigned int key_x = 0;
146  m_AttrKeyName = attr.Column(0);
147 
148  // Tree has to have attribute as feature..
150  LOG_POST(Error << "Error - attribute file key: '" + m_AttrKeyName + "' not found in tree features.");
151  return;
152  }
153 
154  m_AttrTable = &attr;
155 
156  m_AttrKeys.clear();
157  m_AttrKeys.reserve(attr.Rows());
158 
159  // Initialize a separate vector of sorted (usually seq-id) strings
160  // from the table. This can be used to look up table entries
161  // much faster when node ids and table ids match via
162  // string compare, or when the table entry is a shortened
163  // version of the node entry, e.g. AAT66223 vs AAT66223.1
164  for (unsigned int i=0; i<m_AttrTable->Rows(); i++) {
165  const string& id_str = m_AttrTable->GetCell(i, key_x);
166 
167  // Do not create an index for the entry if there is no seq-id
168  if (id_str.empty()) continue;
169 
170  AttrKey akey(id_str, i);
171  m_AttrKeys.push_back(akey);
172  }
173 
174  std::sort(m_AttrKeys.begin(), m_AttrKeys.end());
176 }
177 
179 {
180  m_AttrTable = NULL;
181  m_AttrKeys.clear();
182  m_AttrKeyId = -1;
183 }
184 
186 {
187  /// Go through all the properties in 'row_idx' of the attribute table
188  /// and either update the corresponding value in the node, or add
189  /// a new value if one doesn't already exist. Skip first col since it is the id.
190  for (unsigned int j=1; j<m_AttrTable->Cols(); ++j) {
191  string attr_name = m_AttrTable->Column(j);
192  string attr_value = m_AttrTable->GetCell(row_idx, j);
193 
194  (*node).SetFeature(m_Tree->GetFeatureDict(), attr_name, attr_value);
195 
196  if (attr_name == "label") {
197  (*node).SetLabel(attr_value);
198  }
199  }
200 
201  (*node).InitFeatures(m_Tree->GetFeatureDict(), m_ColorTable);
202 }
203 
205 {
206  ETreeTraverseCode traverse_code =
208 
209  TNodeType& node = (*m_Tree)[x_node];
210 
211  // Account for trees without distance parameter as well
212  if (delta == -1) {
214 
215  float max_branch_dist = 0.0f;
216  for (auto iter = node.GetChildren().begin(); iter != node.GetChildren().end(); ++iter) {
217  max_branch_dist = std::max(max_branch_dist, (*m_Tree)[*iter]->GetDistance() + (*m_Tree)[*iter]->GetMaxChildDistance());
218  }
219 
220  node->SetDistanceFromRoot(m_DistFromRoot);
221  node->SetMaxChildDistance(max_branch_dist);
222  }
223  else if (delta==1 || delta==0) {
224  if (delta==1)
225  m_DistFromRoot += node.GetValue().GetDistance();
226  else if (delta==0 && node.HasParent()) {
227  // If the root has a distance value, ignore it
229  node.GetValue().GetDistance();
230  }
231 
232  // If the optional attribute table has been provided, see if it has
233  // an entry with a seq-id matching the current node and, if so,
234  // update/add current node properties with properties from the table.
235  if (m_AttrTable &&
236  m_AttrTable->Rows()>0) {
237 
238  //string key_id = "seq-id";
239  //unsigned key_x = m_AttrTable->ColumnIdx(key_id);
240  unsigned int key_x = -1;
241  AttrKey comp_key;
242  bool node_has_key = true;
243 
244  if (m_AttrKeyName == "seq-id") {
245  if ((*node).GetSeqID().IsNull()) {
246  node_has_key = false;
247  }
248  else {
249  // We now use first column always so seq-ids in columns other than first
250  // will not be used.
252  comp_key.m_IDStr = (*node).GetSeqID()->GetSeqIdString();
253  }
254  }
255  else {
256  // No seq-id - just try to use first column as the key
257  key_x = 0;
258  comp_key.m_IDStr = (*node).GetBioTreeFeatureList()[m_AttrKeyId];
259 
260  if (comp_key.m_IDStr == "")
261  node_has_key = false;
262  }
263 
264  if (node_has_key) {
265  bool found = false;
266 
267  // First try to look up the matching entry based on simple string
268  // comparison using the sorted array of table seq-ids placed in
269  // m_AttrKeys. If that fails, we do a full (inefficient) scan
270  // of all table entries to try to find a match.
271  std::vector<AttrKey>::iterator iter;
272 
273  iter = std::lower_bound(m_AttrKeys.begin(), m_AttrKeys.end(),
274  comp_key);
275 
276  // iter is closet string match, not necssarily exact (but
277  // we check it either way).
278  if (iter != m_AttrKeys.end()) {
279  try {
280  // put more efficient check first...
281  if (comp_key.m_IDStr == (*iter).m_IDStr) {
282  found = true;
283  }
284  else if (m_AttrKeyName == "seq-id") {
285  objects::CSeq_id sid((*iter).m_IDStr);
286  if ((*node).GetSeqID()->Match(sid))
287  found = true;
288  }
289 
290  if (found)
291  x_UpdateProperties(node, (*iter).m_AttrTableIdx);
292  }
293  // May have errors from mal-formed seq-ids in file - ignore those.
294  catch(...)
295  {
296  found = false;
297  }
298  }
299 
300  // Not found. Check all table entries one at a time if key is a seq-id
301  if (!found && m_AttrKeyName == "seq-id") {
302  for (unsigned i=0; i<m_AttrTable->Rows(); i++) {
303  try {
304  string seqid_str = m_AttrTable->GetCell(i, key_x);
305 
306  // Ignore attribute rows without a key (seq-id).
307  // (could also use node-id potentially as a lookup - but
308  // it would have to be exported from the tree first)
309  if (seqid_str.empty()) continue;
310 
311  objects::CSeq_id sid(seqid_str);
312 
313  if ((*node).GetSeqID()->Match(sid)) {
314  x_UpdateProperties(node, i);
315  break;
316  }
317  }
318  // May have errors from mal-formed seq-ids in file - ignore those.
319  catch (...){break;}
320  }
321  }
322  }
323  }
324 
325  /// Perform all other node calculations
326  if (node.HasParent()) {
329  }
332  }
333  }
334 
335  (*node).SetNumLeaves(0);
336  (*node).SetNumLeavesEx(0);
337 
338  // Under current algorithm, this will be same as ID
339  ++m_NodeCount;
340 
342  ++m_VisibleNodes;
343 
344  if (!(node).IsLeaf()) {
345  (*node).SetPamlCounter(m_PamlID++);
346  }
347 
348  // update label corresponding to format
349  if (!m_LabelFormat.empty()) {
350  (*node).SetLabel(m_PTL.GetLabelForNode(*m_Tree, node, m_LabelFormat));
351  (*node).SetDisplayLabel("");
352  }
353 
354 
355  if (node.IsLeaf() && ((*node).GetLabel().length() > m_MaxLabelLen)) {
356  m_MaxLabelLen = (*node).GetLabel().length();
357  }
358 
359  TClusterID clusterID = (*node).GetClusterID();
360 
361  // cluster ID
362  if (clusterID != -1) {
363  m_Clusters[clusterID].push_back(x_node);
364  m_MaxClusterID = std::max(m_MaxClusterID, clusterID);
365  }
366 
367  if ((*node).HasSelClusters()) {
368  string sel_ids;
369  vector<int> selection_clusters = (*node).GetSelClusters();
370 
371  for (size_t i=0; i<selection_clusters.size(); ++i) {
372  string sel_id;
373  //NStr::IntToString(sel_id, selection_clusters[i]);
374  m_Clusters[selection_clusters[i]].push_back(x_node);
375  // selected nodes only show selection markers. Note that any
376  // marker colors saved in node properties are still there and
377  // will be displayed if node is not part of a selection set.
378  (*node).GetMarkerColors().clear();
379  }
380  }
381 
382  // Disable all cluster colors for now. they will be reset
383  // as needed in the CPhyloTreeDS::Clusterize() function
384  (*node).SetClusterColorIdx(-1);
385  }
386 
387  if (node.IsLeaf()) {
388  node->SetDistanceFromRoot(m_DistFromRoot);
389  node->SetMaxChildDistance(0.0f);
390  }
391 
392  m_LastDist = node.GetValue().GetDistance();
393  return traverse_code;
394 }
395 
396 
398 {
399  TNodeType& node = (*m_Tree)[x_node];
400 
401  // Determine visibility by keeping track of whether any of the (recursive) parents
402  // is collapsed.
403  CPhyloTreeNode::TTreeIdx parent_idx = node.GetParent();
405  if (parent_idx != CPhyloTreeNode::Null() && !(*m_Tree)[parent_idx].Expanded()) {
406  m_CollapsedParentIdx = parent_idx;
407  }
408  }
409 
410  // add 1 if node is visible (does not have a collapsed parent)
412 
413  (*node).IDX().first = m_PosX;
414  (*node).IDX().second = m_PosY;
415 
416  // adjusting height
417  if (m_Height < m_PosY) { m_Height = m_PosY; }
418 
419  return eTreeTraverse;
420 }
421 
423 {
424  TNodeType& node = (*m_Tree)[x_node];
425 
426  // Determine visibility by keeping track of whether any of the (recursive) parents
427  // is collapsed.
428  CPhyloTreeNode::TTreeIdx parent_idx = node.GetParent();
430  if (parent_idx != CPhyloTreeNode::Null() && !(*m_Tree)[parent_idx].Expanded()) {
431  m_CollapsedParentIdx = parent_idx;
432  }
433  }
434 
435  // add 1 if node is visible (does not have a collapsed parent)
437 
438  // position
439  (*node).IDX().first = m_PosX;
440  (*node).IDX().second = m_PosY;
441 
442  // adjusting width
443  if (m_Width < m_PosX) { m_Width = m_PosX; }
444 
445  return eTreeTraverse;
446 }
447 
449 {
450  TNodeType& node = (*m_Tree)[x_node];
451 
452  // Track visibility
453  if (!node.Expanded() && m_CollapsedParentIdx == x_node) {
455  }
456 
457  // Add 1 if node is visible (does not have a collapsed parent)
458  m_PosX -= (( (m_CollapsedParentIdx == CPhyloTreeNode::Null()) && node.Expanded()) ? 1 : 0);
459 
460  // number of leaves/childs calculation
461  for (TTreeType::TNodeList_I it = node.SubNodeBegin();
462  it != node.SubNodeEnd();
463  it++){
464  TNodeType& nn = (*m_Tree)[*it];
465 
466  // our leaves
467  if (node.Expanded()){
468  if (nn.IsLeafEx()){
469  (*node).SetNumLeavesEx((*node).GetNumLeavesEx() + 1);
470  }
471  else {
472  (*node).SetNumLeavesEx((*node).GetNumLeavesEx() + (*nn).GetNumLeavesEx());
473  }
474  }
475 
476  // 'real leaves'
477  if (nn.IsLeaf()){
478  (*node).SetNumLeaves((*node).GetNumLeaves() + 1);
479  }
480  else {
481  (*node).SetNumLeaves((*node).GetNumLeaves() + (*nn).GetNumLeaves());
482  }
483  }
484 
485  return eTreeTraverse;
486 }
487 
488 
490 {
491  TNodeType& node = (*m_Tree)[node_idx];
492 
493  if (delta == 0 || delta == 1) {
494  const CPhyloNodeData::TPoint & xy = (*node).XY();
495  /// m_Rect.IsEmpty() returns true even when rect is line
496  if ((m_Rect.Left()==m_Rect.Right()) && (m_Rect.Top()==m_Rect.Bottom())){
497  m_Rect.Init(xy.X()-1e-04, xy.Y()-1e-04, xy.X()+1e-04, xy.Y()+1e-04);
498  }
499  else {
500  if (m_Rect.Left() > xy.X()) m_Rect.SetLeft(xy.X());
501  if (m_Rect.Right() < xy.X()) m_Rect.SetRight(xy.X());
502  if (m_Rect.Bottom() > xy.Y()) m_Rect.SetBottom(xy.Y());
503  if (m_Rect.Top() < xy.Y()) m_Rect.SetTop(xy.Y());
504  }
505  }
506 
507  if (!node.Expanded())
508  return eTreeTraverseStepOver;
509 
510  return eTreeTraverse;
511 }
512 
514 {
515  m_Rect.Init();
516 }
517 
519 {
520  TNodeType& node = (*m_Tree)[node_idx];
521 
522  if (delta == 0 || delta == 1) {
523  if (node.IsLeaf()) {
524  // Get priority feature for current node, convert it to int, and then
525  // compare it to m_MaxPriority
526  string priority_str = (*node).GetBioTreeFeatureList().GetFeatureValue(m_PriorityId);
527  int priority = -1;
528 
529  if (priority_str != "") {
530  try {
531  priority = NStr::StringToInt(priority_str);
532  }
533  catch (CStringException&) {
534  priority = -1;
535  }
536  }
537  if (priority > m_MaxPriority) {
538  m_MaxPriority = priority;
540  m_PriorityLeafIdx = node_idx;
541  }
542  // Choose the (leaf) node that is closer to the center leaf node
543  // of the subtree as the maximum priority node
544  else if (priority == m_MaxPriority) {
545  if (std::abs(float(m_LeafCount) - float(m_LeafMidpoint)) <
546  std::abs(float(m_MaxPriorityLeafNum) - float(m_LeafMidpoint))) {
548  m_PriorityLeafIdx = node_idx;
549  }
550  }
551 
552  ++m_LeafCount;
553  }
554  }
555 
556  if (!node.Expanded())
557  return eTreeTraverseStepOver;
558 
559  return eTreeTraverse;
560 }
561 
563 {
564  size_t num_leaves = (*m_Tree)[node_idx].GetValue().GetNumLeaves();
565  m_LeafMidpoint = (((float)num_leaves) / 2.0f) - 0.5f;
566  m_PriorityId = (*m_Tree).GetFeatureDict().GetId("$PRIORITY");
567 }
568 
570 {
571  TNodeType& node = (*m_Tree)[node_idx];
572 
573  if (delta == 0 || delta == 1) {
574  m_Id = std::max(m_Id, (*node).GetId());
575  }
576  return eTreeTraverse;
577 }
578 
580 {
581  if (delta == 0 || delta == 1) {
582  m_Tree->Sort(node_idx, m_Order);
583  }
584 
585  return eTreeTraverse;
586 }
587 
589 {
590  ETreeTraverseCode traverse_code =
592 
593  TNodeType& node = (*m_Tree)[node_idx];
594  m_Distances[node_idx].m_NodeIdx = node_idx;
595  float node_dist = std::max(0.0f, node.GetValue().GetDistance());
596 
597  if (delta == -1) {
599  float max_child_dist = 0.0f;
600  bool do_not_collapse = false;
601 
602  for (size_t i = 0; i < node.GetChildren().size(); ++i) {
603  TNodeType& child_node = (*m_Tree)[node.GetChildren()[i]];
604  float child_dist = std::max(0.0f, child_node->GetDistance());
605  max_child_dist = std::max(max_child_dist, m_DistFromRoot + child_dist);
606 
607  if (!(*m_CheckCollapseFunc)(child_node))
608  do_not_collapse = true;
609  }
610 
611  if (do_not_collapse)
612  max_child_dist = 0.0f;
613 
614  m_Distances[node_idx].m_MaxChildDist = max_child_dist;
615  char buf[128];
616  sprintf(buf, "%f", max_child_dist);
617  (*node).SetFeature(m_Tree->GetFeatureDict(), "mcd", buf);
618  }
619  else if (delta == 1 || delta == 0) {
620  if (delta == 1) {
621  m_DistFromRoot += node_dist;
622  }
623  else if (delta == 0 && node.HasParent()) {
624  // If the root has a distance value, ignore it
625  m_DistFromRoot = m_DistFromRoot - m_LastDist + node_dist;
626  }
627  }
628  if (node.IsLeaf()) {
629  m_Distances[node_idx].m_MaxChildDist = -1.0f;
630  char buf[128];
631  sprintf(buf, "%f", m_DistFromRoot);
632  (*node).SetFeature(m_Tree->GetFeatureDict(), "mcd", buf);
633  }
634 
635  m_LastDist = node_dist;
636  return traverse_code;
637 }
638 
640 {
641  ETreeTraverseCode traverse_code =
643 
644  TNodeType& node = (*m_Tree)[node_idx];
645 
646  if (delta==-1) {
648  float max_branch_dist = 0.0f;
649  for (size_t i = 0; i < node.GetChildren().size(); ++i) {
650  max_branch_dist = std::max(max_branch_dist, m_Distances[node.GetChildren()[i]]);
651  }
652 
653  m_Distances[node_idx] = max_branch_dist;
654  }
655  else if (delta==1 || delta==0) {
656  if (delta==1) {
657  m_DistFromRoot += node.GetValue().GetDistance();
658  }
659  else if (delta==0 && node.HasParent()) {
660  // If the root has a distance value, ignore it
662  node.GetValue().GetDistance();
663  }
664  }
665  if (node.IsLeaf()) {
666  m_Distances[node_idx] = m_DistFromRoot;
669  }
670 
671  m_LastDist = node.GetValue().GetDistance();
672  return traverse_code;
673 }
674 
676 {
677  ETreeTraverseCode traverse_code =
679 
680  TNodeType& node = (*m_Tree)[node_idx];
681 
682  // Just for debugging (remove any highlighted path previously found)
683  /*
684  node.GetValue().RemoveFeature(
685  m_Tree->GetFeatureDict(), "$NODE_COLOR");
686  node.GetValue().InitFeatures(m_Tree->GetFeatureDict(), m_ColorTable);
687  */
688 
689  if (delta == -1) {
691  m_Distances[node_idx] = m_DistFromRoot;
692  }
693  else if (delta == 1 || delta == 0) {
694  if (delta == 1) {
695  m_DistFromRoot += node.GetValue().GetDistance();
697  }
698  else if (delta == 0 && node.HasParent()) {
699  // If the root has a distance value, ignore it
701  node.GetValue().GetDistance();
703  }
704  }
705  if (node.IsLeaf()) {
706  m_Distances[node_idx] = m_MaxBranchDist;
707 
708  /// Save idx of node with furthest distance from root
709  if (m_MaxDistNode == CPhyloTree::Null() ||
711  m_MaxDistNode = node_idx;
712  }
713  }
714 
715  m_LastDist = node.GetValue().GetDistance();
716  return traverse_code;
717 }
718 
720 {
721  ETreeTraverseCode traverse_code =
723 
724  TNodeType& node = (*m_Tree)[node_idx];
725 
726  if (node.IsLeaf() && node_idx != m_MaxDistNode) {
727 
728  /// Compare distance from this leaf to m_MaxDistNode. For each leaf,
729  /// find first common node with m_MaxDistPathToRoot. The distance
730  /// between the nodes is then the distance back to the common node.
731  vector<TTreeIdx> path;
732  TTreeIdx idx = node_idx;
733 
734  float leaf_dist = (*m_Tree)[idx].GetValue().GetDistance();
735  path.push_back(idx);
736 
737  bool found = false;
738  while (!found) {
739  TTreeIdx parent_idx = (*m_Tree)[idx].GetParent();
740 
741  // Check if current node is shared (if it is in the current node's
742  // path-to-root).
743  vector<TTreeIdx>::iterator iter = std::lower_bound(m_SortedMaxDistPathToRoot.begin(),
744  m_SortedMaxDistPathToRoot.end(), parent_idx);
745 
746  idx = parent_idx;
747  path.push_back(idx);
748 
749  // We don't add distance stored in lowest common ancestor since that
750  // distance is from the lca to the lca's parent, and that's not part
751  // of the path between the two leaf nodes.
752  if (iter != m_SortedMaxDistPathToRoot.end() && *iter == idx)
753  found = true;
754  else
755  leaf_dist += (*m_Tree)[idx].GetValue().GetDistance();
756  }
757 
758  // idx should now be equal to the lowest common ancestor between
759  // m_MaxDistNode and 'node'. Total distance is then the distance
760  // from 'idx' to 'node' minus the distance to 'idx' from the root
761  // plus leaf_dist (distance from m_MaxDistNode to 'idx')
762  float total_dist = (m_MaxDistFromRoot - m_Distances[idx]) + leaf_dist;
763  if (total_dist > m_MaxDist) {
764  m_MaxDist = total_dist;
765  vector<TTreeIdx>::iterator iter2 = std::find(m_MaxDistPathToRoot.begin(),
766  m_MaxDistPathToRoot.end(), idx);
767  m_MaxPath.clear();
768 
769  // The last node in path2 and the first node in m_PathToRoot is
770  // lowest common ancestor node. (pointed to by iter2). Do not add
771  // this to m_MaxPath twice.
772  m_MaxPath.insert(m_MaxPath.end(), path.begin(), path.end());
773  m_MaxPath.insert(m_MaxPath.end(), iter2 + 1, m_MaxDistPathToRoot.end());
774  }
775  }
776 
777  return traverse_code;
778 }
779 
780 void CPhyloTreeMidpointDist::GetLongest(vector<TTreeIdx>& path, float& length)
781 {
782  path = m_MaxPath;
783  length = m_MaxDist;
784 }
785 
787 {
788  ETreeTraverseCode traverse_code =
790 
791  TNodeType& node = (*m_Tree)[node_idx];
792 
793  if (delta==-1) {
794  // get min/max. special case for blank since otherwise it would always be low value
796  for (; iter!=node.SubNodeEndEx(); ++iter) {
797  TTreeIdx sub_node_idx = *iter;
798 
799  if ( m_LabelRanges[node_idx].first == "" )
800  m_LabelRanges[node_idx].first = m_LabelRanges[sub_node_idx].first;
801  else
802  m_LabelRanges[node_idx].first = std::min(m_LabelRanges[node_idx].first,
803  m_LabelRanges[sub_node_idx].first);
804  m_LabelRanges[node_idx].second = std::max(m_LabelRanges[node_idx].second,
805  m_LabelRanges[sub_node_idx].second);
806  }
807  }
808  else if (delta==1 || delta==0) {
809  if (node->GetLabel() != "") {
810  m_LabelRanges[node_idx].first = node->GetLabel();
811  m_LabelRanges[node_idx].second = node->GetLabel();
812  }
813  }
814 
815  return traverse_code;
816 }
817 
819 {
820  if (delta == 0 || delta == 1) {
822  }
823 
824  return eTreeTraverse;
825 }
826 
828 {
829  if (delta == 0 || delta == 1) {
830  m_Tree->SortLabel(node_idx, m_Order);
831  }
832 
833  return eTreeTraverse;
834 }
835 
837 {
838  if (delta == 0 || delta == 1) {
840  }
841 
842  return eTreeTraverse;
843 }
844 
845 
846 
848 {
849  TNodeType& node = (*m_Tree)[x_node];
850 
851  bool allowed = x_Allowed(node);
852 
853  if (m_TreeStack.size() == 0) {
854  // first node to begin conversion with
855  if (allowed) {
856  //unique_ptr<TTreeType> pnode(MakeNewTreeNode(x_node));
857  m_TreeStack.push_back(x_node);
858  m_Tree->SetRootIdx(x_node);
859  }
860  return eTreeTraverse;
861  }
862 
863  // Mark disallowed nodes - use an invalid ID (-1)
864  if (!allowed)
865  node.GetValue().SetId(CPhyloNodeData::TID(-1));
866 
867  if (delta == 0) {
868  TTreeIdx back_node = CPhyloTree::Null();
869 
870  if (m_TreeStack.size() > 0) {
871  back_node = m_TreeStack.back();
872  m_TreeStack.pop_back();
873  }
874 
875  if (!allowed) {
876  if (m_TreeStack.size() == 0)
877  m_TreeStack.push_back(back_node);
878  else
879  m_TreeStack.push_back(m_TreeStack.back());
880  return eTreeTraverse;
881  }
882 
883 
884  TTreeIdx parent_idx = m_TreeStack.back();
885 
886  // Only set parents when we visit leafs or we are going
887  // back up the tree (delta == -1)
888  if (node.IsLeaf()) {
889  node.SetParent(parent_idx);
890 
891  // maintain order?
892  TNodeType& parent = (*m_Tree)[parent_idx];
893  if (!parent.HasChild(x_node))
894  parent.AddChild(x_node);
895  }
896 
897 
898  m_TreeStack.push_back(x_node);
899  return eTreeTraverse;
900  }
901 
902  if (delta == 1) {
903  // adjusting stack
904  if (!allowed) {
905  m_TreeStack.push_back(m_TreeStack.back());
906  return eTreeTraverse;
907  }
908 
909  TTreeIdx parent_idx = m_TreeStack.back();
910 
911  // Only set parents when we visit leafs or we are going
912  // back up the tree (delta == -1)
913  if (node.IsLeaf()) {
914  node.SetParent(parent_idx);
915 
916  // maintain order?
917  TNodeType& parent = (*m_Tree)[parent_idx];
918  if (!parent.HasChild(x_node))
919  parent.AddChild(x_node);
920  }
921 
922  m_TreeStack.push_back(x_node);
923  return eTreeTraverse;
924  }
925  if (delta == -1) {
926  // returned to first node = stop traversal
927  if (m_TreeStack.empty()) {
928  return eTreeTraverseStop;
929  }
930  m_TreeStack.pop_back();
931 
932  if (allowed) {
933  if (m_TreeStack.size() > 1) {
934  TTreeIdx parent_idx = m_TreeStack[m_TreeStack.size()-2];
935 
936  node.SetParent(parent_idx);
937 
938  // Does this maintain order?
939  TNodeType& parent = (*m_Tree)[parent_idx];
940  if (!parent.HasChild(x_node))
941  parent.AddChild(x_node);
942  }
943  }
944  else {
945  m_Tree->GetParent(node).RemoveChild(x_node);
946  node.SetParent(CPhyloTree::Null());
947  }
948  }
949 
950  return eTreeTraverse;
951 }
952 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Things for representing and manipulating bio trees.
Set debugging utilities.
Template class to create a table with custom row-column access.
Definition: ncbi_table.hpp:66
const TColumn & Column(unsigned int idx) const
Get column name.
Definition: ncbi_table.hpp:441
unsigned int ColumnIdx(const TColumn &col) const
Get column index.
Definition: ncbi_table.hpp:415
const TValueType & GetCell(unsigned int row_idx, unsigned int col_idx) const
Access table element by index.
Definition: ncbi_table.hpp:467
unsigned int Rows() const
Number of rows.
Definition: ncbi_table.hpp:240
unsigned int Cols() const
Number of column.
Definition: ncbi_table.hpp:246
void SetId(TID x_id)
objects::CNode::TId TID
TDistance GetDistance() const
CRgbaGradColorTable * m_ColorTable
void x_UpdateProperties(TNodeType &node, int row_idx)
Updates node properties using values from m_AttrTable.
virtual ETreeTraverseCode x_OnStepDown(TTreeIdx x_node)
CPhyloTreeNode::TTreeIdx m_CollapsedParentIdx
CPhyloTreeCalculator(TTreeType *tree, CRgbaGradColorTable *color_table)
virtual ETreeTraverseCode x_OnStep(TTreeIdx x_node, int delta)
void SetAttrTable(const TAttrTable &attr)
virtual ETreeTraverseCode x_OnStepRight(TTreeIdx x_node)
TBioTreeFeatureId m_AttrKeyId
string m_LabelFormat
label calculation
const TAttrTable * m_AttrTable
Attributes with seq-ids optionally provided to update tree properties.
CPhyloTree::TClusterID TClusterID
void Init(CRgbaGradColorTable *ct)
vector< AttrKey > m_AttrKeys
Mapping for efficient lookup of seq-ids in m_AttrTable.
virtual ETreeTraverseCode x_OnStepLeft(TTreeIdx x_node)
TTreeIdx m_MaxDistNode
Index of node that is furthest from the root.
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
vector< float > m_Distances
Distance of each node from root.
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
vector< pair< string, string > > m_LabelRanges
Min and max label values (according to lexicographic.
string GetLabelForNode(const CPhyloTree &tree, const CPhyloTreeNode &node, const string &format)
vector< float > m_Distances
Max distace of any child of a node from the root, saved in same order as node array in tree.
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
float m_MaxDist
Keep track of overall max and min distances.
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
CRef< SCollapsable > m_CheckCollapseFunc
vector< SChildMaxDist > m_Distances
Max distace of any direct child of a node from the root, saved in same order as node array in tree.
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
float m_MaxDist
Total distance along m_MaxPath.
vector< TTreeIdx > m_MaxDistPathToRoot
vector< TTreeIdx > m_SortedMaxDistPathToRoot
vector< float > m_Distances
Distance of each node from root node.
void GetLongest(vector< TTreeIdx > &path, float &length)
TTreeIdx m_MaxDistNode
Node at greatest distance from root, its distance and vector of nodes from root to m_MaxDistNode (sor...
vector< TTreeIdx > m_MaxPath
Path (set of nodes) that is the longest path in the tree.
bool Expanded() const
Return true if node is currently not collapsed.
TNodeList_I SubNodeEndEx()
TNodeList_I SubNodeBeginEx()
Return the child nodes only if visible.
float m_LeafMidpoint
Index of leaf at center of subtree (leaves in subtree/2)
TTreeIdx m_PriorityLeafIdx
Leaf with highest priority number.
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
size_t m_MaxPriorityLeafNum
Leaf index of node with m_MaxPriority.
void Init(TTreeIdx node_idx)
int m_MaxPriority
Max priority value found in priority field in subtree.
TBioTreeFeatureId m_PriorityId
Id of priority feature in tree.
size_t m_LeafCount
Current leaf count (as we iterate over the subtree)
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
const vector< pair< string, string > > & m_LabelRanges
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
const vector< float > & m_Distances
virtual ETreeTraverseCode x_OnStep(TTreeIdx node_idx, int delta)
Tree subclass also has functions and data needed for rendering and selection.
Definition: phylo_tree.hpp:52
void SortSubtreeDist(TTreeIdx idx, const vector< float > &distances, bool order)
Sort the children of a node based on length of longest subtree.
Definition: phylo_tree.cpp:216
CBioTreeFeatureDictionary & GetFeatureDict()
Return feature dictionary.
Definition: phylo_tree.hpp:323
void SortLabel(TTreeIdx idx, bool order)
Sort the children of a node based on label comparison (alphabetical order)
Definition: phylo_tree.cpp:224
void SortLabelRange(TTreeIdx idx, const vector< pair< string, string > > &subtree_labels, bool order)
Sort the children of a node based on the alphanumeric range of their child nodes.
Definition: phylo_tree.cpp:232
void Sort(TTreeIdx idx, bool order)
Sort the children of a node based on the number of children they have.
Definition: phylo_tree.cpp:208
CRgbaGradColorTable Provides a storage for colors (to eliminate color creation overhead) and Function...
CStringException –.
Definition: ncbistr.hpp:4508
void RemoveChild(TTreeIdx child_idx)
Remove child node if it is a child of this node.
Definition: tree_model.hpp:653
TNodeList & GetChildren()
Return the indices of this node's child nodes.
Definition: tree_model.hpp:126
void SetParent(TTreeIdx parent_idx)
Set index of nodes parent.
Definition: tree_model.hpp:77
bool HasChild(TTreeIdx child_idx) const
Check if another node is a child of this node.
Definition: tree_model.hpp:665
TData & GetValue()
Return the value object for the node.
Definition: tree_model.hpp:159
bool HasParent() const
Check if the node has a parent.
Definition: tree_model.hpp:90
TNodeList::const_iterator TNodeList_CI
Definition: tree_model.hpp:64
bool IsLeaf() const
Report whether this is a leaf node.
Definition: tree_model.hpp:121
static TTreeIdx Null()
Static function that returns the null value.
Definition: tree_model.hpp:636
void AddChild(TTreeIdx child_idx)
Add a child node.
Definition: tree_model.hpp:640
TNodeType & GetParent(TNodeType &node)
Return a reference to the parent node of the given node.
Definition: tree_model.hpp:690
TNodeType::TNodeList_I TNodeList_I
Definition: tree_model.hpp:189
static TTreeIdx Null()
Return the index value that represents a NULL node.
Definition: tree_model.hpp:678
void SetRootIdx(TTreeIdx idx)
Set the index of the root node of the tree.
Definition: tree_model.hpp:262
virtual bool x_Allowed(TNodeType &)
virtual ETreeTraverseCode x_OnStep(TTreeIdx x_node, int delta)
vector< TTreeIdx > m_TreeStack
virtual ETreeTraverseCode x_OnStepDown(TTreeIdx x_node)
ETreeTraverseCode operator()(TTreeType &tree, TTreeIdx tree_node, int delta)
virtual ETreeTraverseCode x_OnStep(TTreeIdx x_node, int delta)
virtual ETreeTraverseCode x_OnStepRight(TTreeIdx x_node)
CPhyloTree::TTreeIdx TTreeIdx
CPhyloTree::TNodeType TNodeType
virtual ETreeTraverseCode x_OnStepLeft(TTreeIdx x_node)
void clear()
Definition: map.hpp:169
Include a standard set of the NCBI C++ Toolkit most basic headers.
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
Definition: dlist.tmpl.h:46
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
void Error(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1197
T & X()
Definition: vect2.hpp:107
T & Y()
Definition: vect2.hpp:109
void SetRight(T right)
Definition: glrect.hpp:114
void Init()
Definition: glrect.hpp:62
void SetBottom(T bottom)
Definition: glrect.hpp:113
T Top() const
Definition: glrect.hpp:84
T Bottom() const
Definition: glrect.hpp:82
T Right() const
Definition: glrect.hpp:83
T Left() const
Definition: glrect.hpp:81
void SetLeft(T left)
Definition: glrect.hpp:112
void SetTop(T top)
Definition: glrect.hpp:115
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
ETreeTraverseCode
Tree traverse code returned by the traverse predicate function.
Definition: ncbi_tree.hpp:51
TBioTreeFeatureId GetId(const string &feature_name) const
If feature is already registered returns its id by name.
Definition: bio_tree.cpp:209
bool HasFeature(const string &feature_name) const
Check if feature is listed in the dictionary.
Definition: bio_tree.cpp:146
@ eTreeTraverseStop
Stop traversal (return form algorithm)
Definition: ncbi_tree.hpp:53
@ eTreeTraverse
Keep traversal.
Definition: ncbi_tree.hpp:52
@ eTreeTraverseStepOver
Do not traverse current node (pick the next one)
Definition: ncbi_tree.hpp:54
char * buf
int i
constexpr auto sort(_Init &&init)
const struct ncbi::grid::netcache::search::fields::SIZE size
Compressed bitset (entry point to bm.h)
#define abs(a)
Definition: ncbi_heapmgr.c:130
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
T max(T x_, T y_)
T min(T x_, T y_)
Int4 delta(size_t dimension_, const Int4 *score_)
Structure allows us to more efficiently store and look up keys (usually seq-ids) in m_AttrTable.
size_t TTreeIdx
Bi-directionaly linked N way tree allocated in a contiguous memory block.
Definition: tree_model.hpp:48
Modified on Sun Jul 14 04:59:45 2024 by modify_doxy.py rev. 669887