NCBI C++ ToolKit
cuSeqTreeAlg.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: cuSeqTreeAlg.cpp 86179 2019-04-15 15:50:08Z lanczyck $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Chris Lanczycki
27 *
28 * File Description: treealg.cpp
29 *
30 * Virtual base class for representing sequence-tree calculation algorithms.
31 * Implementations of specific algorithms should be subclasses that define,
32 * minimally, the ComputeTree method.
33 *
34 */
35 
36 #include <ncbi_pch.hpp>
38 
40 BEGIN_SCOPE(cd_utils)
41 
42 const string TreeAlgorithm::NO_NAME = "<unnamed algorithm>";
44 
45 /*=======================================*/
46 /* Midpoint-root related functions */
47 /*=======================================*/
48 
49 void MidpointRootSeqTree(const SeqTree& oldTree, SeqTree& newTree) {
50 
51  SeqTree tmpTree;
52 
53  bool result = true;
54  bool found = false;
55  double midpointDist = 0.0;
56  double dToEnd1 = 0.0;
57  double dmax = 0.0;
58  double dToNewParent = 0, dToNewChild = 0;
59  int nTopLevelNodes;
60 
61  SeqItem item;
62  SeqTree::iterator cursor1, cursor2, newTreeRoot, newTreeCursor;
63  SeqTree::sibling_iterator tmpSibling, tmpTreeCursor;
64  SeqTree::iterator end1, end2;
65 
66  newTree.clear();
67  if (oldTree.size() == 0) {
68  return;
69  }
70 
71  tmpTree = oldTree;
72 
73  if (GetMaxPath(tmpTree, dmax, end1, end2)) {
74 
75  // Find where midpoint is in the tree; insert a new node there which will
76  // be the new root node. By definition, the longer path is to 'end1'.
77 
78  midpointDist = dmax/2.0;
79  cursor1 = end1;
80  while (cursor1.is_valid() && dToEnd1 < midpointDist) {// != tmpTree.begin()) {
81  dToEnd1 += cursor1->distance;
82  cursor2 = cursor1;
83  cursor1 = tmpTree.parent(cursor2);
84  }
85 
86  // Don't bother if the new root would be either between old root and
87  // the head of the tree or between some node and the old root.
88  if (!cursor1.is_valid() || !tmpTree.parent(cursor1).is_valid()) {
89  newTree = oldTree;
90  return;
91  }
92 
93  item = SeqItem("New Root", -1, dToEnd1-midpointDist);
94  if (cursor1.is_valid()) { // midpoint between two nodes
95  cursor2->distance -= item.distance;
96  if (dToEnd1 == midpointDist) { //midpoint lands on existing (non-head) node
97  newTreeRoot = cursor1;
98  cursor1 = tmpTree.parent(newTreeRoot);
99  } else {
100  newTreeRoot = tmpTree.append_child(cursor1, item);
101  tmpTree.reparent(newTreeRoot, cursor2, tmpTree.next_sibling(cursor2));
102  }
103 
104  // Starting at newTreeRoot, adjust distances in SeqItems on path
105  // to the head of tmpTree. *** How deal w/ head potentially becoming
106  // a new node?? ***
107 
108  cursor2 = newTreeRoot;
109  dToNewParent = cursor2->distance;
110  cursor2->distance = 0.0;
111  while (cursor1.is_valid()) {
112  tmpSibling = tmpTree.previous_sibling(cursor2);
113  if (!tmpSibling.is_valid()) {
114  tmpSibling = tmpTree.next_sibling(cursor2);
115  }
116  dToNewChild = cursor1->distance; // dist to current parent (which will become the new child...)
117  cursor1->distance = dToNewParent;
118  dToNewParent = dToNewChild;
119  cursor2 = cursor1;
120  cursor1 = tmpTree.parent(cursor2);
121  }
122 
123  // If old root will not be in new tree, adjust distance on its children
124  if (tmpTree.number_of_children(cursor2) == 2) {
125  tmpTreeCursor = cursor2.begin();
126  while (tmpTreeCursor != tmpSibling) {
127  ++tmpTreeCursor;
128  }
129  tmpTreeCursor->distance += cursor2->distance;
130  }
131 
132  }
133  /* should be no else
134  } else { // midpoint lies between a node and the head
135  newTreeRoot = tmpTree.insert(tmpTree.begin(), item);
136  if (dToEnd1 == midpointDist) { // head is already the root; do nothing
137  newTree = oldTree;
138  } else {
139  cursor2->distance -= item.distance;
140  tmpTree.append_child(newTreeRoot, cursor2);
141  tmpTree.erase(cursor2);
142  }
143  cursor2 = newTreeRoot;
144  dToNewParent = cursor2->distance;
145  cursor2->distance = 0.0;
146  }
147  */
148 
149  // Now deal with any other top-level nodes (cursor2 is attached to 'head')
150  nTopLevelNodes = tmpTree.number_of_siblings(tmpTree.begin());
151  if (nTopLevelNodes == 2) { // sibling will attach directly to cursor2
152  if (tmpTree.next_sibling(cursor2) != tmpTree.end()) {
153  cursor1 = tmpTree.next_sibling(cursor2);
154  } else if (tmpTree.previous_sibling(cursor2) != tmpTree.end()) {
155  cursor1 = tmpTree.previous_sibling(cursor2);
156  } else {
157  cursor1 = tmpTree.end();
158  }
159  dToNewChild = 0.0;
160  cursor1->distance += dToNewParent;
161  } else if (nTopLevelNodes > 2) { // an intermediate node will be inserted as a child of cursor2 below
162  item = *cursor2;
163  dToNewChild = cursor2->distance;
164  }
165 
166  newTree.SeqTreeBase::operator=(tmpTree.reroot(newTreeRoot));
167 
168  // Fix up the distance of internal node corresp. to head of old tree if needed.
169  if (nTopLevelNodes > 2) {
170 
171  SeqItem nullItem = SeqItem();
173  int count = 0;
174  newTreeCursor = newTree.begin();
175  while (!found && newTreeCursor != newTree.end()) {
176  if (*newTreeCursor == item) {
177  sit = newTreeCursor.begin();
178  while (sit != newTreeCursor.end()) {
179  if (*sit == nullItem) {
180  sit->distance = dToNewParent;
181  ++count;
182  }
183  ++sit;
184  }
185  found = (count == 1) ? true : false;
186  }
187  ++newTreeCursor;
188  }
189  if (!found) { // the empty former-head node was not found.
190  result = false;
191  }
192  }
193  } else {
194  result = false;
195  }
196 
197  if (!result) {
198  newTree.clear();
199  }
200 }
201 
202 
203 bool GetMaxPath(const SeqTree& atree, double& dMax, SeqTree::iterator& end1, SeqTree::iterator& end2) {
204 
205  bool result = true;
206  int foundCount = 0;
207  double rootDMax = 0.0, d1 = 0.0, d2 = 0.0;
208 
209  if (atree.size() < 2) {
210  dMax = 0.0;
211  end1 = atree.end();
212  end2 = atree.end();
213  return false;
214  }
215 
216  SeqTree tmpTree;
217  SeqTree::iterator tmpRoot = tmpTree.insert(tmpTree.begin(), SeqItem());
218  SeqTree::iterator rootEnd1 = tmpTree.begin(), rootEnd2 = tmpTree.begin();
219  SeqTree::iterator it = atree.begin();
220 
221 
222  // Since we cannot pass in the 'head' of the tree, make an dummy tree w/ a root node
223  // to which attach copy of the real tree.
224  while (it.is_valid() && it != atree.end()) {
225  tmpTree.append_child(tmpRoot, it);
226  it = atree.next_sibling(it);
227  }
228 
229  result = GetMaxPath(tmpRoot, rootDMax, d1, rootEnd1, d2, rootEnd2);
230 
231 // std::cout << "longest two branches:\n" << *rootEnd1 << " d1= " << d1 << std::endl;
232 // std::cout << *rootEnd2 << " d2= " << d2 << std::endl;
233 // std::cout << "in GetMaxPath: max distance = " << rootDMax << std::endl;
234 
235 // find nodes in the original tree...
236  it = atree.begin();
237  while (it != atree.end() && foundCount < 2) {
238  if (rootEnd1.is_valid() && *it == *rootEnd1) {
239  rootEnd1 = it;
240  ++foundCount;
241  } else if (rootEnd2.is_valid() && *it == *rootEnd2) {
242  rootEnd2 = it;
243  ++foundCount;
244  }
245  ++it;
246  }
247 
248  // Didn't find the ends!
249  if (foundCount < 2) {
250 // dMax = 0.0;
251 // end1 = atree.end();
252 // end2 = atree.end();
253  result = false;
254  }
255 
256  if (result && rootEnd1 != rootEnd2 && rootDMax > 0) {
257  dMax = rootDMax;
258  end1 = rootEnd1;
259  end2 = rootEnd2;
260  } else {
261  dMax = 0.0;
262  end1 = atree.end();
263  end2 = atree.end();
264  result = false;
265  }
266  tmpTree.clear();
267  return result;
268 }
269 
270 // dBranch1 == longest branch to a child leaf
271 // dBranch2 == 2nd longest branch to a child leaf
272 // Join the two branches to form the longest leaf-to-leaf path.
273 bool GetMaxPath(const SeqTree::iterator& cursor, double& dMax, double& dBranch1, SeqTree::iterator& end1, double& dBranch2, SeqTree::iterator& end2) {
274 
275  bool result = true;
276 // bool updatedPath = false;
277  double localMax, localBranch1, localBranch2;
278  SeqTree::iterator localEnd1;
279  SeqTree::iterator localEnd2;
280 
281  assert(dBranch1 >= dBranch2);
282 
283  if (cursor.number_of_children() == 0) { // a leaf node
284  dBranch1 = 0;
285  dBranch2 = 0;
286  dMax = 0;
287  end1 = cursor;
288  end2 = SeqTree::iterator();
289 // updatedPath = true;
290  } else {
291 
292  double sibDist, testDist;
293  SeqTree::sibling_iterator sibCursor = cursor.begin();
294 
295  // Recusive call to GetMaxPath in loop
296  while (result && sibCursor != cursor.end()) {
297  sibDist = sibCursor->distance;
298  localEnd1 = SeqTree::iterator();
299  localEnd2 = SeqTree::iterator();
300  localBranch1 = 0;
301  localBranch2 = 0;
302  localMax = 0;
303  result = GetMaxPath(sibCursor, localMax, localBranch1, localEnd1, localBranch2, localEnd2);
304  if (result) {
305  testDist = localBranch1 + sibDist + dBranch1;
306  // sibCursor contains both ends of the longest path yet seen
307  if (localMax > dMax && localMax > testDist) {
308 // updatedPath = true;
309  dMax = localMax;
310  end1 = localEnd1;
311  end2 = localEnd2;
312  dBranch1 = localBranch1 + sibDist;
313  dBranch2 = localBranch2 + sibDist;
314  // sibCursor contains one end of the longest path yet seen
315  } else if (testDist >= localMax && testDist > dMax) {
316 // updatedPath = true;
317  dMax = testDist;
318  if (localBranch1 + sibDist > dBranch1) {
319  end2 = end1;
320  end1 = localEnd1;
321  dBranch2 = dBranch1;
322  dBranch1 = localBranch1 + sibDist;
323  } else {
324  end2 = localEnd1;
325  dBranch2 = localBranch1 + sibDist;
326  }
327  // keep the current longest path (sibCursor not on the longest path yet seen)
328  } else if (dMax >= localMax && dMax >= testDist) {
329  ;
330  // should never see this condition
331  } else {
332  result = false;
333  assert(result);
334  }
335 
336  }
337  ++sibCursor;
338  }
339  }
340  return result;
341 }
342 /*=======================================*/
343 /* End midpoint-root related functions */
344 /*=======================================*/
345 
347  if (m_tree && useMidpointRooting()) {
348 
349  // In case any nodes have zero distance (e.g., if there are identical sequences)
350  // reset distance to some small value.
351  // (midpoint root can become confused in such cases...)
352  for (SeqTree::iterator stit = m_tree->begin(); stit != m_tree->end(); ++stit) {
353  if (stit->distance < RESET_WITH_TINY_DISTANCE && m_tree->parent(stit).is_valid()) {
354  stit->distance = RESET_WITH_TINY_DISTANCE;
355  }
356  }
357 
358 // if (isMidpointRooted()) {
359  SeqTree tmpTree = *m_tree;
360 // *m_tree = MidpointRootSeqTree(tmpTree, *m_tree);
361  MidpointRootSeqTree(tmpTree, *m_tree);
362  if (m_tree->size() == 0) {
363  *m_tree = tmpTree;
364 // if (m_tree) {
365 // delete m_tree;
366 // m_tree = 0;
367 // }
368  }
369 // }
370  }
371 }
372 
373 
374 END_SCOPE(cd_utils)
double distance
Definition: cuSeqtree.hpp:70
static const Rootedness DEF_ROOTED
SeqTree * m_tree
bool useMidpointRooting()
void midpointRootIfNeeded()
static const string NO_NAME
bool is_valid() const
Definition: tree_msvc7.hpp:134
sibling_iterator end() const
unsigned int number_of_children() const
sibling_iterator begin() const
pre_order_iterator iterator
Definition: tree_msvc7.hpp:173
sibling_iterator previous_sibling(const iterator_base &) const
Definition: tree_msvc7.hpp:662
sibling_iterator next_sibling(const iterator_base &) const
Definition: tree_msvc7.hpp:669
iter parent(iter) const
Definition: tree_msvc7.hpp:655
iter append_child(iter position)
Definition: tree_msvc7.hpp:680
tree reroot(iterator newRoot)
Definition: tree_msvc7.hpp:341
void clear()
Definition: tree_msvc7.hpp:520
pre_order_iterator begin() const
Definition: tree_msvc7.hpp:573
iter insert(iter position, const T &x)
Definition: tree_msvc7.hpp:762
unsigned int number_of_children(const iterator_base &) const
iter reparent(iter position, sibling_iterator begin, sibling_iterator end)
int size() const
unsigned int number_of_siblings(const iterator_base &) const
pre_order_iterator end() const
Definition: tree_msvc7.hpp:579
bool GetMaxPath(const SeqTree &atree, double &dMax, SeqTree::iterator &end1, SeqTree::iterator &end2)
void MidpointRootSeqTree(const SeqTree &oldTree, SeqTree &newTree)
const double RESET_WITH_TINY_DISTANCE
meTree * newTree()
Definition: graph.cpp:102
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define count
#define assert(x)
Definition: srv_diag.hpp:58
else result
Definition: token2.c:20
Modified on Fri Sep 20 14:58:13 2024 by modify_doxy.py rev. 669887