NCBI C++ ToolKit
dist_methods.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: dist_methods.cpp 102748 2024-07-05 13:55:52Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Josh Cherry
27  *
28  * File Description: Distance methods for phylogenic tree reconstruction
29  *
30  */
31 
32 
33 #include <ncbi_pch.hpp>
34 #include <math.h>
35 #include <corelib/ncbifloat.h>
36 #include <corelib/ncbi_limits.hpp>
38 
39 #include "fastme/graph.h"
40 
42 
43 #ifdef NCBI_COMPILER_MSVC
44 # define isfinite _finite
45 #elif defined(NCBI_OS_SOLARIS) && !defined(__builtin_isfinite) \
46  && (!defined(__GNUC__) || (__GNUC__ == 4 && __GNUC_MINOR < 5))
47 # undef isfinite
48 # define isfinite finite
49 #endif
50 
53 
55 {
56  if (!CDistMethods::AllFinite(mat)) {
57  throw invalid_argument("Matrix contained NaN or Inf");
58  }
59 }
60 
61 
62 /// d = -3/4 ln(1 - (4/3)p).
63 void CDistMethods::JukesCantorDist(const TMatrix& frac_diff,
64  TMatrix& result)
65 {
66  result.Resize(frac_diff.GetRows(), frac_diff.GetCols());
67  for (size_t i = 0; i < frac_diff.GetRows(); ++i) {
68  for (size_t j = 0; j < frac_diff.GetCols(); ++j) {
69  result(i, j) = -0.75 * log(1 - (4.0 / 3.0) * frac_diff(i, j));
70  }
71  }
72 }
73 
74 
75 /// d = -ln(1 - p)
76 void CDistMethods::PoissonDist(const TMatrix& frac_diff,
77  TMatrix& result)
78 {
79  result.Resize(frac_diff.GetRows(), frac_diff.GetCols());
80  for (size_t i = 0; i < frac_diff.GetRows(); ++i) {
81  for (size_t j = 0; j < frac_diff.GetCols(); ++j) {
82  result(i, j) = -log(1 - frac_diff(i, j));
83  }
84  }
85 }
86 
87 
88 /// d = -ln(1 - p - 0.2p^2)
89 void CDistMethods::KimuraDist(const TMatrix& frac_diff,
90  TMatrix& result)
91 {
92  result.Resize(frac_diff.GetRows(), frac_diff.GetCols());
93  for (size_t i = 0; i < frac_diff.GetRows(); ++i) {
94  for (size_t j = 0; j < frac_diff.GetCols(); ++j) {
95  result(i, j) = -log(1 - frac_diff(i, j)
96  - 0.2 * frac_diff(i, j) * frac_diff(i, j));
97  }
98  }
99 }
100 
101 ///1 - p = (1 - e^(2*d)) / (2 * d)
102 /// using approximation d = p(2 - p) / (2(1 - p))
103 void CDistMethods::GrishinDist(const TMatrix& frac_diff,
104  TMatrix& result)
105 {
106  result.Resize(frac_diff.GetRows(), frac_diff.GetCols());
107  for (size_t i = 0; i < frac_diff.GetRows(); ++i) {
108  for (size_t j = 0; j < frac_diff.GetCols(); ++j) {
109  if (frac_diff(i, j) >= 1.0) {
110  throw invalid_argument("Grishin distance can not be computed \
111  for sequences that are 100% different");
112  }
113  result(i, j) = frac_diff(i, j) * (2.0 - frac_diff(i, j))
114  / (2 * (1.0 - frac_diff(i, j)));
115  }
116  }
117 }
118 
119 /// d = 0.65((1 - p)^(-1/0.65) - 1)
121  TMatrix& result)
122 {
123  result.Resize(frac_diff.GetRows(), frac_diff.GetCols());
124  for (size_t i = 0; i < frac_diff.GetRows(); ++i) {
125  for (size_t j = 0; j < frac_diff.GetCols(); ++j) {
126  if (frac_diff(i, j) >= 1.0) {
127  throw invalid_argument("Grishin distance can not be computed \
128  for sequences that are 100% different");
129  }
130  result(i, j) =
131  0.65 * (pow(1 - frac_diff(i, j), -1.0 / 0.65) - 1.0);
132  }
133  }
134 }
135 
136 /// As per Hillis et al. (Ed.), Molecular Systematics, pg. 488-489
138  const vector<string>& labels)
139 {
140  s_ThrowIfNotAllFinite(dist_mat);
141 
142  // prepare initial tree (a star phylogeny)
143  TTree *tree = new TTree;
144  for (unsigned int i = 0; i < dist_mat.GetRows(); ++i) {
145  TTree *new_node = tree->AddNode();
146  new_node->GetValue().SetId(i);
147  if (labels.empty()) {
148  new_node->GetValue().SetLabel() = 'N' + NStr::IntToString(i);
149  } else {
150  new_node->GetValue().SetLabel() = labels[i];
151  }
152  }
153 
154  // now the real work; do N - 2 neighbor joinings
155  int next_id = dist_mat.GetRows();
156  TMatrix dmat = dist_mat;
157  dmat.Resize(2 * dist_mat.GetRows() - 2, 2 * dist_mat.GetRows() - 2);
158  vector<double> r(dmat.GetRows());
159  for (unsigned int n = dist_mat.GetRows(); n > 2; --n) {
160  int i, j;
161  double m;
162  TTree::TNodeList_I neighbor1, neighbor2;
163  // first compute r_i
164  for (TTree::TNodeList_I it1 = tree->SubNodeBegin();
165  it1 != tree->SubNodeEnd(); ++it1) {
166  i = (*it1)->GetValue().GetId();
167  double sum = 0;
168  for (TTree::TNodeList_I it2 = tree->SubNodeBegin();
169  it2 != tree->SubNodeEnd(); ++it2) {
170  if (it1 == it2) {
171  continue;
172  }
173  j = (*it2)->GetValue().GetId();
174  sum += dmat(i, j);
175  }
176  r[i] = sum;
177  }
178 
179  // find where M_{i, j} is minimal
180  double min_m = numeric_limits<double>::max();
181  for (TTree::TNodeList_I it1 = tree->SubNodeBegin();
182  it1 != tree->SubNodeEnd(); ++it1) {
183  TTree::TNodeList_I it2 = it1;
184  ++it2;
185  for (; it2 != tree->SubNodeEnd(); ++it2) {
186  i = (*it1)->GetValue().GetId();
187  j = (*it2)->GetValue().GetId();
188  m = dmat(i, j) - (r[i] + r[j]) / (n - 2);
189  if (m <= min_m) {
190  neighbor1 = it1;
191  neighbor2 = it2;
192  min_m = m;
193  }
194  }
195  }
196  // join the neighbors
197  TTree *new_node = new TTree;
198  i = (*neighbor1)->GetValue().GetId();
199  j = (*neighbor2)->GetValue().GetId();
200  int u = next_id++;
201  new_node->GetValue().SetId(u);
202  double viu = dmat(i, j) / 2 + (r[i] - r[j]) / (2 * (n - 2));
203  double vju = dmat(i, j) - viu;
204  (*neighbor1)->GetValue().SetDist(viu);
205  (*neighbor2)->GetValue().SetDist(vju);
206  new_node->AddNode(tree->DetachNode(neighbor1));
207  new_node->AddNode(tree->DetachNode(neighbor2));
208  tree->AddNode(new_node);
209  // add new distances to dmat
210  for (TTree::TNodeList_CI it1 = tree->SubNodeBegin();
211  it1 != tree->SubNodeEnd(); ++it1) {
212  int k = (*it1)->GetValue().GetId();
213  if (k == u) {
214  continue;
215  }
216  dmat(k, u) = dmat(u, k)
217  = (dmat(i, k) + dmat(j, k) - dmat(i, j)) / 2;
218  }
219  }
220 
221  // Now the root has just two children, whose distances
222  // have not been set. Could do different things here.
223  // Let's make a trifurcation.
224  TTree::TNodeList_I it1 = tree->SubNodeBegin();
225  TTree::TNodeList_I it2 = it1;
226  ++it2;
227  if ((*it1)->IsLeaf()) {
228  swap(*it1, *it2);
229  }
230  double d = dmat((*it1)->GetValue().GetId(), (*it2)->GetValue().GetId());
231  (*it2)->GetValue().SetDist(d);
232  (*it1)->AddNode(tree->DetachNode(it2));
233  TTree *rv = tree->DetachNode(it1);
234  delete tree; // just the original root node
235  return rv;
236 }
237 
238 // implemented by Jason Papadopoulos
240  CDistMethods::TTree* node)
241 {
242  _ASSERT(tree);
243 
244  double distance = 0.0;
245  TTree *new_root = new TTree();
246 
247  if (!node) {
248 
249  //This function assumes that root node has distance zero
250  _ASSERT(tree->GetValue().GetDist() == 0.0);
251 
252  node = x_FindLargestEdge(tree, tree);
253 
254  // For trees generated by FASTme this will automatically make
255  // the tree strictly binary; by default FASTme produces a
256  // tree whose root has three subtrees. In this case all of
257  // the original tree nodes are reused, so nothing in the
258  // original tree needs to be freed.
259 
260  if (node == tree) {
261 
262  // the root has distance zero, and if no other
263  // tree edge has distance larger than this then
264  // the tree is degenerate. Just make it strictly
265  // binary
266 
267  TTree *leaf = 0;
268  TTree::TNodeList_I child(tree->SubNodeBegin());
269  while (child != tree->SubNodeEnd()) {
270  if ((*child)->IsLeaf()) {
271  leaf = tree->DetachNode(*child);
272  break;
273  }
274  child++;
275  }
276 
277  _ASSERT(leaf);
278  new_root->GetValue().SetDist(0.0);
279  new_root->AddNode(tree);
280  new_root->AddNode(leaf);
281  return new_root;
282  }
283  }
284 
285  TTree *parent = node->GetParent();
286 
287  // The node (and the entire subtree underneath it) now
288  // are detached from the input tree and then attached to
289  // the new root
290 
291  node = parent->DetachNode(node);
292  new_root->AddNode(node);
293  node = new_root;
294 
295  // Now proceed up the original tree until the root is
296  // reached. Every child node met along the way becomes
297  // a child node in the new tree (i.e. is unchanged).
298  // Every parent node in the original tree also becomes
299  // a child node in the new tree. This requires detaching
300  // the node, and attaching it to the new tree
301  //
302  // Because the original tree root can never be selected
303  // as the new tree root, at least one parent node must
304  // be attached to the new tree
305 
306  do {
307  // Turning the parent node into a child node is
308  // equivalent to reversing the direction of the
309  // tree for that node. That means the distance that
310  // appears in the parent node must be replaced with
311  // the distance of its child (previously determined)
312 
313  double new_distance = parent->GetValue().GetDist();
314  parent->GetValue().SetDist(distance);
315  distance = new_distance;
316 
317  // Detach the parent from the original tree (if
318  // necessary; the tree root doesn't need to be
319  // detached from anything)
320 
321  TTree *next_parent = parent->GetParent();
322  if (next_parent)
323  parent = next_parent->DetachNode(parent);
324 
325  // Attach the parent to the new tree, and point to it
326  // so that future nodes get attached as children of
327  // this node
328 
329  node->AddNode(parent);
330  node = parent;
331  parent = next_parent;
332  } while (parent);
333 
334  return new_root;
335 }
336 
338  CDistMethods::TTree* best_node)
339 {
340  if (node->GetValue().GetDist() >
341  best_node->GetValue().GetDist()) {
342  best_node = node;
343  }
344  if (node->IsLeaf())
345  return best_node;
346 
347  TTree::TNodeList_I child(node->SubNodeBegin());
348  while (child != node->SubNodeEnd()) {
349  best_node = x_FindLargestEdge(*child, best_node);
350  child++;
351  }
352 
353  return best_node;
354 }
355 
356 
357 
358 // The following is a wrapper for Richard Desper's fast minimume evolution
359 // code. The user passes in a CDistMethods::TMatrix, and gets back
360 // a CDistMethods::TTree. Behind the scenes, the code converts the
361 // TTMatrix to the representation used by fastme, runs the fastme
362 // algorithm using fastme_run, and converts the resulting tree
363 // to the CDistMethods::TTree representation.
364 
365 BEGIN_SCOPE(fastme)
366 
367 typedef char boolean;
368 boolean leaf(meNode *v);
369 meTree* fastme_run(double** D_in, int numSpecies_in, char **labels,
370  int btype_in, int wtype_in, int ntype_in);
371 double **initDoubleMatrix(int d);
372 void freeMatrix(double **D, int size);
373 void freeTree(meTree *T);
374 
375 END_SCOPE(fastme)
376 
377 // Functions to convert from tree representation used by fastme code
378 
379 static void s_AddFastMeSubtree(fastme::meNode *me_node,
380  CDistMethods::TTree* node,
381  const vector<string>& labels)
382 {
383  if (fastme::leaf(me_node)) {
384  int id = NStr::StringToInt(me_node->label);
385  node->GetValue().SetId(id);
386  if (!labels.empty()) {
387  node->GetValue().SetLabel(labels[id]);
388  } else {
389  node->GetValue().SetLabel(me_node->label);
390  }
391  return;
392  }
393 
394  // not a leaf; add two subtrees
395  // first left
396  CDistMethods::TTree* child_node = node->AddNode();
397  child_node->GetValue().SetDist(me_node->leftEdge->distance);
398  s_AddFastMeSubtree(me_node->leftEdge->head, child_node, labels);
399  // then right
400  child_node = node->AddNode();
401  child_node->GetValue().SetDist(me_node->rightEdge->distance);
402  s_AddFastMeSubtree(me_node->rightEdge->head, child_node, labels);
403 }
404 
405 
407  const vector<string>& labels) {
408 
410  fastme::meEdge *edge;
411  edge = me_tree->root->leftEdge;
412 
413  CDistMethods::TTree *node = tree->AddNode();
414  node->GetValue().SetDist(edge->distance);
415 
416  int id = NStr::StringToInt(me_tree->root->label);
417  node->GetValue().SetId(id);
418  if (!labels.empty()) {
419  node->GetValue().SetLabel(labels[id]);
420  } else {
421  node->GetValue().SetLabel(me_tree->root->label);
422  }
423 
424  s_AddFastMeSubtree(edge->head, tree, labels);
425 
426  return tree;
427 }
428 
429 
430 // The publically visible interface
432  const vector<string>& labels,
433  EFastMePar btype,
434  EFastMePar wtype,
435  EFastMePar ntype)
436 {
437 
438  s_ThrowIfNotAllFinite(dist_mat);
439 
440  double **dfme = fastme::initDoubleMatrix(dist_mat.GetRows());
441  for (unsigned int i = 0; i < dist_mat.GetRows(); ++i) {
442  for (unsigned int j = 0; j < dist_mat.GetRows(); ++j) {
443  dfme[i][j] = dist_mat(i, j);
444  }
445  }
446 
447  // leaf labels for fastme code; just pass in
448  // "0", "1", etc., and fill in real labels later
449  char **clabels = new char*[dist_mat.GetRows()];
450  vector<string> dummy_labels;
451  dummy_labels.resize(dist_mat.GetRows());
452  for (unsigned int i = 0; i < dist_mat.GetRows(); ++i) {
453  dummy_labels[i] = NStr::IntToString(i);
454  clabels[i] = const_cast<char *>(dummy_labels[i].c_str());
455  }
456 
457  fastme::meTree *me_out =
458  fastme::fastme_run(dfme, dist_mat.GetRows(), clabels,
459  btype, wtype, ntype);
460  fastme::freeMatrix(dfme, dist_mat.GetRows());
461  delete [] clabels;
462  TTree *me_tree = s_ConvertFastMeTree(me_out, labels);
463  fastme::freeTree(me_out);
464  return me_tree;
465 }
466 
468 {
469  if (!node->IsLeaf()) {
470  for (TPhyTreeNode::TNodeList_CI it = node->SubNodeBegin();
471  it != node->SubNodeEnd(); ++it) {
473  }
474  }
475 
476  if (node->GetValue().IsSetDist() && node->GetValue().GetDist() < 0.0) {
477  node->GetValue().SetDist(0.0);
478  }
479 }
480 
481 
482 // Calculate pairwise fractional differences
483 
484 double CDistMethods::Divergence(const string& seq1, const string& seq2)
485 {
486  int diff_count = 0;
487  int pos_count = 0;
488 
489  for (unsigned int pos = 0; pos < seq1.size(); ++pos) {
490  if (seq1[pos] == '-' || seq2[pos] == '-') {
491  continue; // ignore this position if a seq has a gap
492  }
493  pos_count++;
494  if (seq1[pos] != seq2[pos]) {
495  diff_count++;
496  }
497  }
498 
499  return diff_count / (double) pos_count;
500 }
501 
502 
503 double CDistMethods::FractionIdentity(const string& seq1, const string& seq2)
504 {
505  int same_count = 0;
506  int pos_count = 0;
507 
508  for (unsigned int pos = 0; pos < seq1.size(); ++pos) {
509  if (seq1[pos] == '-' || seq2[pos] == '-') {
510  continue; // ignore this position if a seq has a gap
511  }
512  pos_count++;
513  if (seq1[pos] == seq2[pos]) {
514  same_count++;
515  }
516  }
517  _ASSERT(pos_count >= 0);
518  _ASSERT(same_count >= 0);
519 
520  if (pos_count == 0) {
521  return 0.0;
522  }
523 
524  _ASSERT(same_count <= pos_count);
525  return same_count / (double) pos_count;
526 }
527 
528 
529 void CDistMethods::Divergence(const CAlnVec& avec_in, TMatrix& result)
530 {
531  // want to change gap char of CAlnVec, but no copy constructor,
532  // so effectively copy in a round-about way
533  CAlnVec avec(avec_in.GetDenseg(), avec_in.GetScope());
534  avec.SetGapChar('-');
535  avec.SetEndChar('-');
536 
537  int nseqs = avec.GetNumRows();
538  result.Resize(nseqs, nseqs);
539  vector<string> seq(nseqs);
540 
541  for (int i = 0; i < nseqs; ++i) {
542  avec.GetWholeAlnSeqString(i, seq[i]);
543  }
544 
545  for (int i = 0; i < nseqs; ++i) {
546  result(i, i) = 0; // 0 difference from itself
547  for (int j = i + 1; j < nseqs; ++j) {
548  result(i, j) = result(j, i) = CDistMethods::Divergence(seq[i], seq[j]);
549  }
550  }
551 }
552 
553 
554 /// Recursive function for adding TPhyTreeNodes to BioTreeContainer
556  const TPhyTreeNode* ptn,
557  int parent_uid, int& next_uid)
558 {
559  const int label_fid = 0;
560  const int dist_fid = 1;
561 
562  CRef<CNode> node;
563  CRef<CNodeFeature> node_feature;
564  int my_uid = next_uid++;
565 
566  // first do this node
567  node = new CNode;
568  node->SetId(my_uid);
569  node->SetParent(parent_uid);
570  if (ptn->GetValue().GetLabel() != "") {
571  node_feature = new CNodeFeature;
572  node_feature->SetFeatureid(label_fid);
573  node_feature->SetValue(ptn->GetValue().GetLabel());
574  node->SetFeatures().Set().push_back(node_feature);
575  }
576  if (ptn->GetValue().IsSetDist()) {
577  node_feature = new CNodeFeature;
578  node_feature->SetFeatureid(dist_fid);
579  node_feature->SetValue
580  (NStr::DoubleToString(ptn->GetValue().GetDist()));
581  node->SetFeatures().Set().push_back(node_feature);
582  }
583 
584  btc->SetNodes().Set().push_back(node);
585 
586  // now do its children
587  for (TPhyTreeNode::TNodeList_CI it = ptn->SubNodeBegin();
588  it != ptn->SubNodeEnd(); ++it) {
589  s_AddNodeToBtc(btc, *it, my_uid, next_uid);
590  }
591 }
592 
593 
594 /// Conversion from TPhyTreeNode to CBioTreeContainer
596 {
597  const int label_fid = 0;
598  const int dist_fid = 1;
599 
601  CRef<CFeatureDescr> fdescr;
602 
603  fdescr = new CFeatureDescr();
604  fdescr->SetId(label_fid);
605  fdescr->SetName("label");
606  btc->SetFdict().Set().push_back(fdescr);
607 
608  fdescr = new CFeatureDescr();
609  fdescr->SetId(dist_fid);
610  fdescr->SetName("dist");
611  btc->SetFdict().Set().push_back(fdescr);
612 
613  int next_uid = 0;
614  s_AddNodeToBtc(btc, tree, -1, next_uid);
615  // unset parent id of root node
616  btc->SetNodes().Set().front()->ResetParent();
617  return btc;
618 }
619 
621 {
622  const int label_fid = 0;
623  const int dist_fid = 1;
624 
626  CRef<CFeatureDescr> fdescr;
627 
628  fdescr = new CFeatureDescr();
629  fdescr->SetId(label_fid);
630  fdescr->SetName("label");
631  btc->SetFdict().Set().push_back(fdescr);
632 
633  int next_uid = 0;
634  s_AddNodeToBtc(btc, tree, -1, next_uid);
635  // unset parent id of root node
636  btc->SetNodes().Set().front()->ResetParent();
637 
638  bool at_least_one_node_has_distance = false;
639  const auto& nodes = btc->GetNodes().Get();
640  for ( auto it = nodes.begin(); it != nodes.end(); ++it) {
641  const auto& node = **it;
642  if (!node.IsSetFeatures() || !node.GetFeatures().CanGet()) {
643  continue;
644  }
645  const auto& features = node.GetFeatures().Get();
646  for (auto it1 = features.begin(); it1 != features.end(); ++it1) {
647  const auto& feature = **it1;
648  if (feature.IsSetFeatureid() && (dist_fid == feature.GetFeatureid())) {
649  at_least_one_node_has_distance = true;
650  break;
651  }
652  }
653  if (at_least_one_node_has_distance) {
654  break;
655  }
656  }
657  if (at_least_one_node_has_distance) {
658  fdescr = new CFeatureDescr();
659  fdescr->SetId(dist_fid);
660  fdescr->SetName("dist");
661  btc->SetFdict().Set().push_back(fdescr);
662  }
663  return btc;
664 }
665 
667 {
668  ITERATE (TMatrix::TData, it, mat.GetData()) {
669  if (!isfinite(*it) || *it < 0.0) {
670  return false;
671  }
672  }
673  return true;
674 }
675 
User-defined methods of the data storage class.
#define static
const CDense_seg & GetDenseg(void) const
Definition: alnmap.hpp:475
CScope & GetScope(void) const
Definition: alnvec.hpp:247
static void ZeroNegativeBranches(TTree *node)
Sets negative lengths of branches of a tree to zero.
static TTree * RerootTree(TTree *tree, TTree *node=NULL)
Reroot tree, new root is placed in the middle of the edge specified by node.
static bool AllFinite(const TMatrix &mat)
Check a matrix for NaNs and Infs.
static TTree * NjTree(const TMatrix &dist_mat, const vector< string > &labels=vector< string >())
Compute a tree by neighbor joining; as per Hillis et al.
static double FractionIdentity(const string &seq1, const string &seq2)
Calculate pairwise fraction identity based on positions where both sequences have a base/residue.
static TTree * x_FindLargestEdge(TTree *tree, TTree *best_node)
Find node with the longest edge in the tree.
TPhyTreeNode TTree
static void GrishinGeneralDist(const TMatrix &frac_diff, TMatrix &result)
Grishin's distance for protein sequences 1 - p = ln(1 + 2d) / 2d.
static void GrishinDist(const TMatrix &frac_diff, TMatrix &result)
Grishin's distance for protein sequences: 1 - p = (1 - e^(2*d)) / (2 * d) approximated with d = p(2 +...
static double Divergence(const string &seq1, const string &seq2)
Calculate pairwise fractions of non-identity.
static void PoissonDist(const TMatrix &frac_diff, TMatrix &result)
Simple distance calculation for protein sequences: d = -ln(1 - p).
static TTree * FastMeTree(const TMatrix &dist_mat, const vector< string > &labels=vector< string >(), EFastMePar btype=eOls, EFastMePar wtype=eOls, EFastMePar ntype=eBalanced)
Compute a tree using the fast minimum evolution algorithm.
static void KimuraDist(const TMatrix &frac_diff, TMatrix &result)
Kimura's distance for protein sequences: d = -ln(1 - p - 0.2p^2).
static void JukesCantorDist(const TMatrix &frac_diff, TMatrix &result)
Jukes-Cantor distance calculation for DNA sequences: d = -3/4 ln(1 - (4/3)p).
CFeatureDescr –.
void Resize(size_t i, size_t j, T val=T())
resize this matrix, filling the empty cells with a known value
Definition: matrix.hpp:390
TData & GetData()
retrieve the data associated with this matrix
Definition: matrix.hpp:312
size_t GetRows() const
get the number of rows in this matrix
Definition: matrix.hpp:298
vector< double > TData
Definition: matrix.hpp:49
size_t GetCols() const
get the number of columns in this matrix
Definition: matrix.hpp:305
CNodeFeature –.
Definition: NodeFeature.hpp:66
CNode –.
Definition: Node.hpp:66
definition of a Culling tree
Definition: ncbi_tree.hpp:100
#define T(s)
Definition: common.h:230
void freeMatrix(double **D, int size)
Definition: inputs.cpp:106
USING_SCOPE(objects)
static void s_AddNodeToBtc(CRef< CBioTreeContainer > btc, const TPhyTreeNode *ptn, int parent_uid, int &next_uid)
Recursive function for adding TPhyTreeNodes to BioTreeContainer.
static void s_AddFastMeSubtree(fastme::meNode *me_node, CDistMethods::TTree *node, const vector< string > &labels)
double ** initDoubleMatrix(int d)
Definition: fastme.cpp:224
boolean leaf(meNode *v)
Definition: graph.cpp:44
CRef< CBioTreeContainer > MakeDistanceSensitiveBioTreeContainer(const TPhyTreeNode *tree)
Conversion from TPhyTreeNode to CBioTreeContainer, potentially without dist feature key.
void s_ThrowIfNotAllFinite(const CDistMethods::TMatrix &mat)
void freeTree(meTree *T)
Definition: graph.cpp:136
static CDistMethods::TTree * s_ConvertFastMeTree(fastme::meTree *me_tree, const vector< string > &labels)
CRef< CBioTreeContainer > MakeBioTreeContainer(const TPhyTreeNode *tree)
Conversion from TPhyTreeNode to CBioTreeContainer.
meTree * fastme_run(double **D_in, int numSpecies_in, char **labels, int btype_in, int wtype_in, int ntype_in)
Definition: fastme.cpp:240
struct meTree meTree
struct meEdge meEdge
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
Definition: ncbistr.hpp:5181
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5078
TNodeList::iterator TNodeList_I
Definition: ncbi_tree.hpp:109
TTreeType * DetachNode(TTreeType *subnode)
Remove the subtree from the tree without destroying it.
Definition: ncbi_tree.hpp:720
TNodeList_CI SubNodeBegin(void) const
Return first const iterator on subnode list.
Definition: ncbi_tree.hpp:160
TNodeList::const_iterator TNodeList_CI
Definition: ncbi_tree.hpp:110
void AddNode(TTreeType *subnode)
Add new subnode.
Definition: ncbi_tree.hpp:743
bool IsLeaf() const
Report whether this is a leaf node.
Definition: ncbi_tree.hpp:296
TNodeList_CI SubNodeEnd(void) const
Return last const iterator on subnode list.
Definition: ncbi_tree.hpp:166
const TValue & GetValue(void) const
Return node's value.
Definition: ncbi_tree.hpp:184
const TTreeType * GetParent(void) const
Get node's parent.
Definition: ncbi_tree.hpp:139
void SetNodes(TNodes &value)
Assign a value to Nodes data member.
void SetFdict(TFdict &value)
Assign a value to Fdict data member.
const Tdata & Get(void) const
Get the member data.
Definition: NodeSet_.hpp:164
const TNodes & GetNodes(void) const
Get the Nodes member data.
int i
yy_size_t n
const struct ncbi::grid::netcache::search::fields::SIZE size
Floating-point support routines.
T max(T x_, T y_)
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
#define D(d)
Definition: graph.h:74
Definition: graph.h:94
#define _ASSERT
else result
Definition: token2.c:20
#define const
Definition: zconf.h:232
Modified on Fri Sep 20 14:57:02 2024 by modify_doxy.py rev. 669887