NCBI C++ ToolKit
NNI.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: NNI.cpp 66617 2015-03-13 16:41:08Z vasilche $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Richard Desper
27 *
28 * File Description: NNI.cpp
29 *
30 * A part of the Miminum Evolution algorithm
31 *
32 */
33 
34 #include <ncbi_pch.hpp>
35 #include <stdio.h>
36 #include <stdlib.h>
37 #include <math.h>
38 #include "graph.h"
39 #include "fastme.h"
40 
42 BEGIN_SCOPE(fastme)
43 
44 boolean leaf(meNode *v);
50 double wf(double lambda, double D_LR, double D_LU, double D_LD,
51  double D_RU, double D_RD, double D_DU);
52 /*NNI functions for unweighted OLS topological switches*/
53 
54 /*fillTableUp fills all the entries in D associated with
55  e->head,f->head and those edges g->head above e->head*/
56 void fillTableUp(meEdge *e, meEdge *f, double **A, double **D, meTree *T)
57 {
58  meEdge *g,*h;
59  if (T->root == f->tail)
60  {
61  if (leaf(e->head))
62  A[e->head->index][f->head->index] =
63  A[f->head->index][e->head->index] =
64  D[e->head->index2][f->tail->index2];
65  else
66  {
67  g = e->head->leftEdge;
68  h = e->head->rightEdge;
69  A[e->head->index][f->head->index] =
70  A[f->head->index][e->head->index] =
71  (g->bottomsize*A[f->head->index][g->head->index]
72  + h->bottomsize*A[f->head->index][h->head->index])
73  /e->bottomsize;
74  }
75  }
76  else
77  {
78  g = f->tail->parentEdge;
79  fillTableUp(e,g,A,D,T); /*recursive call*/
80  h = siblingEdge(f);
81  A[e->head->index][f->head->index] =
82  A[f->head->index][e->head->index] =
83  (g->topsize*A[e->head->index][g->head->index]
84  + h->bottomsize*A[e->head->index][h->head->index])/f->topsize;
85  }
86 }
87 
88 
89 void makeOLSAveragesTable(meTree *T, double **D, double **A);
90 
91 double **buildAveragesTable(meTree *T, double **D)
92 {
93  int i,j;
94  double **A;
95  A = (double **) malloc(T->size*sizeof(double *));
96  for(i = 0; i < T->size;i++)
97  {
98  A[i] = (double *) malloc(T->size*sizeof(double));
99  for(j=0;j<T->size;j++)
100  A[i][j] = 0.0;
101  }
102  makeOLSAveragesTable(T,D,A);
103  return(A);
104 }
105 
106 double wf2(double lambda, double D_AD, double D_BC, double D_AC, double D_BD,
107  double D_AB, double D_CD)
108 {
109  double weight;
110  weight = 0.5*(lambda*(D_AC + D_BD) + (1 - lambda)*(D_AD + D_BC)
111  + (D_AB + D_CD));
112  return(weight);
113 }
114 
115 int NNIEdgeTest(meEdge *e, meTree *T, double **A, double *weight)
116 {
117  int a,b,c,d;
118  meEdge *f;
119  double *lambda;
120  double D_LR, D_LU, D_LD, D_RD, D_RU, D_DU;
121  double w1,w2,w0;
122 
123  if ((leaf(e->tail)) || (leaf(e->head)))
124  return(NONE);
125  lambda = (double *)malloc(3*sizeof(double));
126  a = e->tail->parentEdge->topsize;
127  f = siblingEdge(e);
128  b = f->bottomsize;
129  c = e->head->leftEdge->bottomsize;
130  d = e->head->rightEdge->bottomsize;
131 
132  lambda[0] = ((double) b*c + a*d)/((a + b)*(c+d));
133  lambda[1] = ((double) b*c + a*d)/((a + c)*(b+d));
134  lambda[2] = ((double) c*d + a*b)/((a + d)*(b+c));
135 
136  D_LR = A[e->head->leftEdge->head->index][e->head->rightEdge->head->index];
137  D_LU = A[e->head->leftEdge->head->index][e->tail->index];
138  D_LD = A[e->head->leftEdge->head->index][f->head->index];
139  D_RU = A[e->head->rightEdge->head->index][e->tail->index];
140  D_RD = A[e->head->rightEdge->head->index][f->head->index];
141  D_DU = A[e->tail->index][f->head->index];
142 
143  w0 = wf2(lambda[0],D_RU,D_LD,D_LU,D_RD,D_DU,D_LR);
144  w1 = wf2(lambda[1],D_RU,D_LD,D_DU,D_LR,D_LU,D_RD);
145  w2 = wf2(lambda[2],D_DU,D_LR,D_LU,D_RD,D_RU,D_LD);
146  free(lambda);
147  if (w0 <= w1)
148  {
149  if (w0 <= w2) /*w0 <= w1,w2*/
150  {
151  *weight = 0.0;
152  return(NONE);
153  }
154  else /*w2 < w0 <= w1 */
155  {
156  *weight = w2 - w0;
157  if (verbose)
158  {
159  printf("Swap across %s. ",e->label);
160  printf("Weight dropping by %lf.\n",w0 - w2);
161  printf("New weight should be %lf.\n",T->weight + w2 - w0);
162  }
163  return(RIGHT);
164  }
165  }
166  else if (w2 <= w1) /*w2 <= w1 < w0*/
167  {
168  *weight = w2 - w0;
169  if (verbose)
170  {
171  printf("Swap across %s. ",e->label);
172  printf("Weight dropping by %lf.\n",w0 - w2);
173  printf("New weight should be %lf.\n",T->weight + w2 - w0);
174  }
175  return(RIGHT);
176  }
177  else /*w1 < w2, w0*/
178  {
179  *weight = w1 - w0;
180  if (verbose)
181  {
182  printf("Swap across %s. ",e->label);
183  printf("Weight dropping by %lf.\n",w0 - w1);
184  printf("New weight should be %lf.\n",T->weight + w1 - w0);
185  }
186  return(LEFT);
187  }
188 }
189 
190 
191 int *initPerm(int size);
192 
193 void NNIupdateAverages(double **A, meEdge *e, meEdge *par, meEdge *skew,
194  meEdge *swap, meEdge *fixed, meTree *T)
195 {
196  meNode *v;
197  meEdge *elooper;
198  v = e->head;
199  /*first, v*/
200  A[e->head->index][e->head->index] =
201  (swap->bottomsize*
202  ((skew->bottomsize*A[skew->head->index][swap->head->index]
203  + fixed->bottomsize*A[fixed->head->index][swap->head->index])
204  / e->bottomsize) +
205  par->topsize*
206  ((skew->bottomsize*A[skew->head->index][par->head->index]
207  + fixed->bottomsize*A[fixed->head->index][par->head->index])
208  / e->bottomsize)
209  ) / e->topsize;
210 
211  elooper = findBottomLeft(e); /*next, we loop over all the edges
212  which are below e*/
213  while (e != elooper)
214  {
215  A[e->head->index][elooper->head->index] =
216  A[elooper->head->index][v->index]
217  = (swap->bottomsize*A[elooper->head->index][swap->head->index] +
218  par->topsize*A[elooper->head->index][par->head->index])
219  / e->topsize;
220  elooper = depthFirstTraverse(T,elooper);
221  }
222  elooper = findBottomLeft(swap); /*next we loop over all the edges below and
223  including swap*/
224  while (swap != elooper)
225  {
226  A[e->head->index][elooper->head->index] =
227  A[elooper->head->index][e->head->index]
228  = (skew->bottomsize * A[elooper->head->index][skew->head->index] +
229  fixed->bottomsize*A[elooper->head->index][fixed->head->index])
230  / e->bottomsize;
231  elooper = depthFirstTraverse(T,elooper);
232  }
233  /*now elooper = skew */
234  A[e->head->index][elooper->head->index] =
235  A[elooper->head->index][e->head->index]
236  = (skew->bottomsize * A[elooper->head->index][skew->head->index] +
237  fixed->bottomsize* A[elooper->head->index][fixed->head->index])
238  / e->bottomsize;
239 
240  /*finally, we loop over all the edges in the meTree
241  on the far side of parEdge*/
242  elooper = T->root->leftEdge;
243  while ((elooper != swap) && (elooper != e)) /*start a top-first traversal*/
244  {
245  A[e->head->index][elooper->head->index] =
246  A[elooper->head->index][e->head->index]
247  = (skew->bottomsize * A[elooper->head->index][skew->head->index]
248  + fixed->bottomsize* A[elooper->head->index][fixed->head->index])
249  / e->bottomsize;
250  elooper = topFirstTraverse(T,elooper);
251  }
252 
253  /*At this point, elooper = par.
254  We finish the top-first traversal, excluding the submeTree below par*/
255  elooper = moveUpRight(par);
256  while (NULL != elooper)
257  {
258  A[e->head->index][elooper->head->index]
259  = A[elooper->head->index][e->head->index]
260  = (skew->bottomsize * A[elooper->head->index][skew->head->index] +
261  fixed->bottomsize* A[elooper->head->index][fixed->head->index])
262  / e->bottomsize;
263  elooper = topFirstTraverse(T,elooper);
264  }
265 
266 }
267 
268 
269 void NNItopSwitch(meTree *T, meEdge *e, int direction, double **A)
270 {
271  meEdge *par, *fixed;
272  meEdge *skew, *swap;
273 
274  if (verbose)
275  printf("Branch swap across meEdge %s.\n",e->label);
276 
277  if (LEFT == direction)
278  swap = e->head->leftEdge;
279  else
280  swap = e->head->rightEdge;
281  skew = siblingEdge(e);
282  fixed = siblingEdge(swap);
283  par = e->tail->parentEdge;
284 
285  if (verbose)
286  {
287  printf("Branch swap: switching edges %s and %s.\n",skew->label,swap->label);
288  }
289  /*perform topological switch by changing f from (u,b) to (v,b)
290  and g from (v,c) to (u,c), necessitatates also changing parent fields*/
291 
292  swap->tail = e->tail;
293  skew->tail = e->head;
294 
295  if (LEFT == direction)
296  e->head->leftEdge = skew;
297  else
298  e->head->rightEdge = skew;
299  if (skew == e->tail->rightEdge)
300  e->tail->rightEdge = swap;
301  else
302  e->tail->leftEdge = swap;
303 
304  /*both topsize and bottomsize change for e, but nowhere else*/
305 
306  e->topsize = par->topsize + swap->bottomsize;
307  e->bottomsize = fixed->bottomsize + skew->bottomsize;
308  NNIupdateAverages(A, e, par, skew, swap, fixed,T);
309 
310 } /*end NNItopSwitch*/
311 
312 void reHeapElement(int *p, int *q, double *v, int length, int i);
313 void pushHeap(int *p, int *q, double *v, int length, int i);
314 void popHeap(int *p, int *q, double *v, int length, int i);
315 
316 
317 void NNIRetestEdge(int *p, int *q, meEdge *e,meTree *T, double **avgDistArray,
318  double *weights, int *location, int *possibleSwaps)
319 {
320  int tloc;
321  tloc = location[e->head->index+1];
322  location[e->head->index+1] =
323  NNIEdgeTest(e,T,avgDistArray,weights + e->head->index+1);
324  if (NONE == location[e->head->index+1])
325  {
326  if (NONE != tloc)
327  popHeap(p,q,weights,(*possibleSwaps)--,q[e->head->index+1]);
328  }
329  else
330  {
331  if (NONE == tloc)
332  pushHeap(p,q,weights,(*possibleSwaps)++,q[e->head->index+1]);
333  else
334  reHeapElement(p,q,weights,*possibleSwaps,q[e->head->index+1]);
335  }
336 }
337 
338 void permInverse(int *p, int *q, int length);
339 
340 int makeThreshHeap(int *p, int *q, double *v, int arraySize, double thresh);
341 
342 
343 void NNI(meTree *T, double **avgDistArray, int *count)
344 {
345  meEdge *e, *centerEdge;
346  meEdge **edgeArray;
347  int *location;
348  int *p,*q;
349  int i;
350  int possibleSwaps;
351  double *weights;
352  p = initPerm(T->size+1);
353  q = initPerm(T->size+1);
354  edgeArray = (meEdge **) malloc((T->size+1)*sizeof(meEdge*));
355  weights = (double *) malloc((T->size+1)*sizeof(double));
356  location = (int *) malloc((T->size+1)*sizeof(int));
357  for (i=0;i<T->size+1;i++)
358  {
359  weights[i] = 0.0;
360  location[i] = NONE;
361  }
362  e = findBottomLeft(T->root->leftEdge);
363  /* *count = 0; */
364  while (NULL != e)
365  {
366  edgeArray[e->head->index+1] = e;
367  location[e->head->index+1] =
368  NNIEdgeTest(e,T,avgDistArray,weights + e->head->index + 1);
369  e = depthFirstTraverse(T,e);
370  }
371  possibleSwaps = makeThreshHeap(p,q,weights,T->size+1,0.0);
372  permInverse(p,q,T->size+1);
373  /*we put the negative values of weights into a heap, indexed by p
374  with the minimum value pointed to by p[1]*/
375  /*p[i] is index (in edgeArray) of meEdge with i-th position
376  in the heap, q[j] is the position of meEdge j in the heap */
377  /*NOTE: the loop below should test that weights[p[1]] < 0, but
378  if compiled with optimization it is possible that weights[p[1]]
379  ends up negative and very small, so that changes to the heap
380  become cyclic and the loop becomes infinite. To avoid this
381  behavior, stop the loop short of 0.0. This is a workaround
382  until the roundoff sensitivity is removed algorithmically */
383  while (weights[p[1]] < -1e-8)
384  {
385  centerEdge = edgeArray[p[1]];
386  (*count)++;
387  T->weight = T->weight + weights[p[1]];
388  NNItopSwitch(T,edgeArray[p[1]],location[p[1]],avgDistArray);
389  location[p[1]] = NONE;
390  weights[p[1]] = 0.0; /*after the NNI, this meEdge is in optimal
391  configuration*/
392  popHeap(p,q,weights,possibleSwaps--,1);
393  /*but we must retest the other four edges*/
394  e = centerEdge->head->leftEdge;
395  NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps);
396  e = centerEdge->head->rightEdge;
397  NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps);
398  e = siblingEdge(centerEdge);
399  NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps);
400  e = centerEdge->tail->parentEdge;
401  NNIRetestEdge(p,q,e,T,avgDistArray,weights,location,&possibleSwaps);
402  }
403  free(p);
404  free(q);
405  free(location);
406  free(edgeArray);
407 }
408 
409 void NNIwithoutMatrix(meTree *T, double **D, int *count)
410 {
411  double **avgDistArray;
412  avgDistArray = buildAveragesTable(T,D);
413  NNI(T,avgDistArray,count);
414 }
415 
416 void NNIWithPartialMatrix(meTree *T,double **D,double **A,int *count)
417 {
418  makeOLSAveragesTable(T,D,A);
419  NNI(T,A,count);
420 }
421 
422 
423 END_SCOPE(fastme)
meEdge * findBottomLeft(meEdge *e)
Definition: traverse.cpp:44
boolean leaf(meNode *v)
Definition: graph.cpp:44
meEdge * siblingEdge(meEdge *e)
Definition: graph.cpp:210
void NNItopSwitch(meTree *T, meEdge *e, int direction, double **A)
Definition: NNI.cpp:269
double ** buildAveragesTable(meTree *T, double **D)
Definition: NNI.cpp:91
void NNI(meTree *T, double **avgDistArray, int *count)
Definition: NNI.cpp:343
void pushHeap(int *p, int *q, double *v, int length, int i)
Definition: heap.cpp:131
void NNIwithoutMatrix(meTree *T, double **D, int *count)
Definition: NNI.cpp:409
void reHeapElement(int *p, int *q, double *v, int length, int i)
Definition: heap.cpp:108
int NNIEdgeTest(meEdge *e, meTree *T, double **A, double *weight)
Definition: NNI.cpp:115
int makeThreshHeap(int *p, int *q, double *v, int arraySize, double thresh)
Definition: heap.cpp:139
meEdge * moveUpRight(meEdge *e)
Definition: traverse.cpp:91
double wf2(double lambda, double D_AD, double D_BC, double D_AC, double D_BD, double D_AB, double D_CD)
Definition: NNI.cpp:106
void NNIRetestEdge(int *p, int *q, meEdge *e, meTree *T, double **avgDistArray, double *weights, int *location, int *possibleSwaps)
Definition: NNI.cpp:317
double wf(double lambda, double D_LR, double D_LU, double D_LD, double D_RU, double D_RD, double D_DU)
Definition: gme.cpp:78
void popHeap(int *p, int *q, double *v, int length, int i)
Definition: heap.cpp:125
meEdge * depthFirstTraverse(meTree *T, meEdge *e)
Definition: traverse.cpp:65
meEdge * topFirstTraverse(meTree *T, meEdge *e)
Definition: traverse.cpp:107
int * initPerm(int size)
Definition: heap.cpp:45
void makeOLSAveragesTable(meTree *T, double **D, double **A)
Definition: gme.cpp:120
void permInverse(int *p, int *q, int length)
Definition: heap.cpp:55
void NNIWithPartialMatrix(meTree *T, double **D, double **A, int *count)
Definition: NNI.cpp:416
void fillTableUp(meEdge *e, meEdge *f, double **A, double **D, meTree *T)
Definition: NNI.cpp:56
void NNIupdateAverages(double **A, meEdge *e, meEdge *par, meEdge *skew, meEdge *swap, meEdge *fixed, meTree *T)
Definition: NNI.cpp:193
#define T(s)
Definition: common.h:230
#define LEFT
Definition: fastme.h:52
#define NONE
Definition: fastme.h:49
#define RIGHT
Definition: fastme.h:53
static const char location[]
Definition: config.c:97
void swap(NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair1, NCBI_NS_NCBI::pair_base_member< T1, T2 > &pair2)
Definition: ncbimisc.hpp:1508
#define NULL
Definition: ncbistd.hpp:225
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
n font weight
while(yy_chk[yy_base[yy_current_state]+yy_c] !=yy_current_state)
int i
const struct ncbi::grid::netcache::search::fields::SIZE size
unsigned int a
Definition: ncbi_localip.c:102
double lambda(size_t dimMatrix_, const Int4 *const *scoreMatrix_, const double *q_)
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
true_type verbose
Definition: processing.cpp:890
Definition: graph.h:84
struct meNode * tail
Definition: graph.h:86
int topsize
Definition: graph.h:89
char label[50]
Definition: graph.h:85
struct meNode * head
Definition: graph.h:87
int bottomsize
Definition: graph.h:88
Definition: graph.h:74
struct meEdge * rightEdge
Definition: graph.h:79
struct meEdge * parentEdge
Definition: graph.h:76
int index2
Definition: graph.h:81
struct meEdge * leftEdge
Definition: graph.h:77
int index
Definition: graph.h:80
Definition: graph.h:94
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
void free(voidpf ptr)
voidp malloc(uInt size)
Modified on Wed Jun 12 11:20:22 2024 by modify_doxy.py rev. 669887