NCBI C++ ToolKit
unified_pvalues.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* ==========================================================================
2  *
3  * PUBLIC DOMAIN NOTICE
4  * National Center for Biotechnology Information
5  *
6  * This software/database is a "United States Government Work" under the
7  * terms of the United States Copyright Act. It was written as part of
8  * the author's official duties as a United States Government employee and
9  * thus cannot be copyrighted. This software/database is freely available
10  * to the public for use. The National Library of Medicine and the U.S.
11  * Government have not placed any restriction on its use or reproduction.
12  *
13  * Although all reasonable efforts have been taken to ensure the accuracy
14  * and reliability of the software and data, the NLM and the U.S.
15  * Government do not and cannot warrant the performance or results that
16  * may be obtained by using this software or data. The NLM and the U.S.
17  * Government disclaim all warranties, express or implied, including
18  * warranties of performance, merchantability or fitness for any particular
19  * purpose.
20  *
21  * Please cite the author in any work or product based on this material.
22  *
23  * ==========================================================================*/
24 
25 /** @file unified_pvalues.c
26  * Procedures for computing a "composition" p-value of a match, and
27  * for computing a unified p-value combining the customary alignment
28  * p-value and the new composition p-value.
29  *
30  * @author Yi-Kuo Yu, Alejandro Schaffer, Mike Gertz
31  */
32 
33 #include <stdio.h>
34 #include <stdlib.h>
35 #include <string.h>
36 #include <math.h>
37 
40 
41 /** @{ The next two constants are used as thresholds in one test of whether
42  * to use compositional p-values */
43 #define LENGTH_MIN 40
44 #define LENGTH_MAX 4000
45 /** @} */
46 
47 #define LAMBDA_BIN_SIZE 0.001 /**< bin size when discretizing lambda
48  distribution function */
49 #define LAMBDA_BIN_TOTAL 565 /**< total number of bins for the
50  lambda distribution function */
51 /** Marks the boundary of the left tail of the lambda distribution;
52  values of lambda that lie in the left tail are treated all same */
53 #define LOW_LAMBDA_BIN_CUT 35
54 
55 
56 /**
57  * P_lambda_table is a discretized version of the empirical cumulative
58  * distribution function for the statistical parameter lambda for
59  * pairs of unrelated sequences in the Astral data set*/
60 static double
62  { 1.247750e-07, 1.247750e-07, 1.247750e-07, 1.247750e-07, 1.247750e-07,
63  1.247750e-07, 1.247750e-07, 1.247750e-07, 1.247750e-07, 1.247750e-07,
64  1.247750e-07, 1.247750e-07, 2.495500e-07,
65  2.495500e-07, 2.495500e-07, 2.495500e-07, 2.495500e-07, 2.495500e-07,
66  2.495500e-07, 2.495500e-07, 2.495500e-07,
67  2.495500e-07, 3.743250e-07, 4.991000e-07, 4.991000e-07, 4.991000e-07,
68  4.991000e-07, 7.486490e-07, 7.486490e-07,
69  7.486490e-07, 7.486490e-07, 8.734240e-07, 9.981990e-07, 9.981990e-07,
70  1.122974e-06, 1.122974e-06, 1.372523e-06,
71  1.497298e-06, 1.622073e-06, 1.871622e-06, 1.871622e-06, 1.996397e-06,
72  2.370721e-06, 2.620270e-06, 2.620270e-06,
73  2.620270e-06, 2.745045e-06, 2.745045e-06, 2.745045e-06, 2.745045e-06,
74  2.869820e-06, 3.493695e-06, 3.493695e-06,
75  3.493695e-06, 3.868019e-06, 4.117568e-06, 4.367117e-06, 4.866216e-06,
76  4.866216e-06, 5.240540e-06, 5.240540e-06,
77  5.864415e-06, 6.363514e-06, 6.737838e-06, 7.236937e-06, 7.860812e-06,
78  8.110361e-06, 8.484685e-06, 9.233334e-06,
79  9.857209e-06, 1.035631e-05, 1.060586e-05, 1.085541e-05, 1.210316e-05,
80  1.272703e-05, 1.322613e-05, 1.409955e-05,
81  1.509775e-05, 1.647027e-05, 1.784279e-05, 1.859144e-05, 1.921532e-05,
82  2.008874e-05, 2.133649e-05, 2.245946e-05,
83  2.333288e-05, 2.445585e-05, 2.557882e-05, 2.732567e-05, 2.832387e-05,
84  3.019549e-05, 3.231666e-05, 3.468738e-05,
85  3.568558e-05, 3.718288e-05, 3.942882e-05, 4.292251e-05, 4.479413e-05,
86  4.666575e-05, 4.903647e-05, 5.140719e-05,
87  5.352836e-05, 5.539998e-05, 5.714683e-05, 5.964232e-05, 6.263691e-05,
88  6.550673e-05, 6.825177e-05, 7.124636e-05,
89  7.411618e-05, 7.611258e-05, 7.910717e-05, 8.235131e-05, 8.534590e-05,
90  8.971302e-05, 9.457923e-05, 9.907112e-05,
91  1.028144e-04, 1.068072e-04, 1.124220e-04, 1.150423e-04, 1.202828e-04,
92  1.241509e-04, 1.297657e-04, 1.352558e-04,
93  1.408707e-04, 1.474838e-04, 1.535977e-04, 1.605851e-04, 1.679469e-04,
94  1.766811e-04, 1.849162e-04, 1.943991e-04,
95  2.023847e-04, 2.111190e-04, 2.208514e-04, 2.298352e-04, 2.390685e-04,
96  2.500487e-04, 2.599059e-04, 2.705118e-04,
97  2.826149e-04, 2.953419e-04, 3.058230e-04, 3.209207e-04, 3.336477e-04,
98  3.469986e-04, 3.607238e-04, 3.758215e-04,
99  3.915431e-04, 4.100098e-04, 4.283517e-04, 4.454458e-04, 4.661584e-04,
100  4.845003e-04, 5.032165e-04, 5.194372e-04,
101  5.399003e-04, 5.642314e-04, 5.905589e-04, 6.173855e-04, 6.407184e-04,
102  6.646751e-04, 6.896300e-04, 7.199503e-04,
103  7.487733e-04, 7.780954e-04, 8.119093e-04, 8.442260e-04, 8.782895e-04,
104  9.142246e-04, 9.507836e-04, 9.890894e-04,
105  1.025898e-03, 1.060710e-03, 1.102010e-03, 1.141065e-03, 1.179870e-03,
106  1.225413e-03, 1.268086e-03, 1.313254e-03,
107  1.362665e-03, 1.413199e-03, 1.465479e-03, 1.514516e-03, 1.568668e-03,
108  1.620325e-03, 1.678595e-03, 1.738861e-03,
109  1.800375e-03, 1.863511e-03, 1.933760e-03, 1.999641e-03, 2.066271e-03,
110  2.136269e-03, 2.208639e-03, 2.282256e-03,
111  2.363235e-03, 2.451451e-03, 2.535050e-03, 2.626385e-03, 2.715973e-03,
112  2.811676e-03, 2.906879e-03, 3.008695e-03,
113  3.108390e-03, 3.220687e-03, 3.337351e-03, 3.458507e-03, 3.587275e-03,
114  3.717165e-03, 3.853419e-03, 3.998407e-03,
115  4.150882e-03, 4.307349e-03, 4.470305e-03, 4.650105e-03, 4.835520e-03,
116  5.028047e-03, 5.239291e-03, 5.459394e-03,
117  5.699835e-03, 5.944019e-03, 6.201679e-03, 6.472939e-03, 6.767532e-03,
118  7.067490e-03, 7.402760e-03, 7.779704e-03,
119  8.163261e-03, 8.581007e-03, 9.043672e-03, 9.538778e-03, 1.006046e-02,
120  1.061571e-02, 1.121351e-02, 1.187918e-02,
121  1.259427e-02, 1.336213e-02, 1.420611e-02, 1.510486e-02, 1.609220e-02,
122  1.716439e-02, 1.831930e-02, 1.961309e-02,
123  2.100770e-02, 2.254767e-02, 2.423026e-02, 2.604985e-02, 2.803414e-02,
124  3.023304e-02, 3.261836e-02, 3.527531e-02,
125  3.818206e-02, 4.134298e-02, 4.483305e-02, 4.870456e-02, 5.296474e-02,
126  5.765964e-02, 6.281384e-02, 6.850944e-02,
127  7.481781e-02, 8.173895e-02, 8.937667e-02, 9.774769e-02, 1.069522e-01,
128  1.170782e-01, 1.282548e-01, 1.405027e-01,
129  1.539673e-01, 1.686191e-01, 1.846574e-01, 2.020771e-01, 2.210459e-01,
130  2.415374e-01, 2.637095e-01, 2.872893e-01,
131  3.124426e-01, 3.391945e-01, 3.673232e-01, 3.966229e-01, 4.269345e-01,
132  4.580085e-01, 4.895437e-01, 5.211873e-01,
133  5.525534e-01, 5.832051e-01, 6.129409e-01, 6.414552e-01, 6.685366e-01,
134  6.941538e-01, 7.180993e-01, 7.403272e-01,
135  7.608830e-01, 7.798100e-01, 7.972520e-01, 8.132446e-01, 8.277589e-01,
136  8.410031e-01, 8.531792e-01, 8.642644e-01,
137  8.743145e-01, 8.835475e-01, 8.919562e-01, 8.996102e-01, 9.066574e-01,
138  9.130520e-01, 9.189736e-01, 9.244128e-01,
139  9.293928e-01, 9.339722e-01, 9.381913e-01, 9.421451e-01, 9.457657e-01,
140  9.491230e-01, 9.522394e-01, 9.551589e-01,
141  9.578458e-01, 9.603525e-01, 9.626876e-01, 9.648771e-01, 9.669361e-01,
142  9.688645e-01, 9.706878e-01, 9.723798e-01,
143  9.739466e-01, 9.754310e-01, 9.768135e-01, 9.781311e-01, 9.793539e-01,
144  9.805091e-01, 9.816204e-01, 9.826387e-01,
145  9.835868e-01, 9.844858e-01, 9.853414e-01, 9.861397e-01, 9.869005e-01,
146  9.876234e-01, 9.882862e-01, 9.889344e-01,
147  9.895357e-01, 9.901063e-01, 9.906435e-01, 9.911373e-01, 9.916119e-01,
148  9.920576e-01, 9.924838e-01, 9.928730e-01,
149  9.932470e-01, 9.935931e-01, 9.939352e-01, 9.942430e-01, 9.945419e-01,
150  9.948185e-01, 9.950818e-01, 9.953442e-01,
151  9.955881e-01, 9.958246e-01, 9.960354e-01, 9.962523e-01, 9.964441e-01,
152  9.966252e-01, 9.967933e-01, 9.969595e-01,
153  9.971159e-01, 9.972685e-01, 9.974102e-01, 9.975482e-01, 9.976761e-01,
154  9.977899e-01, 9.978979e-01, 9.979996e-01,
155  9.980933e-01, 9.981938e-01, 9.982869e-01, 9.983711e-01, 9.984526e-01,
156  9.985349e-01, 9.986089e-01, 9.986793e-01,
157  9.987437e-01, 9.988027e-01, 9.988587e-01, 9.989131e-01, 9.989656e-01,
158  9.990185e-01, 9.990667e-01, 9.991120e-01,
159  9.991549e-01, 9.991993e-01, 9.992385e-01, 9.992718e-01, 9.993088e-01,
160  9.993393e-01, 9.993723e-01, 9.993982e-01,
161  9.994278e-01, 9.994549e-01, 9.994825e-01, 9.995078e-01, 9.995336e-01,
162  9.995536e-01, 9.995732e-01, 9.995946e-01,
163  9.996155e-01, 9.996358e-01, 9.996535e-01, 9.996697e-01, 9.996842e-01,
164  9.996983e-01, 9.997115e-01, 9.997263e-01,
165  9.997404e-01, 9.997520e-01, 9.997612e-01, 9.997721e-01, 9.997834e-01,
166  9.997917e-01, 9.998010e-01, 9.998097e-01,
167  9.998197e-01, 9.998272e-01, 9.998347e-01, 9.998427e-01, 9.998504e-01,
168  9.998564e-01, 9.998629e-01, 9.998688e-01,
169  9.998745e-01, 9.998795e-01, 9.998834e-01, 9.998882e-01, 9.998937e-01,
170  9.998978e-01, 9.999021e-01, 9.999067e-01,
171  9.999094e-01, 9.999134e-01, 9.999174e-01, 9.999209e-01, 9.999240e-01,
172  9.999275e-01, 9.999312e-01, 9.999336e-01,
173  9.999374e-01, 9.999390e-01, 9.999415e-01, 9.999435e-01, 9.999452e-01,
174  9.999480e-01, 9.999506e-01, 9.999517e-01,
175  9.999541e-01, 9.999555e-01, 9.999570e-01, 9.999581e-01, 9.999595e-01,
176  9.999608e-01, 9.999623e-01, 9.999636e-01,
177  9.999655e-01, 9.999665e-01, 9.999680e-01, 9.999691e-01, 9.999702e-01,
178  9.999712e-01, 9.999718e-01, 9.999733e-01,
179  9.999739e-01, 9.999751e-01, 9.999761e-01, 9.999771e-01, 9.999782e-01,
180  9.999792e-01, 9.999801e-01, 9.999807e-01,
181  9.999813e-01, 9.999816e-01, 9.999826e-01, 9.999831e-01, 9.999834e-01,
182  9.999836e-01, 9.999838e-01, 9.999841e-01,
183  9.999846e-01, 9.999854e-01, 9.999856e-01, 9.999859e-01, 9.999870e-01,
184  9.999874e-01, 9.999882e-01, 9.999885e-01,
185  9.999890e-01, 9.999892e-01, 9.999892e-01, 9.999893e-01, 9.999895e-01,
186  9.999900e-01, 9.999904e-01, 9.999907e-01,
187  9.999909e-01, 9.999914e-01, 9.999917e-01, 9.999923e-01, 9.999923e-01,
188  9.999925e-01, 9.999928e-01, 9.999932e-01,
189  9.999934e-01, 9.999937e-01, 9.999939e-01, 9.999942e-01, 9.999948e-01,
190  9.999950e-01, 9.999950e-01, 9.999952e-01,
191  9.999954e-01, 9.999955e-01, 9.999957e-01, 9.999957e-01, 9.999959e-01,
192  9.999959e-01, 9.999960e-01, 9.999960e-01,
193  9.999962e-01, 9.999963e-01, 9.999965e-01, 9.999965e-01, 9.999967e-01,
194  9.999967e-01, 9.999968e-01, 9.999973e-01,
195  9.999973e-01, 9.999974e-01, 9.999975e-01, 9.999977e-01, 9.999977e-01,
196  9.999977e-01, 9.999977e-01, 9.999979e-01,
197  9.999979e-01, 9.999979e-01, 9.999982e-01, 9.999983e-01, 9.999984e-01,
198  9.999985e-01, 9.999985e-01, 9.999985e-01,
199  9.999985e-01, 9.999985e-01, 9.999985e-01, 9.999985e-01, 9.999985e-01,
200  9.999987e-01, 9.999987e-01, 9.999988e-01,
201  9.999988e-01, 9.999988e-01, 9.999988e-01, 9.999989e-01, 9.999989e-01,
202  9.999989e-01, 9.999989e-01, 9.999992e-01
203  };
204 
205 
206 /* Documented in unified_pvalues.h */
207 double
209 {
210  /* Let initialUScore be lambda, scaled and shifted to fit the
211  * bins of P_lambda_table. */
212  double initialUScore = (lambda - COMPO_MIN_LAMBDA) / LAMBDA_BIN_SIZE;
213 
214  if (initialUScore < LOW_LAMBDA_BIN_CUT) {
215  /* Values of lambda less than
216  * LOW_LAMBDA_BIN_CUT * LAMBDA_BIN_SIZE + COMPO_MIN_LAMBDA
217  * all receive the same p-value.
218  */
220  } else if (initialUScore > LAMBDA_BIN_TOTAL - 1) {
221  /* Values of initialUScore strictly larger than the largest
222  * value in the table all receive p-value 1.0 */
223  return 1.0;
224  } else {
225  /* floor of initialUScore is the bin in the table */
226  int bin = ((int) initialUScore);
227  if (bin == LAMBDA_BIN_TOTAL - 1) {
228  /* In the unlikely event that bin is exactly the
229  * largest item in the table, just return that value. */
230  return P_lambda_table[LAMBDA_BIN_TOTAL - 1];
231  } else {
232  /* Return a value using linear interpolation; delta is the
233  * difference between the discrete table bin and the
234  * actual desired value */
235  double delta = initialUScore - bin;
236  return (1.0 - delta) * P_lambda_table[bin] +
237  delta * P_lambda_table[bin + 1];
238  }
239  }
240 }
241 
242 
243 /* Documented in unified_pvalues.h */
244 double
245 Blast_Overall_P_Value(double p_comp,
246  double p_alignment)
247 {
248  double product; /* the product of the two input p-values */
249 
250  product = p_comp * p_alignment;
251  if (product > 0) {
252  return product * (1.0 - log(product));
253  } else {
254  /* product is either correctly 0, in which case the overall P
255  * is also corectly zero; or incorrectly nonpositive (possibly
256  * NaN), in which case we return the nonsense value so as not
257  * to hide the error */
258  return product;
259  }
260 }
Constants used in compositional score matrix adjustment.
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
Int4 delta(size_t dimension_, const Int4 *score_)
double lambda(size_t dimMatrix_, const Int4 *const *scoreMatrix_, const double *q_)
double Blast_Overall_P_Value(double p_comp, double p_alignment)
This function implements the method of Fisher, R.
#define LAMBDA_BIN_TOTAL
total number of bins for the lambda distribution function
double Blast_CompositionPvalue(double lambda)
Conditionally compute a compositional p-value.
#define LOW_LAMBDA_BIN_CUT
Marks the boundary of the left tail of the lambda distribution; values of lambda that lie in the left...
static double P_lambda_table[565]
P_lambda_table is a discretized version of the empirical cumulative distribution function for the sta...
#define LAMBDA_BIN_SIZE
bin size when discretizing lambda distribution function
Headers for computing a "composition" p-value of a match, and for computing a unified p-value combini...
#define COMPO_MIN_LAMBDA
the smallest value of lambda in the table of lambda's empirical distribution function.
Modified on Fri Sep 20 14:57:15 2024 by modify_doxy.py rev. 669887