56 #define BLAST_SCORE_RANGE_MAX (BLAST_SCORE_MAX - BLAST_SCORE_MIN)
64 #define BLAST_KARLIN_K_SUMLIMIT_DEFAULT 0.0001
66 #define BLAST_KARLIN_LAMBDA_ACCURACY_DEFAULT (1.e-5)
68 #define BLAST_KARLIN_LAMBDA_ITER_DEFAULT 17
70 #define BLAST_KARLIN_LAMBDA0_DEFAULT 0.5
72 #define BLAST_KARLIN_K_ITER_MAX 100
75 #define BLAST_NUM_STAT_VALUES 11
182 #define BLOSUM45_VALUES_MAX 14
184 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.2291, 0.0924, 0.2514, 0.9113, -5.7, 0.641318, 9.611060, 9.611060},
185 {13, 3, (double)
INT2_MAX, 0.207, 0.049, 0.14, 1.5, -22, 0.671128, 35.855900, 35.963900},
186 {12, 3, (double)
INT2_MAX, 0.199, 0.039, 0.11, 1.8, -34, 0.691530, 45.693600, 45.851700},
187 {11, 3, (double)
INT2_MAX, 0.190, 0.031, 0.095, 2.0, -38, 0.691181, 62.874100, 63.103700},
188 {10, 3, (double)
INT2_MAX, 0.179, 0.023, 0.075, 2.4, -51, 0.710529, 88.286800, 88.639100},
189 {16, 2, (double)
INT2_MAX, 0.210, 0.051, 0.14, 1.5, -24, 0.666680, 36.279800, 36.452400},
190 {15, 2, (double)
INT2_MAX, 0.203, 0.041, 0.12, 1.7, -31, 0.673871, 44.825700, 45.060400},
191 {14, 2, (double)
INT2_MAX, 0.195, 0.032, 0.10, 1.9, -36, 0.685753, 60.736200, 61.102300},
192 {13, 2, (double)
INT2_MAX, 0.185, 0.024, 0.084, 2.2, -45, 0.698480, 85.148100, 85.689400},
193 {12, 2, (double)
INT2_MAX, 0.171, 0.016, 0.061, 2.8, -65, 0.713429, 127.758000, 128.582000},
194 {19, 1, (double)
INT2_MAX, 0.205, 0.040, 0.11, 1.9, -43, 0.672302, 53.071400, 53.828200},
195 {18, 1, (double)
INT2_MAX, 0.198, 0.032, 0.10, 2.0, -43, 0.682580, 72.342400, 73.403900},
196 {17, 1, (double)
INT2_MAX, 0.189, 0.024, 0.079, 2.4, -57, 0.695035, 103.055000, 104.721000},
197 {16, 1, (double)
INT2_MAX, 0.176, 0.016, 0.063, 2.8, -67, 0.712966, 170.100000, 173.003000},
218 #define BLOSUM50_VALUES_MAX 16
220 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.2318, 0.112, 0.3362, 0.6895, -4.0, 0.609639, 5.388310, 5.388310},
221 {13, 3, (double)
INT2_MAX, 0.212, 0.063, 0.19, 1.1, -16, 0.639287, 18.113800, 18.202800},
222 {12, 3, (double)
INT2_MAX, 0.206, 0.055, 0.17, 1.2, -18, 0.644715, 22.654600, 22.777700},
223 {11, 3, (double)
INT2_MAX, 0.197, 0.042, 0.14, 1.4, -25, 0.656327, 29.861100, 30.045700},
224 {10, 3, (double)
INT2_MAX, 0.186, 0.031, 0.11, 1.7, -34, 0.671150, 42.393800, 42.674000},
225 {9, 3, (double)
INT2_MAX, 0.172, 0.022, 0.082, 2.1, -48, 0.694326, 66.069600, 66.516400},
226 {16, 2, (double)
INT2_MAX, 0.215, 0.066, 0.20, 1.05, -15, 0.633899, 17.951800, 18.092100},
227 {15, 2, (double)
INT2_MAX, 0.210, 0.058, 0.17, 1.2, -20, 0.641985, 21.940100, 22.141800},
228 {14, 2, (double)
INT2_MAX, 0.202, 0.045, 0.14, 1.4, -27, 0.650682, 28.681200, 28.961900},
229 {13, 2, (double)
INT2_MAX, 0.193, 0.035, 0.12, 1.6, -32, 0.660984, 42.059500, 42.471600},
230 {12, 2, (double)
INT2_MAX, 0.181, 0.025, 0.095, 1.9, -41, 0.678090, 63.747600, 64.397300},
231 {19, 1, (double)
INT2_MAX, 0.212, 0.057, 0.18, 1.2, -21, 0.635714, 26.311200, 26.923300},
232 {18, 1, (double)
INT2_MAX, 0.207, 0.050, 0.15, 1.4, -28, 0.643523, 34.903700, 35.734800},
233 {17, 1, (double)
INT2_MAX, 0.198, 0.037, 0.12, 1.6, -33, 0.654504, 48.895800, 50.148600},
234 {16, 1, (double)
INT2_MAX, 0.186, 0.025, 0.10, 1.9, -42, 0.667750, 76.469100, 78.443000},
235 {15, 1, (double)
INT2_MAX, 0.171, 0.015, 0.063, 2.7, -76, 0.694575, 140.053000, 144.160000},
257 #define BLOSUM62_VALUES_MAX 12
259 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.3176, 0.134, 0.4012, 0.7916, -3.2, 0.623757, 4.964660, 4.964660},
260 {11, 2, (double)
INT2_MAX, 0.297, 0.082, 0.27, 1.1, -10, 0.641766, 12.673800, 12.757600},
261 {10, 2, (double)
INT2_MAX, 0.291, 0.075, 0.23, 1.3, -15, 0.649362, 16.474000, 16.602600},
262 {9, 2, (double)
INT2_MAX, 0.279, 0.058, 0.19, 1.5, -19, 0.659245, 22.751900, 22.950000},
263 {8, 2, (double)
INT2_MAX, 0.264, 0.045, 0.15, 1.8, -26, 0.672692, 35.483800, 35.821300},
264 {7, 2, (double)
INT2_MAX, 0.239, 0.027, 0.10, 2.5, -46, 0.702056, 61.238300, 61.886000},
265 {6, 2, (double)
INT2_MAX, 0.201, 0.012, 0.061, 3.3, -58, 0.740802, 140.417000, 141.882000},
266 {13, 1, (double)
INT2_MAX, 0.292, 0.071, 0.23, 1.2, -11, 0.647715, 19.506300, 19.893100},
267 {12, 1, (double)
INT2_MAX, 0.283, 0.059, 0.19, 1.5, -19, 0.656391, 27.856200, 28.469900},
268 {11, 1, (double)
INT2_MAX, 0.267, 0.041, 0.14, 1.9, -30, 0.669720, 42.602800, 43.636200},
269 {10, 1, (double)
INT2_MAX, 0.243, 0.024, 0.10, 2.5, -44, 0.693267, 83.178700, 85.065600},
270 {9, 1, (double)
INT2_MAX, 0.206, 0.010, 0.052, 4.0, -87, 0.731887, 210.333000, 214.842000},
289 #define BLOSUM80_VALUES_MAX 10
291 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.3430, 0.177, 0.6568, 0.5222, -1.6, 0.564057, 1.918130, 1.918130},
292 {25, 2, (double)
INT2_MAX, 0.342, 0.17, 0.66, 0.52, -1.6, 0.563956, 1.731000, 1.731300},
293 {13, 2, (double)
INT2_MAX, 0.336, 0.15, 0.57, 0.59, -3, 0.570979, 2.673470, 2.692300},
294 {9, 2, (double)
INT2_MAX, 0.319, 0.11, 0.42, 0.76, -6, 0.587837, 5.576090, 5.667860},
295 {8, 2, (double)
INT2_MAX, 0.308, 0.090, 0.35, 0.89, -9, 0.597556, 7.536950, 7.686230},
296 {7, 2, (double)
INT2_MAX, 0.293, 0.070, 0.27, 1.1, -14, 0.615254, 11.586600, 11.840400},
297 {6, 2, (double)
INT2_MAX, 0.268, 0.045, 0.19, 1.4, -19, 0.644054, 19.958100, 20.441200},
298 {11, 1, (double)
INT2_MAX, 0.314, 0.095, 0.35, 0.90, -9, 0.590702, 8.808610, 9.223320},
299 {10, 1, (double)
INT2_MAX, 0.299, 0.071, 0.27, 1.1, -14, 0.609620, 13.833800, 14.533400},
300 {9, 1, (double)
INT2_MAX, 0.279, 0.048, 0.20, 1.4, -19, 0.623800, 24.252000, 25.490400},
316 #define BLOSUM90_VALUES_MAX 8
318 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.3346, 0.190, 0.7547, 0.4434, -1.4 , 0.544178, 1.377760, 1.377760},
319 {9, 2, (double)
INT2_MAX, 0.310, 0.12, 0.46, 0.67, -6 , 0.570267, 4.232290, 4.334170},
320 {8, 2, (double)
INT2_MAX, 0.300, 0.099, 0.39, 0.76, -7, 0.581580, 5.797020, 5.961420},
321 {7, 2, (double)
INT2_MAX, 0.283, 0.072, 0.30, 0.93, -11, 0.600024, 9.040880, 9.321600},
322 {6, 2, (double)
INT2_MAX, 0.259, 0.048, 0.22, 1.2, -16, 0.629344, 16.024400, 16.531600},
323 {11, 1, (double)
INT2_MAX, 0.302, 0.093, 0.39, 0.78, -8, 0.576919, 7.143250, 7.619190},
324 {10, 1, (double)
INT2_MAX, 0.290, 0.075, 0.28, 1.04, -15, 0.591366, 11.483900, 12.269800},
325 {9, 1, (double)
INT2_MAX, 0.265, 0.044, 0.20, 1.3, -19, 0.613013, 21.408300, 22.840900},
339 #define PAM250_VALUES_MAX 16
341 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.2252, 0.0868, 0.2223, 0.98, -5.0, 0.660059, 11.754300, 11.754300},
342 {15, 3, (double)
INT2_MAX, 0.205, 0.049, 0.13, 1.6, -23, 0.687656, 34.578400, 34.928000},
343 {14, 3, (double)
INT2_MAX, 0.200, 0.043, 0.12, 1.7, -26, 0.689768, 43.353000, 43.443800},
344 {13, 3, (double)
INT2_MAX, 0.194, 0.036, 0.10, 1.9, -31, 0.697431, 50.948500, 51.081700},
345 {12, 3, (double)
INT2_MAX, 0.186, 0.029, 0.085, 2.2, -41, 0.704565, 69.606500, 69.793600},
346 {11, 3, (double)
INT2_MAX, 0.174, 0.020, 0.070, 2.5, -48, 0.722438, 98.653500, 98.927100},
347 {17, 2, (double)
INT2_MAX, 0.204, 0.047, 0.12, 1.7, -28, 0.684799, 41.583800, 41.735800},
348 {16, 2, (double)
INT2_MAX, 0.198, 0.038, 0.11, 1.8, -29, 0.691098, 51.635200, 51.843900},
349 {15, 2, (double)
INT2_MAX, 0.191, 0.031, 0.087, 2.2, -44, 0.699051, 67.256700, 67.558500},
350 {14, 2, (double)
INT2_MAX, 0.182, 0.024, 0.073, 2.5, -53, 0.714103, 96.315100, 96.756800},
351 {13, 2, (double)
INT2_MAX, 0.171, 0.017, 0.059, 2.9, -64, 0.728738, 135.653000, 136.339000},
352 {21, 1, (double)
INT2_MAX, 0.205, 0.045, 0.11, 1.8, -34, 0.683265, 48.728200, 49.218800},
353 {20, 1, (double)
INT2_MAX, 0.199, 0.037, 0.10, 1.9, -35, 0.689380, 60.832000, 61.514100},
354 {19, 1, (double)
INT2_MAX, 0.192, 0.029, 0.083, 2.3, -52, 0.696344, 84.019700, 84.985600},
355 {18, 1, (double)
INT2_MAX, 0.183, 0.021, 0.070, 2.6, -60, 0.710525, 113.829000, 115.184000},
356 {17, 1, (double)
INT2_MAX, 0.171, 0.014, 0.052, 3.3, -86, 0.727000, 175.071000, 177.196000},
378 #define PAM30_VALUES_MAX 11
380 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.3400, 0.283, 1.754, 0.1938, -0.3, 0.436164, 0.161818, 0.161818},
381 {7, 2, (double)
INT2_MAX, 0.305, 0.15, 0.87, 0.35, -3, 0.479087, 1.014010, 1.162730},
382 {6, 2, (double)
INT2_MAX, 0.287, 0.11, 0.68, 0.42, -4, 0.499980, 1.688060, 1.951430},
383 {5, 2, (double)
INT2_MAX, 0.264, 0.079, 0.45, 0.59, -7, 0.533009, 3.377010, 3.871950},
384 {10, 1, (double)
INT2_MAX, 0.309, 0.15, 0.88, 0.35, -3, 0.474741, 1.372050, 1.788770},
385 {9, 1, (double)
INT2_MAX, 0.294, 0.11, 0.61, 0.48, -6, 0.492716, 2.463920, 3.186150},
386 {8, 1, (double)
INT2_MAX, 0.270, 0.072, 0.40, 0.68, -10, 0.521286, 5.368130, 6.763480},
387 {15, 3, (double)
INT2_MAX, 0.339, 0.28, 1.70, 0.20, -0.5, 0.437688, 0.157089, 0.155299},
388 {14, 2, (double)
INT2_MAX, 0.337, 0.27, 1.62, 0.21, -0.8, 0.440010, 0.206970, 0.198524},
389 {14, 1, (double)
INT2_MAX, 0.333, 0.27, 1.43, 0.23, -1.4, 0.444817, 0.436301, 0.361947},
390 {13, 3, (double)
INT2_MAX, 0.338, 0.27, 1.69, 0.20, -0.5, 0.439086, 0.178973, 0.175436},
408 #define PAM70_VALUES_MAX 9
410 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.3345, 0.229, 1.029, 0.3250, -0.7, 0.511296, 0.633439, 0.633439},
411 {8, 2, (double)
INT2_MAX, 0.301, 0.12, 0.54, 0.56, -5, 0.549019, 2.881650, 3.025710},
412 {7, 2, (double)
INT2_MAX, 0.286, 0.093, 0.43, 0.67, -7, 0.565659, 4.534540, 4.785780},
413 {6, 2, (double)
INT2_MAX, 0.264, 0.064, 0.29, 0.90, -12, 0.596330, 7.942630, 8.402720},
414 {11, 1, (double)
INT2_MAX, 0.305, 0.12, 0.52, 0.59, -6, 0.543514, 3.681400, 4.108020},
415 {10, 1, (double)
INT2_MAX, 0.291, 0.091, 0.41, 0.71, -9, 0.560723, 6.002970, 6.716570},
416 {9, 1, (double)
INT2_MAX, 0.270, 0.060, 0.28, 0.97, -14, 0.585186, 11.360800, 12.636700},
417 {11, 2, (double)
INT2_MAX, 0.323, 0.186, 0.80, 1.32, -27, 0.524062, 1.321301, 1.281671},
418 {12, 3, (double)
INT2_MAX, 0.330, 0.219, 0.93, 0.82, -16, 0.516845, 0.818768, 0.811240},
435 #ifdef BLOSUM62_20_ENABLE
437 #define BLOSUM62_20_VALUES_MAX 65
438 static array_of_8 blosum62_20_values[BLOSUM62_20_VALUES_MAX] = {
439 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.03391, 0.125, 0.4544, 0.07462, -3.2,0.0,0.0,0.0},
440 {100, 12, (double)
INT2_MAX, 0.0300, 0.056, 0.21, 0.14, -15,0.0,0.0,0.0},
441 {95, 12, (double)
INT2_MAX, 0.0291, 0.047, 0.18, 0.16, -20,0.0,0.0,0.0},
442 {90, 12, (double)
INT2_MAX, 0.0280, 0.038, 0.15, 0.19, -28,0.0,0.0,0.0},
443 {85, 12, (double)
INT2_MAX, 0.0267, 0.030, 0.13, 0.21, -31,0.0,0.0,0.0},
444 {80, 12, (double)
INT2_MAX, 0.0250, 0.021, 0.10, 0.25, -39,0.0,0.0,0.0},
445 {105, 11, (double)
INT2_MAX, 0.0301, 0.056, 0.22, 0.14, -16,0.0,0.0,0.0},
446 {100, 11, (double)
INT2_MAX, 0.0294, 0.049, 0.20, 0.15, -17,0.0,0.0,0.0},
447 {95, 11, (double)
INT2_MAX, 0.0285, 0.042, 0.16, 0.18, -25,0.0,0.0,0.0},
448 {90, 11, (double)
INT2_MAX, 0.0271, 0.031, 0.14, 0.20, -28,0.0,0.0,0.0},
449 {85, 11, (double)
INT2_MAX, 0.0256, 0.023, 0.10, 0.26, -46,0.0,0.0,0.0},
450 {115, 10, (double)
INT2_MAX, 0.0308, 0.062, 0.22, 0.14, -20,0.0,0.0,0.0},
451 {110, 10, (double)
INT2_MAX, 0.0302, 0.056, 0.19, 0.16, -26,0.0,0.0,0.0},
452 {105, 10, (double)
INT2_MAX, 0.0296, 0.050, 0.17, 0.17, -27,0.0,0.0,0.0},
453 {100, 10, (double)
INT2_MAX, 0.0286, 0.041, 0.15, 0.19, -32,0.0,0.0,0.0},
454 {95, 10, (double)
INT2_MAX, 0.0272, 0.030, 0.13, 0.21, -35,0.0,0.0,0.0},
455 {90, 10, (double)
INT2_MAX, 0.0257, 0.022, 0.11, 0.24, -40,0.0,0.0,0.0},
456 {85, 10, (double)
INT2_MAX, 0.0242, 0.017, 0.083, 0.29, -51,0.0,0.0,0.0},
457 {115, 9, (double)
INT2_MAX, 0.0306, 0.061, 0.24, 0.13, -14,0.0,0.0,0.0},
458 {110, 9, (double)
INT2_MAX, 0.0299, 0.053, 0.19, 0.16, -23,0.0,0.0,0.0},
459 {105, 9, (double)
INT2_MAX, 0.0289, 0.043, 0.17, 0.17, -23,0.0,0.0,0.0},
460 {100, 9, (double)
INT2_MAX, 0.0279, 0.036, 0.14, 0.20, -31,0.0,0.0,0.0},
461 {95, 9, (double)
INT2_MAX, 0.0266, 0.028, 0.12, 0.23, -37,0.0,0.0,0.0},
462 {120, 8, (double)
INT2_MAX, 0.0307, 0.062, 0.22, 0.14, -18,0.0,0.0,0.0},
463 {115, 8, (double)
INT2_MAX, 0.0300, 0.053, 0.20, 0.15, -19,0.0,0.0,0.0},
464 {110, 8, (double)
INT2_MAX, 0.0292, 0.046, 0.17, 0.17, -23,0.0,0.0,0.0},
465 {105, 8, (double)
INT2_MAX, 0.0280, 0.035, 0.14, 0.20, -31,0.0,0.0,0.0},
466 {100, 8, (double)
INT2_MAX, 0.0266, 0.026, 0.12, 0.23, -37,0.0,0.0,0.0},
467 {125, 7, (double)
INT2_MAX, 0.0306, 0.058, 0.22, 0.14, -18,0.0,0.0,0.0},
468 {120, 7, (double)
INT2_MAX, 0.0300, 0.052, 0.19, 0.16, -23,0.0,0.0,0.0},
469 {115, 7, (double)
INT2_MAX, 0.0292, 0.044, 0.17, 0.17, -24,0.0,0.0,0.0},
470 {110, 7, (double)
INT2_MAX, 0.0279, 0.032, 0.14, 0.20, -31,0.0,0.0,0.0},
471 {105, 7, (double)
INT2_MAX, 0.0267, 0.026, 0.11, 0.24, -41,0.0,0.0,0.0},
472 {120,10,5, 0.0298, 0.049, 0.19, 0.16, -21,0.0,0.0,0.0},
473 {115,10,5, 0.0290, 0.042, 0.16, 0.18, -25,0.0,0.0,0.0},
474 {110,10,5, 0.0279, 0.033, 0.13, 0.21, -32,0.0,0.0,0.0},
475 {105,10,5, 0.0264, 0.024, 0.10, 0.26, -46,0.0,0.0,0.0},
476 {100,10,5, 0.0250, 0.018, 0.081, 0.31, -56,0.0,0.0,0.0},
477 {125,10,4, 0.0301, 0.053, 0.18, 0.17, -25,0.0,0.0,0.0},
478 {120,10,4, 0.0292, 0.043, 0.15, 0.20, -33,0.0,0.0,0.0},
479 {115,10,4, 0.0282, 0.035, 0.13, 0.22, -36,0.0,0.0,0.0},
480 {110,10,4, 0.0270, 0.027, 0.11, 0.25, -41,0.0,0.0,0.0},
481 {105,10,4, 0.0254, 0.020, 0.079, 0.32, -60,0.0,0.0,0.0},
482 {130,10,3, 0.0300, 0.051, 0.17, 0.18, -27,0.0,0.0,0.0},
483 {125,10,3, 0.0290, 0.040, 0.13, 0.22, -38,0.0,0.0,0.0},
484 {120,10,3, 0.0278, 0.030, 0.11, 0.25, -44,0.0,0.0,0.0},
485 {115,10,3, 0.0267, 0.025, 0.092, 0.29, -52,0.0,0.0,0.0},
486 {110,10,3, 0.0252, 0.018, 0.070, 0.36, -70,0.0,0.0,0.0},
487 {135,10,2, 0.0292, 0.040, 0.13, 0.22, -35,0.0,0.0,0.0},
488 {130,10,2, 0.0283, 0.034, 0.10, 0.28, -51,0.0,0.0,0.0},
489 {125,10,2, 0.0269, 0.024, 0.077, 0.35, -71,0.0,0.0,0.0},
490 {120,10,2, 0.0253, 0.017, 0.059, 0.43, -90,0.0,0.0,0.0},
491 {115,10,2, 0.0234, 0.011, 0.043, 0.55, -121,0.0,0.0,0.0},
492 {100,14,3, 0.0258, 0.023, 0.087, 0.33, -59,0.0,0.0,0.0},
493 {105,13,3, 0.0263, 0.024, 0.085, 0.31, -57,0.0,0.0,0.0},
494 {110,12,3, 0.0271, 0.028, 0.093, 0.29, -54,0.0,0.0,0.0},
495 {115,11,3, 0.0275, 0.030, 0.10, 0.27, -49,0.0,0.0,0.0},
496 {125,9,3, 0.0283, 0.034, 0.12, 0.23, -38,0.0,0.0,0.0},
497 {130,8,3, 0.0287, 0.037, 0.12, 0.23, -40,0.0,0.0,0.0},
498 {125,7,3, 0.0287, 0.036, 0.12, 0.24, -44,0.0,0.0,0.0},
499 {140,6,3, 0.0285, 0.033, 0.12, 0.23, -40,0.0,0.0,0.0},
500 {105,14,3, 0.0270, 0.028, 0.10, 0.27, -46,0.0,0.0,0.0},
501 {110,13,3, 0.0279, 0.034, 0.10, 0.27, -50,0.0,0.0,0.0},
502 {115,12,3, 0.0282, 0.035, 0.12, 0.24, -42,0.0,0.0,0.0},
503 {120,11,3, 0.0286, 0.037, 0.12, 0.24, -44,0.0,0.0,0.0},
506 static Int4 blosum62_20_prefs[BLOSUM62_20_VALUES_MAX] = {
577 #define PROT_IDENTITY_VALUES_MAX 2
579 {(double)
INT2_MAX, (
double)
INT2_MAX, (double)
INT2_MAX, 0.28768, 0.282, 1.69, 0.1703, -0.3, 0.43828, 0.16804, 0.16804},
580 {15, 2, (double)
INT2_MAX, 0.2835, 0.255, 1.49, 0.19, -1, 0.44502, 0.24613, 0.22743}
612 { 0, 0, 1.39, 0.747, 1.38, 1.00, 0, 100 },
613 { 3, 3, 1.39, 0.747, 1.38, 1.00, 0, 100 }
618 { 0, 0, 1.383, 0.738, 1.36, 1.02, 0, 100 },
619 { 1, 2, 1.36, 0.67, 1.2, 1.1, 0, 98 },
620 { 0, 2, 1.26, 0.43, 0.90, 1.4, -1, 91 },
621 { 2, 1, 1.35, 0.61, 1.1, 1.2, -1, 98 },
622 { 1, 1, 1.22, 0.35, 0.72, 1.7, -3, 88 }
630 { 0, 0, 0.69, 0.73, 1.34, 0.515, 0, 100 },
631 { 2, 4, 0.68, 0.67, 1.2, 0.55, 0, 99 },
632 { 0, 4, 0.63, 0.43, 0.90, 0.7, -1, 91 },
633 { 4, 2, 0.675, 0.62, 1.1, 0.6, -1, 98 },
634 { 2, 2, 0.61, 0.35, 0.72, 1.7, -3, 88 }
639 { 0, 0, 1.374, 0.711, 1.31, 1.05, 0, 100 },
640 { 2, 2, 1.37, 0.70, 1.2, 1.1, 0, 99 },
641 { 1, 2, 1.35, 0.64, 1.1, 1.2, -1, 98 },
642 { 0, 2, 1.25, 0.42, 0.83, 1.5, -2, 91 },
643 { 2, 1, 1.34, 0.60, 1.1, 1.2, -1, 97 },
644 { 1, 1, 1.21, 0.34, 0.71, 1.7, -2, 88 }
652 { 0, 0, 0.675, 0.65, 1.1, 0.6, -1, 99 },
653 { 2, 4, 0.67, 0.59, 1.1, 0.6, -1, 98 },
654 { 0, 4, 0.62, 0.39, 0.78, 0.8, -2, 91 },
655 { 4, 2, 0.67, 0.61, 1.0, 0.65, -2, 98 },
656 { 2, 2, 0.56, 0.32, 0.59, 0.95, -4, 82 }
661 { 0, 0, 1.28, 0.46, 0.85, 1.5, -2, 96 },
662 { 2, 2, 1.33, 0.62, 1.1, 1.2, 0, 99 },
663 { 1, 2, 1.30, 0.52, 0.93, 1.4, -2, 97 },
664 { 0, 2, 1.19, 0.34, 0.66, 1.8, -3, 89 },
665 { 3, 1, 1.32, 0.57, 1.0, 1.3, -1, 99 },
666 { 2, 1, 1.29, 0.49, 0.92, 1.4, -1, 96 },
667 { 1, 1, 1.14, 0.26, 0.52, 2.2, -5, 85 }
675 { 0, 0, 0.55, 0.21, 0.46, 1.2, -5, 87 },
676 { 4, 4, 0.63, 0.42, 0.84, 0.75, -2, 99 },
677 { 2, 4, 0.615, 0.37, 0.72, 0.85, -3, 97 },
678 { 0, 4, 0.55, 0.21, 0.46, 1.2, -5, 87 },
679 { 3, 3, 0.615, 0.37, 0.68, 0.9, -3, 97 },
680 { 6, 2, 0.63, 0.42, 0.84, 0.75, -2, 99 },
681 { 5, 2, 0.625, 0.41, 0.78, 0.8, -2, 99 },
682 { 4, 2, 0.61, 0.35, 0.68, 0.9, -3, 96 },
683 { 2, 2, 0.515, 0.14, 0.33, 1.55, -9, 81 }
688 { 6, 3, 0.389, 0.25, 0.56, 0.7, -5, 95},
689 { 5, 3, 0.375, 0.21, 0.47, 0.8, -6, 92},
690 { 4, 3, 0.351, 0.14, 0.35, 1.0, -9, 86},
691 { 6, 2, 0.362, 0.16, 0.45, 0.8, -4, 88},
692 { 5, 2, 0.330, 0.092, 0.28, 1.2, -13, 81},
693 { 4, 2, 0.281, 0.046, 0.16, 1.8, -23, 69}
698 { 0, 0, 0.22, 0.061, 0.22, 1.0, -15, 74 },
699 { 6, 5, 0.28, 0.21, 0.47, 0.6 , -7, 93 },
700 { 5, 5, 0.27, 0.17, 0.39, 0.7, -9, 90 },
701 { 4, 5, 0.25, 0.10, 0.31, 0.8, -10, 83 },
702 { 3, 5, 0.23, 0.065, 0.25, 0.9, -11, 76 }
707 { 3, 2, 1.09, 0.31, 0.55, 2.0, -2, 99 },
708 { 2, 2, 1.07, 0.27, 0.49, 2.2, -3, 97 },
709 { 1, 2, 1.02, 0.21, 0.36, 2.8, -6, 92 },
710 { 0, 2, 0.80, 0.064, 0.17, 4.8, -16, 72 },
711 { 4, 1, 1.08, 0.28, 0.54, 2.0, -2, 98 },
712 { 3, 1, 1.06, 0.25, 0.46, 2.3, -4, 96 },
713 { 2, 1, 0.99, 0.17, 0.30, 3.3, -10, 90 }
718 { 5, 5, 0.208, 0.030, 0.072, 2.9, -47, 77}
723 { 10, 6, 0.163, 0.068, 0.16, 1.0, -19, 85 },
724 { 8, 6, 0.146, 0.039, 0.11, 1.3, -29, 76 }
770 if ( !retval->
data ) {
777 retval->
freqs = (
double *)
calloc(ncols,
sizeof(
double));
780 retval->
ncols = ncols;
781 retval->
nrows = nrows;
816 if ( !retval->
pssm ) {
827 if ( !retval->
kbp ) {
847 if ( !gbp)
return NULL;
866 if (sbp->
kbp[index] || sbp->
sfp[index])
922 use_old_fsc = getenv(
"OLD_FSC");
1016 Uint1* ambig_buffer;
1062 Int2 index1, index2, degen;
1068 const int k_number_non_ambig_bp = 4;
1082 matrix[index1][index2] = 0;
1088 for (index1=0; index1<k_number_non_ambig_bp; index1++)
1089 degeneracy[index1] = 1;
1091 for (index1=k_number_non_ambig_bp; index1<
BLASTNA_SIZE; index1++) {
1093 for (index2=0; index2<k_number_non_ambig_bp; index2++)
1098 degeneracy[index1] = degen;
1106 matrix[index1][index2] =
1108 reward)/ (double) degeneracy[index2]);
1109 if (index1 != index2)
1111 matrix[index2][index1] = matrix[index1][index2];
1116 matrix[index1][index2] = penalty;
1117 matrix[index2][index1] = penalty;
1161 register int index1, index2;
1165 double lambda_upper = 0;
1166 double lambda_lower = 0;
1170 const char kCommentChar =
'#';
1171 const char* kTokenStr =
" \t\n\r";
1185 while ( fgets(fbuf,
sizeof(fbuf),
fp) ) {
1186 if (strchr(fbuf,
'\n') ==
NULL) {
1194 while( (*cp) &&
isspace(*cp) ) cp++;
1196 if (*cp == kCommentChar) {
1198 if ( (ncp = strstr( cp, (
const char *)
"FREQS" )) !=
NULL ) {
1201 while( (*cp) &&
isspace(*cp) ) cp++;
1203 lp = (
char*)strtok(cp, kTokenStr);
1209 while (lp !=
NULL) {
1213 lp = (
char*)strtok(
NULL, kTokenStr);
1219 if ( sscanf(lp,
"%lf", &fval ) != 1 )
1225 lp = (
char*)strtok(
NULL, kTokenStr);
1229 *strchr(cp,
'\n') =
NULLB;
1236 if (
isalpha(*cp) && !alphabet[0] ) {
1238 lp = (
char*)strtok(cp, kTokenStr);
1239 while (lp !=
NULL) {
1240 alphabet[j++] =
toupper((
unsigned char)(*lp));
1241 lp = (
char*)strtok(
NULL, kTokenStr);
1250 while( (*cp) &&
isspace(*cp) ) cp++;
1254 if (
isdigit(*cp) || *cp ==
'-' ) {
1256 lp = (
char*)strtok(cp, kTokenStr);
1258 while (lp !=
NULL) {
1259 if ( sscanf(lp,
"%d", &
val ) != 1 )
1262 matrix[rowIdx][colIdx] =
val;
1263 lp = (
char*)strtok(
NULL, kTokenStr);
1267 if ( j != alphaSize )
1275 if ( numFreqs != 4 ||
i != alphaSize )
1287 if ( freqs[
i] && freqs[j] )
1289 sum += freqs[
i] * freqs[j] *
1291 check += freqs[
i] * freqs[j];
1300 }
while ( sum < 1.0 );
1304 while ( lambda_upper - lambda_lower > (
double).00001 ) {
1305 lambda = ( lambda_lower + lambda_upper ) / 2.0;
1312 if ( freqs[
i] && freqs[j] )
1314 sum += freqs[
i] * freqs[j] *
1316 check += freqs[
i] * freqs[j];
1360 Uint4 a1cnt = 0, a2cnt = 0;
1364 register int index1, index2;
1365 int x_index, u_index, o_index, c_index;
1366 const char kCommentChar =
'#';
1367 const char* kTokenStr =
" \t\n\r";
1385 if (strchr(
buf,
'\n') ==
NULL) {
1389 if (
buf[0] == kCommentChar) {
1395 if ((cp = strchr(
buf, kCommentChar)) !=
NULL)
1397 lp = (
char*)strtok(
buf, kTokenStr);
1400 while (lp !=
NULL) {
1408 a2chars[a2cnt++] = ch;
1409 lp = (
char*)strtok(
NULL, kTokenStr);
1421 if ((cp = strchr(
buf,
'\n')) ==
NULL) {
1424 if ((cp = strchr(
buf, kCommentChar)) !=
NULL)
1426 if ((lp = (
char*)strtok(
buf, kTokenStr)) ==
NULL)
1429 if ((cp = strtok(
NULL, kTokenStr)) ==
NULL) {
1432 if (a1cnt >=
DIM(a1chars)) {
1443 a1chars[a1cnt++] = ch;
1444 m = &matrix[(
int)ch][0];
1446 while (cp !=
NULL) {
1447 if (index2 >= (
int) a2cnt) {
1455 if (sscanf(temp,
"%lg", &xscore) != 1) {
1462 xscore += (xscore >= 0. ? 0.5 : -0.5);
1463 score = (
Int4)xscore;
1466 m[(
int)a2chars[index2++]] = score;
1468 cp = strtok(
NULL, kTokenStr);
1483 matrix[u_index][index1] = matrix[c_index][index1];
1484 matrix[index1][u_index] = matrix[index1][c_index];
1485 matrix[o_index][index1] = matrix[x_index][index1];
1486 matrix[index1][o_index] = matrix[index1][x_index];
1504 Int2 index1, index2;
1513 score = matrix[index1][index2];
1544 int x_index, u_index, o_index, c_index;
1589 matrix[u_index][
i] = matrix[c_index][
i];
1590 matrix[
i][u_index] = matrix[
i][c_index];
1591 matrix[o_index][
i] = matrix[x_index][
i];
1592 matrix[
i][o_index] = matrix[
i][x_index];
1615 matrix_found =
FALSE;
1621 matrix_found =
TRUE;
1628 matrix_found =
TRUE;
1633 char* matrix_path = get_path(sbp->
name,
FALSE);
1637 char* full_matrix_path =
NULL;
1638 size_t path_len = strlen(matrix_path);
1639 size_t buflen = path_len + strlen(sbp->
name);
1641 full_matrix_path = (
char*)
malloc((buflen + 1) *
sizeof(char));
1642 if (!full_matrix_path) {
1645 memcpy(full_matrix_path, matrix_path, path_len);
1646 memcpy(full_matrix_path + path_len, sbp->
name, buflen - path_len);
1647 full_matrix_path[buflen] =
'\0';
1651 if ( (
fp=fopen(full_matrix_path,
"r")) ==
NULL) {
1654 sfree(full_matrix_path);
1675 matrix_found =
TRUE;
1679 if (matrix_found ==
FALSE)
1818 #define STD_AMINO_ACID_FREQS Robinson_prob
1837 Int2 alphabet_stop, index;
1846 p = rfp->
prob[index];
1856 rfp->
prob[index] /= sum;
1857 rfp->
prob[index] *= norm;
2002 rcp->
comp0[index] = 0;
2004 for (lp =
str, lpmax = lp+length; lp < lpmax; lp++)
2026 Int2 alphabet_max, index;
2030 rfp->
prob[index] = 0.0;
2047 Int2 alphabet_max, index;
2058 sum += rcp->
comp[index];
2066 rfp->
prob[index] = rcp->
comp[index] / sum;
2102 if (lo >= 0 || hi <= 0 ||
2125 range = score_max - score_min + 1;
2134 sfp->
sprob -= score_min;
2154 Int4 score, obs_min, obs_max;
2155 double score_sum, score_avg;
2156 Int2 alphabet_start, alphabet_end, index1, index2;
2164 for (score = sfp->
score_min; score <= sfp->score_max; score++)
2165 sfp->
sprob[score] = 0.0;
2171 for (index1=alphabet_start; index1<alphabet_end; index1++)
2173 for (index2=alphabet_start; index2<alphabet_end; index2++)
2175 score = matrix[index1][index2];
2178 sfp->
sprob[score] += rfp1->
prob[index1] * rfp2->
prob[index2];
2185 for (score = sfp->
score_min; score <= sfp->score_max; score++)
2187 if (sfp->
sprob[score] > 0.)
2189 score_sum += sfp->
sprob[score];
2199 if (score_sum > 0.0001 || score_sum < -0.0001)
2201 for (score = obs_min; score <= obs_max; score++)
2203 sfp->
sprob[score] /= score_sum;
2204 score_avg += score * sfp->
sprob[score];
2253 double *alignmentScoreProbabilities =
NULL;
2263 Int4 lowAlignmentScore, highAlignmentScore;
2265 register double innerSum;
2266 double oldsum, oldsum2;
2274 double firstTermClosedForm;
2280 double *probArrayStartLow;
2283 double *ptrP, *ptr1, *ptr2, *ptr1e;
2284 double expMinusLambda;
2286 if (
lambda <= 0. ||
H <= 0.) {
2302 probArrayStartLow = &sfp->
sprob[low];
2305 for (
i = 1, divisor = -low; i <= range && divisor > 1; ++
i) {
2306 if (probArrayStartLow[
i] != 0.0)
2316 firstTermClosedForm =
H/
lambda;
2317 expMinusLambda = exp((
double) -
lambda);
2319 if (low == -1 && high == 1) {
2320 K = (sfp->
sprob[low*divisor] - sfp->
sprob[high*divisor]) *
2321 (sfp->
sprob[low*divisor] - sfp->
sprob[high*divisor]) / sfp->
sprob[low*divisor];
2325 if (low == -1 || high == 1) {
2329 = (score_avg * score_avg) / firstTermClosedForm;
2331 return firstTermClosedForm * (1.0 - expMinusLambda);
2337 alignmentScoreProbabilities =
2338 (
double *)
calloc((iterlimit*
range + 1),
sizeof(*alignmentScoreProbabilities));
2339 if (alignmentScoreProbabilities ==
NULL)
2343 lowAlignmentScore = highAlignmentScore = 0;
2344 alignmentScoreProbabilities[0] = innerSum = oldsum = oldsum2 = 1.;
2346 for (iterCounter = 0;
2347 ((iterCounter < iterlimit) && (innerSum > sumlimit));
2348 outerSum += innerSum /= ++iterCounter) {
2350 lowAlignmentScore += low;
2351 highAlignmentScore += high;
2353 for (ptrP = alignmentScoreProbabilities +
2354 (highAlignmentScore-lowAlignmentScore);
2355 ptrP >= alignmentScoreProbabilities;
2356 *ptrP-- =innerSum) {
2357 ptr1 = ptrP -
first;
2358 ptr1e = ptrP -
last;
2359 ptr2 = probArrayStartLow +
first;
2360 for (innerSum = 0.; ptr1 >= ptr1e; ) {
2361 innerSum += *ptr1 * *ptr2;
2367 if (ptrP - alignmentScoreProbabilities <=
range)
2372 for(
i = lowAlignmentScore + 1;
i < 0;
i++ ) {
2373 innerSum = *++ptrP + innerSum * expMinusLambda;
2375 innerSum *= expMinusLambda;
2377 for (;
i <= highAlignmentScore; ++
i)
2378 innerSum += *++ptrP;
2383 #ifdef ADD_GEOMETRIC_TERMS_TO_K
2397 ratio = oldsum / oldsum2;
2398 if (ratio >= (1.0 - sumlimit*0.001)) {
2400 if (alignmentScoreProbabilities !=
NULL)
2401 sfree(alignmentScoreProbabilities);
2405 while (innerSum > sumlimit) {
2407 outerSum += innerSum = oldsum / ++iterCounter;
2412 K = -exp((
double)-2.0*outerSum) /
2415 if (alignmentScoreProbabilities !=
NULL)
2416 sfree(alignmentScoreProbabilities);
2495 double x0, x,
a = 0,
b = 1;
2501 x0 = exp( -lambda0 );
2502 x = ( 0 < x0 && x0 < 1 ) ? x0 : .5;
2504 for( k = 0; k < itmax; k++ ) {
2507 Int4 wasNewton = isNewton;
2514 for(
i = low + d;
i < 0;
i += d ) {
2516 f =
f * x + probs[
i];
2519 f =
f * x + probs[0] - 1;
2520 for(
i = d;
i <= high;
i += d ) {
2522 f =
f * x + probs[
i];
2528 }
else if(
f < 0 ) {
2533 if(
b -
a < 2 *
a * ( 1 -
b ) * tolx ) {
2535 x = (
a +
b) / 2;
break;
2538 if( k >= maxNewton ||
2540 ( wasNewton &&
fabs(
f ) > .9 *
fabs(fold) ) ||
2552 if( y <= a || y >=
b ) {
2557 if(
fabs( p ) < tolx * x * (1-x) )
break;
2585 for (
i = 1, d = -low;
i <= high-low && d > 1; ++
i) {
2586 if (sprob[
i+low] != 0.0) {
2610 double H, etonlam, sum, scale;
2612 double *probs = sfp->
sprob;
2620 etonlam = exp( -
lambda );
2621 sum = low * probs[low];
2622 for( score = low + 1; score <= high; score++ ) {
2623 sum = score * probs[score] + etonlam * sum;
2731 kbp->
Lambda = kbp->
H = kbp->
K = -1.;
2732 kbp->
logK = HUGE_VAL;
2763 context <= query_info->last_context; ++
context) {
2765 Int4 context_offset;
2809 valid_context =
TRUE;
2815 if (valid_context ==
FALSE)
2840 return (status = 1);
2873 if (!kbp_to || !kbp_from)
2877 kbp_to->
K = kbp_from->
K;
2879 kbp_to->
H = kbp_from->
H;
2893 if (matrix_info ==
NULL)
2917 matrix_info->
values = values;
2918 matrix_info->
prefs = prefs;
2973 #ifdef BLOSUM62_20_ENABLE
2974 matrix_info =
MatrixInfoNew(
"BLOSUM62_20", blosum62_20_values, blosum62_20_prefs, BLOSUM62_20_VALUES_MAX);
2987 if (!standard_only) {
3018 Int4 index, max_number_values=0;
3031 matrix_info = vnp->
ptr;
3034 values = matrix_info->
values;
3036 prefs = matrix_info->
prefs;
3037 found_matrix =
TRUE;
3046 *open = open_array = (
Int4 *)
calloc(max_number_values,
sizeof(
Int4));
3048 *extension = extension_array =
3052 (
double*)
calloc(max_number_values,
sizeof(
double));
3054 *K = K_array = (
double*)
calloc(max_number_values,
sizeof(
double));
3056 *
H = H_array = (
double*)
calloc(max_number_values,
sizeof(
double));
3058 *alpha = alpha_array = (
double*)
calloc(max_number_values,
sizeof(
double));
3060 *beta = beta_array = (
double*)
calloc(max_number_values,
sizeof(
double));
3062 *pref_flags = pref_flags_array =
3065 for (index=0; index<max_number_values; index++)
3068 open_array[index] = (
Int4) values[index][0];
3070 extension_array[index] = (
Int4) values[index][1];
3073 lambda_array[index] = values[index][3];
3075 K_array[index] = values[index][4];
3077 H_array[index] = values[index][5];
3079 alpha_array[index] = values[index][6];
3081 beta_array[index] = values[index][7];
3083 pref_flags_array[index] = prefs[index];
3089 return max_number_values;
3098 Int4* gapOpen_arr,* gapExtend_arr,* pref_flags;
3099 double* alpha_arr,* beta_arr;
3104 &gapExtend_arr,
NULL,
NULL,
NULL, &alpha_arr, &beta_arr,
3108 if ((0 == gap_open) && (0 == gap_extend)) {
3109 for(
i = 1;
i < num_values;
i++) {
3111 (*alpha) = alpha_arr[
i];
3112 (*beta) = beta_arr[
i];
3118 for(
i = 1;
i < num_values;
i++) {
3119 if ((gapOpen_arr[
i] == gap_open) &&
3120 (gapExtend_arr[
i] == gap_extend)) {
3121 (*alpha) = alpha_arr[
i];
3122 (*beta) = beta_arr[
i];
3128 else if (num_values > 0) {
3129 (*alpha) = alpha_arr[0];
3130 (*beta) = beta_arr[0];
3132 *alpha = kbp_ungapped->
Lambda / kbp_ungapped->
H;
3137 sfree(gapExtend_arr);
3164 *non_affine =
input;
3194 (*gap_existence_max) *= divisor;
3195 (*gap_extend_max) *= divisor;
3204 normal[
i][0] *= divisor;
3205 normal[
i][1] *= divisor;
3206 normal[
i][2] /= divisor;
3207 normal[
i][5] /= divisor;
3212 linear[0][0] *= divisor;
3213 linear[0][1] *= divisor;
3214 linear[0][2] /= divisor;
3215 linear[0][5] /= divisor;
3247 int divisor =
BLAST_Gcd(reward, penalty);
3249 *round_down =
FALSE;
3261 if (reward == 1 && penalty == -5) {
3267 *gap_extend_max = 3;
3268 }
else if (reward == 1 && penalty == -4) {
3274 *gap_extend_max = 2;
3275 }
else if (reward == 2 && penalty == -7) {
3282 *gap_extend_max = 4;
3283 }
else if (reward == 1 && penalty == -3) {
3289 *gap_extend_max = 2;
3290 }
else if (reward == 2 && penalty == -5) {
3297 *gap_extend_max = 4;
3298 }
else if (reward == 1 && penalty == -2) {
3304 *gap_extend_max = 2;
3305 }
else if (reward == 2 && penalty == -3) {
3312 *gap_extend_max = 4;
3313 }
else if (reward == 3 && penalty == -4) {
3320 *gap_extend_max = 3;
3321 }
else if (reward == 1 && penalty == -1) {
3327 *gap_extend_max = 2;
3328 }
else if (reward == 3 && penalty == -2) {
3334 *gap_extend_max = 5;
3335 }
else if (reward == 4 && penalty == -5) {
3341 *gap_extend_max = 8;
3342 }
else if (reward == 5 && penalty == -4) {
3348 *gap_extend_max = 10;
3353 sprintf(
buffer,
"Substitution scores %d and %d are not supported",
3363 if (*array_size > 0)
3365 if (kValues_non_affine)
3375 Int4* gap_existence,
3376 Int4* gap_extension)
3378 Int4* gapOpen_arr,* gapExtend_arr,* pref_flags;
3383 if (num_values <= 0)
3386 for(
i = 1;
i < num_values;
i++) {
3388 (*gap_existence) = gapOpen_arr[
i];
3389 (*gap_extension) = gapExtend_arr[
i];
3395 sfree(gapExtend_arr);
3404 Int4* gap_existence,
3405 Int4* gap_extension)
3411 int gap_existence_max=0;
3412 int gap_extension_max=0;
3414 &gap_existence_max, &gap_extension_max, &round_down,
NULL);
3423 if (*gap_existence == 0 && *gap_extension == 0 && non_affine)
3429 while (index < array_size)
3431 if (*gap_existence == normal[index][0] && *gap_extension == normal[index][1])
3441 if (*gap_existence < gap_existence_max || *gap_extension < gap_extension_max)
3443 *gap_existence = gap_existence_max;
3444 *gap_extension = gap_extension_max;
3460 int gap_existence_max = 0;
3461 int gap_extension_max = 0;
3463 &non_affine, &gap_existence_max,
3464 &gap_extension_max, &round_down,
NULL);
3483 Int4 max_number_values=0;
3490 matrix_info = vnp->
ptr;
3493 values = matrix_info->
values;
3495 found_matrix =
TRUE;
3505 for (index=0; index<max_number_values; index++)
3508 sprintf(
buffer,
"Gap existence and extension values of %ld and %ld are supported", (
long)
BLAST_Nint(values[index][0]), (
long)
BLAST_Nint(values[index][1]));
3510 sprintf(
buffer,
"Gap existence, extension and decline-to-align values of %ld, %ld and %ld are supported", (
long)
BLAST_Nint(values[index][0]), (
long)
BLAST_Nint(values[index][1]), (
long)
BLAST_Nint(values[index][2]));
3533 gap_extend, matrix_name,
FALSE);
3535 if (status && error_return)
3544 sprintf(
buffer,
"%s is not a supported matrix", matrix_name);
3549 matrix_info = vnp->
ptr;
3550 sprintf(
buffer,
"%s is a supported matrix", matrix_info->
name);
3557 else if (status == 2)
3559 sprintf(
buffer,
"Gap existence and extension values of %ld and %ld not supported for %s", (
long) gap_open, (
long) gap_extend, matrix_name);
3578 Int4 gap_extend,
const char* matrix_name,
3584 Int4 max_number_values=0;
3588 if (matrix_name ==
NULL)
3596 matrix_info = vnp->
ptr;
3599 values = matrix_info->
values;
3601 found_matrix =
TRUE;
3612 for (index=0; index<max_number_values; index++)
3614 if (
BLAST_Nint(values[index][0]) == gap_open &&
3619 kbp->
Lambda = values[index][3];
3620 kbp->
K = values[index][4];
3622 kbp->
H = values[index][5];
3624 found_values =
TRUE;
3629 if (found_values ==
TRUE)
3659 if (status && error_return) {
3666 sprintf(
buffer,
"%s is not a supported matrix", matrix_name);
3670 matrix_info = vnp->
ptr;
3671 sprintf(
buffer,
"%s is a supported matrix", matrix_info->
name);
3677 }
else if (status == 2) {
3678 sprintf(
buffer,
"Gap existence and extension values of %ld and %ld not supported for %s", (
long) gap_open, (
long) gap_extend, matrix_name);
3697 Int4 gap_extend,
const char* matrix_name)
3702 Int4 max_number_values=0;
3706 if (matrix_name ==
NULL)
3713 matrix_info = vnp->
ptr;
3715 values = matrix_info->
values;
3717 found_matrix =
TRUE;
3727 for (index=0; index<max_number_values; index++) {
3728 if (
BLAST_Nint(values[index][0]) == gap_open &&
3729 BLAST_Nint(values[index][1]) == gap_extend) {
3731 gbp->
Lambda = values[index][3];
3732 gbp->
C = values[index][8];
3733 gbp->
G = gap_open + gap_extend;
3734 gbp->
a = values[index][6];
3735 gbp->
Alpha = values[index][9];
3736 gbp->
Sigma = values[index][10];
3737 gbp->
a_un = values[0][6];
3739 gbp->
b = 2.0 * gbp->
G * (gbp->
a_un - gbp->
a);
3744 found_values =
TRUE;
3749 status = found_values ? 0 : 2;
3768 sprintf(ptr,
"%s is not a supported matrix, supported matrices are:\n", matrix_name);
3776 matrix_info = vnp->
ptr;
3777 sprintf(ptr,
"%s \n", matrix_info->
name);
3793 Int4 index, max_number_values=0;
3799 sprintf(ptr,
"Gap existence and extension values of %ld and %ld not supported for %s\nsupported values are:\n",
3800 (
long) gap_open, (
long) gap_extend, matrix_name);
3807 matrix_info = vnp->
ptr;
3810 values = matrix_info->
values;
3812 found_matrix =
TRUE;
3820 for (index=0; index<max_number_values; index++)
3823 sprintf(ptr,
"%ld, %ld\n", (
long)
BLAST_Nint(values[index][0]), (
long)
BLAST_Nint(values[index][1]));
3842 const int kGapOpenIndex = 0;
3843 const int kGapExtIndex = 1;
3844 const int kLambdaIndex = 2;
3845 const int kKIndex = 3;
3846 const int kHIndex = 4;
3847 int num_combinations = 0;
3848 int gap_open_max, gap_extend_max;
3868 ASSERT(kbp && kbp_ungap);
3872 if (gap_open == 0 && gap_extend == 0 && linear)
3874 kbp->
Lambda = linear[0][kLambdaIndex];
3875 kbp->
K = linear[0][kKIndex];
3877 kbp->
H = linear[0][kHIndex];
3882 for (index = 0; index < num_combinations; ++index) {
3883 if (normal[index][kGapOpenIndex] == gap_open &&
3884 normal[index][kGapExtIndex] == gap_extend) {
3885 kbp->
Lambda = normal[index][kLambdaIndex];
3886 kbp->
K = normal[index][kKIndex];
3888 kbp->
H = normal[index][kHIndex];
3895 if (index == num_combinations) {
3898 if (gap_open >= gap_open_max && gap_extend >= gap_extend_max) {
3900 }
else if (error_return) {
3905 sprintf(
buffer,
"Gap existence and extension values %ld and %ld "
3906 "are not supported for substitution scores %ld and %ld\n",
3907 (
long) gap_open, (
long) gap_extend, (
long) reward, (
long) penalty);
3908 for (
i = 0;
i < num_combinations; ++
i)
3911 sprintf(
buffer+
len,
"%ld and %ld are supported existence and extension values\n",
3912 (
long) normal[
i][kGapOpenIndex], (
long) normal[
i][kGapExtIndex]);
3915 sprintf(
buffer+
len,
"%ld and %ld are supported existence and extension values\n",
3916 (
long) gap_open_max, (
long) gap_extend_max);
3918 sprintf(
buffer+
len,
"Any values more stringent than %ld and %ld are supported\n",
3919 (
long) gap_open_max, (
long) gap_extend_max);
3942 if ((reward == 1 && penalty == -1) ||
3943 (reward == 2 && penalty == -3))
3952 double *alpha,
double *beta)
3954 const int kGapOpenIndex = 0;
3955 const int kGapExtIndex = 1;
3956 const int kAlphaIndex = 5;
3957 const int kBetaIndex = 6;
3958 Int4 num_combinations = 0;
3959 Int4 gap_open_max = 0, gap_extend_max = 0;
3978 ASSERT(alpha && beta && kbp);
3981 if (gapped_calculation && normal) {
3982 if (gap_open == 0 && gap_extend == 0 && linear)
3984 *alpha = linear[0][kAlphaIndex];
3985 *beta = linear[0][kBetaIndex];
3991 for (index = 0; index < num_combinations; ++index) {
3992 if (normal[index][kGapOpenIndex] == gap_open &&
3993 normal[index][kGapExtIndex] == gap_extend) {
3994 *alpha = normal[index][kAlphaIndex];
3995 *beta = normal[index][kBetaIndex];
4029 double Lambda, K,
H;
4033 const double kSmallFloat = 1.0e-297;
4038 if (Lambda < 0. || K < 0. ||
H < 0.0)
4043 E =
MAX(
E, kSmallFloat);
4045 S = (
Int4) (ceil(
log((
double)(K * searchsp /
E)) / Lambda ));
4065 return (1. - decayrate) *
BLAST_Powi(decayrate, nsegs - 1);
4079 double gap_decay_rate)
4082 double e = *
E, esave;
4085 if (kbp->
Lambda == -1. || kbp->
K == -1. || kbp->
H == -1.)
4100 if( gap_decay_rate > 0 && gap_decay_rate < 1 ) {
4118 if (esave <= 0. || !s_changed)
4125 if( gap_decay_rate > 0 && gap_decay_rate < 1 ) {
4145 double Lambda, K,
H;
4150 if (Lambda < 0. || K < 0. ||
H < 0.) {
4154 return (
double) searchsp * exp((
double)(-Lambda *
S) + kbp->
logK);
4161 if (p < 0.0 || p > 1.0) {
4203 double y = exp(x - callback_args->
sdvir);
4209 return exp(callback_args->
adj2 - y);
4231 callback_args->
adj2 = callback_args->
adj1 - s;
4233 mx = (s > 0. ? callback_args->
sdvir + 3. : 3.);
4257 double mean, stddev, stddev4;
4260 const double kSumpEpsilon = 0.002;
4293 stddev4 = 4.*stddev;
4298 double est_mean = -
r *
r1;
4299 if (s <= est_mean - stddev4)
4306 mean =
r * (1. - logr) - 0.5;
4307 if (s <= mean - stddev4)
4315 t = mean + 6.*stddev;
4319 memset((
void *)&callback_args, 0,
sizeof(callback_args));
4323 callback_args.
epsilon = kSumpEpsilon;
4329 }
while (s < mean && d < 0.4 && itmin++ < 4);
4331 return (d < 1. ? d : 1.);
4343 static const double kTab2[] = {
4344 0.01669, 0.0249, 0.03683, 0.05390, 0.07794, 0.1111, 0.1559, 0.2146,
4345 0.2890, 0.3794, 0.4836, 0.5965, 0.7092, 0.8114, 0.8931, 0.9490,
4346 0.9806, 0.9944, 0.9989
4348 static const double kTab3[] = {
4349 0.9806, 0.9944, 0.9989, 0.0001682,0.0002542,0.0003829,0.0005745,0.0008587,
4350 0.001278, 0.001893, 0.002789, 0.004088, 0.005958, 0.008627, 0.01240, 0.01770,
4351 0.02505, 0.03514, 0.04880, 0.06704, 0.09103, 0.1220, 0.1612, 0.2097,
4352 0.2682, 0.3368, 0.4145, 0.4994, 0.5881, 0.6765, 0.7596, 0.8326,
4353 0.8922, 0.9367, 0.9667, 0.9846, 0.9939, 0.9980
4355 static const double kTab4[] = {
4356 2.658e-07,4.064e-07,6.203e-07,9.450e-07,1.437e-06,2.181e-06,3.302e-06,4.990e-06,
4357 7.524e-06,1.132e-05,1.698e-05,2.541e-05,3.791e-05,5.641e-05,8.368e-05,0.0001237,
4358 0.0001823,0.0002677,0.0003915,0.0005704,0.0008275,0.001195, 0.001718, 0.002457,
4359 0.003494, 0.004942, 0.006948, 0.009702, 0.01346, 0.01853, 0.02532, 0.03431,
4360 0.04607, 0.06128, 0.08068, 0.1051, 0.1352, 0.1719, 0.2157, 0.2669,
4361 0.3254, 0.3906, 0.4612, 0.5355, 0.6110, 0.6849, 0.7544, 0.8168,
4362 0.8699, 0.9127, 0.9451, 0.9679, 0.9827, 0.9915, 0.9963
4364 const double*
kTable[] = { kTab2, kTab3, kTab4 };
4365 const int kTabsize[] = {
DIM(kTab2)-1,
DIM(kTab3)-1,
DIM(kTab4)-1 };
4386 i = kTabsize[
r2 =
r - 2] -
i;
4403 Int4 starting_points,
4414 Int4 subject_length,
4416 double weight_divisor)
4425 sum_e = searchsp_eff * exp(-xsum);
4427 double pair_search_space;
4431 pair_search_space = (double)subject_length * (
double)query_length;
4434 log(pair_search_space) + 2 * (num-1)*
log((
double)starting_points);
4440 ((double) searchsp_eff / (
double) pair_search_space);
4442 if( weight_divisor == 0.0 || (sum_e /= weight_divisor) >
INT4_MAX ) {
4476 Int2 num,
double xsum,
4477 Int4 query_length,
Int4 subject_length,
4479 double weight_divisor)
4484 sum_e = searchsp_eff * exp(-xsum);
4488 double pair_search_space;
4490 pair_search_space = (double)subject_length*(
double)query_length;
4492 xsum -=
log(pair_search_space) +
4493 (num-1)*(
log((
double) query_start_points) +
4494 log((
double) subject_start_points));
4499 ((double) searchsp_eff / (
double) pair_search_space);
4501 if( weight_divisor == 0.0 || (sum_e /= weight_divisor) >
INT4_MAX ) {
4524 Int4 subject_length,
4526 double weight_divisor)
4535 double lcl_subject_length;
4536 double lcl_query_length;
4538 lcl_query_length = (double) query_length;
4539 lcl_subject_length = (double) subject_length;
4542 sum_e = searchsp_eff * exp(-xsum);
4544 xsum -= num*
log(lcl_subject_length*lcl_query_length)
4550 ((double) searchsp_eff / (lcl_query_length * lcl_subject_length));
4552 if( weight_divisor == 0.0 || (sum_e /= weight_divisor) >
INT4_MAX ) {
4567 denominator = length;
4571 for(
i = 0;
i < length;
i++) {
4573 frequency[sequence[
i]]++;
4579 if (frequency[
i] == 0)
4582 resProb[
i] = ((double) frequency[
i]) /((double) denominator);
4596 double* lambda_array =
NULL;
4600 if (num_lambdas > 0) {
4601 double retval = lambda_array[0];
4602 sfree(lambda_array);
4605 sfree(lambda_array);
4634 double *queryProbArray,
double *scoreArray,
4638 Int4 minScore, maxScore;
4642 minScore = maxScore = 0;
4643 for (
i = 0;
i < matrixLength;
i++) {
4644 for (j = 0 ; j < alphabet_size; j++) {
4648 (matrix[
i][j] < minScore))
4649 minScore = matrix[
i][j];
4650 if (matrix[
i][j] > maxScore)
4651 maxScore = matrix[
i][j];
4655 return_sfp->
obs_min = minScore;
4656 return_sfp->
obs_max = maxScore;
4657 memset(scoreArray, 0, (maxScore - minScore + 1) *
sizeof(
double));
4659 return_sfp->
sprob = &(scoreArray[-minScore]);
4660 recipLength = 1.0 / (double) matrixLength;
4661 for(
i = 0;
i < matrixLength;
i++) {
4662 for (j = 0; j < alphabet_size; j++) {
4665 if(matrix[
i][j] >= minScore)
4666 return_sfp->
sprob[matrix[
i][j]] += recipLength *
4672 for(
i = minScore;
i <= maxScore;
i++)
4678 const Uint1* rps_query_seq,
Int4 db_seq_length,
4684 Int4* * returnMatrix;
4685 double initialUngappedLambda;
4686 double scaledInitialUngappedLambda;
4687 double correctUngappedLambda;
4690 Int4 index, inner_index;
4700 RPSFillScores(posMatrix, db_seq_length, resProb, scoreArray,
4704 ASSERT(initialUngappedLambda > 0.0);
4705 scaledInitialUngappedLambda = initialUngappedLambda / scalingFactor;
4707 scaledInitialUngappedLambda);
4711 if(correctUngappedLambda == -1.0)
4714 finalLambda = correctUngappedLambda/scaledInitialUngappedLambda;
4723 for (index = 0; index < db_seq_length; index++) {
4724 for (inner_index = 0; inner_index < alphabet_size; inner_index++) {
4727 returnMatrix[index][inner_index] =
4728 posMatrix[index][inner_index];
4731 temp = ((double)(posMatrix[index][inner_index])) * finalLambda;
4740 return returnMatrix;
4765 Int4 compressed_alphabet_size,
4769 Int4 compressed_letter;
4772 table[
i] = compressed_alphabet_size;
4774 for (
i = j = compressed_letter = 0; trans_string[
i] != 0;
i++) {
4776 Int4 c = trans_string[
i];
4779 compressed_letter++;
4784 table[aa_letter] = compressed_letter;
4785 rev_table[compressed_letter][j++] = aa_letter;
4786 rev_table[compressed_letter][j] = -1;
4790 ASSERT(compressed_letter == compressed_alphabet_size - 1);
4802 double* compressed_prob,
4803 Int4 compressed_alphabet_size,
4819 compressed_prob[
i] = 0.0;
4822 double prob_sum = 0.;
4831 prob_sum += rfp->
prob[aa];
4841 compressed_prob[aa] = rfp->
prob[aa] / prob_sum;
4860 double matrix_scale_factor,
4867 Int4 compressed_alphabet_size =
4876 matrix_scale_factor /=
lambda;
4881 if (std_freqs ==
NULL)
4887 compressed_alphabet_size,
4901 for (s = 0; s < compressed_alphabet_size; s++) {
4907 Int4 aa = rev_table[s][
i];
4914 val += std_freqs->
data[q][aa] * compressed_prob[aa];
4940 Int4 compressed_alphabet_size,
4941 double matrix_scale_factor)
4945 const char* alphabet_string = compressed_alphabet_size == 10 ?
4948 ASSERT(compressed_alphabet_size == 10 ||
4949 compressed_alphabet_size == 15);
4961 compressed_alphabet_size,
4967 matrix_scale_factor, rev_table) < 0) {
4971 return new_alphabet;
5027 double alpha_d_lambda,
5032 Int4 * length_adjustment)
5035 const Int4 kMaxIterations = 20;
5036 double m = (double) query_length;
5037 double n = (double) db_length;
5038 double N = (double) db_num_seqs;
5042 double ell_min = 0, ell_max;
5045 double ell_next = 0;
5055 double mb = m *
N +
n;
5056 double c =
n * m -
MAX(m,
n) / K;
5059 *length_adjustment = 0;
5062 ell_max = 2 * c / (mb + sqrt(mb * mb - 4 *
a * c));
5066 for(
i = 1;
i <= kMaxIterations;
i++) {
5069 ss = (m - ell) * (
n -
N * ell);
5070 ell_bar = alpha_d_lambda * (logK +
log(ss)) + beta;
5071 if(ell_bar >= ell) {
5073 if(ell_bar - ell_min <= 1.0) {
5077 if(ell_min == ell_max) {
5083 if(ell_min <= ell_bar && ell_bar <= ell_max) {
5087 ell_next = (
i == 1) ? ell_max : (ell_min + ell_max) / 2;
5094 *length_adjustment = (
Int4) ell_min;
5096 ell = ceil(ell_min);
5097 if( ell <= ell_max ) {
5098 ss = (m - ell) * (
n -
N * ell);
5099 if(alpha_d_lambda * (logK +
log(ss)) + beta >= ell) {
5101 *length_adjustment = (
Int4) ell;
5106 *length_adjustment = (
Int4) ell_min;
5109 return converged ? 0 : 1;
5170 double db_scale_factor = (gbp->
db_length) ?
5171 (
double)gbp->
db_length/(double)n_ : 1.0;
5173 double lambda_ = kbp->
Lambda;
5175 double ai_hat_ = gbp->
a * scale_factor;
5176 double bi_hat_ = gbp->
b;
5177 double alphai_hat_= gbp->
Alpha * scale_factor;
5178 double betai_hat_ = gbp->
Beta;
5179 double sigma_hat_ = gbp->
Sigma * scale_factor;
5180 double tau_hat_ = gbp->
Tau;
5183 double aj_hat_ = ai_hat_;
5184 double bj_hat_ = bi_hat_;
5185 double alphaj_hat_= alphai_hat_;
5186 double betaj_hat_ = betai_hat_;
5189 static double const_val = 0.39894228040143267793994605993438;
5191 double m_li_y, vi_y, sqrt_vi_y, m_F, P_m_F;
5192 double n_lj_y, vj_y, sqrt_vj_y, n_F, P_n_F;
5193 double c_y, p1, p2, area;
5196 m_li_y = m_ - (ai_hat_*y_ + bi_hat_);
5197 vi_y =
MAX(2.0*alphai_hat_/lambda_, alphai_hat_*y_+betai_hat_);
5198 sqrt_vi_y = sqrt(vi_y);
5199 m_F = m_li_y/sqrt_vi_y;
5200 P_m_F =
ErfC(-m_F / sqrt(2.0)) / 2.0;
5201 p1 = m_li_y * P_m_F + sqrt_vi_y * const_val * exp(-0.5*m_F*m_F);
5203 n_lj_y = n_ - (aj_hat_*y_ + bj_hat_);
5204 vj_y =
MAX(2.0*alphaj_hat_/lambda_, alphaj_hat_*y_+betaj_hat_);
5205 sqrt_vj_y = sqrt(vj_y);
5206 n_F = n_lj_y/sqrt_vj_y;
5207 P_n_F =
ErfC(-n_F / sqrt(2.0)) / 2.0;
5208 p2 = n_lj_y * P_n_F + sqrt_vj_y * const_val * exp(-0.5*n_F*n_F);
5210 c_y =
MAX(2.0*sigma_hat_/lambda_, sigma_hat_*y_+tau_hat_);
5211 area = p1 * p2 + c_y * P_m_F * P_n_F;
5213 e_value = area * k_ * exp(-lambda_ * y_) * db_scale_factor;
5227 double db_scale_factor = (gbp->
db_length) ?
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
const char * kBlastErrMsg_CantCalculateUngappedKAParams
Int2 Blast_MessageWrite(Blast_Message **blast_msg, EBlastSeverity severity, int context, const char *message)
Writes a message to a structure.
const int kBlastMessageNoContext
Declared in blast_message.h as extern const.
Boolean Blast_QueryIsPssm(EBlastProgramType p)
Returns true if the query is PSSM.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
void ** _PSIAllocateMatrix(unsigned int ncols, unsigned int nrows, unsigned int data_type_sz)
Generic 2 dimensional matrix allocator.
void ** _PSIDeallocateMatrix(void **matrix, unsigned int ncols)
Generic 2 dimensional matrix deallocator.
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
static Int4 pam30_prefs[11]
Quality values for PAM30 matrix, each element corresponds to same element number in array pam30_value...
static Int4 prot_identity_prefs[2]
static Int2 BlastScoreBlkProteinMatrixRead(BlastScoreBlk *sbp, FILE *fp)
Read in the matrix from the FILE *fp.
static Blast_ResComp * BlastResCompDestruct(Blast_ResComp *rcp)
Deallocates Blast_ResComp structure and associated arrays.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
static BLAST_LetterProb Robinson_prob[]
amino acid background frequencies from Robinson and Robinson
Int1 CompressedReverseLookup[BLASTAA_SIZE+1][BLASTAA_SIZE+1]
2-D array mapping compressed letters to sets of ordinary protein letters
#define BLOSUM45_VALUES_MAX
Number of different combinations supported for BLOSUM45.
static Blast_GumbelBlk * s_BlastGumbelBlkNew()
static Int4 blosum45_prefs[14]
Quality values for BLOSUM45 matrix, each element corresponds to same element number in array blosum45...
Int2 BLAST_GetProteinGapExistenceExtendParams(const char *matrixName, Int4 *gap_existence, Int4 *gap_extension)
Extract the recommended gap existence and extension values.
double BLAST_GapDecayDivisor(double decayrate, unsigned nsegs)
Compute a divisor used to weight the evalue of a collection of "nsegs" distinct alignments.
static double s_GetUngappedBeta(Int4 reward, Int4 penalty)
Returns the beta statistical parameter value, given the nucleotide substitution scores.
static const array_of_8 blastn_values_5_4[]
Karlin-Altschul parameter values for substitution scores 5 and -4.
static double s_BlastSumP(Int4 r, double s)
Estimate the Sum P-value by calculation or interpolation, as appropriate.
#define BLAST_SCORE_RANGE_MAX
maximum allowed range of BLAST scores.
double BLAST_LargeGapSumE(Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
Calculates the e-value if a collection of distinct alignments with arbitrarily large gaps between the...
static array_of_8 blosum80_values[10]
Supported values (gap-existence, extension, etc.) for BLOSUM80.
double array_of_8[11]
Holds values (gap-opening, extension, etc.) for a matrix.
static const array_of_8 blastn_values_3_4[]
Karlin-Altschul parameter values for substitution scores 3 and -4.
static Int4 blosum50_prefs[16]
Quality values for BLOSUM50 matrix, each element corresponds to same element number in array blosum50...
#define PAM70_VALUES_MAX
Number of different combinations supported for PAM70.
Int2 Blast_GetNuclAlphaBeta(Int4 reward, Int4 penalty, Int4 gap_open, Int4 gap_extend, Blast_KarlinBlk *kbp, Boolean gapped_calculation, double *alpha, double *beta)
Extract the alpha and beta settings for these substitution and gap scores.
void Blast_FillResidueProbability(const Uint1 *sequence, Int4 length, double *resProb)
Given a sequence of 'length' amino acid residues, compute the probability of each residue and put tha...
#define BLOSUM90_VALUES_MAX
Number of different combinations supported for BLOSUM90.
static Int4 blosum90_prefs[8]
Quality values for BLOSUM90 matrix, each element corresponds to same element number in array blosum90...
static ListNode * BlastMatrixValuesDestruct(ListNode *vnp)
Free linked list of MatrixValues and all associated data.
struct BLAST_LetterProb BLAST_LetterProb
Records probability of letter appearing in sequence.
#define BLAST_KARLIN_LAMBDA0_DEFAULT
Initial guess for the value of Lambda in BlastKarlinLambdaNR.
static Int2 s_SplitArrayOf8(const array_of_8 *input, const array_of_8 **normal, const array_of_8 **non_affine, Boolean *split)
Splits an ArrayOf8 into two arrays of supported gap costs.
double Blast_KarlinLambdaNR(Blast_ScoreFreq *sfp, double initialLambdaGuess)
Calculates the parameter Lambda given an initial guess for its value.
Int4 ** RPSRescalePssm(double scalingFactor, Int4 rps_query_length, const Uint1 *rps_query_seq, Int4 db_seq_length, Int4 **posMatrix, BlastScoreBlk *sbp)
Rescale the PSSM, using composition-based statistics, for use with RPS BLAST.
Blast_ResFreq * Blast_ResFreqFree(Blast_ResFreq *rfp)
Deallocates Blast_ResFreq and prob0 element.
SCompressedAlphabet * SCompressedAlphabetFree(SCompressedAlphabet *alphabet)
Free a compressed alphabet and score matrix.
static Int2 BlastScoreBlkProteinMatrixLoad(BlastScoreBlk *sbp)
Sets sbp->matrix->data field using sbp->name field using the matrices in the toolkit (util/tables/raw...
char * BLAST_PrintMatrixMessage(const char *matrix_name, Boolean standard_only)
Prints a messages about the allowed matrices, BlastKarlinBlkGappedFill should return 1 before this is...
double BLAST_SmallGapSumE(Int4 starting_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
Calculates the e-value for alignments with "small" gaps (typically under fifty residues/basepairs) fo...
static const array_of_8 blastn_values_1_2[]
Karlin-Altschul parameter values for substitution scores 1 and -2.
double BLAST_SpougeStoE(Int4 y_, Blast_KarlinBlk *kbp, Blast_GumbelBlk *gbp, Int4 m_, Int4 n_)
Calculates the Expect value based upon the Spouge's FSC method.
static MatrixInfo * MatrixInfoNew(const char *name, array_of_8 *values, Int4 *prefs, Int4 max_number)
Allocates New MatrixInfo*.
#define BLOSUM62_VALUES_MAX
Number of different combinations supported for BLOSUM62.
double BLAST_KarlinEtoP(double x)
Convert an E-value to a P-value.
Int2 BlastScoreBlkNuclMatrixCreate(BlastScoreBlk *sbp)
Fill in the matrix for blastn using the penaly and rewards The query sequence alphabet is blastna,...
Int2 Blast_KarlinBlkGappedCalc(Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name, Blast_Message **error_return)
Fills in lambda, H, and K values, as calculated by Stephen Altschul in Methods in Enzy.
static array_of_8 blosum62_values[12]
Supported values (gap-existence, extension, etc.) for BLOSUM62.
static void RPSFillScores(Int4 **matrix, Int4 matrixLength, double *queryProbArray, double *scoreArray, Blast_ScoreFreq *return_sfp, Int4 range, Int4 alphabet_size)
the routine RPSFillScores computes the probability of each score weighted by the probability of each ...
SBlastScoreMatrix * SBlastScoreMatrixFree(SBlastScoreMatrix *matrix)
Deallocates SBlastScoreMatrix structure.
static Int2 Blast_ResFreqResComp(const BlastScoreBlk *sbp, Blast_ResFreq *rfp, const Blast_ResComp *rcp)
Calculate the residue frequencies associated with the provided ResComp This function takes into accou...
#define PROT_IDENTITY_VALUES_MAX
Blast_KarlinBlk * Blast_KarlinBlkNew(void)
Callocs a Blast_KarlinBlk.
static MatrixInfo * MatrixInfoDestruct(MatrixInfo *matrix_info)
Deallocates MatrixInfo as well as name string.
Blast_KarlinBlk * Blast_KarlinBlkFree(Blast_KarlinBlk *kbp)
Deallocates the KarlinBlk.
static Int2 BlastScoreBlkMaxScoreSet(BlastScoreBlk *sbp)
Sets maximum and minimum scores on the BlastScoreBlk for a given matrix.
Int2 BLAST_ScoreSetAmbigRes(BlastScoreBlk *sbp, char ambiguous_res)
Set the ambiguous residue (e.g, 'N', 'X') in the BlastScoreBlk*.
static double BlastKarlinLtoH(Blast_ScoreFreq *sfp, double lambda)
Calculate H, the relative entropy of the p's and q's.
Int2 Blast_KarlinBlkUngappedCalc(Blast_KarlinBlk *kbp, Blast_ScoreFreq *sfp)
Computes the parameters lambda, H K for use in calculating the statistical significance of high-scori...
double BLAST_KarlinPtoE(double p)
Convert a P-value to an E-value.
static const array_of_8 blastn_values_2_7[]
Karlin-Altschul parameter values for substitution scores 2 and -7.
struct MatrixInfo MatrixInfo
Used to temporarily store matrix values for retrieval.
static const array_of_8 blastn_values_1_3[]
Karlin-Altschul parameter values for substitution scores 1 and -3.
static array_of_8 blosum90_values[8]
Supported values (gap-existence, extension, etc.) for BLOSUM90.
#define BLOSUM80_VALUES_MAX
Number of different combinations supported for BLOSUM80.
static double BlastKarlinLHtoK(Blast_ScoreFreq *sfp, double lambda, double H)
The following procedure computes K.
static Int4 pam250_prefs[16]
Quality values for PAM250 matrix, each element corresponds to same element number in array pam250_val...
Int2 Blast_ScoreBlkKbpUngappedCalc(EBlastProgramType program, BlastScoreBlk *sbp, Uint1 *query, const BlastQueryInfo *query_info, Blast_Message **blast_message)
Calculate and fill the ungapped Karlin-Altschul parameters in the BlastScoreBlk structure (fields kbp...
static const array_of_8 blastn_values_2_5[]
Karlin-Altschul parameter values for substitution scores 2 and -5.
struct SRombergCbackArgs SRombergCbackArgs
Internal data structure used by Romberg integration callbacks.
Int2 Blast_ResFreqStdComp(const BlastScoreBlk *sbp, Blast_ResFreq *rfp)
Calculates residues frequencies given a standard distribution.
static const array_of_8 blastn_values_3_2[]
Karlin-Altschul parameter values for substitution scores 3 and -2.
static Int2 s_BuildCompressedScoreMatrix(BlastScoreBlk *sbp, SCompressedAlphabet *new_alphabet, double matrix_scale_factor, CompressedReverseLookup rev_table)
Compute a (non-square) score matrix for a compressed alphabet.
char * BLAST_PrintAllowedValues(const char *matrix_name, Int4 gap_open, Int4 gap_extend)
Prints a messages about the allowed open etc values for the given matrix, BlastKarlinBlkGappedFill sh...
Int2 Blast_KarlinBlkGappedLoadFromTables(Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name, Boolean standard_only)
Attempts to fill KarlinBlk for given gap opening, extensions etc.
static Int2 s_AdjustGapParametersByGcd(array_of_8 *normal, array_of_8 *linear, int size, Int4 *gap_existence_max, Int4 *gap_extend_max, int divisor)
Adjust Lambda and H if reward and penalty have a non-1 gcd.
static Int4 blosum62_prefs[12]
Quality values for BLOSUM62 matrix, each element corresponds to same element number in array blosum62...
static Int2 Blast_ResFreqClr(const BlastScoreBlk *sbp, Blast_ResFreq *rfp)
Sets prob elements of Blast_ResFreq to zero.
static const array_of_8 blastn_values_1_4[]
Karlin-Altschul parameter values for substitution scores 1 and -4.
Int2 Blast_GetStdAlphabet(Uint1 alphabet_code, Uint1 *residues, Uint4 residues_size)
Fills a buffer with the 'standard' alphabet (given by STD_AMINO_ACID_FREQS[index]....
SCompressedAlphabet * SCompressedAlphabetNew(BlastScoreBlk *sbp, Int4 compressed_alphabet_size, double matrix_scale_factor)
Allocate a new compressed alphabet and score matrix.
Blast_ScoreFreq * Blast_ScoreFreqFree(Blast_ScoreFreq *sfp)
Deallocates the score frequencies structure.
static BLAST_LetterProb nt_prob[]
nucleotide probabilities (25% each letter)
static Int2 BlastResCompStr(const BlastScoreBlk *sbp, Blast_ResComp *rcp, char *str, Int4 length)
Store the composition of a (query) string.
Int2 Blast_GumbelBlkCalc(Blast_GumbelBlk *gbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name, Blast_Message **error_return)
Fills in gumbel parameters to estimate p-value using FSC.
static Blast_ResComp * BlastResCompNew(const BlastScoreBlk *sbp)
Allocated the Blast_ResComp* for a given alphabet.
static const char * s_alphabet10
23-to-10 letter compressed alphabet.
static const array_of_8 blastn_values_1_5[]
Supported substitution and gap costs with corresponding quality values for nucleotide sequence compar...
static Int4 BlastKarlinEtoS_simple(double E, const Blast_KarlinBlk *kbp, Int8 searchsp)
Calculates score from expect value and search space.
#define BLAST_KARLIN_K_SUMLIMIT_DEFAULT
K_SUMLIMIT_DEFAULT == sumlimit used in BlastKarlinLHtoK()
SBlastScoreMatrix * SBlastScoreMatrixNew(size_t ncols, size_t nrows)
Allocates a new SBlastScoreMatrix structure of the specified dimensions.
static array_of_8 pam70_values[9]
Supported values (gap-existence, extension, etc.) for PAM70.
Int2 Blast_KarlinBlkNuclGappedCalc(Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, Int4 reward, Int4 penalty, Blast_KarlinBlk *kbp_ungap, Boolean *round_down, Blast_Message **error_return)
Retrieves Karlin-Altschul parameters from precomputed tables, given the substitution and gap scores.
static double s_BlastSumPCalc(int r, double s)
Evaluate the following double integral, where r = number of segments.
Blast_ResFreq * Blast_ResFreqNew(const BlastScoreBlk *sbp)
Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.
#define STD_AMINO_ACID_FREQS
points to the standard amino acid frequencies to use.
double BLAST_KarlinStoE_simple(Int4 S, Blast_KarlinBlk *kbp, Int8 searchsp)
Calculates the Expect value based upon the search space and some Karlin-Altschul parameters.
static array_of_8 pam30_values[11]
Supported values (gap-existence, extension, etc.) for PAM30.
int BlastScoreBlkCheck(BlastScoreBlk *sbp)
Check that score blk is valid, returns zero if it is.
static Int2 Blast_ResFreqNormalize(const BlastScoreBlk *sbp, Blast_ResFreq *rfp, double norm)
Normalizes all the residue frequencies and then normalizes them to "norm".
Int2 Blast_ScoreBlkKbpIdealCalc(BlastScoreBlk *sbp)
Calculates the Karlin-Altschul parameters assuming standard residue compositions for the query and su...
static Blast_GumbelBlk * s_BlastGumbelBlkFree(Blast_GumbelBlk *gbp)
static array_of_8 blosum45_values[14]
Supported values (gap-existence, extension, etc.) for BLOSUM45.
Boolean BLAST_CheckRewardPenaltyScores(Int4 reward, Int4 penalty)
Check the validity of the reward and penalty scores.
double BLAST_UnevenGapSumE(Int4 query_start_points, Int4 subject_start_points, Int2 num, double xsum, Int4 query_length, Int4 subject_length, Int8 searchsp_eff, double weight_divisor)
Calculates the e-value of a collection multiple distinct alignments with asymmetric gaps between the ...
static double s_OuterIntegralCback(double x, void *vp)
Callback for the Romberg integration function.
void BLAST_GetAlphaBeta(const char *matrixName, double *alpha, double *beta, Boolean gapped, Int4 gap_open, Int4 gap_extend, const Blast_KarlinBlk *kbp_ungapped)
Extract the alpha and beta settings for this matrixName, and these gap open and gap extension costs.
static double RPSfindUngappedLambda(const char *matrixName)
Gets the ungapped lambda calculated for the matrix in question given standard residue composition for...
Blast_ScoreFreq * Blast_ScoreFreqNew(Int4 score_min, Int4 score_max)
Creates a new structure to keep track of score frequencies for a scoring system.
Int2 BLAST_Cutoffs(Int4 *S, double *E, Blast_KarlinBlk *kbp, Int8 searchsp, Boolean dodecay, double gap_decay_rate)
Calculate the cutoff score from the expected number of HSPs or vice versa.
static Int2 Blast_ResFreqString(const BlastScoreBlk *sbp, Blast_ResFreq *rfp, char *string, Int4 length)
Fills in residue frequences for a given sequence.
static Int2 BlastKarlinReportAllowedValues(const char *matrix_name, Blast_Message **error_return)
Fills in error_return with strings describing the allowed values.
static Int2 BlastScoreBlkNucleotideMatrixRead(BlastScoreBlk *sbp, FILE *fp)
Read in a custom nucleotide matrix from the FILE *fp.
static const array_of_8 blastn_values_2_3[]
Karlin-Altschul parameter values for substitution scores 2 and -3.
static double NlmKarlinLambdaNR(double *probs, Int4 d, Int4 low, Int4 high, double lambda0, double tolx, Int4 itmax, Int4 maxNewton, Int4 *itn)
Find positive solution to.
static void s_BuildCompressedTranslation(const char *trans_string, Uint1 *table, Int4 compressed_alphabet_size, CompressedReverseLookup rev_table)
parse the string defining the conversion between the ordinary protein alphabet and a compressed alpha...
SPsiBlastScoreMatrix * SPsiBlastScoreMatrixNew(size_t ncols)
Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.
Int2 Blast_KarlinBlkCopy(Blast_KarlinBlk *kbp_to, Blast_KarlinBlk *kbp_from)
Copies contents of one Karlin block to another.
Int2 BLAST_GetNucleotideGapExistenceExtendParams(Int4 reward, Int4 penalty, Int4 *gap_existence, Int4 *gap_extension)
Extract the recommended gap existence and extension values.
#define PAM250_VALUES_MAX
Number of different combinations supported for PAM250.
#define BLOSUM50_VALUES_MAX
Number of different combinations supported for BLOSUM50.
#define PAM30_VALUES_MAX
Number of different combinations supported for PAM30.
#define BLAST_KARLIN_K_ITER_MAX
upper limit on iterations for BlastKarlinLHtoK
static array_of_8 blosum50_values[16]
Supported values (gap-existence, extension, etc.) for BLOSUM50.
static array_of_8 prot_idenity_values[2]
Int4 BLAST_SpougeEtoS(double e0, Blast_KarlinBlk *kbp, Blast_GumbelBlk *gbp, Int4 m, Int4 n)
Estimate the score