NCBI C++ ToolKit
blast_psi.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* ===========================================================================
2  *
3  * PUBLIC DOMAIN NOTICE
4  * National Center for Biotechnology Information
5  *
6  * This software/database is a "United States Government Work" under the
7  * terms of the United States Copyright Act. It was written as part of
8  * the author's official duties as a United States Government employee and
9  * thus cannot be copyrighted. This software/database is freely available
10  * to the public for use. The National Library of Medicine and the U.S.
11  * Government have not placed any restriction on its use or reproduction.
12  *
13  * Although all reasonable efforts have been taken to ensure the accuracy
14  * and reliability of the software and data, the NLM and the U.S.
15  * Government do not and cannot warrant the performance or results that
16  * may be obtained by using this software or data. The NLM and the U.S.
17  * Government disclaim all warranties, express or implied, including
18  * warranties of performance, merchantability or fitness for any particular
19  * purpose.
20  *
21  * Please cite the author in any work or product based on this material.
22  *
23  * ===========================================================================
24  *
25  * Author: Christiam Camacho
26  *
27  */
28 
29 /** @file blast_psi.c
30  * Implementation of the high level functions of PSI-BLAST's PSSM engine.
31  */
32 
34 #include "blast_psi_priv.h"
35 
36 /* needed for BLAST_GetStandardAaProbabilities(); */
38 
39 /****************************************************************************/
40 /* Function prototypes */
41 
42 /** Convenience function to deallocate data structures allocated in
43  * PSICreatePssmWithDiagnostics.
44  * @param pssm PSSM and statistical information [in|out]
45  * @param packed_msa compact multiple sequence alignment structure[in]
46  * @param msa multiple sequence alignment structure[in]
47  * @param aligned_block aligned blocks data structure [in]
48  * @param seq_weights sequence weights data structure [in]
49  * @param internal_pssm PSSM being computed [in]
50  */
51 static void
53  _PSIPackedMsa* packed_msa,
54  _PSIMsa* msa,
55  _PSIAlignedBlock* aligned_block,
56  _PSISequenceWeights* seq_weights,
57  _PSIInternalPssmData* internal_pssm);
58 
59 /** Copies pssm data from internal_pssm and sbp into pssm. None of its
60  * parameters can be NULL.
61  * @param internal_pssm PSSM being computed [in]
62  * @param sbp Score block structure containing the calculated lambda and K
63  * which will be saved in the pssm parameter [in]
64  * @param pssm PSSM and statistical information [in|out]
65  */
66 static void
67 s_PSISavePssm(const _PSIInternalPssmData* internal_pssm,
68  const BlastScoreBlk* sbp,
69  PSIMatrix* pssm);
70 
71 /** Private function which performs the last 2 stages of the PSSM creation:
72  * conversion of PSSM frequecy ratios to PSSM and scaling of the PSSM.
73  * @param internal_pssm PSSM being computed, must be already allocated [in|out]
74  * @param query query sequence in ncbistdaa encoding. [in]
75  * @param query_length length of the query sequence above [in]
76  * @param std_prob array containing the standard background residue
77  * probabilities [in]
78  * @param sbp Score block structure where the calculated lambda and K will be
79  * returned [in|out]
80  * @param impala_scaling_factor scaling factor used in IMPALA-style scaling if
81  * its value is NOT kPSSM_NoImpalaScaling (otherwise it performs standard
82  * PSI-BLAST scaling) [in]
83  */
84 static int
86  const Uint1* query,
87  Uint4 query_length,
88  double* std_prob,
89  BlastScoreBlk* sbp,
90  double impala_scaling_factor);
91 
92 /****************************************************************************/
93 
94 int
95 PSICreatePssm(const PSIMsa* msap,
96  const PSIBlastOptions* options,
97  BlastScoreBlk* sbp,
98  PSIMatrix** pssm)
99 {
100  return PSICreatePssmWithDiagnostics(msap, options, sbp, NULL,
101  pssm, NULL);
102 }
103 
104 int
105 PSICreatePssmWithDiagnostics(const PSIMsa* msap, /* [in] */
106  const PSIBlastOptions* options, /* [in] */
107  BlastScoreBlk* sbp, /* [in] */
108  const PSIDiagnosticsRequest* request, /* [in] */
109  PSIMatrix** pssm, /* [out] */
110  PSIDiagnosticsResponse** diagnostics) /* [out] */
111 {
112  _PSIMsa* msa = NULL;
113  _PSIAlignedBlock* aligned_block = NULL;
114  _PSISequenceWeights* seq_weights = NULL;
115  _PSIInternalPssmData* internal_pssm = NULL;
116  _PSIPackedMsa* packed_msa = NULL;
117  int status = 0;
118 
119  if ( !msap || !options || !sbp || !pssm ) {
120  return PSIERR_BADPARAM;
121  }
122 
123  packed_msa = _PSIPackedMsaNew(msap);
124 
125  /*** Run the engine's stages ***/
126 
127  status = _PSIPurgeBiasedSegments(packed_msa);
128  if (status != PSI_SUCCESS) {
129  s_PSICreatePssmCleanUp(pssm, packed_msa, msa, aligned_block,
130  seq_weights, internal_pssm);
131  return status;
132  }
133 
134  /*** Allocate data structures ***/
135  msa = _PSIMsaNew(packed_msa, (Uint4) sbp->alphabet_size);
136  aligned_block = _PSIAlignedBlockNew(msa->dimensions->query_length);
137  seq_weights = _PSISequenceWeightsNew(msa->dimensions, sbp);
138  internal_pssm = _PSIInternalPssmDataNew(msa->dimensions->query_length,
139  (Uint4) sbp->alphabet_size);
140  *pssm = PSIMatrixNew(msa->dimensions->query_length,
141  (Uint4) sbp->alphabet_size);
142  if ( !msa || ! aligned_block || !seq_weights || !internal_pssm || !*pssm ) {
143  s_PSICreatePssmCleanUp(pssm, packed_msa, msa, aligned_block,
144  seq_weights, internal_pssm);
145  return PSIERR_OUTOFMEM;
146  }
147  packed_msa = _PSIPackedMsaFree(packed_msa);
148 
149  /*** Enable structure group customization if needed and validate the
150  * multiple sequence alignment data ***/
151  if (options->nsg_compatibility_mode) {
153  status = _PSIValidateMSA_StructureGroup(msa);
154  } else {
155  status = _PSIValidateMSA(msa, options->ignore_unaligned_positions);
156  }
157  if (status != PSI_SUCCESS) {
158  s_PSICreatePssmCleanUp(pssm, packed_msa, msa, aligned_block,
159  seq_weights, internal_pssm);
160  return status;
161  }
162 
163  status = _PSIComputeAlignmentBlocks(msa, aligned_block);
164  if (status != PSI_SUCCESS) {
165  s_PSICreatePssmCleanUp(pssm, packed_msa, msa, aligned_block,
166  seq_weights, internal_pssm);
167  return status;
168  }
169 
170  status = _PSIComputeSequenceWeights(msa, aligned_block,
171  options->nsg_compatibility_mode,
172  seq_weights);
173  if (status != PSI_SUCCESS) {
174  s_PSICreatePssmCleanUp(pssm, packed_msa, msa, aligned_block,
175  seq_weights, internal_pssm);
176  return status;
177  }
178 
179  status = _PSIComputeFreqRatios(msa, seq_weights, sbp, aligned_block,
180  options->pseudo_count,
181  options->nsg_compatibility_mode,
182  internal_pssm);
183  if (status != PSI_SUCCESS) {
184  s_PSICreatePssmCleanUp(pssm, packed_msa, msa, aligned_block,
185  seq_weights, internal_pssm);
186  return status;
187  }
188 
190  (internal_pssm, msa->query, msa->dimensions->query_length,
191  seq_weights->std_prob, sbp, options->impala_scaling_factor);
192  if (status != PSI_SUCCESS) {
193  s_PSICreatePssmCleanUp(pssm, packed_msa, msa, aligned_block,
194  seq_weights, internal_pssm);
195  return status;
196  }
197  /*** Save the pssm outgoing parameter ***/
198  s_PSISavePssm(internal_pssm, sbp, *pssm);
199 
200 
201  /*** Save diagnostics if required ***/
202  if (request && diagnostics) {
204  (Uint4) sbp->alphabet_size,
205  request);
206  if ( !*diagnostics ) {
207  /* FIXME: This could be changed to return a warning and not
208  * deallocate PSSM data */
209  s_PSICreatePssmCleanUp(pssm, packed_msa, msa, aligned_block,
210  seq_weights, internal_pssm);
211  return PSIERR_OUTOFMEM;
212  }
213  status = _PSISaveDiagnostics(msa, aligned_block, seq_weights,
214  internal_pssm, *diagnostics);
215  if (status != PSI_SUCCESS) {
216  *diagnostics = PSIDiagnosticsResponseFree(*diagnostics);
217  s_PSICreatePssmCleanUp(pssm, packed_msa, msa, aligned_block,
218  seq_weights, internal_pssm);
219  return status;
220  }
221  }
222  s_PSICreatePssmCleanUp(NULL, packed_msa, msa, aligned_block, seq_weights,
223  internal_pssm);
224 
225  return PSI_SUCCESS;
226 }
227 
228 int
229 PSICreatePssmFromCDD(const PSICdMsa* cd_msa, /* [in] */
230  const PSIBlastOptions* options, /* [in] */
231  BlastScoreBlk* sbp, /* [in] */
232  const PSIDiagnosticsRequest* request, /* [in] */
233  PSIMatrix** pssm, /* [out] */
234  PSIDiagnosticsResponse** diagnostics) /* [out] */
235 {
236  _PSISequenceWeights* seq_weights = NULL;
237  _PSIInternalPssmData* internal_pssm = NULL;
238  int status = 0;
239 
240  if ( !cd_msa || !options || !sbp || !pssm ) {
241  return PSIERR_BADPARAM;
242  }
243 
244  /*** Run the engine's stages ***/
245 
246 
247  /*** Allocate data structures ***/
248  seq_weights = _PSISequenceWeightsNew(cd_msa->dimensions, sbp);
249  internal_pssm = _PSIInternalPssmDataNew(cd_msa->dimensions->query_length,
250  (Uint4) sbp->alphabet_size);
251  *pssm = PSIMatrixNew(cd_msa->dimensions->query_length,
252  (Uint4) sbp->alphabet_size);
253  if ( !seq_weights || !internal_pssm || !*pssm ) {
254  s_PSICreatePssmCleanUp(pssm, NULL, NULL, NULL, seq_weights,
255  internal_pssm);
256  return PSIERR_OUTOFMEM;
257  }
258 
259  status = _PSIValidateCdMSA(cd_msa, sbp->alphabet_size);
260  if (status != PSI_SUCCESS) {
261  s_PSICreatePssmCleanUp(pssm, NULL, NULL, NULL, seq_weights,
262  internal_pssm);
263  return status;
264  }
265 
266  status = _PSIComputeFrequenciesFromCDs(cd_msa, sbp, options, seq_weights);
267 
268  if (status != PSI_SUCCESS) {
269  s_PSICreatePssmCleanUp(pssm, NULL, NULL, NULL, seq_weights,
270  internal_pssm);
271  return status;
272  }
273 
274  status = _PSIComputeFreqRatiosFromCDs(cd_msa, seq_weights, sbp,
275  options->pseudo_count,
276  internal_pssm);
277  if (status != PSI_SUCCESS) {
278  s_PSICreatePssmCleanUp(pssm, NULL, NULL, NULL, seq_weights,
279  internal_pssm);
280  return status;
281  }
282 
284  (internal_pssm, cd_msa->query, cd_msa->dimensions->query_length,
285  seq_weights->std_prob, sbp, options->impala_scaling_factor);
286  if (status != PSI_SUCCESS) {
287  s_PSICreatePssmCleanUp(pssm, NULL, NULL, NULL, seq_weights,
288  internal_pssm);
289  return status;
290  }
291  /*** Save the pssm outgoing parameter ***/
292  s_PSISavePssm(internal_pssm, sbp, *pssm);
293 
294 
295  /*** Save diagnostics if required ***/
296  if (request && diagnostics) {
297 
298  *diagnostics = PSIDiagnosticsResponseNew(
299  cd_msa->dimensions->query_length,
300  (Uint4) sbp->alphabet_size,
301  request);
302  if ( !*diagnostics ) {
303  /* FIXME: This could be changed to return a warning and not
304  * deallocate PSSM data */
305  s_PSICreatePssmCleanUp(pssm, NULL, NULL, NULL, seq_weights,
306  internal_pssm);
307  return PSIERR_OUTOFMEM;
308  }
309  status = _PSISaveCDDiagnostics(cd_msa, seq_weights, internal_pssm,
310  *diagnostics);
311  if (status != PSI_SUCCESS) {
312  *diagnostics = PSIDiagnosticsResponseFree(*diagnostics);
313  s_PSICreatePssmCleanUp(pssm, NULL, NULL, NULL, seq_weights,
314  internal_pssm);
315  return status;
316  }
317  }
318  s_PSICreatePssmCleanUp(NULL, NULL, NULL, NULL, seq_weights, internal_pssm);
319 
320  return PSI_SUCCESS;
321 
322 }
323 
324 /** Convenience function to deallocate data structures allocated in
325  * PSICreatePssmFromFrequencyRatios
326  * @param pssm PSSM and statistical information [in|out]
327  * @param internal_pssm PSSM being computed [in]
328  * @param std_prob array containing the standard background residue
329  * probabilities [in]
330  */
331 static void
333  _PSIInternalPssmData* internal_pssm,
334  double* std_prob)
335 {
336  if (pssm) {
337  *pssm = PSIMatrixFree(*pssm);
338  }
339  _PSIInternalPssmDataFree(internal_pssm);
340  sfree(std_prob);
341 }
342 
343 int
345  Uint4 query_length,
346  BlastScoreBlk* sbp,
347  double** freq_ratios,
348  double impala_scaling_factor,
349  PSIMatrix** pssm)
350 {
351  int status = PSI_SUCCESS;
352  double* std_prob = NULL;
353  _PSIInternalPssmData* internal_pssm = NULL;
354 
356  *pssm = PSIMatrixNew(query_length, (Uint4) sbp->alphabet_size);
357  internal_pssm = _PSIInternalPssmDataNew(query_length, sbp->alphabet_size);
358 
359  if ( !std_prob || !*pssm || !internal_pssm ) {
360  s_PSICreatePssmFromFrequencyRatiosCleanUp(pssm, internal_pssm,
361  std_prob);
362  return PSIERR_OUTOFMEM;
363  }
364 
365  _PSICopyMatrix_double(internal_pssm->freq_ratios, freq_ratios,
366  internal_pssm->ncols, internal_pssm->nrows);
367 
368  status = _PSICreateAndScalePssmFromFrequencyRatios(internal_pssm,
369  query, query_length,
370  std_prob, sbp,
371  impala_scaling_factor);
372  if (status != PSI_SUCCESS) {
373  s_PSICreatePssmFromFrequencyRatiosCleanUp(pssm, internal_pssm,
374  std_prob);
375  return status;
376  }
377  /*** Save the pssm outgoing parameter ***/
378  s_PSISavePssm(internal_pssm, sbp, *pssm);
379 
380  s_PSICreatePssmFromFrequencyRatiosCleanUp(NULL, internal_pssm, std_prob);
381  return status;
382 }
383 
384 static int
386  const Uint1* query,
387  Uint4 query_length,
388  double* std_prob,
389  BlastScoreBlk* sbp,
390  double impala_scaling_factor)
391 {
392  int status = PSI_SUCCESS;
393 
394  ASSERT(internal_pssm);
395  ASSERT(query);
396  ASSERT(std_prob);
397  ASSERT(sbp);
398 
399  status = _PSIConvertFreqRatiosToPSSM(internal_pssm, query, sbp, std_prob);
400  if (status != PSI_SUCCESS) {
401  /* clean up is done in calling code */
402  return status;
403  }
404 
405  if (impala_scaling_factor == kPSSM_NoImpalaScaling) {
406  status = _PSIScaleMatrix(query, std_prob, internal_pssm, sbp);
407  } else {
408  status = _IMPALAScaleMatrix(query, std_prob, internal_pssm, sbp,
409  impala_scaling_factor);
410  }
411  if (status != PSI_SUCCESS) {
412  /* clean up is done in calling code */
413  return status;
414  }
415 
416  return status;
417 }
418 
419 /****************************************************************************/
420 
421 static void
423  _PSIPackedMsa* packed_msa,
424  _PSIMsa* msa,
425  _PSIAlignedBlock* aligned_block,
426  _PSISequenceWeights* seq_weights,
427  _PSIInternalPssmData* internal_pssm)
428 {
429  if (pssm) {
430  *pssm = PSIMatrixFree(*pssm);
431  }
432  _PSIPackedMsaFree(packed_msa);
433  _PSIMsaFree(msa);
434  _PSIAlignedBlockFree(aligned_block);
435  _PSISequenceWeightsFree(seq_weights);
436  _PSIInternalPssmDataFree(internal_pssm);
437 }
438 
439 static void
440 s_PSISavePssm(const _PSIInternalPssmData* internal_pssm,
441  const BlastScoreBlk* sbp,
442  PSIMatrix* pssm)
443 {
444  ASSERT(internal_pssm);
445  ASSERT(sbp);
446  ASSERT(pssm);
447 
448  _PSICopyMatrix_int(pssm->pssm, internal_pssm->pssm,
449  pssm->ncols, pssm->nrows);
450 
451  pssm->lambda = sbp->kbp_gap_psi[0]->Lambda;
452  pssm->kappa = sbp->kbp_gap_psi[0]->K;
453  pssm->h = sbp->kbp_gap_psi[0]->H;
454  pssm->ung_lambda = sbp->kbp_psi[0]->Lambda;
455  pssm->ung_kappa = sbp->kbp_psi[0]->K;
456  pssm->ung_h = sbp->kbp_psi[0]->H;
457 }
458 
459 /****************************************************************************/
460 
461 PSIMsa*
462 PSIMsaNew(const PSIMsaDimensions* dimensions)
463 {
464  PSIMsa* retval = NULL;
465 
466  if ( !dimensions ) {
467  return NULL;
468  }
469 
470  retval = (PSIMsa*) calloc(1, sizeof(PSIMsa));
471  if ( !retval ) {
472  return PSIMsaFree(retval);
473  }
474 
475  retval->dimensions = (PSIMsaDimensions*) malloc(sizeof(PSIMsaDimensions));
476  if ( !retval->dimensions ) {
477  return PSIMsaFree(retval);
478  }
479  memcpy((void*) retval->dimensions,
480  (void*) dimensions,
481  sizeof(PSIMsaDimensions));
482 
483  retval->data = (PSIMsaCell**) _PSIAllocateMatrix(dimensions->num_seqs + 1,
484  dimensions->query_length,
485  sizeof(PSIMsaCell));
486  if ( !retval->data ) {
487  return PSIMsaFree(retval);
488  }
489  {
490  Uint4 s = 0; /* index on sequences */
491  Uint4 p = 0; /* index on positions */
492 
493  for (s = 0; s < dimensions->num_seqs + 1; s++) {
494  for (p = 0; p < dimensions->query_length; p++) {
495  retval->data[s][p].letter = 0;
496  retval->data[s][p].is_aligned = FALSE;
497  }
498  }
499  }
500 
501 #ifdef DEBUG_PSSM_ENGINE
502  retval->seqinfo = (PSISeqInfo*) calloc(dimensions->num_seqs + 1,
503  sizeof(PSISeqInfo));
504  if ( !retval->seqinfo ) {
505  return PSIMsaFree(retval);
506  }
507 #endif /* DEBUG_PSSM_ENGINE */
508 
509  return retval;
510 }
511 
512 PSIMsa*
514 {
515  if ( !msa ) {
516  return NULL;
517  }
518 
519  if ( msa->data && msa->dimensions ) {
520  _PSIDeallocateMatrix((void**) msa->data,
521  msa->dimensions->num_seqs + 1);
522  msa->data = NULL;
523  }
524 
525  if ( msa->dimensions ) {
526  sfree(msa->dimensions);
527  }
528 
529 #ifdef DEBUG_PSSM_ENGINE
530  if ( msa->seqinfo ) {
531  sfree(msa->seqinfo);
532  }
533 #endif /* DEBUG_PSSM_ENGINE */
534 
535  sfree(msa);
536 
537  return NULL;
538 }
539 
540 PSIMatrix*
541 PSIMatrixNew(Uint4 query_length, Uint4 alphabet_size)
542 {
543  PSIMatrix* retval = NULL;
544 
545  retval = (PSIMatrix*) malloc(sizeof(PSIMatrix));
546  if ( !retval ) {
547  return NULL;
548  }
549  retval->ncols = query_length;
550  retval->nrows = alphabet_size;
551 
552  retval->pssm = (int**) _PSIAllocateMatrix(query_length, alphabet_size,
553  sizeof(int));
554  if ( !(retval->pssm) ) {
555  return PSIMatrixFree(retval);
556  }
557 
558  retval->lambda = 0.0;
559  retval->kappa = 0.0;
560  retval->h = 0.0;
561  retval->ung_lambda = 0.0;
562  retval->ung_kappa = 0.0;
563  retval->ung_h = 0.0;
564 
565  return retval;
566 }
567 
568 PSIMatrix*
570 {
571  if ( !matrix ) {
572  return NULL;
573  }
574 
575  if (matrix->pssm) {
576  _PSIDeallocateMatrix((void**) matrix->pssm, matrix->ncols);
577  }
578 
579  sfree(matrix);
580 
581  return NULL;
582 }
583 
586 {
587  return calloc(1, sizeof(PSIDiagnosticsRequest));
588 }
589 
592 {
594  if ( !retval ) {
595  return NULL;
596  }
597 
598  retval->frequency_ratios = TRUE;
599  if (save_ascii_pssm) {
600  retval->information_content = TRUE;
602  retval->gapless_column_weights = TRUE;
603  retval->sigma = TRUE;
604  retval->interval_sizes = TRUE;
605  retval->num_matching_seqs = TRUE;
606  }
607  return retval;
608 }
609 
612 {
613  sfree(diags_request);
614  return NULL;
615 }
616 
618 PSIDiagnosticsResponseNew(Uint4 query_length, Uint4 alphabet_size,
619  const PSIDiagnosticsRequest* wants)
620 {
621  PSIDiagnosticsResponse* retval = NULL;
622 
623  if ( !wants ) {
624  return NULL;
625  }
626 
627  /* MUST use calloc to allocate structure because code that uses this
628  * structure assumes that non-NULL members will require to be populated */
629  retval = (PSIDiagnosticsResponse*) calloc(1,
630  sizeof(PSIDiagnosticsResponse));
631  if ( !retval ) {
632  return NULL;
633  }
634 
635  retval->query_length = query_length;
636  retval->alphabet_size = alphabet_size;
637 
638  if (wants->information_content) {
639  retval->information_content = (double*)
640  calloc(query_length, sizeof(double));
641  if ( !(retval->information_content) ) {
642  return PSIDiagnosticsResponseFree(retval);
643  }
644  }
645 
646  if (wants->residue_frequencies) {
647  retval->residue_freqs = (Uint4**) _PSIAllocateMatrix(query_length,
648  alphabet_size,
649  sizeof(Uint4));
650  if ( !(retval->residue_freqs) ) {
651  return PSIDiagnosticsResponseFree(retval);
652  }
653  }
654 
655  if (wants->weighted_residue_frequencies) {
656  retval->weighted_residue_freqs = (double**)
657  _PSIAllocateMatrix(query_length,
658  alphabet_size,
659  sizeof(double));
660  if ( !(retval->weighted_residue_freqs) ) {
661  return PSIDiagnosticsResponseFree(retval);
662  }
663  }
664 
665  if (wants->frequency_ratios) {
666  retval->frequency_ratios = (double**)
667  _PSIAllocateMatrix(query_length,
668  alphabet_size,
669  sizeof(double));
670  if ( !retval->frequency_ratios ) {
671  return PSIDiagnosticsResponseFree(retval);
672  }
673  }
674 
675  if (wants->gapless_column_weights) {
676  retval->gapless_column_weights = (double*)
677  calloc(query_length, sizeof(double));
678  if ( !(retval->gapless_column_weights) ) {
679  return PSIDiagnosticsResponseFree(retval);
680  }
681  }
682 
683  if (wants->sigma) {
684  retval->sigma = (double*) calloc(query_length, sizeof(double));
685  if ( !retval->sigma ) {
686  return PSIDiagnosticsResponseFree(retval);
687  }
688  }
689 
690  if (wants->interval_sizes) {
691  retval->interval_sizes = (Uint4*) calloc(query_length, sizeof(Uint4));
692  if ( !retval->interval_sizes ) {
693  return PSIDiagnosticsResponseFree(retval);
694  }
695  }
696 
697  if (wants->num_matching_seqs) {
698  retval->num_matching_seqs =
699  (Uint4*) calloc(query_length, sizeof(Uint4));
700  if ( !retval->num_matching_seqs ) {
701  return PSIDiagnosticsResponseFree(retval);
702  }
703  }
704 
705  if (wants->independent_observations) {
706  retval->independent_observations =
707  (double*) calloc(query_length, sizeof(double));
708  if ( !retval->independent_observations ) {
709  return PSIDiagnosticsResponseFree(retval);
710  }
711  }
712 
713  return retval;
714 }
715 
718 {
719  if ( !diags )
720  return NULL;
721 
722  if (diags->information_content) {
723  sfree(diags->information_content);
724  }
725 
726  if (diags->residue_freqs) {
727  _PSIDeallocateMatrix((void**) diags->residue_freqs,
728  diags->query_length);
729  }
730 
731  if (diags->weighted_residue_freqs) {
733  diags->query_length);
734  }
735 
736  if (diags->frequency_ratios) {
737  _PSIDeallocateMatrix((void**) diags->frequency_ratios,
738  diags->query_length);
739  }
740 
741  if (diags->gapless_column_weights) {
743  }
744 
745  if (diags->sigma) {
746  sfree(diags->sigma);
747  }
748 
749  if (diags->interval_sizes) {
750  sfree(diags->interval_sizes);
751  }
752 
753  if (diags->num_matching_seqs) {
754  sfree(diags->num_matching_seqs);
755  }
756 
757  if (diags->independent_observations) {
759  }
760 
761  sfree(diags);
762 
763  return NULL;
764 }
765 
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
const double kPSSM_NoImpalaScaling
Value used to indicate that no IMPALA-style scaling should be performed when scaling a PSSM.
Definition: blast_options.c:43
PSIDiagnosticsResponse * PSIDiagnosticsResponseFree(PSIDiagnosticsResponse *diags)
Deallocates the PSIDiagnosticsResponse structure passed in.
Definition: blast_psi.c:717
int PSICreatePssmFromCDD(const PSICdMsa *cd_msa, const PSIBlastOptions *options, BlastScoreBlk *sbp, const PSIDiagnosticsRequest *request, PSIMatrix **pssm, PSIDiagnosticsResponse **diagnostics)
Main entry point to core PSSM engine for computing CDD-based PSSMs.
Definition: blast_psi.c:229
PSIMatrix * PSIMatrixFree(PSIMatrix *matrix)
Deallocates the PSIMatrix structure passed in.
Definition: blast_psi.c:569
PSIDiagnosticsResponse * PSIDiagnosticsResponseNew(Uint4 query_length, Uint4 alphabet_size, const PSIDiagnosticsRequest *wants)
Allocates a new PSI-BLAST diagnostics structure based on which fields of the PSIDiagnosticsRequest st...
Definition: blast_psi.c:618
PSIMsa * PSIMsaFree(PSIMsa *msa)
Deallocates the PSIMsa structure.
Definition: blast_psi.c:513
static void s_PSICreatePssmCleanUp(PSIMatrix **pssm, _PSIPackedMsa *packed_msa, _PSIMsa *msa, _PSIAlignedBlock *aligned_block, _PSISequenceWeights *seq_weights, _PSIInternalPssmData *internal_pssm)
Convenience function to deallocate data structures allocated in PSICreatePssmWithDiagnostics.
Definition: blast_psi.c:422
PSIDiagnosticsRequest * PSIDiagnosticsRequestNew(void)
Allocates a PSIDiagnosticsRequest structure, setting all fields to false.
Definition: blast_psi.c:585
static int _PSICreateAndScalePssmFromFrequencyRatios(_PSIInternalPssmData *internal_pssm, const Uint1 *query, Uint4 query_length, double *std_prob, BlastScoreBlk *sbp, double impala_scaling_factor)
Private function which performs the last 2 stages of the PSSM creation: conversion of PSSM frequecy r...
Definition: blast_psi.c:385
int PSICreatePssmFromFrequencyRatios(const Uint1 *query, Uint4 query_length, BlastScoreBlk *sbp, double **freq_ratios, double impala_scaling_factor, PSIMatrix **pssm)
Top-level function to create a PSSM given a matrix of frequency ratios and perform scaling on the res...
Definition: blast_psi.c:344
PSIDiagnosticsRequest * PSIDiagnosticsRequestNewEx(Boolean save_ascii_pssm)
Allocates a PSIDiagnosticsRequest structure, setting fields to their default values for their use in ...
Definition: blast_psi.c:591
PSIMatrix * PSIMatrixNew(Uint4 query_length, Uint4 alphabet_size)
Allocates a new PSIMatrix structure.
Definition: blast_psi.c:541
static void s_PSICreatePssmFromFrequencyRatiosCleanUp(PSIMatrix **pssm, _PSIInternalPssmData *internal_pssm, double *std_prob)
Convenience function to deallocate data structures allocated in PSICreatePssmFromFrequencyRatios.
Definition: blast_psi.c:332
static void s_PSISavePssm(const _PSIInternalPssmData *internal_pssm, const BlastScoreBlk *sbp, PSIMatrix *pssm)
Copies pssm data from internal_pssm and sbp into pssm.
Definition: blast_psi.c:440
int PSICreatePssm(const PSIMsa *msap, const PSIBlastOptions *options, BlastScoreBlk *sbp, PSIMatrix **pssm)
Main entry point to core PSSM engine to calculate the PSSM.
Definition: blast_psi.c:95
PSIMsa * PSIMsaNew(const PSIMsaDimensions *dimensions)
Allocates and initializes the multiple sequence alignment data structure for use as input to the PSSM...
Definition: blast_psi.c:462
int PSICreatePssmWithDiagnostics(const PSIMsa *msap, const PSIBlastOptions *options, BlastScoreBlk *sbp, const PSIDiagnosticsRequest *request, PSIMatrix **pssm, PSIDiagnosticsResponse **diagnostics)
Main entry point to core PSSM engine which allows to request diagnostics information.
Definition: blast_psi.c:105
PSIDiagnosticsRequest * PSIDiagnosticsRequestFree(PSIDiagnosticsRequest *diags_request)
Deallocates the PSIDiagnosticsRequest structure passed in.
Definition: blast_psi.c:611
int _PSIComputeAlignmentBlocks(const _PSIMsa *msa, _PSIAlignedBlock *aligned_blocks)
Main function to compute aligned blocks' properties for each position within multiple alignment (stag...
int _PSIConvertFreqRatiosToPSSM(_PSIInternalPssmData *internal_pssm, const Uint1 *query, const BlastScoreBlk *sbp, const double *std_probs)
Converts the PSSM's frequency ratios obtained in the previous stage to a PSSM of scores.
int _PSIComputeFreqRatios(const _PSIMsa *msa, const _PSISequenceWeights *seq_weights, const BlastScoreBlk *sbp, const _PSIAlignedBlock *aligned_blocks, Int4 pseudo_count, Boolean nsg_compatibility_mode, _PSIInternalPssmData *internal_pssm)
Main function to compute the PSSM's frequency ratios (stage 5).
void ** _PSIAllocateMatrix(unsigned int ncols, unsigned int nrows, unsigned int data_type_sz)
Generic 2 dimensional matrix allocator.
void _PSIStructureGroupCustomization(_PSIMsa *msa)
Enable NCBI structure group customization to discard the query sequence, as this really isn't the res...
int _PSIComputeFreqRatiosFromCDs(const PSICdMsa *cd_msa, const _PSISequenceWeights *seq_weights, const BlastScoreBlk *sbp, Int4 pseudo_count, _PSIInternalPssmData *internal_pssm)
Main function to compute CD-based PSSM's frequency ratios.
_PSISequenceWeights * _PSISequenceWeightsNew(const PSIMsaDimensions *dimensions, const BlastScoreBlk *sbp)
Allocates and initializes the _PSISequenceWeights structure.
_PSIInternalPssmData * _PSIInternalPssmDataNew(Uint4 query_length, Uint4 alphabet_size)
Allocates a new _PSIInternalPssmData structure.
_PSIAlignedBlock * _PSIAlignedBlockNew(Uint4 query_length)
Allocates and initializes the _PSIAlignedBlock structure.
int _PSIComputeSequenceWeights(const _PSIMsa *msa, const _PSIAlignedBlock *aligned_blocks, Boolean nsg_compatibility_mode, _PSISequenceWeights *seq_weights)
Main function to calculate the sequence weights.
int _PSISaveDiagnostics(const _PSIMsa *msa, const _PSIAlignedBlock *aligned_block, const _PSISequenceWeights *seq_weights, const _PSIInternalPssmData *internal_pssm, PSIDiagnosticsResponse *diagnostics)
Collects diagnostic information from the process of creating the PSSM.
int _PSIPurgeBiasedSegments(_PSIPackedMsa *msa)
Main function for keeping only those selected sequences for PSSM construction (stage 2).
int _PSIComputeFrequenciesFromCDs(const PSICdMsa *cd_msa, BlastScoreBlk *sbp, const PSIBlastOptions *options, _PSISequenceWeights *seq_weights)
Main function to calculate CD weights and combine weighted residue counts from matched CDs.
_PSIMsa * _PSIMsaNew(const _PSIPackedMsa *msa, Uint4 alphabet_size)
Allocates and initializes the internal version of the PSIMsa structure (makes a deep copy) for intern...
_PSISequenceWeights * _PSISequenceWeightsFree(_PSISequenceWeights *seq_weights)
Deallocates the _PSISequenceWeights structure.
void ** _PSIDeallocateMatrix(void **matrix, unsigned int ncols)
Generic 2 dimensional matrix deallocator.
void _PSICopyMatrix_int(int **dest, int **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be int matrices with dimensions ncols by nrows...
_PSIInternalPssmData * _PSIInternalPssmDataFree(_PSIInternalPssmData *pssm_data)
Deallocates the _PSIInternalPssmData structure.
void _PSICopyMatrix_double(double **dest, double **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be double matrices with dimensions ncols by nr...
int _PSIValidateMSA_StructureGroup(const _PSIMsa *msa)
Structure group validation function for multiple sequence alignment structure.
int _PSIScaleMatrix(const Uint1 *query, const double *std_probs, _PSIInternalPssmData *internal_pssm, BlastScoreBlk *sbp)
Scales the PSSM (stage 7)
_PSIMsa * _PSIMsaFree(_PSIMsa *msa)
Deallocates the _PSIMsa data structure.
_PSIAlignedBlock * _PSIAlignedBlockFree(_PSIAlignedBlock *aligned_blocks)
Deallocates the _PSIAlignedBlock structure.
int _PSISaveCDDiagnostics(const PSICdMsa *cd_msa, const _PSISequenceWeights *seq_weights, const _PSIInternalPssmData *internal_pssm, PSIDiagnosticsResponse *diagnostics)
Collects diagnostic information from the process of creating the CDD-based PSSM.
int _PSIValidateCdMSA(const PSICdMsa *cd_msa, Uint4 alphabet_size)
Validation of multiple alignment of conserved domains structure.
_PSIPackedMsa * _PSIPackedMsaNew(const PSIMsa *msa)
Allocates and initializes the compact version of the PSIMsa structure (makes a deep copy) for interna...
int _IMPALAScaleMatrix(const Uint1 *query, const double *std_probs, _PSIInternalPssmData *internal_pssm, BlastScoreBlk *sbp, double scaling_factor)
Provides a similar function to _PSIScaleMatrix but it performs the scaling as IMPALA did,...
_PSIPackedMsa * _PSIPackedMsaFree(_PSIPackedMsa *msa)
Deallocates the _PSIMsa data structure.
int _PSIValidateMSA(const _PSIMsa *msa, Boolean ignore_unaligned_positions)
Main validation function for multiple sequence alignment structure.
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
#define PSIERR_BADPARAM
Bad parameter used in function.
#define PSIERR_OUTOFMEM
Out of memory.
#define PSI_SUCCESS
Successful operation.
Various auxiliary BLAST utility functions.
double * BLAST_GetStandardAaProbabilities(void)
Get the standard amino acid probabilities.
Definition: blast_util.c:1323
#define NULL
Definition: ncbistd.hpp:225
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
Structure used for scoring calculations.
Definition: blast_stat.h:177
Blast_KarlinBlk ** kbp_psi
K-A parameters for position-based alignments.
Definition: blast_stat.h:213
Int2 alphabet_size
size of alphabet.
Definition: blast_stat.h:181
Blast_KarlinBlk ** kbp_gap_psi
K-A parameters for psi alignments.
Definition: blast_stat.h:215
double K
K value used in statistics.
Definition: blast_stat.h:68
double Lambda
Lambda value used in statistics.
Definition: blast_stat.h:67
double H
H value used in statistics.
Definition: blast_stat.h:70
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
double impala_scaling_factor
Scaling factor as used in IMPALA to do the matrix rescaling.
Boolean ignore_unaligned_positions
This turns off a validation for the multiple sequence alignment in the PSSM engine for unaligned posi...
Int4 pseudo_count
Pseudocount constant.
Data structure representing multiple alignemnt of CDs and query sequence along with data needed for P...
Definition: blast_psi.h:134
PSIMsaDimensions * dimensions
Query length and number of aligned cds.
Definition: blast_psi.h:136
unsigned char * query
Query sequence as Ncbistdaa.
Definition: blast_psi.h:135
Structure to allow requesting various diagnostics data to be collected by PSSM engine.
Definition: blast_psi.h:181
Boolean information_content
request information content
Definition: blast_psi.h:182
Boolean frequency_ratios
request frequency ratios
Definition: blast_psi.h:187
Boolean independent_observations
request number of independent observations
Definition: blast_psi.h:194
Boolean weighted_residue_frequencies
request observed weighted residue frequencies
Definition: blast_psi.h:185
Boolean gapless_column_weights
request gapless column weights
Definition: blast_psi.h:188
Boolean num_matching_seqs
request number of matching sequences
Definition: blast_psi.h:192
Boolean sigma
request sigma
Definition: blast_psi.h:190
Boolean residue_frequencies
request observed residue frequencies
Definition: blast_psi.h:183
Boolean interval_sizes
request interval sizes
Definition: blast_psi.h:191
This structure contains the diagnostics information requested using the PSIDiagnosticsRequest structu...
Definition: blast_psi.h:201
double * information_content
position information content (query_length elements)
Definition: blast_psi.h:202
Uint4 ** residue_freqs
observed residue frequencies per position of the PSSM (Dimensions are query_length by alphabet_size)
Definition: blast_psi.h:204
double ** weighted_residue_freqs
Weighted observed residue frequencies per position of the PSSM.
Definition: blast_psi.h:208
Uint4 * interval_sizes
interval sizes of aligned regions (query_length elements)
Definition: blast_psi.h:218
Uint4 alphabet_size
Specifies length of alphabet.
Definition: blast_psi.h:225
Uint4 query_length
Specifies the number of positions in the PSSM.
Definition: blast_psi.h:223
double * gapless_column_weights
Weights for columns without gaps (query_length elements)
Definition: blast_psi.h:215
double * independent_observations
Effective number of observations per column.
Definition: blast_psi.h:227
Uint4 * num_matching_seqs
number of matching sequences per query position (query_length elements)
Definition: blast_psi.h:220
double * sigma
sigma (query_length elements)
Definition: blast_psi.h:217
double ** frequency_ratios
PSSM's frequency ratios (Dimensions are query_length by alphabet_size)
Definition: blast_psi.h:212
This is the main return value from the PSSM engine.
Definition: blast_psi.h:150
double ung_lambda
Ungapped Lambda Karlin-Altschul parameter.
Definition: blast_psi.h:157
double kappa
Kappa Karlin-Altschul parameter.
Definition: blast_psi.h:155
int ** pssm
Position-specific score matrix.
Definition: blast_psi.h:153
double ung_kappa
Ungapped Kappa Karlin-Altschul parameter.
Definition: blast_psi.h:158
Uint4 ncols
Number of columns in PSSM (query_length)
Definition: blast_psi.h:151
double ung_h
Ungapped H Karlin-Altschul parameter.
Definition: blast_psi.h:159
double lambda
Lambda Karlin-Altschul parameter.
Definition: blast_psi.h:154
Uint4 nrows
Number of rows in PSSM (alphabet_size)
Definition: blast_psi.h:152
double h
H Karlin-Altschul parameter.
Definition: blast_psi.h:156
Structure to describe the characteristics of a position in the multiple sequence alignment data struc...
Definition: blast_psi.h:49
Boolean is_aligned
Is this letter part of the alignment?
Definition: blast_psi.h:52
Uint1 letter
Preferred letter at this position, in ncbistdaa encoding.
Definition: blast_psi.h:50
Structure representing the dimensions of the multiple sequence alignment data structure.
Definition: blast_psi.h:57
Uint4 num_seqs
Number of distinct sequences aligned with the query (does not include the query)
Definition: blast_psi.h:59
Uint4 query_length
Length of the query.
Definition: blast_psi.h:58
Multiple sequence alignment (msa) data structure containing the raw data needed by the PSSM engine to...
Definition: blast_psi.h:75
PSIMsaCell ** data
actual data, dimensions are (dimensions->num_seqs+1) by (dimensions->query_length)
Definition: blast_psi.h:77
PSIMsaDimensions * dimensions
dimensions of the msa
Definition: blast_psi.h:76
This structure keeps track of the regions aligned between the query sequence and those that were not ...
Internal representation of a PSSM in various stages of its creation and its dimensions.
Uint4 nrows
number of rows (alphabet_size)
int ** pssm
PSSM (scores)
Uint4 ncols
number of columns (query_length)
double ** freq_ratios
frequency ratios
Internal multiple alignment data structure used by the PSSM engine.
Uint1 * query
query sequence (length: query_length)
PSIMsaDimensions * dimensions
dimensions of field below
Compact version of PSIMsa structure.
Internal data structure to keep computed sequence weights.
double * std_prob
standard amino acid probabilities
static string query
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Wed Sep 04 15:03:40 2024 by modify_doxy.py rev. 669887