NCBI C++ ToolKit
|
Private interface for Position Iterated BLAST API, contains the PSSM generation engine. More...
#include <algo/blast/core/ncbi_std.h>
#include <algo/blast/core/blast_stat.h>
#include <algo/blast/core/blast_psi.h>
#include "matrix_freq_ratios.h"
Go to the source code of this file.
Go to the SVN repository for this file.
Classes | |
struct | _PSIPackedMsaCell |
Compact version of the PSIMsaCell structure. More... | |
struct | _PSIPackedMsa |
Compact version of PSIMsa structure. More... | |
struct | _PSIMsaCell |
Internal data structure to represent a position in the multiple sequence alignment data structure. More... | |
struct | _PSIMsa |
Internal multiple alignment data structure used by the PSSM engine. More... | |
struct | _PSIInternalPssmData |
Internal representation of a PSSM in various stages of its creation and its dimensions. More... | |
struct | _PSIAlignedBlock |
This structure keeps track of the regions aligned between the query sequence and those that were not purged. More... | |
struct | _PSISequenceWeights |
Internal data structure to keep computed sequence weights. More... | |
Macros | |
#define | PSI_SUCCESS (0) |
Successful operation. More... | |
#define | PSIERR_BADPARAM (-1) |
Bad parameter used in function. More... | |
#define | PSIERR_OUTOFMEM (-2) |
Out of memory. More... | |
#define | PSIERR_BADSEQWEIGHTS (-3) |
Sequence weights do not add to 1. More... | |
#define | PSIERR_NOFREQRATIOS (-4) |
No frequency ratios were found for the given scoring matrix. More... | |
#define | PSIERR_POSITIVEAVGSCORE (-5) |
Positive average score found when scaling matrix. More... | |
#define | PSIERR_NOALIGNEDSEQS (-6) |
After purge stage of PSSM creation, no sequences are left. More... | |
#define | PSIERR_GAPINQUERY (-7) |
GAP residue found in query sequence. More... | |
#define | PSIERR_UNALIGNEDCOLUMN (-8) |
Found an entire column with no participating sequences. More... | |
#define | PSIERR_COLUMNOFGAPS (-9) |
Found an entire column full of GAP residues. More... | |
#define | PSIERR_STARTINGGAP (-10) |
Found flanking gap at start of alignment. More... | |
#define | PSIERR_ENDINGGAP (-11) |
Found flanking gap at end of alignment. More... | |
#define | PSIERR_BADPROFILE (-12) |
Errors in conserved domain profile. More... | |
#define | PSIERR_UNKNOWN (-255) |
Unknown error. More... | |
Typedefs | |
typedef struct _PSIPackedMsaCell | _PSIPackedMsaCell |
Compact version of the PSIMsaCell structure. More... | |
typedef struct _PSIPackedMsa | _PSIPackedMsa |
Compact version of PSIMsa structure. More... | |
typedef struct _PSIMsaCell | _PSIMsaCell |
Internal data structure to represent a position in the multiple sequence alignment data structure. More... | |
typedef struct _PSIMsa | _PSIMsa |
Internal multiple alignment data structure used by the PSSM engine. More... | |
typedef struct _PSIInternalPssmData | _PSIInternalPssmData |
Internal representation of a PSSM in various stages of its creation and its dimensions. More... | |
typedef struct _PSIAlignedBlock | _PSIAlignedBlock |
This structure keeps track of the regions aligned between the query sequence and those that were not purged. More... | |
typedef struct _PSISequenceWeights | _PSISequenceWeights |
Internal data structure to keep computed sequence weights. More... | |
Functions | |
void ** | _PSIAllocateMatrix (unsigned int ncols, unsigned int nrows, unsigned int data_type_sz) |
Generic 2 dimensional matrix allocator. More... | |
void ** | _PSIDeallocateMatrix (void **matrix, unsigned int ncols) |
Generic 2 dimensional matrix deallocator. More... | |
void | _PSICopyMatrix_int (int **dest, int **src, unsigned int ncols, unsigned int nrows) |
Copies src matrix into dest matrix, both of which must be int matrices with dimensions ncols by nrows. More... | |
void | _PSICopyMatrix_double (double **dest, double **src, unsigned int ncols, unsigned int nrows) |
Copies src matrix into dest matrix, both of which must be double matrices with dimensions ncols by nrows. More... | |
_PSIPackedMsa * | _PSIPackedMsaNew (const PSIMsa *msa) |
Allocates and initializes the compact version of the PSIMsa structure (makes a deep copy) for internal use by the PSSM engine. More... | |
_PSIPackedMsa * | _PSIPackedMsaFree (_PSIPackedMsa *msa) |
Deallocates the _PSIMsa data structure. More... | |
unsigned int | _PSIPackedMsaGetNumberOfAlignedSeqs (const _PSIPackedMsa *msa) |
Retrieve the number of aligned sequences in the compact multiple sequence alignment. More... | |
_PSIMsa * | _PSIMsaNew (const _PSIPackedMsa *packed_msa, Uint4 alphabet_size) |
Allocates and initializes the internal version of the PSIMsa structure (makes a deep copy) for internal use by the PSSM engine. More... | |
_PSIMsa * | _PSIMsaFree (_PSIMsa *msa) |
Deallocates the _PSIMsa data structure. More... | |
_PSIInternalPssmData * | _PSIInternalPssmDataNew (Uint4 query_length, Uint4 alphabet_size) |
Allocates a new _PSIInternalPssmData structure. More... | |
_PSIInternalPssmData * | _PSIInternalPssmDataFree (_PSIInternalPssmData *pssm) |
Deallocates the _PSIInternalPssmData structure. More... | |
_PSIAlignedBlock * | _PSIAlignedBlockNew (Uint4 query_length) |
Allocates and initializes the _PSIAlignedBlock structure. More... | |
_PSIAlignedBlock * | _PSIAlignedBlockFree (_PSIAlignedBlock *aligned_blocks) |
Deallocates the _PSIAlignedBlock structure. More... | |
_PSISequenceWeights * | _PSISequenceWeightsNew (const PSIMsaDimensions *dims, const BlastScoreBlk *sbp) |
Allocates and initializes the _PSISequenceWeights structure. More... | |
_PSISequenceWeights * | _PSISequenceWeightsFree (_PSISequenceWeights *seq_weights) |
Deallocates the _PSISequenceWeights structure. More... | |
int | _PSIPurgeBiasedSegments (_PSIPackedMsa *msa) |
Main function for keeping only those selected sequences for PSSM construction (stage 2). More... | |
int | _PSIValidateMSA (const _PSIMsa *msa, Boolean ignored_unaligned_positions) |
Main validation function for multiple sequence alignment structure. More... | |
int | _PSIComputeAlignmentBlocks (const _PSIMsa *msa, _PSIAlignedBlock *aligned_block) |
Main function to compute aligned blocks' properties for each position within multiple alignment (stage 3) Corresponds to posit.c:posComputeExtents. More... | |
int | _PSIComputeSequenceWeights (const _PSIMsa *msa, const _PSIAlignedBlock *aligned_blocks, Boolean nsg_compatibility_mode, _PSISequenceWeights *seq_weights) |
Main function to calculate the sequence weights. More... | |
int | _PSIComputeFrequenciesFromCDs (const PSICdMsa *cd_msa, BlastScoreBlk *sbp, const PSIBlastOptions *options, _PSISequenceWeights *seq_weights) |
Main function to calculate CD weights and combine weighted residue counts from matched CDs. More... | |
int | _PSIComputeFreqRatios (const _PSIMsa *msa, const _PSISequenceWeights *seq_weights, const BlastScoreBlk *sbp, const _PSIAlignedBlock *aligned_blocks, Int4 pseudo_count, Boolean nsg_compatibility_mode, _PSIInternalPssmData *internal_pssm) |
Main function to compute the PSSM's frequency ratios (stage 5). More... | |
int | _PSIComputeFreqRatiosFromCDs (const PSICdMsa *cd_msa, const _PSISequenceWeights *seq_weights, const BlastScoreBlk *sbp, Int4 pseudo_count, _PSIInternalPssmData *internal_pssm) |
Main function to compute CD-based PSSM's frequency ratios. More... | |
int | _PSIConvertFreqRatiosToPSSM (_PSIInternalPssmData *internal_pssm, const Uint1 *query, const BlastScoreBlk *sbp, const double *std_probs) |
Converts the PSSM's frequency ratios obtained in the previous stage to a PSSM of scores. More... | |
int | _PSIScaleMatrix (const Uint1 *query, const double *std_probs, _PSIInternalPssmData *internal_pssm, BlastScoreBlk *sbp) |
Scales the PSSM (stage 7) More... | |
void | _PSIUpdateLambdaK (const int **pssm, const Uint1 *query, Uint4 query_length, const double *std_probs, BlastScoreBlk *sbp) |
Updates the Karlin-Altschul parameters based on the query sequence and PSSM's score frequencies. More... | |
int | _IMPALAScaleMatrix (const Uint1 *query, const double *std_probs, _PSIInternalPssmData *internal_pssm, BlastScoreBlk *sbp, double scaling_factor) |
Provides a similar function to _PSIScaleMatrix but it performs the scaling as IMPALA did, i.e. More... | |
int | _PSIPurgeAlignedRegion (_PSIPackedMsa *msa, unsigned int seq_index, unsigned int start, unsigned int stop) |
Marks the (start, stop] region corresponding to sequence seq_index in alignment so that it is not further considered for PSSM calculation. More... | |
void | _PSIUpdatePositionCounts (_PSIMsa *msa) |
Counts the number of sequences matching the query per query position (columns of the multiple alignment) as well as the number of residues present in each position of the query. More... | |
Uint4 | _PSISequenceLengthWithoutX (const Uint1 *seq, Uint4 length) |
Calculates the length of the sequence without including any 'X' residues. More... | |
Blast_ScoreFreq * | _PSIComputeScoreProbabilities (const int **pssm, const Uint1 *query, Uint4 query_length, const double *std_probs, const BlastScoreBlk *sbp) |
Compute the probabilities for each score in the PSSM. More... | |
int | _PSISaveDiagnostics (const _PSIMsa *msa, const _PSIAlignedBlock *aligned_block, const _PSISequenceWeights *seq_weights, const _PSIInternalPssmData *internal_pssm, PSIDiagnosticsResponse *diagnostics) |
Collects diagnostic information from the process of creating the PSSM. More... | |
int | _PSISaveCDDiagnostics (const PSICdMsa *msa, const _PSISequenceWeights *seq_weights, const _PSIInternalPssmData *internal_pssm, PSIDiagnosticsResponse *diagnostics) |
Collects diagnostic information from the process of creating the CDD-based PSSM. More... | |
double * | _PSICalculateInformationContentFromScoreMatrix (Int4 **score_mat, const double *std_prob, const Uint1 *query, Uint4 query_length, Uint4 alphabet_sz, double lambda) |
Calculates the information content from the scoring matrix. More... | |
double * | _PSICalculateInformationContentFromFreqRatios (double **freq_ratios, const double *std_prob, Uint4 query_length, Uint4 alphabet_sz) |
Calculates the information content from the residue frequencies calculated in stage 5 of the PSSM creation algorithm Corresponds to posit.c:posFreqsToInformation. More... | |
void | _PSIStructureGroupCustomization (_PSIMsa *msa) |
Enable NCBI structure group customization to discard the query sequence, as this really isn't the result of a PSI-BLAST iteration, but rather an artificial consensus sequence of the multiple sequence alignment constructed by them. More... | |
int | _PSIValidateMSA_StructureGroup (const _PSIMsa *msa) |
Structure group validation function for multiple sequence alignment structure. More... | |
int | _PSIValidateCdMSA (const PSICdMsa *cd_msa, Uint4 alphabet_size) |
Validation of multiple alignment of conserved domains structure. More... | |
Variables | |
const double | kPSINearIdentical |
Percent identity threshold for discarding near-identical matches. More... | |
const double | kPSIIdentical |
Percent identity threshold for discarding identical matches. More... | |
const unsigned int | kQueryIndex |
Index into multiple sequence alignment structure for the query sequence. More... | |
const double | kEpsilon |
Small constant to test against 0. More... | |
const int | kPSIScaleFactor |
Successor to POSIT_SCALE_FACTOR. More... | |
const double | kPositScalingPercent |
Constant used in scaling PSSM routines: Successor to POSIT_PERCENT. More... | |
const Uint4 | kPositScalingNumIterations |
Constant used in scaling PSSM routines: Successor to POSIT_NUM_ITERATIONS. More... | |
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
Calculating PSSMs from Seq-aligns is a multi-stage process. These stages include: 1) Processing the Seq-align Examine alignment and extract information about aligned characters, performed at the API level 2) Purge biased sequences: construct M multiple sequence alignment as described in page 3395[1] - performed at the core level; custom selection of sequences should be performed at the API level. 3) Compute extents of the alignment: M sub C as described in page 3395[1] 4) Compute sequence weights 5) Compute residue frequencies 6) Convert residue frequencies to PSSM 7) Scale the resulting PSSM
Definition in file blast_psi_priv.h.
#define PSI_SUCCESS (0) |
Successful operation.
Definition at line 366 of file blast_psi_priv.h.
#define PSIERR_BADPARAM (-1) |
Bad parameter used in function.
Definition at line 368 of file blast_psi_priv.h.
#define PSIERR_BADPROFILE (-12) |
Errors in conserved domain profile.
Definition at line 390 of file blast_psi_priv.h.
#define PSIERR_BADSEQWEIGHTS (-3) |
Sequence weights do not add to 1.
Definition at line 372 of file blast_psi_priv.h.
#define PSIERR_COLUMNOFGAPS (-9) |
Found an entire column full of GAP residues.
Definition at line 384 of file blast_psi_priv.h.
#define PSIERR_ENDINGGAP (-11) |
Found flanking gap at end of alignment.
Definition at line 388 of file blast_psi_priv.h.
#define PSIERR_GAPINQUERY (-7) |
GAP residue found in query sequence.
Definition at line 380 of file blast_psi_priv.h.
#define PSIERR_NOALIGNEDSEQS (-6) |
After purge stage of PSSM creation, no sequences are left.
Definition at line 378 of file blast_psi_priv.h.
#define PSIERR_NOFREQRATIOS (-4) |
No frequency ratios were found for the given scoring matrix.
Definition at line 374 of file blast_psi_priv.h.
#define PSIERR_OUTOFMEM (-2) |
Out of memory.
Definition at line 370 of file blast_psi_priv.h.
#define PSIERR_POSITIVEAVGSCORE (-5) |
Positive average score found when scaling matrix.
Definition at line 376 of file blast_psi_priv.h.
#define PSIERR_STARTINGGAP (-10) |
Found flanking gap at start of alignment.
Definition at line 386 of file blast_psi_priv.h.
#define PSIERR_UNALIGNEDCOLUMN (-8) |
Found an entire column with no participating sequences.
Definition at line 382 of file blast_psi_priv.h.
#define PSIERR_UNKNOWN (-255) |
Unknown error.
Definition at line 392 of file blast_psi_priv.h.
typedef struct _PSIAlignedBlock _PSIAlignedBlock |
This structure keeps track of the regions aligned between the query sequence and those that were not purged.
It is used when calculating the sequence weights (replaces posExtents in old code)
typedef struct _PSIInternalPssmData _PSIInternalPssmData |
Internal representation of a PSSM in various stages of its creation and its dimensions.
typedef struct _PSIMsaCell _PSIMsaCell |
Internal data structure to represent a position in the multiple sequence alignment data structure.
typedef struct _PSIPackedMsa _PSIPackedMsa |
Compact version of PSIMsa structure.
typedef struct _PSIPackedMsaCell _PSIPackedMsaCell |
Compact version of the PSIMsaCell structure.
typedef struct _PSISequenceWeights _PSISequenceWeights |
Internal data structure to keep computed sequence weights.
int _IMPALAScaleMatrix | ( | const Uint1 * | query, |
const double * | std_probs, | ||
_PSIInternalPssmData * | internal_pssm, | ||
BlastScoreBlk * | sbp, | ||
double | scaling_factor | ||
) |
Provides a similar function to _PSIScaleMatrix but it performs the scaling as IMPALA did, i.e.
: allowing the specification of a scaling factor and when calculating the score probabilities, the query length includes 'X' residues.
Definition at line 2599 of file blast_psi_priv.c.
References _PSICopyMatrix_int(), _PSIInternalPssmData::freq_ratios, Kappa_compactSearchItemsFree(), Kappa_compactSearchItemsNew(), Kappa_impalaScaling(), Kappa_posSearchItemsFree(), Kappa_posSearchItemsNew(), BlastScoreBlk::name, _PSIInternalPssmData::ncols, _PSIInternalPssmData::nrows, NULL, PSI_SUCCESS, _PSIInternalPssmData::pssm, query, _PSIInternalPssmData::scaled_pssm, and TRUE.
Referenced by _PSICreateAndScalePssmFromFrequencyRatios().
_PSIAlignedBlock* _PSIAlignedBlockFree | ( | _PSIAlignedBlock * | aligned_blocks | ) |
Deallocates the _PSIAlignedBlock structure.
aligned_blocks | data structure to deallocate [in] |
Definition at line 532 of file blast_psi_priv.c.
References NULL, _PSIAlignedBlock::pos_extnt, sfree, and _PSIAlignedBlock::size.
Referenced by _PSIAlignedBlockNew(), Deleter< _PSIAlignedBlock >::Delete(), and s_PSICreatePssmCleanUp().
_PSIAlignedBlock* _PSIAlignedBlockNew | ( | Uint4 | query_length | ) |
Allocates and initializes the _PSIAlignedBlock structure.
query_length | length of the query sequence of the multiple sequence alignment [in] |
Definition at line 501 of file blast_psi_priv.c.
References _PSIAlignedBlockFree(), calloc(), i, SSeqRange::left, malloc(), NULL, _PSIAlignedBlock::pos_extnt, SSeqRange::right, and _PSIAlignedBlock::size.
Referenced by BOOST_AUTO_TEST_CASE(), and PSICreatePssmWithDiagnostics().
Generic 2 dimensional matrix allocator.
Allocates a ncols by nrows matrix with cells of size data_type_sz. Must be freed using x_DeallocateMatrix
ncols | number of columns in matrix [in] |
nrows | number of rows in matrix [in] |
data_type_sz | size of the data type (in bytes) to allocate for each element in the matrix [in] |
Definition at line 66 of file blast_psi_priv.c.
References _PSIDeallocateMatrix(), calloc(), i, malloc(), and NULL.
Referenced by _PSIInternalPssmDataNew(), _PSIMatrixFrequencyRatiosNew(), _PSIMsaNew(), _PSIPackedMsaNew(), _PSISequenceWeightsNew(), Kappa_posSearchItemsNew(), CRedoAlignmentTestFixture::loadPssmFromFile(), PSIDiagnosticsResponseNew(), PSIMatrixNew(), PSIMsaNew(), RPSRescalePssm(), s_RPSComputeTraceback(), s_RPSFillFreqRatiosInPsiMatrix(), SBlastScoreMatrixNew(), and SPsiBlastScoreMatrixNew().
double* _PSICalculateInformationContentFromFreqRatios | ( | double ** | freq_ratios, |
const double * | std_prob, | ||
Uint4 | query_length, | ||
Uint4 | alphabet_sz | ||
) |
Calculates the information content from the residue frequencies calculated in stage 5 of the PSSM creation algorithm Corresponds to posit.c:posFreqsToInformation.
freq_ratios | matrix of frequency ratios (dimensions: query_length x alphabet_sz) (const) [in] |
std_prob | standard residue probabilities [in] |
query_length | length of the query [in] |
alphabet_sz | length of the alphabet used by the query [in] |
Definition at line 2341 of file blast_psi_priv.c.
References calloc(), kEpsilon, log, NCBIMATH_LN2, NULL, and r().
Referenced by _PSISaveCDDiagnostics(), and _PSISaveDiagnostics().
double* _PSICalculateInformationContentFromScoreMatrix | ( | Int4 ** | score_mat, |
const double * | std_prob, | ||
const Uint1 * | query, | ||
Uint4 | query_length, | ||
Uint4 | alphabet_sz, | ||
double | lambda | ||
) |
Calculates the information content from the scoring matrix.
score_mat | alphabet by alphabet_sz matrix of scores (const) [in] |
std_prob | standard residue probabilities [in] |
query | query sequence [in] |
query_length | length of the query [in] |
alphabet_sz | length of the alphabet used by the query [in] |
lambda | lambda parameter [in] FIXME documentation |
Definition at line 2299 of file blast_psi_priv.c.
References calloc(), kEpsilon, lambda(), log, NCBIMATH_LN2, NULL, query, r(), and tmp.
int _PSIComputeAlignmentBlocks | ( | const _PSIMsa * | msa, |
_PSIAlignedBlock * | aligned_block | ||
) |
Main function to compute aligned blocks' properties for each position within multiple alignment (stage 3) Corresponds to posit.c:posComputeExtents.
msa | multiple sequence alignment data structure [in] |
aligned_block | data structure describing the aligned blocks' properties for each position of the multiple sequence alignment [out] |
Definition at line 1297 of file blast_psi_priv.c.
References _PSIComputeAlignedRegionLengths(), _PSIComputePositionExtents(), _PSIGetLeftExtents(), _PSIGetRightExtents(), _PSIMsa::dimensions, kQueryIndex, PSIMsaDimensions::num_seqs, PSI_SUCCESS, and PSIERR_BADPARAM.
Referenced by BOOST_AUTO_TEST_CASE(), and PSICreatePssmWithDiagnostics().
int _PSIComputeFreqRatios | ( | const _PSIMsa * | msa, |
const _PSISequenceWeights * | seq_weights, | ||
const BlastScoreBlk * | sbp, | ||
const _PSIAlignedBlock * | aligned_blocks, | ||
Int4 | pseudo_count, | ||
Boolean | nsg_compatibility_mode, | ||
_PSIInternalPssmData * | internal_pssm | ||
) |
Main function to compute the PSSM's frequency ratios (stage 5).
Implements formula 2 in Nucleic Acids Research, 2001, Vol 29, No 14. Corresponds to posit.c:posComputePseudoFreqs
msa | multiple sequence alignment data structure [in] |
seq_weights | data structure containing the data needed to compute the sequence weights [in] |
sbp | score block structure initialized for the scoring system used with the query sequence [in] |
aligned_blocks | data structure describing the aligned blocks' properties for each position of the multiple sequence alignment [in] |
pseudo_count | pseudo count constant [in] |
nsg_compatibility_mode | set to true to emulate the structure group's use of PSSM engine in the cddumper application. By default should be FALSE |
internal_pssm | PSSM being computed [out] |
Definition at line 2071 of file blast_psi_priv.c.
References _PSIMatrixFrequencyRatiosFree(), _PSIMatrixFrequencyRatiosNew(), BlastScoreBlk::alphabet_size, _PSIMsa::alphabet_size, AMINOACID_TO_NCBISTDAA, ASSERT, Blast_GetMatrixBackgroundFreq(), BLAST_SCORE_MIN, _PSIMsa::cell, SBlastScoreMatrix::data, SFreqRatios::data, _PSIMsa::dimensions, _PSIInternalPssmData::freq_ratios, i, _PSISequenceWeights::independent_observations, kEpsilon, kQueryIndex, _PSIMsaCell::letter, _PSISequenceWeights::match_weights, BlastScoreBlk::matrix, MAX_IND_OBSERVATIONS, BlastScoreBlk::name, NULL, PSEUDO_MAX, _PSIInternalPssmData::pseudocounts, PSI_SUCCESS, PSIERR_BADPARAM, PSIERR_UNKNOWN, PSIMsaDimensions::query_length, r(), s_columnSpecificPseudocounts(), s_effectiveObservations(), s_initializeExpNumObservations(), and _PSISequenceWeights::std_prob.
Referenced by BOOST_AUTO_TEST_CASE(), and PSICreatePssmWithDiagnostics().
int _PSIComputeFreqRatiosFromCDs | ( | const PSICdMsa * | cd_msa, |
const _PSISequenceWeights * | seq_weights, | ||
const BlastScoreBlk * | sbp, | ||
Int4 | pseudo_count, | ||
_PSIInternalPssmData * | internal_pssm | ||
) |
Main function to compute CD-based PSSM's frequency ratios.
cd_msa | multiple alignment of CDs [in] |
seq_weights | contains weighted residue frequencies and effective number of observations [in] |
sbp | initialized score block data structure [in] |
pseudo_count | pseudo count constant [in] |
internal_pssm | PSSM [out] |
Definition at line 2185 of file blast_psi_priv.c.
References _PSIMatrixFrequencyRatiosFree(), _PSIMatrixFrequencyRatiosNew(), BlastScoreBlk::alphabet_size, AMINOACID_TO_NCBISTDAA, ASSERT, Blast_GetMatrixBackgroundFreq(), BLAST_SCORE_MIN, SBlastScoreMatrix::data, SFreqRatios::data, PSICdMsa::dimensions, _PSIInternalPssmData::freq_ratios, i, _PSISequenceWeights::independent_observations, kEpsilon, _PSISequenceWeights::match_weights, BlastScoreBlk::matrix, MAX, BlastScoreBlk::name, NULL, PSEUDO_MAX, PSI_SUCCESS, PSIERR_BADPARAM, PSIERR_OUTOFMEM, PSICdMsa::query, PSIMsaDimensions::query_length, r(), s_columnSpecificPseudocounts(), and _PSISequenceWeights::std_prob.
Referenced by BOOST_AUTO_TEST_CASE(), PSICreatePssmFromCDD(), and s_TestCreatePssmFromFreqs().
int _PSIComputeFrequenciesFromCDs | ( | const PSICdMsa * | cd_msa, |
BlastScoreBlk * | sbp, | ||
const PSIBlastOptions * | options, | ||
_PSISequenceWeights * | seq_weights | ||
) |
Main function to calculate CD weights and combine weighted residue counts from matched CDs.
cd_msa | multiple alignment of conserved domains data structure [in] |
sbp | BLAST score block [in] |
options | CDD-related options [in] |
seq_weights | data structure with CD frequencies [out] |
Definition at line 1651 of file blast_psi_priv.c.
References BlastScoreBlk::alphabet_size, AMINOACID_TO_NCBISTDAA, ASSERT, PSICdMsaCell::data, PSICdMsa::dimensions, fabs, _PSISequenceWeights::independent_observations, PSICdMsaCellData::iobsr, PSICdMsaCell::is_aligned, malloc(), _PSISequenceWeights::match_weights, MIN, PSICdMsa::msa, NULL, PSIMsaDimensions::num_seqs, PSI_SUCCESS, PSIERR_BADPARAM, PSIERR_OUTOFMEM, PSICdMsa::query, PSIMsaDimensions::query_length, s_PSIComputeFrequenciesFromCDsCleanup(), and PSICdMsaCellData::wfreqs.
Referenced by BOOST_AUTO_TEST_CASE(), and PSICreatePssmFromCDD().
Blast_ScoreFreq* _PSIComputeScoreProbabilities | ( | const int ** | pssm, |
const Uint1 * | query, | ||
Uint4 | query_length, | ||
const double * | std_probs, | ||
const BlastScoreBlk * | sbp | ||
) |
Compute the probabilities for each score in the PSSM.
This is only valid for protein sequences. FIXME: Should this be moved to blast_stat.[hc]? used in kappa.c in notposfillSfp()
pssm | PSSM for which to compute the score probabilities [in] |
query | query sequence for the PSSM above in ncbistdaa encoding [in] |
query_length | length of the query sequence above [in] |
std_probs | array containing the standard background residue probabilities [in] |
sbp | score block structure initialized for the scoring system used with the query sequence [in] |
Definition at line 2647 of file blast_psi_priv.c.
References _PSISequenceLengthWithoutX(), BlastScoreBlk::alphabet_code, AMINOACID_TO_NCBISTDAA, ASSERT, Blast_GetStdAlphabet(), BLAST_SCORE_MAX, BLAST_SCORE_MIN, Blast_ScoreFreqNew(), BLASTAA_SEQ_CODE, BLASTAA_SIZE, kScore, MAX, MIN, NULL, Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, query, r(), Blast_ScoreFreq::score_avg, and Blast_ScoreFreq::sprob.
Referenced by _PSIUpdateLambdaK().
int _PSIComputeSequenceWeights | ( | const _PSIMsa * | msa, |
const _PSIAlignedBlock * | aligned_blocks, | ||
Boolean | nsg_compatibility_mode, | ||
_PSISequenceWeights * | seq_weights | ||
) |
Main function to calculate the sequence weights.
Should be called with the return value of PSIComputeAlignmentBlocks (stage 4) Corresponds to posit.c:posComputeSequenceWeights
msa | multiple sequence alignment data structure [in] |
aligned_blocks | data structure describing the aligned blocks' properties for each position of the multiple sequence alignment [in] |
nsg_compatibility_mode | set to true to emulate the structure group's use of PSSM engine in the cddumper application. By default should be FALSE [in] |
seq_weights | data structure containing the data needed to compute the sequence weights [out] |
Definition at line 1552 of file blast_psi_priv.c.
References _PSICalculateMatchWeights(), _PSICalculateNormalizedSequenceWeights(), _PSICheckSequenceWeights(), _PSIGetAlignedSequencesForPosition(), _PSISpreadGapWeights(), ASSERT, _PSIMsa::dimensions, DynamicUint4Array_AreEqual(), DynamicUint4Array_Copy(), DynamicUint4Array_Dup(), DynamicUint4ArrayFree(), DynamicUint4ArrayNewEx(), EFFECTIVE_ALPHABET, _PSISequenceWeights::norm_seq_weights, _PSIMsa::num_matching_seqs, PSIMsaDimensions::num_seqs, SDynamicUint4Array::num_used, _PSISequenceWeights::posDistinctDistrib, _PSISequenceWeights::posNumParticipating, PSI_SUCCESS, PSIERR_BADPARAM, PSIERR_OUTOFMEM, PSIMsaDimensions::query_length, _PSISequenceWeights::row_sigma, _PSISequenceWeights::sigma, and _PSIAlignedBlock::size.
Referenced by BOOST_AUTO_TEST_CASE(), and PSICreatePssmWithDiagnostics().
int _PSIConvertFreqRatiosToPSSM | ( | _PSIInternalPssmData * | internal_pssm, |
const Uint1 * | query, | ||
const BlastScoreBlk * | sbp, | ||
const double * | std_probs | ||
) |
Converts the PSSM's frequency ratios obtained in the previous stage to a PSSM of scores.
(stage 6)
internal_pssm | PSSM being computed [in|out] |
query | query sequence in ncbistdaa encoding. The length of this sequence is read from internal_pssm->ncols [in] |
sbp | score block structure initialized for the scoring system used with the query sequence [in] |
std_probs | array containing the standard residue probabilities [in] |
Definition at line 2392 of file blast_psi_priv.c.
References _PSIMatrixFrequencyRatiosFree(), _PSIMatrixFrequencyRatiosNew(), BlastScoreBlk::alphabet_size, AMINOACID_TO_NCBISTDAA, SFreqRatios::bit_scale_factor, BLAST_Nint(), BLAST_SCORE_MIN, SBlastScoreMatrix::data, SFreqRatios::data, FALSE, _PSIInternalPssmData::freq_ratios, i, int, BlastScoreBlk::kbp_ideal, kEpsilon, kPSIScaleFactor, Blast_KarlinBlk::Lambda, log, BlastScoreBlk::matrix, BlastScoreBlk::name, NCBIMATH_LN2, _PSIInternalPssmData::ncols, NULL, PSI_SUCCESS, PSIERR_BADPARAM, _PSIInternalPssmData::pssm, query, _PSIInternalPssmData::scaled_pssm, tmp, and TRUE.
Referenced by _PSICreateAndScalePssmFromFrequencyRatios(), BOOST_AUTO_TEST_CASE(), s_ScalePosMatrix(), and s_TestCreatePssmFromFreqs().
void _PSICopyMatrix_double | ( | double ** | dest, |
double ** | src, | ||
unsigned int | ncols, | ||
unsigned int | nrows | ||
) |
Copies src matrix into dest matrix, both of which must be double matrices with dimensions ncols by nrows.
dest | Destination matrix [out] |
src | Source matrix [in] |
ncols | Number of columns to copy [in] |
nrows | Number of rows to copy [in] |
Definition at line 124 of file blast_psi_priv.c.
Referenced by PSICreatePssmFromFrequencyRatios(), and s_ScalePosMatrix().
Copies src matrix into dest matrix, both of which must be int matrices with dimensions ncols by nrows.
dest | Destination matrix [out] |
src | Source matrix [in] |
ncols | Number of columns to copy [in] |
nrows | Number of rows to copy [in] |
Definition at line 123 of file blast_psi_priv.c.
Referenced by _IMPALAScaleMatrix(), s_PSISavePssm(), s_ScalePosMatrix(), and CRedoAlignmentTestFixture::setupPositionBasedBlastScoreBlk().
void** _PSIDeallocateMatrix | ( | void ** | matrix, |
unsigned int | ncols | ||
) |
Generic 2 dimensional matrix deallocator.
Deallocates the memory allocated by x_AllocateMatrix
matrix | matrix to deallocate [in] |
ncols | number of columns in the matrix [in] |
Definition at line 88 of file blast_psi_priv.c.
References i, NULL, and sfree.
Referenced by _PSIAllocateMatrix(), _PSIInternalPssmDataFree(), _PSIMatrixFrequencyRatiosFree(), _PSIMsaFree(), _PSIPackedMsaFree(), _PSISequenceWeightsFree(), Kappa_posSearchItemsFree(), CRedoAlignmentTestFixture::loadPssmFromFile(), PSIDiagnosticsResponseFree(), PSIMatrixFree(), PSIMsaFree(), s_RPSComputeTraceback(), SBlastScoreMatrixFree(), CRedoAlignmentTestFixture::setupPositionBasedBlastScoreBlk(), and SPsiBlastScoreMatrixFree().
_PSIInternalPssmData* _PSIInternalPssmDataFree | ( | _PSIInternalPssmData * | pssm | ) |
Deallocates the _PSIInternalPssmData structure.
pssm | data structure to deallocate [in] |
Definition at line 468 of file blast_psi_priv.c.
References _PSIDeallocateMatrix(), _PSIInternalPssmData::freq_ratios, _PSIInternalPssmData::ncols, NULL, _PSIInternalPssmData::pseudocounts, _PSIInternalPssmData::pssm, _PSIInternalPssmData::scaled_pssm, and sfree.
Referenced by _PSIInternalPssmDataNew(), Deleter< _PSIInternalPssmData >::Delete(), s_PSICreatePssmCleanUp(), s_PSICreatePssmFromFrequencyRatiosCleanUp(), and s_ScalePosMatrix().
_PSIInternalPssmData* _PSIInternalPssmDataNew | ( | Uint4 | query_length, |
Uint4 | alphabet_size | ||
) |
Allocates a new _PSIInternalPssmData structure.
query_length | number of columns for the PSSM [in] |
alphabet_size | number of rows for the PSSM [in] |
Definition at line 425 of file blast_psi_priv.c.
References _PSIAllocateMatrix(), _PSIInternalPssmDataFree(), calloc(), _PSIInternalPssmData::freq_ratios, _PSIInternalPssmData::ncols, _PSIInternalPssmData::nrows, NULL, _PSIInternalPssmData::pseudocounts, _PSIInternalPssmData::pssm, and _PSIInternalPssmData::scaled_pssm.
Referenced by BOOST_AUTO_TEST_CASE(), PSICreatePssmFromCDD(), PSICreatePssmFromFrequencyRatios(), PSICreatePssmWithDiagnostics(), s_ScalePosMatrix(), and s_TestCreatePssmFromFreqs().
Deallocates the _PSIMsa data structure.
msa | multiple sequence alignment data structure to deallocate [in] |
Definition at line 389 of file blast_psi_priv.c.
References _PSIDeallocateMatrix(), _PSIMsa::cell, _PSIMsa::dimensions, NULL, _PSIMsa::num_matching_seqs, PSIMsaDimensions::num_seqs, _PSIMsa::query, PSIMsaDimensions::query_length, _PSIMsa::residue_counts, and sfree.
Referenced by _PSIMsaNew(), Deleter< _PSIMsa >::Delete(), and s_PSICreatePssmCleanUp().
_PSIMsa* _PSIMsaNew | ( | const _PSIPackedMsa * | packed_msa, |
Uint4 | alphabet_size | ||
) |
Allocates and initializes the internal version of the PSIMsa structure (makes a deep copy) for internal use by the PSSM engine.
packed_msa | compact multiple sequence alignment data structure [in] |
alphabet_size | number of elements in the alphabet that makes up the aligned characters in the multiple sequence alignment [in] |
Definition at line 308 of file blast_psi_priv.c.
References _PSIAllocateMatrix(), _PSIMsaFree(), _PSIPackedMsaGetNumberOfAlignedSeqs(), _PSIUpdatePositionCounts(), _PSIMsa::alphabet_size, ASSERT, calloc(), _PSIMsa::cell, _PSIPackedMsa::data, _PSIPackedMsa::dimensions, _PSIMsa::dimensions, _PSIMsaCell::extents, _PSIPackedMsaCell::is_aligned, _PSIMsaCell::is_aligned, IS_residue, kQueryIndex, SSeqRange::left, _PSIPackedMsaCell::letter, _PSIMsaCell::letter, malloc(), NULL, _PSIMsa::num_matching_seqs, PSIMsaDimensions::num_seqs, _PSIMsa::query, PSIMsaDimensions::query_length, _PSIMsa::residue_counts, SSeqRange::right, and _PSIPackedMsa::use_sequence.
Referenced by BOOST_AUTO_TEST_CASE(), and PSICreatePssmWithDiagnostics().
_PSIPackedMsa* _PSIPackedMsaFree | ( | _PSIPackedMsa * | msa | ) |
Deallocates the _PSIMsa data structure.
msa | multiple sequence alignment data structure to deallocate [in] |
Definition at line 183 of file blast_psi_priv.c.
References _PSIDeallocateMatrix(), _PSIPackedMsa::data, _PSIPackedMsa::dimensions, NULL, PSIMsaDimensions::num_seqs, sfree, and _PSIPackedMsa::use_sequence.
Referenced by _PSIPackedMsaNew(), Deleter< _PSIPackedMsa >::Delete(), PSICreatePssmWithDiagnostics(), and s_PSICreatePssmCleanUp().
unsigned int _PSIPackedMsaGetNumberOfAlignedSeqs | ( | const _PSIPackedMsa * | msa | ) |
Retrieve the number of aligned sequences in the compact multiple sequence alignment.
msa | multiple sequence alignment data structure to deallocate [in] |
Definition at line 209 of file blast_psi_priv.c.
References _PSIPackedMsa::dimensions, i, PSIMsaDimensions::num_seqs, and _PSIPackedMsa::use_sequence.
Referenced by _PSIMsaNew().
_PSIPackedMsa* _PSIPackedMsaNew | ( | const PSIMsa * | msa | ) |
Allocates and initializes the compact version of the PSIMsa structure (makes a deep copy) for internal use by the PSSM engine.
msa | multiple sequence alignment data structure provided by the user [in] |
Definition at line 129 of file blast_psi_priv.c.
References _PSIAllocateMatrix(), _PSIPackedMsaFree(), ASSERT, BLASTAA_SIZE, Boolean, calloc(), _PSIPackedMsa::data, _PSIPackedMsa::dimensions, _PSIPackedMsaCell::is_aligned, _PSIPackedMsaCell::letter, malloc(), NULL, TRUE, and _PSIPackedMsa::use_sequence.
Referenced by BOOST_AUTO_TEST_CASE(), and PSICreatePssmWithDiagnostics().
int _PSIPurgeAlignedRegion | ( | _PSIPackedMsa * | msa, |
unsigned int | seq_index, | ||
unsigned int | start, | ||
unsigned int | stop | ||
) |
Marks the (start, stop] region corresponding to sequence seq_index in alignment so that it is not further considered for PSSM calculation.
Note that the query sequence cannot be purged.
msa | multiple sequence alignment data [in|out] |
seq_index | index of the sequence of interested in alignment [in] |
start | start of the region to remove [in] |
stop | stop of the region to remove [in] |
Definition at line 2781 of file blast_psi_priv.c.
References _PSIPackedMsa::data, _PSIPackedMsa::dimensions, FALSE, i, _PSIPackedMsaCell::is_aligned, _PSIPackedMsaCell::letter, NULL, PSIMsaDimensions::num_seqs, PSI_SUCCESS, PSIERR_BADPARAM, PSIMsaDimensions::query_length, and s_PSIDiscardIfUnused().
Referenced by _handleNeitherAligned().
int _PSIPurgeBiasedSegments | ( | _PSIPackedMsa * | msa | ) |
Main function for keeping only those selected sequences for PSSM construction (stage 2).
After this function the multiple sequence alignment data will not be modified.
msa | multiple sequence alignment data structure [in] |
Definition at line 948 of file blast_psi_priv.c.
References PSI_SUCCESS, PSIERR_BADPARAM, s_PSIPurgeNearIdenticalAlignments(), and s_PSIPurgeSelfHits().
Referenced by BOOST_AUTO_TEST_CASE(), and PSICreatePssmWithDiagnostics().
int _PSISaveCDDiagnostics | ( | const PSICdMsa * | msa, |
const _PSISequenceWeights * | seq_weights, | ||
const _PSIInternalPssmData * | internal_pssm, | ||
PSIDiagnosticsResponse * | diagnostics | ||
) |
Collects diagnostic information from the process of creating the CDD-based PSSM.
cd_msa | multiple alignment of CDs data structure [in] |
seq_weights | sequence weights data structure [in] |
internal_pssm | structure containing PSSM's frequency ratios [in] |
diagnostics | output parameter [out] |
Definition at line 2912 of file blast_psi_priv.c.
References _PSICalculateInformationContentFromFreqRatios(), PSIDiagnosticsResponse::alphabet_size, ASSERT, PSICdMsa::dimensions, _PSIInternalPssmData::freq_ratios, PSIDiagnosticsResponse::frequency_ratios, PSIDiagnosticsResponse::independent_observations, _PSISequenceWeights::independent_observations, info, PSIDiagnosticsResponse::information_content, _PSISequenceWeights::match_weights, PSI_SUCCESS, PSIERR_BADPARAM, PSIERR_OUTOFMEM, PSIMsaDimensions::query_length, PSIDiagnosticsResponse::query_length, r(), sfree, _PSISequenceWeights::std_prob, and PSIDiagnosticsResponse::weighted_residue_freqs.
Referenced by PSICreatePssmFromCDD().
int _PSISaveDiagnostics | ( | const _PSIMsa * | msa, |
const _PSIAlignedBlock * | aligned_block, | ||
const _PSISequenceWeights * | seq_weights, | ||
const _PSIInternalPssmData * | internal_pssm, | ||
PSIDiagnosticsResponse * | diagnostics | ||
) |
Collects diagnostic information from the process of creating the PSSM.
msa | multiple sequence alignment data structure [in] |
aligned_block | aligned regions' extents [in] |
seq_weights | sequence weights data structure [in] |
internal_pssm | structure containing PSSM's frequency ratios [in] |
diagnostics | output parameter [out] |
Definition at line 2809 of file blast_psi_priv.c.
References _PSICalculateInformationContentFromFreqRatios(), PSIDiagnosticsResponse::alphabet_size, AMINOACID_TO_NCBISTDAA, ASSERT, _PSIMsa::cell, _PSIMsa::dimensions, _PSIInternalPssmData::freq_ratios, PSIDiagnosticsResponse::frequency_ratios, PSIDiagnosticsResponse::gapless_column_weights, _PSISequenceWeights::gapless_column_weights, PSIDiagnosticsResponse::independent_observations, _PSISequenceWeights::independent_observations, info, PSIDiagnosticsResponse::information_content, PSIDiagnosticsResponse::interval_sizes, _PSIMsaCell::letter, _PSISequenceWeights::match_weights, PSIDiagnosticsResponse::num_matching_seqs, _PSIMsa::num_matching_seqs, _PSIInternalPssmData::pseudocounts, PSI_SUCCESS, PSIERR_BADPARAM, PSIERR_OUTOFMEM, PSIMsaDimensions::query_length, PSIDiagnosticsResponse::query_length, r(), _PSIMsa::residue_counts, PSIDiagnosticsResponse::residue_freqs, sfree, PSIDiagnosticsResponse::sigma, _PSISequenceWeights::sigma, _PSIAlignedBlock::size, _PSISequenceWeights::std_prob, and PSIDiagnosticsResponse::weighted_residue_freqs.
Referenced by PSICreatePssmWithDiagnostics().
int _PSIScaleMatrix | ( | const Uint1 * | query, |
const double * | std_probs, | ||
_PSIInternalPssmData * | internal_pssm, | ||
BlastScoreBlk * | sbp | ||
) |
Scales the PSSM (stage 7)
query | query sequence in ncbistdaa encoding. The length of this sequence is read from internal_pssm->ncols [in] |
std_probs | array containing the standard background residue probabilities [in] |
internal_pssm | PSSM being computed [in|out] |
sbp | score block structure initialized for the scoring system used with the query sequence [in|out] |
Definition at line 2481 of file blast_psi_priv.c.
References _PSIUpdateLambdaK(), ASSERT, BLAST_Nint(), BLAST_SCORE_MIN, FALSE, i, int, BlastScoreBlk::kbp_ideal, BlastScoreBlk::kbp_psi, kPositScalingNumIterations, kPositScalingPercent, kPSIScaleFactor, Blast_KarlinBlk::Lambda, _PSIInternalPssmData::ncols, _PSIInternalPssmData::nrows, NULL, PSI_SUCCESS, PSIERR_BADPARAM, PSIERR_POSITIVEAVGSCORE, _PSIInternalPssmData::pssm, query, _PSIInternalPssmData::scaled_pssm, and TRUE.
Referenced by _PSICreateAndScalePssmFromFrequencyRatios(), and BOOST_AUTO_TEST_CASE().
Calculates the length of the sequence without including any 'X' residues.
used in kappa.c
seq | sequence to examine [in] |
length | length of the sequence above [in] |
Definition at line 2629 of file blast_psi_priv.c.
References AMINOACID_TO_NCBISTDAA, ASSERT, and i.
Referenced by _PSIComputeScoreProbabilities().
_PSISequenceWeights* _PSISequenceWeightsFree | ( | _PSISequenceWeights * | seq_weights | ) |
Deallocates the _PSISequenceWeights structure.
seq_weights | data structure to deallocate [in] |
Definition at line 627 of file blast_psi_priv.c.
References _PSIDeallocateMatrix(), _PSISequenceWeights::gapless_column_weights, _PSISequenceWeights::independent_observations, _PSISequenceWeights::match_weights, _PSISequenceWeights::match_weights_size, _PSISequenceWeights::norm_seq_weights, NULL, _PSISequenceWeights::posDistinctDistrib, _PSISequenceWeights::posDistinctDistrib_size, _PSISequenceWeights::posNumParticipating, _PSISequenceWeights::row_sigma, sfree, _PSISequenceWeights::sigma, and _PSISequenceWeights::std_prob.
Referenced by _PSISequenceWeightsNew(), Deleter< _PSISequenceWeights >::Delete(), and s_PSICreatePssmCleanUp().
_PSISequenceWeights* _PSISequenceWeightsNew | ( | const PSIMsaDimensions * | dims, |
const BlastScoreBlk * | sbp | ||
) |
Allocates and initializes the _PSISequenceWeights structure.
dims | structure containing the multiple sequence alignment dimensions [in] |
sbp | score block structure initialized for the scoring system used with the query sequence [in] |
Definition at line 552 of file blast_psi_priv.c.
References _PSIAllocateMatrix(), _PSISequenceWeightsFree(), BlastScoreBlk::alphabet_size, ASSERT, BLAST_GetStandardAaProbabilities(), calloc(), EFFECTIVE_ALPHABET, _PSISequenceWeights::gapless_column_weights, _PSISequenceWeights::independent_observations, _PSISequenceWeights::match_weights, _PSISequenceWeights::match_weights_size, _PSISequenceWeights::norm_seq_weights, NULL, PSIMsaDimensions::num_seqs, _PSISequenceWeights::posDistinctDistrib, _PSISequenceWeights::posDistinctDistrib_size, _PSISequenceWeights::posNumParticipating, PSIMsaDimensions::query_length, _PSISequenceWeights::row_sigma, _PSISequenceWeights::sigma, and _PSISequenceWeights::std_prob.
Referenced by BOOST_AUTO_TEST_CASE(), PSICreatePssmFromCDD(), and PSICreatePssmWithDiagnostics().
void _PSIStructureGroupCustomization | ( | _PSIMsa * | msa | ) |
Enable NCBI structure group customization to discard the query sequence, as this really isn't the result of a PSI-BLAST iteration, but rather an artificial consensus sequence of the multiple sequence alignment constructed by them.
This should be called after _PSIPurgeBiasedSegments.
Definition at line 800 of file blast_psi_priv.c.
References _PSIUpdatePositionCounts(), _PSIMsa::cell, _PSIMsa::dimensions, FALSE, i, _PSIMsaCell::is_aligned, kQueryIndex, _PSIMsaCell::letter, and PSIMsaDimensions::query_length.
Referenced by PSICreatePssmWithDiagnostics().
void _PSIUpdateLambdaK | ( | const int ** | pssm, |
const Uint1 * | query, | ||
Uint4 | query_length, | ||
const double * | std_probs, | ||
BlastScoreBlk * | sbp | ||
) |
Updates the Karlin-Altschul parameters based on the query sequence and PSSM's score frequencies.
Port of blastool.c's updateLambdaK
pssm | PSSM [in] |
query | query sequence in ncbistdaa encoding [in] |
query_length | length of the query sequence above [in] |
std_probs | array containing the standard background residue probabilities [in] |
sbp | Score block structure where the calculated lambda and K will be returned [in|out] |
Definition at line 2732 of file blast_psi_priv.c.
References _PSIComputeScoreProbabilities(), ASSERT, Blast_KarlinBlkUngappedCalc(), Blast_ScoreFreqFree(), Blast_KarlinBlk::K, BlastScoreBlk::kbp_gap_psi, BlastScoreBlk::kbp_gap_std, BlastScoreBlk::kbp_ideal, BlastScoreBlk::kbp_psi, log, Blast_KarlinBlk::logK, and query.
Referenced by _PSIScaleMatrix(), and impalaScaleMatrix().
void _PSIUpdatePositionCounts | ( | _PSIMsa * | msa | ) |
Counts the number of sequences matching the query per query position (columns of the multiple alignment) as well as the number of residues present in each position of the query.
Should be called after multiple alignment data has been purged from biased sequences.
msa | multiple sequence alignment structure [in|out] |
Definition at line 991 of file blast_psi_priv.c.
References _PSIMsa::alphabet_size, ASSERT, _PSIMsa::cell, _PSIMsa::dimensions, _PSIMsaCell::is_aligned, _PSIMsaCell::letter, _PSIMsa::num_matching_seqs, PSIMsaDimensions::num_seqs, PSIMsaDimensions::query_length, and _PSIMsa::residue_counts.
Referenced by _PSIMsaNew(), and _PSIStructureGroupCustomization().
Validation of multiple alignment of conserved domains structure.
cd_msa | multiple alignment of CDs [in] |
alphabet_size | alphabet size [in] |
Definition at line 862 of file blast_psi_priv.c.
References AMINOACID_TO_NCBISTDAA, PSICdMsaCell::data, PSICdMsa::dimensions, fabs, PSICdMsaCellData::iobsr, PSICdMsaCell::is_aligned, kEpsilon, PSICdMsa::msa, PSIMsaDimensions::num_seqs, PSI_SUCCESS, PSIERR_BADPARAM, PSIERR_BADPROFILE, PSIERR_GAPINQUERY, PSICdMsa::query, PSIMsaDimensions::query_length, and PSICdMsaCellData::wfreqs.
Referenced by PSICreatePssmFromCDD().
Main validation function for multiple sequence alignment structure.
Should be called after _PSIPurgeBiasedSegments.
msa | multiple sequence alignment data structure [in] |
ignored_unaligned_positions | determines whether the unaligned positions test should be performend or not [in] |
Definition at line 828 of file blast_psi_priv.c.
References PSI_SUCCESS, PSIERR_BADPARAM, s_PSIValidateAlignedColumns(), s_PSIValidateNoFlankingGaps(), s_PSIValidateNoGapsInQuery(), and s_PSIValidateParticipatingSequences().
Referenced by PSICreatePssmWithDiagnostics().
Structure group validation function for multiple sequence alignment structure.
Should be called after _PSIStructureGroupCustomization.
msa | multiple sequence alignment data structure [in] |
Definition at line 811 of file blast_psi_priv.c.
References PSI_SUCCESS, PSIERR_BADPARAM, and s_PSIValidateParticipatingSequences().
Referenced by PSICreatePssmWithDiagnostics().
|
extern |
Small constant to test against 0.
Definition at line 58 of file blast_psi_priv.c.
Referenced by _PSICalculateInformationContentFromFreqRatios(), _PSICalculateInformationContentFromScoreMatrix(), _PSIComputeFreqRatios(), _PSIComputeFreqRatiosFromCDs(), _PSIConvertFreqRatiosToPSSM(), _PSISpreadGapWeights(), _PSIValidateCdMSA(), CalculateLinkHSPCutoffs(), PsiBlastSetupScoreBlock(), and CSeqGraphicRenderer::ZoomOnRange().
Constant used in scaling PSSM routines: Successor to POSIT_NUM_ITERATIONS.
Definition at line 61 of file blast_psi_priv.c.
Referenced by _PSIScaleMatrix(), and impalaScaleMatrix().
|
extern |
Constant used in scaling PSSM routines: Successor to POSIT_PERCENT.
Definition at line 60 of file blast_psi_priv.c.
Referenced by _PSIScaleMatrix(), and impalaScaleMatrix().
|
extern |
Percent identity threshold for discarding identical matches.
Definition at line 56 of file blast_psi_priv.c.
Referenced by s_PSIPurgeSelfHits().
|
extern |
Percent identity threshold for discarding near-identical matches.
Definition at line 55 of file blast_psi_priv.c.
Referenced by s_PSIPurgeNearIdenticalAlignments(), and CPssmInputTestData::SetupNearIdenticalHits().
Successor to POSIT_SCALE_FACTOR.
Definition at line 59 of file blast_psi_priv.c.
Referenced by _PSIConvertFreqRatiosToPSSM(), _PSIScaleMatrix(), and impalaScaleMatrix().
Index into multiple sequence alignment structure for the query sequence.
Definition at line 57 of file blast_psi_priv.c.
Referenced by _PSICheckSequenceWeights(), _PSIComputeAlignmentBlocks(), _PSIComputeFreqRatios(), _PSIMsaNew(), _PSISpreadGapWeights(), _PSIStructureGroupCustomization(), Blast_Message2TSearchMessages(), BOOST_AUTO_TEST_CASE(), s_ExtractSeqId(), s_PSIPurgeSelfHits(), s_PSIPurgeSimilarAlignments(), s_PSIValidateAlignedColumns(), s_PSIValidateNoGapsInQuery(), CPssmInputTestData::SetupDuplicateHit(), CPsiBlastInputClustalW::x_CopyQueryToMsa(), CPsiBlastInputData::x_CopyQueryToMsa(), CPsiBlastInputClustalW::x_ExtractAlignmentData(), and CPsiBlastInputData::x_ExtractAlignmentData().