34 #ifndef ALGO_BLAST_CORE___BLAST_PSI__H
35 #define ALGO_BLAST_CORE___BLAST_PSI__H
63 #ifdef DEBUG_PSSM_ENGINE
65 typedef struct PSISeqInfo {
80 #ifdef DEBUG_PSSM_ENGINE
142 #ifdef DEBUG_PSSM_ENGINE
144 void PrintMsa(
const char* filename,
const PSIMsa* msa);
146 void PrintMsaFP(FILE*
fp,
const PSIMsa* msa);
362 double** freq_ratios,
363 double impala_scaling_factor,
Defines to provide correct exporting from BLAST DLL in Windows.
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
The structures and functions in blast_options.
PSIDiagnosticsResponse * PSIDiagnosticsResponseFree(PSIDiagnosticsResponse *diags)
Deallocates the PSIDiagnosticsResponse structure passed in.
int PSICreatePssmFromCDD(const PSICdMsa *cd_msa, const PSIBlastOptions *options, BlastScoreBlk *sbp, const PSIDiagnosticsRequest *request, PSIMatrix **pssm, PSIDiagnosticsResponse **diagnostics)
Main entry point to core PSSM engine for computing CDD-based PSSMs.
PSIDiagnosticsResponse * PSIDiagnosticsResponseNew(Uint4 query_length, Uint4 alphabet_size, const PSIDiagnosticsRequest *request)
Allocates a new PSI-BLAST diagnostics structure based on which fields of the PSIDiagnosticsRequest st...
PSIMatrix * PSIMatrixFree(PSIMatrix *matrix)
Deallocates the PSIMatrix structure passed in.
PSIMsa * PSIMsaFree(PSIMsa *msa)
Deallocates the PSIMsa structure.
PSIDiagnosticsRequest * PSIDiagnosticsRequestNew(void)
Allocates a PSIDiagnosticsRequest structure, setting all fields to false.
int PSICreatePssmFromFrequencyRatios(const Uint1 *query, Uint4 query_length, BlastScoreBlk *sbp, double **freq_ratios, double impala_scaling_factor, PSIMatrix **pssm)
Top-level function to create a PSSM given a matrix of frequency ratios and perform scaling on the res...
struct PSIMatrix PSIMatrix
This is the main return value from the PSSM engine.
struct PSICdMsaCell PSICdMsaCell
Alignment cell that represents one column of CD aligned to a position in the query.
struct PSIMsaCell PSIMsaCell
Structure to describe the characteristics of a position in the multiple sequence alignment data struc...
PSIDiagnosticsRequest * PSIDiagnosticsRequestNewEx(Boolean save_ascii_pssm)
Allocates a PSIDiagnosticsRequest structure, setting fields to their default values for their use in ...
struct PSICdMsa PSICdMsa
Data structure representing multiple alignemnt of CDs and query sequence along with data needed for P...
PSIMatrix * PSIMatrixNew(Uint4 query_length, Uint4 alphabet_size)
Allocates a new PSIMatrix structure.
struct PSIMsa PSIMsa
Multiple sequence alignment (msa) data structure containing the raw data needed by the PSSM engine to...
struct PSICdMsaCellData PSICdMsaCellData
Data needed for PSSM computation stored in MSA cell for single column in CD aligned to a position in ...
struct PSIDiagnosticsRequest PSIDiagnosticsRequest
Structure to allow requesting various diagnostics data to be collected by PSSM engine.
int PSICreatePssm(const PSIMsa *msap, const PSIBlastOptions *options, BlastScoreBlk *sbp, PSIMatrix **pssm)
Main entry point to core PSSM engine to calculate the PSSM.
PSIMsa * PSIMsaNew(const PSIMsaDimensions *dimensions)
Allocates and initializes the multiple sequence alignment data structure for use as input to the PSSM...
int PSICreatePssmWithDiagnostics(const PSIMsa *msap, const PSIBlastOptions *options, BlastScoreBlk *sbp, const PSIDiagnosticsRequest *request, PSIMatrix **pssm, PSIDiagnosticsResponse **diagnostics)
Main entry point to core PSSM engine which allows to request diagnostics information.
PSIDiagnosticsRequest * PSIDiagnosticsRequestFree(PSIDiagnosticsRequest *diags_request)
Deallocates the PSIDiagnosticsRequest structure passed in.
struct PSIMsaDimensions PSIMsaDimensions
Structure representing the dimensions of the multiple sequence alignment data structure.
struct PSIDiagnosticsResponse PSIDiagnosticsResponse
This structure contains the diagnostics information requested using the PSIDiagnosticsRequest structu...
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
uint8_t Uint1
1-byte (8-bit) unsigned integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
Uint1 Boolean
bool replacment for C
Structure used for scoring calculations.
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Data needed for PSSM computation stored in MSA cell for single column in CD aligned to a position in ...
double iobsr
Effective number of independent observations in a CD column.
double * wfreqs
Frequencies for each residue in CD column.
Alignment cell that represents one column of CD aligned to a position in the query.
Uint1 is_aligned
Does this cell represent column aligned to a CD.
PSICdMsaCellData * data
Data needed for PSSM computation.
Data structure representing multiple alignemnt of CDs and query sequence along with data needed for P...
PSIMsaDimensions * dimensions
Query length and number of aligned cds.
unsigned char * query
Query sequence as Ncbistdaa.
PSICdMsaCell ** msa
Multiple alignment of CDs.
Structure to allow requesting various diagnostics data to be collected by PSSM engine.
Boolean information_content
request information content
Boolean frequency_ratios
request frequency ratios
Boolean independent_observations
request number of independent observations
Boolean weighted_residue_frequencies
request observed weighted residue frequencies
Boolean gapless_column_weights
request gapless column weights
Boolean num_matching_seqs
request number of matching sequences
Boolean sigma
request sigma
Boolean residue_frequencies
request observed residue frequencies
Boolean interval_sizes
request interval sizes
This structure contains the diagnostics information requested using the PSIDiagnosticsRequest structu...
double * information_content
position information content (query_length elements)
Uint4 ** residue_freqs
observed residue frequencies per position of the PSSM (Dimensions are query_length by alphabet_size)
double ** weighted_residue_freqs
Weighted observed residue frequencies per position of the PSSM.
Uint4 * interval_sizes
interval sizes of aligned regions (query_length elements)
Uint4 alphabet_size
Specifies length of alphabet.
Uint4 query_length
Specifies the number of positions in the PSSM.
double * gapless_column_weights
Weights for columns without gaps (query_length elements)
double * independent_observations
Effective number of observations per column.
Uint4 * num_matching_seqs
number of matching sequences per query position (query_length elements)
double * sigma
sigma (query_length elements)
double ** frequency_ratios
PSSM's frequency ratios (Dimensions are query_length by alphabet_size)
This is the main return value from the PSSM engine.
double ung_lambda
Ungapped Lambda Karlin-Altschul parameter.
double kappa
Kappa Karlin-Altschul parameter.
int ** pssm
Position-specific score matrix.
double ung_kappa
Ungapped Kappa Karlin-Altschul parameter.
Uint4 ncols
Number of columns in PSSM (query_length)
double ung_h
Ungapped H Karlin-Altschul parameter.
double lambda
Lambda Karlin-Altschul parameter.
Uint4 nrows
Number of rows in PSSM (alphabet_size)
double h
H Karlin-Altschul parameter.
Structure to describe the characteristics of a position in the multiple sequence alignment data struc...
Boolean is_aligned
Is this letter part of the alignment?
Uint1 letter
Preferred letter at this position, in ncbistdaa encoding.
Structure representing the dimensions of the multiple sequence alignment data structure.
Uint4 num_seqs
Number of distinct sequences aligned with the query (does not include the query)
Uint4 query_length
Length of the query.
Multiple sequence alignment (msa) data structure containing the raw data needed by the PSSM engine to...
PSIMsaCell ** data
actual data, dimensions are (dimensions->num_seqs+1) by (dimensions->query_length)
PSIMsaDimensions * dimensions
dimensions of the msa