NCBI C++ ToolKit
|
Utilities for doing Smith-Waterman alignments and adjusting the scoring system for each match in blastpgp. More...
#include <float.h>
#include <algo/blast/core/ncbi_math.h>
#include <algo/blast/core/blast_hits.h>
#include <algo/blast/core/blast_kappa.h>
#include <algo/blast/core/blast_util.h>
#include <algo/blast/core/blast_gapalign.h>
#include <algo/blast/core/blast_filter.h>
#include <algo/blast/core/blast_traceback.h>
#include <algo/blast/core/link_hsps.h>
#include <algo/blast/core/gencode_singleton.h>
#include "blast_psi_priv.h"
#include "blast_gapalign_priv.h"
#include "blast_hits_priv.h"
#include "blast_posit.h"
#include "blast_hspstream_mt_utils.h"
#include "blast_traceback_mt_priv.h"
#include <algo/blast/composition_adjustment/nlm_linear_algebra.h>
#include <algo/blast/composition_adjustment/compo_heap.h>
#include <algo/blast/composition_adjustment/redo_alignment.h>
#include <algo/blast/composition_adjustment/matrix_frequency_data.h>
#include <algo/blast/composition_adjustment/unified_pvalues.h>
Go to the source code of this file.
Go to the SVN repository for this file.
Classes | |
struct | BlastKappa_SequenceInfo |
BLAST-specific information that is associated with a BlastCompo_MatchingSequence. More... | |
struct | BlastKappa_GappingParamsContext |
Data and data-structures needed to perform a gapped alignment. More... | |
struct | BlastKappa_SavedParameters |
A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCore. More... | |
Macros | |
#define | KAPPA_BLASTP_NO_SEG_SEQUENCE 0 |
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to compute the traceback will not SEG the subject sequence. More... | |
#define | KAPPA_TBLASTN_NO_SEG_SEQUENCE 0 |
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to compute the traceback will not SEG the subject sequence. More... | |
#define | SCALING_FACTOR 32 |
SCALING_FACTOR is a multiplicative factor used to get more bits of precision in the integer matrix scores. More... | |
#define | BLASTP_MASK_RESIDUE 21 |
NCBIstdaa encoding for 'X' character. More... | |
#define | BLASTP_MASK_INSTRUCTIONS "S 10 1.8 2.1" |
Default instructions and mask residue for SEG filtering. More... | |
#define | NEAR_IDENTICAL_BITS_PER_POSITION (1.74) |
Typedefs | |
typedef struct BlastKappa_SequenceInfo | BlastKappa_SequenceInfo |
BLAST-specific information that is associated with a BlastCompo_MatchingSequence. More... | |
typedef struct BlastKappa_GappingParamsContext | BlastKappa_GappingParamsContext |
Data and data-structures needed to perform a gapped alignment. More... | |
typedef struct BlastKappa_SavedParameters | BlastKappa_SavedParameters |
A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCore. More... | |
Functions | |
static void | s_HSPListNormalizeScores (BlastHSPList *hsp_list, double lambda, double logK, double scoreDivisor) |
Given a list of HSPs with (possibly) high-precision scores, rescale the scores to have standard precision and set the scale-independent bit scores. More... | |
static void | s_AdjustEvaluesForComposition (BlastHSPList *hsp_list, double comp_p_value, const BlastSeqSrc *seqSrc, Int4 subject_length, const BlastContextInfo *query_context, double LambdaRatio, int subject_id) |
Adjusts the E-values in a BLAST_HitList to be composites of a composition-based P-value and a score/alignment-based P-value. More... | |
static void | s_HitlistReapContained (BlastHSP *hsp_array[], Int4 *hspcnt) |
Remove from a hitlist all HSPs that are completely contained in an HSP that occurs earlier in the list and that: More... | |
static void | s_FreeEditScript (void *edit_script) |
A callback used to free an EditScript that has been stored in a BlastCompo_Alignment. More... | |
static int | s_HSPListFromDistinctAlignments (BlastHSPList *hsp_list, BlastCompo_Alignment **alignments, int oid, const BlastQueryInfo *queryInfo, int frame) |
Converts a list of objects of type BlastCompo_Alignment to an new object of type BlastHSPList and returns the result. More... | |
Int4 | s_GetSubjectLength (Int4 total_subj_length, EBlastProgramType program_number) |
static int | s_HitlistEvaluateAndPurge (int *pbestScore, double *pbestEvalue, BlastHSPList *hsp_list, const BlastSeqSrc *seqSrc, int subject_length, EBlastProgramType program_number, const BlastQueryInfo *queryInfo, int context_index, BlastScoreBlk *sbp, const BlastHitSavingParameters *hitParams, double pvalueForThisPair, double LambdaRatio, int subject_id) |
Adding evalues to a list of HSPs and remove those that do not have sufficiently good (low) evalue. More... | |
static void | s_ComputeNumIdentities (const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, BLAST_SequenceBlk *subject_blk, const BlastSeqSrc *seq_src, BlastHSPList *hsp_list, const BlastScoringOptions *scoring_options, const Uint1 *gen_code_string, const BlastScoreBlk *sbp, BlastSeqSrcSetRangesArg *ranges) |
Compute the number of identities for the HSPs in the hsp_list. More... | |
static double | s_CalcLambda (double probs[], int min_score, int max_score, double lambda0) |
A callback routine: compute lambda for the given score probabilities. More... | |
static int | s_GetPosBasedStartFreqRatios (double **returnRatios, Int4 numPositions, Uint1 *query, const char *matrixName, double **startNumerator) |
Fill a two-dimensional array with the frequency ratios that underlie a position specific score matrix (PSSM). More... | |
static int | s_GetStartFreqRatios (double **returnRatios, const char *matrixName) |
Fill a two-dimensional array with the frequency ratios that underlie the named score matrix. More... | |
static int | s_ScalePosMatrix (int **fillPosMatrix, const char *matrixName, double **posFreqs, Uint1 *query, int queryLength, BlastScoreBlk *sbp, double scale_factor) |
Produce a scaled-up version of the position-specific matrix with a given set of position-specific residue frequencies. More... | |
static int | s_ResultHspToDistinctAlign (BlastCompo_Alignment **self, int *numAligns, BlastHSP *hsp_array[], Int4 hspcnt, int init_context, const BlastQueryInfo *queryInfo, double localScalingFactor) |
Convert an array of HSPs to a list of BlastCompo_Alignment objects. More... | |
static void | s_SWFindFinalEndsUsingXdrop (BlastCompo_SequenceData *query, Int4 queryStart, Int4 queryEnd, BlastCompo_SequenceData *subject, Int4 matchStart, Int4 matchEnd, BlastGapAlignStruct *gap_align, const BlastScoringParameters *scoringParams, Int4 score, Int4 *queryAlignmentExtent, Int4 *matchAlignmentExtent, Int4 *newScore) |
Redo a S-W alignment using an x-drop alignment. More... | |
static void | s_MatchingSequenceRelease (BlastCompo_MatchingSequence *self) |
Release the resources associated with a matching sequence. More... | |
static int | s_ExtendRight (Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len) |
Do a simple gapped extension to the right from the beginning of query and subject ranges examining only matches and mismatches. More... | |
static int | s_ExtendLeft (Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len) |
Extend left from the end of the sequence and subject ranges and count identities. More... | |
static Uint8 | s_GetHash (const Uint1 *data, int word_size) |
Get hash for a word of word_size residues assuming 28-letter alphabet. More... | |
static int | s_FindNumIdentical (Uint1 *query_seq, const Uint8 *query_hashes, int query_len, Uint1 *subject_seq, int subject_len, int max_shift) |
Find a local number of identical residues in two aligned sequences by finding word matches and doing a simple gapped extensions from the word hits. More... | |
static Boolean | s_TestNearIdentical (const BlastCompo_SequenceData *seqData, const int seqOffset, const BlastCompo_SequenceData *queryData, const int queryOffset, const Uint8 *query_words, const BlastCompo_Alignment *align) |
Test whether the aligned parts of two sequences that have a high-scoring gapless alignment are nearly identical. More... | |
static int | s_MatchingSequenceInitialize (BlastCompo_MatchingSequence *self, EBlastProgramType program_number, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, Int4 subject_index, BlastSeqSrcSetRangesArg *ranges) |
Initialize a new matching sequence, obtaining information about the sequence from the search. More... | |
static int | s_DoSegSequenceData (BlastCompo_SequenceData *seqData, EBlastProgramType program_name, Boolean *is_seq_biased) |
Filter low complexity regions from the sequence data; uses the SEG algorithm. More... | |
static int | s_SequenceGetTranslatedRange (const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased) |
Obtain a string of translated data. More... | |
static int | s_SequenceGetProteinRange (const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased) |
Get a string of protein data from a protein sequence. More... | |
static int | s_SequenceGetRange (const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *s_range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceData *query, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased) |
Obtain the sequence data that lies within the given range. More... | |
static BlastCompo_Alignment * | s_NewAlignmentFromGapAlign (BlastGapAlignStruct *gap_align, GapEditScript **edit_script, BlastCompo_SequenceRange *query_range, BlastCompo_SequenceRange *subject_range, EMatrixAdjustRule matrix_adjust_rule) |
Reads a BlastGapAlignStruct that has been used to compute a traceback, and return a BlastCompo_Alignment representing the alignment. More... | |
static int | s_NewAlignmentUsingXdrop (BlastCompo_Alignment **pnewAlign, Int4 *pqueryEnd, Int4 *pmatchEnd, Int4 queryStart, Int4 matchStart, Int4 score, BlastCompo_SequenceData *query, BlastCompo_SequenceRange *query_range, Int4 ccat_query_length, BlastCompo_SequenceData *subject, BlastCompo_SequenceRange *subject_range, Int4 full_subject_length, BlastCompo_GappingParams *gapping_params, EMatrixAdjustRule matrix_adjust_rule) |
A callback used when performing SmithWaterman alignments: Calculate the traceback for one alignment by performing an x-drop alignment in the forward direction, possibly increasing the x-drop parameter until the desired score is attained. More... | |
static BlastCompo_Alignment * | s_RedoOneAlignment (BlastCompo_Alignment *in_align, EMatrixAdjustRule matrix_adjust_rule, BlastCompo_SequenceData *query_data, BlastCompo_SequenceRange *query_range, int ccat_query_length, BlastCompo_SequenceData *subject_data, BlastCompo_SequenceRange *subject_range, int full_subject_length, BlastCompo_GappingParams *gapping_params) |
A callback: calculate the traceback for one alignment by performing an x-drop alignment in both directions. More... | |
static void | s_SavedParametersFree (BlastKappa_SavedParameters **searchParams) |
Release the data associated with a BlastKappa_SavedParameters and delete the object. More... | |
static BlastKappa_SavedParameters * | s_SavedParametersNew (Int4 rows, Int4 numQueries, ECompoAdjustModes compo_adjust_mode, Boolean positionBased) |
Create a new instance of BlastKappa_SavedParameters. More... | |
static int | s_RecordInitialSearch (BlastKappa_SavedParameters *searchParams, BlastScoreBlk *sbp, const BlastScoringParameters *scoring, int query_length, ECompoAdjustModes compo_adjust_mode, Boolean positionBased) |
Record the initial value of the search parameters that are to be adjusted. More... | |
static void | s_RescaleSearch (BlastScoreBlk *sbp, BlastScoringParameters *sp, int num_queries, double scale_factor) |
Rescale the search parameters in the search object and options object to obtain more precision. More... | |
static void | s_RestoreSearch (BlastScoreBlk *sbp, BlastScoringParameters *scoring, const BlastKappa_SavedParameters *searchParams, int query_length, Boolean positionBased, ECompoAdjustModes compo_adjust_mode) |
Restore the parameters that were adjusted to their original values. More... | |
static int | s_MatrixInfoInit (Blast_MatrixInfo *self, BLAST_SequenceBlk *queryBlk, BlastScoreBlk *sbp, double scale_factor, const char *matrixName) |
Initialize an object of type Blast_MatrixInfo. More... | |
static int | s_CreateWordArray (const Uint1 *seq_data, Int4 seq_len, Uint8 **words) |
static void | s_FreeBlastCompo_QueryInfoArray (BlastCompo_QueryInfo **query_info, int num_queries) |
static BlastCompo_QueryInfo * | s_GetQueryInfo (Uint1 *query_data, const BlastQueryInfo *blast_query_info, Boolean skip) |
Save information about all queries in an array of objects of type BlastCompo_QueryInfo. More... | |
static BlastCompo_GappingParams * | s_GappingParamsNew (BlastKappa_GappingParamsContext *context, const BlastExtensionParameters *extendParams, int num_queries) |
Create a new object of type BlastCompo_GappingParams. More... | |
static Blast_RedoAlignParams * | s_GetAlignParams (BlastKappa_GappingParamsContext *context, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, const BlastHitSavingParameters *hitParams, const BlastExtensionParameters *extendParams) |
Read the parameters required for the Blast_RedoOneMatch* functions from the corresponding parameters in standard BLAST datatypes. More... | |
static void | s_FillResultsFromCompoHeaps (BlastHSPResults *results, BlastCompo_Heap heaps[], Int4 hitlist_size) |
Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure. More... | |
static void | s_ClearHeap (BlastCompo_Heap *self) |
Remove all matches from a BlastCompo_Heap. More... | |
static void | s_BlastGapAlignStruct_Free (BlastGapAlignStruct *copy) |
Free a BlastGapAlignStruct copy created by s_BlastGapAlignStruct_Copy. More... | |
static BlastGapAlignStruct * | s_BlastGapAlignStruct_Copy (BlastGapAlignStruct *orig, BlastScoreBlk *sbp) |
Create a "deep" copy of a BlastGapAlignStruct structure. More... | |
static void | s_BlastScoreBlk_Free (BlastScoreBlk **copy) |
Free a BlastScoreBlk copy created by s_BlastScoreBlk_Copy. More... | |
static BlastScoreBlk * | s_BlastScoreBlk_Copy (EBlastProgramType program, BlastScoreBlk *orig, Uint1 alphabet_code, Int4 number_of_contexts) |
Create a "deep" copy of a BlastScoreBlk structure. More... | |
Int2 | Blast_RedoAlignmentCore (EBlastProgramType program_number, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results) |
Recompute alignments for each match found by the gapped BLAST algorithm. More... | |
Int2 | Blast_RedoAlignmentCore_MT (EBlastProgramType program_number, Uint4 num_threads, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results) |
Recompute alignments for each match found by the gapped BLAST algorithm. More... | |
Variables | |
static const Blast_RedoAlignCallbacks | redo_align_callbacks |
Callbacks used by the Blast_RedoOneMatch* routines. More... | |
Utilities for doing Smith-Waterman alignments and adjusting the scoring system for each match in blastpgp.
Definition in file blast_kappa.c.
#define BLASTP_MASK_INSTRUCTIONS "S 10 1.8 2.1" |
Default instructions and mask residue for SEG filtering.
Definition at line 1416 of file blast_kappa.c.
#define BLASTP_MASK_RESIDUE 21 |
NCBIstdaa encoding for 'X' character.
Definition at line 1414 of file blast_kappa.c.
#define KAPPA_BLASTP_NO_SEG_SEQUENCE 0 |
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to compute the traceback will not SEG the subject sequence.
Definition at line 77 of file blast_kappa.c.
#define KAPPA_TBLASTN_NO_SEG_SEQUENCE 0 |
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to compute the traceback will not SEG the subject sequence.
Definition at line 85 of file blast_kappa.c.
#define NEAR_IDENTICAL_BITS_PER_POSITION (1.74) |
Definition at line 2399 of file blast_kappa.c.
#define SCALING_FACTOR 32 |
SCALING_FACTOR is a multiplicative factor used to get more bits of precision in the integer matrix scores.
It cannot be arbitrarily large because we do not want total alignment scores to exceed -(BLAST_SCORE_MIN)
Definition at line 676 of file blast_kappa.c.
typedef struct BlastKappa_GappingParamsContext BlastKappa_GappingParamsContext |
Data and data-structures needed to perform a gapped alignment.
typedef struct BlastKappa_SavedParameters BlastKappa_SavedParameters |
A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCore.
These values are restored on exit.
typedef struct BlastKappa_SequenceInfo BlastKappa_SequenceInfo |
BLAST-specific information that is associated with a BlastCompo_MatchingSequence.
Int2 Blast_RedoAlignmentCore | ( | EBlastProgramType | program_number, |
BLAST_SequenceBlk * | queryBlk, | ||
const BlastQueryInfo * | queryInfo, | ||
BlastScoreBlk * | sbp, | ||
BLAST_SequenceBlk * | subjectBlk, | ||
const BlastSeqSrc * | seqSrc, | ||
Int4 | default_db_genetic_code, | ||
BlastHSPList * | thisMatch, | ||
BlastHSPStream * | hsp_stream, | ||
BlastScoringParameters * | scoringParams, | ||
const BlastExtensionParameters * | extendParams, | ||
const BlastHitSavingParameters * | hitParams, | ||
const PSIBlastOptions * | psiOptions, | ||
BlastHSPResults * | results | ||
) |
Recompute alignments for each match found by the gapped BLAST algorithm.
Top level routine to recompute alignments for each match found by the gapped BLAST algorithm (single-thread prototype) This prototype is an adapter to the multi-thread prototype with num_threads set to 1.
Single-thread adapter to Blast_RedoAlignmentCore_MT.
Definition at line 2942 of file blast_kappa.c.
References Blast_RedoAlignmentCore_MT(), and results.
Referenced by CRedoAlignmentTestFixture::runRedoAlignmentCoreUnitTest(), and s_RPSComputeTraceback().
Int2 Blast_RedoAlignmentCore_MT | ( | EBlastProgramType | program_number, |
Uint4 | num_threads, | ||
BLAST_SequenceBlk * | queryBlk, | ||
const BlastQueryInfo * | queryInfo, | ||
BlastScoreBlk * | sbp, | ||
BLAST_SequenceBlk * | subjectBlk, | ||
const BlastSeqSrc * | seqSrc, | ||
Int4 | default_db_genetic_code, | ||
BlastHSPList * | thisMatch, | ||
BlastHSPStream * | hsp_stream, | ||
BlastScoringParameters * | scoringParams, | ||
const BlastExtensionParameters * | extendParams, | ||
const BlastHitSavingParameters * | hitParams, | ||
const PSIBlastOptions * | psiOptions, | ||
BlastHSPResults * | results | ||
) |
Recompute alignments for each match found by the gapped BLAST algorithm.
Top level routine to recompute alignments for each match found by the gapped BLAST algorithm (multi-thread prototype) A linked list of alignments is returned (param hitList); the alignments are sorted according to the lowest E-value of the best alignment for each matching sequence; alignments for the same matching sequence are in the list consecutively regardless of the E-value of the secondary alignments.
Definition at line 2981 of file blast_kappa.c.
References BlastScoreBlk::alphabet_code, ASSERT, b, BlastHSPList::best_evalue, Blast_CompositionWorkspaceFree(), Blast_CompositionWorkspaceInit(), Blast_CompositionWorkspaceNew(), Blast_ForbiddenRangesInitialize(), Blast_ForbiddenRangesRelease(), Blast_FrequencyDataIsAvailable(), BLAST_GapAlignStructFree(), BLAST_GapAlignStructNew(), Blast_HitListFree(), Blast_HSPListFree(), Blast_HSPListNew(), Blast_HSPListSwap(), Blast_HSPResultsFree(), Blast_HSPResultsNew(), Blast_RedoAlignParamsFree(), Blast_RedoOneMatch(), Blast_RedoOneMatchSmithWaterman(), BLAST_SetupPartialFetching(), BLASTAA_SIZE, BlastCompo_AlignmentsFree(), BlastCompo_EarlyTermination(), BlastCompo_HeapInitialize(), BlastCompo_HeapInsert(), BlastCompo_HeapRelease(), BlastCompo_HeapWouldInsert(), BlastHSPStreamRead(), BlastHSPStreamTBackClose(), BlastSeqSrcCopy(), BlastSeqSrcFree(), BlastSeqSrcGetMaxSeqLen(), BlastSeqSrcGetSupportsPartialFetching(), Boolean, calloc(), BlastExtensionOptions::compositionBasedStats, SBlastScoreMatrix::data, eBlastTypeBlastp, eBlastTypeBlastx, eBlastTypePsiBlast, eBlastTypeRpsBlast, eBlastTypeRpsTblastn, eBlastTypeTblastn, eCompositionBasedStats, eNoCompositionBasedStats, eSmithWatermanTbck, BlastExtensionOptions::eTbackExt, FALSE, BlastKappa_GappingParamsContext::gap_align, GenCodeSingletonFind(), head, SThreadLocalData::hit_params, BlastHSPResults::hitlist_array, BlastHitSavingOptions::hitlist_size, BlastHSPList::hsp_array, BlastHSPList::hspcnt, i, PSIBlastOptions::inclusion_ethresh, BlastCompo_MatchingSequence::index, kBlastHSPStream_Eof, BlastScoreBlk::kbp_gap, Blast_KarlinBlk::Lambda, BlastQueryInfo::last_context, BlastCompo_MatchingSequence::length, BLAST_SequenceBlk::length, BlastCompo_MatchingSequence::local_data, BlastKappa_GappingParamsContext::localScalingFactor, Blast_KarlinBlk::logK, match(), BlastScoringOptions::matrix, BlastScoreBlk::matrix, BlastQueryInfo::max_length, SBlastScoreMatrix::ncols, next(), NULL, BlastHSPResults::num_queries, BlastQueryInfo::num_queries, BlastScoreBlk::number_of_contexts, BlastHSPList::oid, BlastExtensionParameters::options, BlastHitSavingParameters::options, BlastScoringParameters::options, BlastKappa_GappingParamsContext::prog_number, PSI_INCLUSION_ETHRESH, BlastScoreBlk::psi_matrix, SPsiBlastScoreMatrix::pssm, BlastHSPList::query_index, SThreadLocalData::results, results, s_BlastGapAlignStruct_Copy(), s_BlastGapAlignStruct_Free(), s_BlastScoreBlk_Copy(), s_BlastScoreBlk_Free(), s_ClearHeap(), s_ComputeNumIdentities(), s_FillResultsFromCompoHeaps(), s_FreeBlastCompo_QueryInfoArray(), s_FreeEditScript(), s_GetAlignParams(), s_GetQueryInfo(), s_HitlistEvaluateAndPurge(), s_HitlistReapContained(), s_HSPListFromDistinctAlignments(), s_HSPListNormalizeScores(), s_MatchingSequenceInitialize(), s_MatchingSequenceRelease(), s_RecordInitialSearch(), s_RescaleSearch(), s_RestoreSearch(), s_ResultHspToDistinctAlign(), s_SavedParametersFree(), s_SavedParametersNew(), BlastGapAlignStruct::sbp, BlastKappa_GappingParamsContext::sbp, SCALING_FACTOR, BlastKappa_GappingParamsContext::scoringParams, BLAST_SequenceBlk::sequence, sfree, SThreadLocalDataArrayConsolidateResults(), SThreadLocalDataArrayNew(), SThreadLocalDataFree(), util::strcmp(), SThreadLocalDataArray::tld, TRUE, and BlastExtensionOptions::unifiedP.
Referenced by BLAST_ComputeTraceback_MT(), and Blast_RedoAlignmentCore().
|
static |
Adjusts the E-values in a BLAST_HitList to be composites of a composition-based P-value and a score/alignment-based P-value.
hsp_list | the hitlist whose E-values need to be adjusted |
comp_p_value | P-value from sequence composition |
seqSrc | a source of sequence data |
subject_length | length of database sequence |
query_context | info about this query context; needed when multiple queries are being used |
LambdaRatio | the ratio between the observed value of Lambda and the predicted value of lambda (used to print diagnostics) |
subject_id | the subject id of this sequence (used to print diagnostics) |
Definition at line 135 of file blast_kappa.c.
References BlastHSPList::best_evalue, BLAST_KarlinEtoP(), BLAST_KarlinPtoE(), Blast_Overall_P_Value(), BlastContextInfo::eff_searchsp, BlastHSP::evalue, for(), BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastContextInfo::length_adjustment, MAX, and BlastContextInfo::query_length.
Referenced by s_HitlistEvaluateAndPurge().
|
static |
Create a "deep" copy of a BlastGapAlignStruct structure.
Non-pointer structure members are copied. Pointers to data which will only be read are copied. For data which will be changing, memory for copies will be allocated and new pointers will be assigned to them. The process repeats down the structure hierarchy until all pointers are dealt with.
orig | Pointer to BlastGapAlignStruct structure to be copied |
sbp | Pointer to BlastScoreBlk structure, required to set copy->sbp |
Definition at line 2604 of file blast_kappa.c.
References calloc(), copy(), GapPrelimEditBlock::edit_ops, i, GapStateArrayStruct::length, GapStateArrayStruct::next, NULL, GapEditScript::num, GapPrelimEditScript::num, GapPrelimEditBlock::num_ops_allocated, GapEditScript::op_type, GapPrelimEditScript::op_type, orig, GapEditScript::size, and GapStateArrayStruct::state_array.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Free a BlastGapAlignStruct copy created by s_BlastGapAlignStruct_Copy.
copy | Pointer to BlastGapAlignStruct to be freed |
Definition at line 2532 of file blast_kappa.c.
References copy(), NULL, sfree, and GapStateArrayStruct::state_array.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Create a "deep" copy of a BlastScoreBlk structure.
Non-pointer structure members are copied. Pointers to data which will only be read are copied. For data which will be changing, memory for copies will be allocated and new pointers will be assigned to them. The process repeats down the structure hierarchy until all pointers are dealt with.
program | The program type |
orig | Pointer to BlastScoreBlk structure to be copied |
alphabet_code | Alphabet code |
number_of_contexts | Number of contexts |
Definition at line 2765 of file blast_kappa.c.
References Blast_KarlinBlkCopy(), Blast_KarlinBlkNew(), Blast_QueryIsPssm(), Blast_ScoreFreqNew(), BlastScoreBlkFree(), BlastScoreBlkNew(), calloc(), copy(), ctx, SBlastScoreMatrix::data, SPsiBlastScoreMatrix::freq_ratios, SBlastScoreMatrix::freqs, i, SPsiBlastScoreMatrix::kbp, SBlastScoreMatrix::lambda, SBlastScoreMatrix::ncols, SBlastScoreMatrix::nrows, NULL, orig, SPsiBlastScoreMatrix::pssm, r(), SPsiBlastScoreMatrixNew(), and strdup.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Free a BlastScoreBlk copy created by s_BlastScoreBlk_Copy.
BlastScoreBlk* pointer "bsb_ptr" should be passed as (&bsb_ptr); this function will set bsb_ptr to NULL before returning.
copy | Pointer to (pointer to BlastScoreBlk to be freed) |
Definition at line 2743 of file blast_kappa.c.
References BlastScoreBlkFree(), copy(), and NULL.
Referenced by Blast_RedoAlignmentCore_MT().
A callback routine: compute lambda for the given score probabilities.
(
Definition at line 551 of file blast_kappa.c.
References Blast_KarlinLambdaNR(), i, Blast_ScoreFreq::obs_max, Blast_ScoreFreq::obs_min, Blast_ScoreFreq::score_avg, Blast_ScoreFreq::score_max, Blast_ScoreFreq::score_min, Blast_ScoreFreq::sprob, and Blast_ScoreFreq::sprob0.
|
static |
Remove all matches from a BlastCompo_Heap.
Definition at line 2518 of file blast_kappa.c.
References Blast_HSPListFree(), BlastCompo_HeapPop(), and NULL.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Compute the number of identities for the HSPs in the hsp_list.
query_blk | the query sequence data [in] |
query_info | structure describing the query_blk structure [in] |
seq_src | source of subject sequence data [in] |
hsp_list | list of HSPs to be processed [in|out] |
scoring_options | scoring options [in] @gen_code_string Genetic code for tblastn [in] |
Definition at line 459 of file blast_kappa.c.
References ASSERT, Blast_HSPGetNumIdentitiesAndPositives(), Blast_HSPGetTargetTranslation(), Blast_TracebackGetEncoding(), BlastSeqSrcGetSequence(), BlastSeqSrcReleaseSequence(), BlastSequenceBlkFree(), BlastTargetTranslationFree(), BlastTargetTranslationNew(), CODON_LENGTH, BlastHSP::context, context, BlastQueryInfo::contexts, eBlastTypeBlastx, eBlastTypeTblastn, BlastHSPList::hsp_array, BlastHSPList::hspcnt, i, BlastScoringOptions::is_ooframe, NULL, BlastHSPList::oid, BLAST_SequenceBlk::oof_sequence, BlastScoringOptions::program_number, query, BlastContextInfo::query_offset, BlastSeqSrcGetSeqArg::ranges, BLAST_SequenceBlk::sequence, BLAST_SequenceBlk::sequence_nomask, subject, and TRUE.
Referenced by Blast_RedoAlignmentCore_MT().
Definition at line 2244 of file blast_kappa.c.
References calloc(), i, mask, NCBI_CONST_UINT8, and s_GetHash().
Referenced by s_GetQueryInfo().
|
static |
Filter low complexity regions from the sequence data; uses the SEG algorithm.
seqData | data to be filtered |
program_name | type of search being performed |
Definition at line 1428 of file blast_kappa.c.
References Blast_MaskTheResidues(), BlastFilteringOptionsFromString(), BLASTP_MASK_INSTRUCTIONS, BlastSeqLocFree(), BlastSetUp_Filter(), BlastCompo_SequenceData::data, FALSE, BlastCompo_SequenceData::length, NULL, and SBlastFilterOptionsFree().
Referenced by s_SequenceGetProteinRange(), and s_SequenceGetTranslatedRange().
|
static |
Extend left from the end of the sequence and subject ranges and count identities.
The extension stops when there are more than max_shift mismatches or mismatches or gaps are not followed by two identical matches. See description for s_ExtendRight for more details.
query_seq | Query sequence [in] |
query_len | Query length [in] |
subject_seq | Subject sequence [in] |
subject_len | Subject length [in] |
max_shift | Maximum number of mismatches or gaps, extension stops if this number is reached [in] |
query_ext_len | Extension length on the query [out] |
subject_ext_len | Extension length on the subject [out] |
align_len | Alignment length [out] |
Definition at line 1039 of file blast_kappa.c.
Referenced by s_FindNumIdentical(), and s_TestNearIdentical().
|
static |
Do a simple gapped extension to the right from the beginning of query and subject ranges examining only matches and mismatches.
The extension stops when there are more than max_shift mismatches or mismatches or gaps are not followed by two identical matches. This is a simplified version of the Danielle and Jean Thierry-Miegs' jumper alignment implemented in NCBI Magic https://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/Download/Downloads.html
query_seq | Query sequence [in] |
query_len | Query length [in] |
subject_seq | Subject sequence [in] |
subject_len | Subject length [in] |
max_shift | Maximum number of mismatches or gaps, extension stops if this number is reached [in] |
query_ext_len | Extension length on the query [out] |
subject_ext_len | Extension length on the subject [out] |
align_len | Alignment length [out] |
Definition at line 944 of file blast_kappa.c.
Referenced by s_FindNumIdentical(), and s_TestNearIdentical().
|
static |
Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.
results | BLAST core external results structure (pre-SeqAlign) [out] |
heaps | an array of BlastCompo_Heap objects |
hitlist_size | size of each list in the results structure above [in] |
Definition at line 2493 of file blast_kappa.c.
References Blast_HitListNew(), Blast_HitListUpdate(), Blast_HSPResultsReverseOrder(), BlastCompo_HeapPop(), heap, NULL, and results.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Find a local number of identical residues in two aligned sequences by finding word matches and doing a simple gapped extensions from the word hits.
query_seq | Query sequence [in] |
query_hashes | Array of query words with index of each word corresponding to word position in the query [in] |
query_len | Query length [in] |
subject_seq | Subject sequence [in] |
subject_len | Subject length [in] |
max_shift | Maximum number of local mismatches or gaps for extensions [in] |
Definition at line 1143 of file blast_kappa.c.
References FALSE, mask, match(), NCBI_CONST_UINT8, s_ExtendLeft(), s_ExtendRight(), s_GetHash(), and TRUE.
Referenced by s_TestNearIdentical().
|
static |
Definition at line 2279 of file blast_kappa.c.
References free(), i, and NULL.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
A callback used to free an EditScript that has been stored in a BlastCompo_Alignment.
Definition at line 283 of file blast_kappa.c.
References GapEditScriptDelete(), and NULL.
Referenced by Blast_RedoAlignmentCore_MT(), and s_HSPListFromDistinctAlignments().
|
static |
Create a new object of type BlastCompo_GappingParams.
The new object contains the parameters needed by the composition adjustment library to compute a gapped alignment.
context | the data structures needed by callback functions that perform the gapped alignments. |
extendParams | parameters used for a gapped extension |
num_queries | the number of queries in the concatenated query |
Definition at line 2355 of file blast_kappa.c.
References BlastCompo_GappingParams::context, context, BlastCompo_GappingParams::gap_extend, BlastScoringParameters::gap_extend, BlastCompo_GappingParams::gap_open, BlastScoringParameters::gap_open, BlastExtensionOptions::gap_x_dropoff_final, BlastExtensionParameters::gap_x_dropoff_final, i, malloc(), MAX, NCBIMATH_LN2, NULL, BlastExtensionParameters::options, and BlastCompo_GappingParams::x_dropoff.
Referenced by s_GetAlignParams().
|
static |
Read the parameters required for the Blast_RedoOneMatch* functions from the corresponding parameters in standard BLAST datatypes.
Return a new object representing these parameters.
Definition at line 2407 of file blast_kappa.c.
References ASSERT, Blast_MatrixInfoNew(), Blast_RedoAlignParamsNew(), BLASTAA_SIZE, Boolean, BlastExtensionOptions::compositionBasedStats, context, BlastQueryInfo::contexts, BlastHitSavingParameters::cutoff_score_min, BlastHitSavingParameters::do_sum_stats, eBlastTypeBlastx, eBlastTypeRpsTblastn, eBlastTypeTblastn, BlastHitSavingOptions::expect_value, BlastQueryInfo::first_context, for(), int, BlastContextInfo::is_valid, BlastQueryInfo::last_context, BlastHitSavingParameters::link_hsp_params, BlastQueryInfo::max_length, NCBIMATH_LN2, NEAR_IDENTICAL_BITS_PER_POSITION, NULL, BlastExtensionParameters::options, BlastHitSavingParameters::options, redo_align_callbacks, s_GappingParamsNew(), and s_MatrixInfoInit().
Referenced by Blast_RedoAlignmentCore_MT().
Get hash for a word of word_size residues assuming 28-letter alphabet.
data | Sequence [in] |
word_size | Word size [in] |
Definition at line 1117 of file blast_kappa.c.
References data.
Referenced by s_CreateWordArray(), and s_FindNumIdentical().
|
static |
Fill a two-dimensional array with the frequency ratios that underlie a position specific score matrix (PSSM).
returnRatios | a two-dimensional array with BLASTAA_SIZE columns |
numPositions | the number of rows in returnRatios |
query | query sequence data, of length numPositions |
matrixName | the name of the position independent matrix corresponding to this PSSM |
startNumerator | position-specific data used to generate the PSSM |
Definition at line 592 of file blast_kappa.c.
References _PSIMatrixFrequencyRatiosFree(), _PSIMatrixFrequencyRatiosNew(), BLAST_GetStandardAaProbabilities(), BLASTAA_SIZE, SFreqRatios::data, eStopChar, eXchar, i, kPosEpsilon, NULL, query, and sfree.
Referenced by s_MatrixInfoInit().
|
static |
Save information about all queries in an array of objects of type BlastCompo_QueryInfo.
query_data | query sequence data |
blast_query_info | information about all queries, as an internal blast data structure |
Definition at line 2309 of file blast_kappa.c.
References Blast_ReadAaComposition(), BLASTAA_SIZE, calloc(), BlastCompo_QueryInfo::composition, BlastQueryInfo::contexts, BlastCompo_SequenceData::data, BlastCompo_QueryInfo::eff_search_space, BlastContextInfo::eff_searchsp, i, BlastQueryInfo::last_context, BlastCompo_SequenceData::length, NULL, BlastCompo_QueryInfo::origin, BlastContextInfo::query_length, BlastContextInfo::query_offset, s_CreateWordArray(), BlastCompo_QueryInfo::seq, and BlastCompo_QueryInfo::words.
Referenced by Blast_RedoAlignmentCore_MT().
Fill a two-dimensional array with the frequency ratios that underlie the named score matrix.
returnRatios | a two-dimensional array of size BLASTAA_SIZE x BLASTAA_SIZE |
matrixName | the name of a matrix |
Definition at line 649 of file blast_kappa.c.
References _PSIMatrixFrequencyRatiosFree(), _PSIMatrixFrequencyRatiosNew(), BLASTAA_SIZE, SFreqRatios::data, i, and NULL.
Referenced by s_MatrixInfoInit().
Int4 s_GetSubjectLength | ( | Int4 | total_subj_length, |
EBlastProgramType | program_number | ||
) |
Definition at line 364 of file blast_kappa.c.
References eBlastTypeRpsTblastn, and GET_NUCL_LENGTH.
Referenced by s_HitlistEvaluateAndPurge().
|
static |
Adding evalues to a list of HSPs and remove those that do not have sufficiently good (low) evalue.
*pbestScore | best (highest) score in the list |
*pbestEvalue | best (lowest) evalue in the list |
hsp_list | the list |
seqSrc | a source of sequence data |
subject_length | length of the subject sequence |
program_number | the type of BLAST search being performed |
queryInfo | information about the queries |
context_index | the index of the query corresponding to the HSPs in hsp_list |
sbp | the score block for this search |
hitParams | parameters used to assign evalues and decide whether to save hits. |
pvalueForThisPair | composition p-value |
LambdaRatio | lambda ratio, if available |
subject_id | index of subject |
Definition at line 395 of file blast_kappa.c.
References BlastHSPList::best_evalue, Blast_HSPListGetEvalues(), Blast_HSPListReapByEvalue(), BLAST_LinkHsps(), BlastQueryInfo::contexts, BlastHitSavingParameters::do_sum_stats, eBlastTypeBlastp, eBlastTypeBlastx, FALSE, BlastHSPList::hsp_array, BlastHSPList::hspcnt, BlastHitSavingParameters::link_hsp_params, BlastHitSavingParameters::options, s_AdjustEvaluesForComposition(), s_GetSubjectLength(), BlastHSP::score, and TRUE.
Referenced by Blast_RedoAlignmentCore_MT().
Remove from a hitlist all HSPs that are completely contained in an HSP that occurs earlier in the list and that:
hsp_array | array to be reaped |
hspcnt | length of hsp_array |
Definition at line 224 of file blast_kappa.c.
References Blast_HSPFree(), CONTAINED_IN_HSP, BlastSeg::end, BlastSeg::frame, NULL, BlastSeg::offset, BlastHSP::query, BlastHSP::score, and BlastHSP::subject.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Converts a list of objects of type BlastCompo_Alignment to an new object of type BlastHSPList and returns the result.
Conversion in this direction is lossless. The list passed to this routine is freed to ensure that there is no aliasing of fields between the list of BlastCompo_Alignments and the new hitlist.
hsp_list | The hsp_list to populate |
alignments | A list of distinct alignments; freed before return [in] |
oid | Ordinal id of a database sequence [in] |
queryInfo | information about all queries in this search [in] |
frame | query frame |
Definition at line 305 of file blast_kappa.c.
References Blast_HSPInit(), Blast_HSPListFree(), Blast_HSPListSaveHSP(), Blast_HSPListSortByScore(), BlastCompo_AlignmentsFree(), BlastHSP::comp_adjustment_method, BlastCompo_Alignment::context, eCompoScaleOldMatrix, eCompositionBasedStats, eCompositionMatrixAdjust, eDontAdjustMatrix, eNoCompositionBasedStats, BlastCompo_Alignment::frame, BlastCompo_Alignment::matchEnd, BlastCompo_Alignment::matchStart, BlastCompo_Alignment::matrix_adjust_rule, BlastCompo_Alignment::next, NULL, BlastHSP::num_ident, BlastHSPList::oid, BlastCompo_Alignment::queryEnd, BlastCompo_Alignment::queryIndex, BlastCompo_Alignment::queryStart, s_FreeEditScript(), and BlastCompo_Alignment::score.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Given a list of HSPs with (possibly) high-precision scores, rescale the scores to have standard precision and set the scale-independent bit scores.
This routine does *not* resort the list; it is assumed that the list is already sorted according to e-values that have been computed using the initial, higher-precision scores.
hsp_list | the HSP list |
logK | Karlin-Altschul statistical parameter [in] |
lambda | Karlin-Altschul statistical parameter [in] |
scoreDivisor | the value by which reported scores are to be |
Definition at line 102 of file blast_kappa.c.
References BlastHSP::bit_score, BLAST_Nint(), BlastHSPList::hsp_array, BlastHSPList::hspcnt, lambda(), NCBIMATH_LN2, and BlastHSP::score.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Initialize a new matching sequence, obtaining information about the sequence from the search.
self | object to be initialized |
seqSrc | A pointer to a source from which sequence data may be obtained |
program_number | identifies the type of blast search being performed. |
default_db_genetic_code | default genetic code to use when subject sequences are translated and there is no other guidance on what code to use |
subject_index | index of the matching sequence in the database |
Definition at line 1357 of file blast_kappa.c.
References ASSERT, Blast_SubjectIsTranslated(), BlastSeqSrcGetSeqLen(), BlastSeqSrcGetSequence(), BlastSeqSrcGetSeqArg::check_oid_exclusion, eBlastEncodingNcbi4na, eBlastEncodingProtein, eBlastTypeTblastn, BlastSeqSrcGetSeqArg::encoding, BLAST_SequenceBlk::gen_code_string, GenCodeSingletonFind(), malloc(), NULL, BlastSeqSrcGetSeqArg::oid, BlastKappa_SequenceInfo::prog_number, BlastSeqSrcGetSeqArg::ranges, s_MatchingSequenceRelease(), BlastSeqSrcGetSeqArg::seq, BlastKappa_SequenceInfo::seq_arg, BlastKappa_SequenceInfo::seq_src, and TRUE.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Release the resources associated with a matching sequence.
Definition at line 908 of file blast_kappa.c.
References BlastSeqSrcReleaseSequence(), BlastSequenceBlkFree(), free(), NULL, BlastSeqSrcGetSeqArg::seq, BlastKappa_SequenceInfo::seq_arg, and BlastKappa_SequenceInfo::seq_src.
Referenced by Blast_RedoAlignmentCore_MT(), and s_MatchingSequenceInitialize().
|
static |
Initialize an object of type Blast_MatrixInfo.
self | object being initialized |
queryBlk | the query sequence data |
sbp | score block for this search |
scale_factor | amount by which ungapped parameters should be scaled |
matrixName | name of the matrix |
Definition at line 2199 of file blast_kappa.c.
References Blast_Int4MatrixFromFreq(), SPsiBlastScoreMatrix::freq_ratios, BlastScoreBlk::kbp_ideal, BlastScoreBlk::kbp_psi, Blast_KarlinBlk::Lambda, BLAST_SequenceBlk::length, malloc(), NULL, BlastScoreBlk::psi_matrix, s_GetPosBasedStartFreqRatios(), s_GetStartFreqRatios(), s_ScalePosMatrix(), and BLAST_SequenceBlk::sequence.
Referenced by s_GetAlignParams().
|
static |
Reads a BlastGapAlignStruct that has been used to compute a traceback, and return a BlastCompo_Alignment representing the alignment.
The BlastGapAlignStruct is in coordinates local to the ranges being aligned; the resulting alignment is in coordinates w.r.t. the whole query and subject.
gap_align | the BlastGapAlignStruct |
*edit_script | the edit script from the alignment; on exit NULL. The edit_script is usually gap_align->edit_script, but we don't want an implicit side effect on the gap_align. |
query_range | the range of the query used in this alignment |
subject_range | the range of the subject used in this alignment |
matrix_adjust_rule | the rule used to compute the scoring matrix |
Definition at line 1748 of file blast_kappa.c.
References BlastCompo_SequenceRange::begin, BlastCompo_AlignmentNew(), BlastCompo_SequenceRange::context, NULL, BlastGapAlignStruct::query_start, BlastGapAlignStruct::query_stop, BlastGapAlignStruct::score, BlastGapAlignStruct::subject_start, and BlastGapAlignStruct::subject_stop.
Referenced by s_RedoOneAlignment().
|
static |
A callback used when performing SmithWaterman alignments: Calculate the traceback for one alignment by performing an x-drop alignment in the forward direction, possibly increasing the x-drop parameter until the desired score is attained.
The start, end and score of the alignment should be obtained using the Smith-Waterman algorithm before this routine is called.
*pnewAlign | the new alignment |
*pqueryEnd | on entry, the end of the alignment in the query, as computed by the Smith-Waterman algorithm. On exit, the end as computed by the x-drop algorithm |
*pmatchEnd | like as *pqueryEnd, but for the subject sequence |
queryStart | the starting point in the query |
matchStart | the starting point in the subject |
score | the score of the alignment, as computed by the Smith-Waterman algorithm |
query | query sequence data |
query_range | range of this query in the concatenated query |
ccat_query_length | total length of the concatenated query |
subject | subject sequence data |
subject_range | range of subject_data in the translated query, in amino acid coordinates |
full_subject_length | length of the full subject sequence |
gapping_params | parameters used to compute gapped alignments |
matrix_adjust_rule | the rule used to compute the scoring matrix |
Definition at line 1813 of file blast_kappa.c.
References BlastCompo_SequenceRange::begin, Blast_PrelimEditBlockToGapEditScript(), BlastCompo_AlignmentNew(), BlastCompo_GappingParams::context, BlastCompo_SequenceRange::context, context, BlastGapAlignStruct::fwd_prelim_tback, BlastGapAlignStruct::gap_x_dropoff, GapEditScriptDelete(), NULL, query, BlastGapAlignStruct::rev_prelim_tback, s_SWFindFinalEndsUsingXdrop(), subject, and BlastCompo_GappingParams::x_dropoff.
|
static |
Record the initial value of the search parameters that are to be adjusted.
searchParams | holds the recorded values [out] |
sbp | a score block [in] |
scoring | gapped alignment parameters [in] |
query_length | length of the concatenated query [in] |
compo_adjust_mode | composition adjustment mode [in] |
positionBased | is this search position-based [in] |
Definition at line 2060 of file blast_kappa.c.
References Blast_KarlinBlkCopy(), Blast_KarlinBlkNew(), BLASTAA_SIZE, SBlastScoreMatrix::data, eNoCompositionBasedStats, BlastScoringParameters::gap_extend, BlastScoringParameters::gap_open, BlastKappa_SavedParameters::gap_open, BlastKappa_SavedParameters::gapExtend, i, BlastScoreBlk::kbp_gap, BlastKappa_SavedParameters::kbp_gap_orig, BlastScoreBlk::matrix, NULL, BlastKappa_SavedParameters::num_queries, BlastKappa_SavedParameters::origMatrix, BlastScoreBlk::psi_matrix, SPsiBlastScoreMatrix::pssm, BlastScoringParameters::scale_factor, and BlastKappa_SavedParameters::scale_factor.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
A callback: calculate the traceback for one alignment by performing an x-drop alignment in both directions.
in_align | the existing alignment, without traceback |
matrix_adjust_rule | the rule used to compute the scoring matrix |
query_data | query sequence data |
query_range | range of this query in the concatenated query |
ccat_query_length | total length of the concatenated query |
subject_data | subject sequence data |
subject_range | range of subject_data in the translated query, in amino acid coordinates |
full_subject_length | length of the full subject sequence |
gapping_params | parameters used to compute gapped alignments |
Definition at line 1899 of file blast_kappa.c.
References BlastCompo_SequenceRange::begin, BLAST_GappedAlignmentWithTraceback(), BlastCompo_Alignment::context, BlastCompo_GappingParams::context, context, BlastCompo_SequenceData::data, BlastGapAlignStruct::edit_script, FALSE, BlastGapAlignStruct::gap_x_dropoff, BlastSeg::gapped_start, BlastCompo_SequenceData::length, NULL, BlastHSP::query, s_NewAlignmentFromGapAlign(), BlastHSP::subject, and BlastCompo_GappingParams::x_dropoff.
|
static |
Rescale the search parameters in the search object and options object to obtain more precision.
sbp | score block to be rescaled |
sp | scoring parameters to be rescaled |
num_queries | number of queries in this search |
scale_factor | amount by which to scale this search |
Definition at line 2117 of file blast_kappa.c.
References BLAST_Nint(), BlastScoringParameters::gap_extend, BlastScoringParameters::gap_open, i, Blast_KarlinBlk::K, BlastScoreBlk::kbp_gap, Blast_KarlinBlk::Lambda, log, Blast_KarlinBlk::logK, NULL, and BlastScoringParameters::scale_factor.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Restore the parameters that were adjusted to their original values.
sbp | the score block to be restored |
scoring | the scoring parameters to be restored |
searchParams | the initial recorded values of the parameters |
query_length | the concatenated query length |
positionBased | is this search position-based |
compo_adjust_mode | mode of composition adjustment |
Definition at line 2148 of file blast_kappa.c.
References Blast_KarlinBlkCopy(), BLASTAA_SIZE, SBlastScoreMatrix::data, eNoCompositionBasedStats, BlastScoringParameters::gap_extend, BlastScoringParameters::gap_open, BlastKappa_SavedParameters::gap_open, BlastKappa_SavedParameters::gapExtend, i, BlastScoreBlk::kbp_gap, BlastKappa_SavedParameters::kbp_gap_orig, BlastScoreBlk::matrix, NULL, BlastKappa_SavedParameters::num_queries, BlastKappa_SavedParameters::origMatrix, BlastScoreBlk::psi_matrix, SPsiBlastScoreMatrix::pssm, BlastScoringParameters::scale_factor, and BlastKappa_SavedParameters::scale_factor.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Convert an array of HSPs to a list of BlastCompo_Alignment objects.
The context field of each BlastCompo_Alignment is set to point to the corresponding HSP.
self | the array of alignment to be filled |
numAligns | number of alignments |
hsp_array | an array of HSPs |
hspcnt | the length of hsp_array |
init_context | the initial context to process |
queryInfo | information about the concatenated query |
localScalingFactor | the amount by which this search is scaled |
Definition at line 770 of file blast_kappa.c.
References ASSERT, BlastCompo_AlignmentNew(), BlastHSP::context, eDontAdjustMatrix, BlastSeg::end, BlastSeg::frame, BlastCompo_Alignment::next, NULL, BlastSeg::offset, BlastHSP::query, BlastHSP::score, and BlastHSP::subject.
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Release the data associated with a BlastKappa_SavedParameters and delete the object.
searchParams | the object to be deleted [in][out] |
Definition at line 1978 of file blast_kappa.c.
References Blast_KarlinBlkFree(), free(), i, BlastKappa_SavedParameters::kbp_gap_orig, Nlm_Int4MatrixFree(), NULL, BlastKappa_SavedParameters::num_queries, BlastKappa_SavedParameters::origMatrix, and sfree.
Referenced by Blast_RedoAlignmentCore_MT(), and s_SavedParametersNew().
|
static |
Create a new instance of BlastKappa_SavedParameters.
rows | number of rows in the scoring matrix |
numQueries | number of queries in this search |
compo_adjust_mode | if >0, use composition-based statistics |
positionBased | if true, the search is position-based |
Definition at line 2009 of file blast_kappa.c.
References BLASTAA_SIZE, calloc(), eNoCompositionBasedStats, i, BlastKappa_SavedParameters::kbp_gap_orig, malloc(), Nlm_Int4MatrixNew(), NULL, BlastKappa_SavedParameters::num_queries, BlastKappa_SavedParameters::origMatrix, and s_SavedParametersFree().
Referenced by Blast_RedoAlignmentCore_MT().
|
static |
Produce a scaled-up version of the position-specific matrix with a given set of position-specific residue frequencies.
fillPosMatrix | is the matrix to be filled |
matrixName | name of the standard substitution matrix [in] |
posFreqs | PSSM's frequency ratios [in] |
query | Query sequence data [in] |
queryLength | Length of the query sequence above [in] |
sbp | stores various parameters of the search |
scale_factor | amount by which ungapped parameters should be scaled. |
Definition at line 694 of file blast_kappa.c.
References _PSIConvertFreqRatiosToPSSM(), _PSICopyMatrix_double(), _PSICopyMatrix_int(), _PSIInternalPssmDataFree(), _PSIInternalPssmDataNew(), BLASTAA_SIZE, cleanup(), FALSE, _PSIInternalPssmData::freq_ratios, Kappa_compactSearchItemsFree(), Kappa_compactSearchItemsNew(), Kappa_impalaScaling(), Kappa_posSearchItemsFree(), Kappa_posSearchItemsNew(), _PSIInternalPssmData::ncols, _PSIInternalPssmData::nrows, NULL, Kappa_posSearchItems::posFreqs, Kappa_posSearchItems::posMatrix, Kappa_posSearchItems::posPrivateMatrix, _PSIInternalPssmData::pssm, query, _PSIInternalPssmData::scaled_pssm, and Kappa_compactSearchItems::standardProb.
Referenced by s_MatrixInfoInit().
|
static |
Get a string of protein data from a protein sequence.
self | a protein sequence [in] |
range | the range to get [in] |
seqData | the resulting data [out] |
queryData | the query sequence [in] |
queryOffset | offset for align if there are multiple queries |
align | information about the alignment between query and subject [in] |
shouldTestIdentical | did alignment pass a preliminary test in redo_alignment.c that indicates the sequence pieces may be near identical [in] |
Definition at line 1573 of file blast_kappa.c.
References BlastCompo_SequenceRange::begin, BlastCompo_SequenceData::buffer, calloc(), BlastCompo_SequenceData::data, eBlastTypeBlastp, f, BlastCompo_Alignment::frame, free(), GET_NUCL_LENGTH, GET_SEQ_FRAME, GET_TRANSLATED_LENGTH, i, KAPPA_BLASTP_NO_SEG_SEQUENCE, BlastCompo_SequenceData::length, NULL, offsets, compile_time_bits::range(), s_DoSegSequenceData(), s_TestNearIdentical(), BlastSeqSrcGetSeqArg::seq, BlastKappa_SequenceInfo::seq_arg, and BLAST_SequenceBlk::sequence.
Referenced by s_SequenceGetRange().
|
static |
Obtain the sequence data that lies within the given range.
self | sequence information [in] |
range | range specifying the range of data [in] |
seqData | the sequence data obtained [out] |
seqData | the resulting data [out] |
queryData | the query sequence [in] |
queryOffset | offset for align if there are multiple queries |
align | information about the alignment between query and subject |
shouldTestIdentical | did alignment pass a preliminary test in redo_alignment.c that indicates the sequence pieces may be near identical |
Definition at line 1671 of file blast_kappa.c.
References BlastCompo_SequenceRange::begin, BlastCompo_SequenceData::buffer, calloc(), BlastCompo_SequenceData::data, eBlastTypeTblastn, BlastCompo_SequenceRange::end, BlastCompo_SequenceData::length, BlastKappa_SequenceInfo::prog_number, query, s_SequenceGetProteinRange(), and s_SequenceGetTranslatedRange().
|
static |
Obtain a string of translated data.
self | the sequence from which to obtain the data [in] |
range | the range and translation frame to get [in] |
seqData | the resulting data [out] |
queryData | the query sequence [in] |
queryOffset | offset for align if there are multiple queries |
align | information about the alignment between query and subject |
shouldTestIdentical | did alignment pass a preliminary test in redo_alignment.c that indicates the sequence pieces may be near identical |
Definition at line 1475 of file blast_kappa.c.
References ABS, BlastCompo_SequenceRange::begin, Blast_GetPartialTranslation(), BlastCompo_SequenceData::buffer, BlastCompo_SequenceData::data, eBlastTypeTblastn, free(), BLAST_SequenceBlk::gen_code_string, KAPPA_TBLASTN_NO_SEG_SEQUENCE, BlastCompo_SequenceData::length, NULL, compile_time_bits::range(), s_DoSegSequenceData(), s_TestNearIdentical(), BlastSeqSrcGetSeqArg::seq, BlastKappa_SequenceInfo::seq_arg, and BLAST_SequenceBlk::sequence_start.
Referenced by s_SequenceGetRange().
|
static |
Redo a S-W alignment using an x-drop alignment.
The result will usually be the same as the S-W alignment. The call to ALIGN_EX attempts to force the endpoints of the alignment to match the optimal endpoints determined by the Smith-Waterman algorithm. ALIGN_EX is used, so that if the data structures for storing BLAST alignments are changed, the code will not break
query | the query data |
queryStart | start of the alignment in the query sequence |
queryEnd | end of the alignment in the query sequence, as computed by the Smith-Waterman algorithm |
subject | the subject (database) sequence |
matchStart | start of the alignment in the subject sequence |
matchEnd | end of the alignment in the query sequence, as computed by the Smith-Waterman algorithm |
gap_align | parameters for a gapped alignment |
scoringParams | Settings for gapped alignment.[in] |
score | score computed by the Smith-Waterman algorithm |
queryAlignmentExtent | length of the alignment in the query sequence, as computed by the x-drop algorithm |
matchAlignmentExtent | length of the alignment in the subject sequence, as computed by the x-drop algorithm |
newScore | alignment score computed by the x-drop algorithm |
Definition at line 844 of file blast_kappa.c.
References ALIGN_EX(), FALSE, BlastGapAlignStruct::fwd_prelim_tback, BlastGapAlignStruct::gap_x_dropoff, GapPrelimEditBlockReset(), NULL, query, BlastGapAlignStruct::rev_prelim_tback, and subject.
Referenced by s_NewAlignmentUsingXdrop().
|
static |
Test whether the aligned parts of two sequences that have a high-scoring gapless alignment are nearly identical.
First extend from the left end of the query and subject ranges and stop if there are too manu mismatches. Then extend from the right end. Then for the remaining protion of ths sequences find matching words and extend left and right from the word hit. Repeat the last steo until the whole alignment ranges are processed.
@params seqData Subject sequence [in] @params seqOffse Starting offset of the subject sequence in alignment data [in] @params queryData Query sequence [in] @params queryOffset Starting offset of the query sequence in alignment data [in]
query_words | Array of query words with word index corresponding to word's position in the query [in] |
align | Alignment data [in] |
Definition at line 1259 of file blast_kappa.c.
References ASSERT, BlastCompo_SequenceData::data, FALSE, BlastCompo_Alignment::matchEnd, BlastCompo_Alignment::matchStart, MIN, BlastCompo_Alignment::queryEnd, BlastCompo_Alignment::queryStart, s_ExtendLeft(), s_ExtendRight(), s_FindNumIdentical(), and TRUE.
Referenced by s_SequenceGetProteinRange(), and s_SequenceGetTranslatedRange().
|
static |
Callbacks used by the Blast_RedoOneMatch* routines.
Definition at line 2391 of file blast_kappa.c.
Referenced by s_GetAlignParams().