NCBI C++ ToolKit
|
Various auxiliary BLAST utility functions. More...
#include <algo/blast/core/ncbi_std.h>
#include <algo/blast/core/blast_program.h>
#include <algo/blast/core/blast_def.h>
#include <algo/blast/core/blast_query_info.h>
#include <algo/blast/core/blast_encoding.h>
Go to the source code of this file.
Go to the SVN repository for this file.
Macros | |
#define | IS_residue(x) (x <= 250) |
Does character encode a residue? More... | |
#define | NCBI2NA_MASK 0x03 |
Bit mask for obtaining a single base from a byte in ncbi2na format. More... | |
#define | NCBI2NA_UNPACK_BASE(x, N) (((x)>>(2*(N))) & NCBI2NA_MASK) |
Macro to extract base N from a byte x (N >= 0, N < 4) More... | |
#define | MAX_FULL_TRANSLATION 2100 |
Maximal unpacked subject sequence length for which full translation is performed up front. More... | |
#define | FENCE_SENTRY 201 |
This sentry value is used as a 'fence' around the valid portions of partially decoded sequences. More... | |
Functions | |
void | BlastSequenceBlkClean (BLAST_SequenceBlk *seq_blk) |
Deallocate memory only for the sequence in the sequence block. More... | |
BLAST_SequenceBlk * | BlastSequenceBlkFree (BLAST_SequenceBlk *seq_blk) |
Deallocate memory for a sequence block. More... | |
void | BlastSequenceBlkCopy (BLAST_SequenceBlk **copy, BLAST_SequenceBlk *src) |
Copies contents of the source sequence block without copying sequence buffers; sets all "field_allocated" booleans to FALSE, to make sure fields are not freed on the call to BlastSequenceBlkFree. More... | |
Int2 | BlastProgram2Number (const char *program, EBlastProgramType *number) |
Set number for a given program type. More... | |
Int2 | BlastNumber2Program (EBlastProgramType number, char **program) |
Return string name for program given a number. More... | |
Int2 | BlastSetUp_SeqBlkNew (const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated) |
Allocates memory for *sequence_blk and then populates it. More... | |
Int2 | BlastSeqBlkNew (BLAST_SequenceBlk **retval) |
Allocates a new sequence block structure. More... | |
Int2 | BlastSeqBlkSetSequence (BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen) |
Stores the sequence in the sequence block structure. More... | |
Int2 | BlastSeqBlkSetCompressedSequence (BLAST_SequenceBlk *seq_blk, const Uint1 *sequence) |
Stores the compressed nucleotide sequence in the sequence block structure for the subject sequence when BLASTing 2 sequences. More... | |
Int2 | BlastSeqBlkSetSeqRanges (BLAST_SequenceBlk *seq_blk, SSeqRange *seq_ranges, Uint4 num_seq_ranges, Boolean copy_seq_ranges, ESubjectMaskingType mask_type) |
Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure. More... | |
Int2 | BlastCompressBlastnaSequence (BLAST_SequenceBlk *seq_blk) |
Adds a specialized representation of sequence data to a sequence block. More... | |
Int4 | BLAST_GetTranslation (const Uint1 *query_seq, const Uint1 *query_seq_rev, Int4 nt_length, Int2 frame, Uint1 *buffer, const Uint1 *genetic_code) |
GetTranslation to get the translation of the nucl. More... | |
Int4 | BLAST_TranslateCompressedSequence (Uint1 *translation, Int4 length, const Uint1 *nt_seq, Int2 frame, Uint1 *prot_seq) |
Translate a nucleotide sequence without ambiguity codes. More... | |
Int2 | GetReverseNuclSequence (const Uint1 *sequence, Int4 length, Uint1 **rev_sequence_ptr) |
Reverse a nucleotide sequence in the blastna encoding, adding sentinel bytes on both ends. More... | |
Int1 | BLAST_ContextToFrame (EBlastProgramType prog_number, Uint4 context_number) |
This function translates the context number of a context into the frame of the sequence. More... | |
Int2 | BLAST_PackDNA (const Uint1 *buffer, Int4 length, EBlastEncoding encoding, Uint1 **packed_seq) |
Convert a sequence in ncbi4na or blastna encoding into a packed sequence in ncbi2na encoding. More... | |
size_t | BLAST_GetTranslatedProteinLength (size_t nucleotide_length, unsigned int context) |
Calculates the length of frame for a translated protein. More... | |
Int2 | BLAST_CreateMixedFrameDNATranslation (BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info) |
Initialize the mixed-frame sequence for out-of-frame gapped extension. More... | |
Int2 | BLAST_GetAllTranslations (const Uint1 *nucl_seq, EBlastEncoding encoding, Int4 nucl_length, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Uint4 **frame_offsets_ptr, Uint1 **mixed_seq_ptr) |
Translate nucleotide into 6 frames. More... | |
int | Blast_GetPartialTranslation (const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr) |
Get one frame translation - needed when only parts of subject sequences are translated. More... | |
Int4 | BLAST_FrameToContext (Int2 frame, EBlastProgramType program) |
Convert translation frame or strand into a context number suitable for indexing into the BlastQueryInfo::contexts array. More... | |
Int4 | BSearchInt4 (Int4 n, Int4 *A, Int4 size) |
The following binary search routine assumes that array A is filled. More... | |
double * | BLAST_GetStandardAaProbabilities (void) |
Get the standard amino acid probabilities. More... | |
char * | BLAST_StrToUpper (const char *string) |
Returns a copy of the input string with all its characters turned to uppercase. More... | |
unsigned int | BLAST_GetNumberOfContexts (EBlastProgramType program) |
Get the number of contexts for a given program. More... | |
SBlastTargetTranslation * | BlastTargetTranslationFree (SBlastTargetTranslation *target_t) |
Free SBlastTargetTranslation. More... | |
Int2 | BlastTargetTranslationNew (BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target) |
Sets up structure for target translation. More... | |
Various auxiliary BLAST utility functions.
Definition in file blast_util.h.
#define FENCE_SENTRY 201 |
This sentry value is used as a 'fence' around the valid portions of partially decoded sequences.
If an alignment finds this value in a subject sequence, the fence_hit flag should be used to request a refetch of the whole sequence, and the alignment restarted.
Definition at line 364 of file blast_util.h.
#define IS_residue | ( | x | ) | (x <= 250) |
Does character encode a residue?
Definition at line 48 of file blast_util.h.
#define MAX_FULL_TRANSLATION 2100 |
Maximal unpacked subject sequence length for which full translation is performed up front.
Definition at line 356 of file blast_util.h.
#define NCBI2NA_MASK 0x03 |
Bit mask for obtaining a single base from a byte in ncbi2na format.
Definition at line 52 of file blast_util.h.
#define NCBI2NA_UNPACK_BASE | ( | x, | |
N | |||
) | (((x)>>(2*(N))) & NCBI2NA_MASK) |
Macro to extract base N from a byte x (N >= 0, N < 4)
Definition at line 55 of file blast_util.h.
Int1 BLAST_ContextToFrame | ( | EBlastProgramType | prog_number, |
Uint4 | context_number | ||
) |
This function translates the context number of a context into the frame of the sequence.
prog_number | Integer corresponding to the BLAST program |
context_number | Context number |
Definition at line 839 of file blast_util.c.
References abort(), Blast_QueryIsProtein(), eBlastTypeBlastn, eBlastTypeBlastx, eBlastTypeMapping, eBlastTypePhiBlastn, eBlastTypeRpsTblastn, eBlastTypeTblastx, INT1_MAX, NUM_FRAMES, and NUM_STRANDS.
Referenced by BLAST_GetAllTranslations(), Blast_GetSeqLocInfoVector(), BlastHSPStreamMerge(), BlastMaskLocDNAToProtein(), BlastMaskLocProteinToDNA(), BlastQueryInfoNew(), BlastTargetTranslationNew(), BOOST_AUTO_TEST_CASE(), OffsetArrayToContextOffsets(), s_AddMask(), s_BlastSearchEngineCore(), and x_TestGetSeqLocInfoVector().
Int2 BLAST_CreateMixedFrameDNATranslation | ( | BLAST_SequenceBlk * | query_blk, |
const BlastQueryInfo * | query_info | ||
) |
Initialize the mixed-frame sequence for out-of-frame gapped extension.
query_blk | Sequence block containing the concatenated frames of the query. The mixed-frame sequence is saved here. [in] [out] |
query_info | Query information structure containing offsets into the* concatenated sequence. [in] |
Definition at line 931 of file blast_util.c.
References buffer, CODON_LENGTH, context, BlastQueryInfo::contexts, i, BlastQueryInfo::last_context, malloc(), NULL, NULLB, offset, BLAST_SequenceBlk::oof_sequence, BLAST_SequenceBlk::oof_sequence_allocated, BlastContextInfo::query_length, BlastContextInfo::query_offset, QueryInfo_GetSeqBufLen(), BLAST_SequenceBlk::sequence, and TRUE.
Referenced by BLAST_MainSetUp(), and BOOST_AUTO_TEST_CASE().
Int4 BLAST_FrameToContext | ( | Int2 | frame, |
EBlastProgramType | program | ||
) |
Convert translation frame or strand into a context number suitable for indexing into the BlastQueryInfo::contexts array.
frame | Frame (allowed values: 1,2,3,-1,-2,-3, 0) [in] |
program | Type of BLAST program [in] |
Definition at line 1211 of file blast_util.c.
References ASSERT, Blast_QueryIsNucleotide(), Blast_QueryIsTranslated(), Blast_SubjectIsNucleotide(), and Blast_SubjectIsTranslated().
Referenced by BLAST_GetGappedScore(), Blast_HSPGetTargetTranslation(), BLAST_SmithWatermanGetGappedScore(), Blast_TracebackFromHSPList(), BlastRPSWordFinder(), and s_BlastHSPListRPSUpdate().
Int2 BLAST_GetAllTranslations | ( | const Uint1 * | nucl_seq, |
EBlastEncoding | encoding, | ||
Int4 | nucl_length, | ||
const Uint1 * | genetic_code, | ||
Uint1 ** | translation_buffer_ptr, | ||
Uint4 ** | frame_offsets_ptr, | ||
Uint1 ** | mixed_seq_ptr | ||
) |
Translate nucleotide into 6 frames.
All frames are put into a translation buffer, with sentinel NULLB bytes in between. Array of offsets into the translation buffer is also returned. For out-of-frame gapping option, a mixed frame sequence is created.
nucl_seq | The nucleotide sequence [in] |
encoding | Sequence encoding: ncbi2na or ncbi4na [in] |
nucl_length | Length of the nucleotide sequence [in] |
genetic_code | The genetic code to be used for translations, in ncbistdaa encoding [in] |
translation_buffer_ptr | Buffer to hold the frames of the translated sequence. [out] |
frame_offsets_ptr | Offsets into the translation buffer for each frame. [out] |
mixed_seq_ptr | Pointer to buffer for the mixed frame sequence [out] |
Definition at line 1045 of file blast_util.c.
References BLAST_ContextToFrame(), BLAST_GetTranslation(), BLAST_TranslateCompressedSequence(), CODON_LENGTH, context, eBlastEncodingNcbi2na, eBlastEncodingNcbi4na, eBlastTypeBlastx, FALSE, free(), GetReverseNuclSequence(), i, malloc(), NULL, NULLB, NUM_FRAMES, offset, s_BlastGetTranslationTable(), sfree, and TRUE.
Referenced by BlastTargetTranslationNew(), BOOST_AUTO_TEST_CASE(), and s_BlastSearchEngineCore().
unsigned int BLAST_GetNumberOfContexts | ( | EBlastProgramType | program | ) |
Get the number of contexts for a given program.
This corresponds to the number of translation frames or strands whenever applicable.
Definition at line 1373 of file blast_util.c.
References Blast_ProgramIsValid(), Blast_QueryIsNucleotide(), Blast_QueryIsTranslated(), NUM_FRAMES, and NUM_STRANDS.
Referenced by BlastHSPStreamMerge(), BlastQueryInfoGetEffSearchSpace(), BlastQueryInfoGetQueryLength(), BlastQueryInfoNew(), BlastQueryInfoSetEffSearchSpace(), BlastSetUp_GetFilteringLocations(), CBlastAncillaryData::CBlastAncillaryData(), GetNumberOfContexts(), and CBlastQueryFilteredFrames::GetNumFrames().
int Blast_GetPartialTranslation | ( | const Uint1 * | nucl_seq, |
Int4 | nucl_length, | ||
Int2 | frame, | ||
const Uint1 * | genetic_code, | ||
Uint1 ** | translation_buffer_ptr, | ||
Int4 * | protein_length, | ||
Uint1 ** | mixed_seq_ptr | ||
) |
Get one frame translation - needed when only parts of subject sequences are translated.
nucl_seq | Pointer to start of nucleotide sequence to be translated [in] |
nucl_length | Length of nucleotide sequence to be translated [in] |
frame | What frame to translate into [in] |
genetic_code | What genetic code to use? [in] |
translation_buffer_ptr | Pointer to buffer with translated sequence [out] |
protein_length | Length of the translation buffer [out] |
mixed_seq_ptr | Pointer to buffer with mixed frame sequence, in case of out-of-frame gapping; buffer filled only if argument not NULL. [out] |
Definition at line 1141 of file blast_util.c.
References BLAST_GetTranslation(), CODON_LENGTH, GetReverseNuclSequence(), malloc(), NULL, offset, and sfree.
Referenced by Blast_HSPGetPartialSubjectTranslation(), and s_SequenceGetTranslatedRange().
double* BLAST_GetStandardAaProbabilities | ( | void | ) |
Get the standard amino acid probabilities.
This is basically a wrapper for BlastScoreBlkNew() and Blast_ResFreqStdComp() from blast_stat.c with a more intention-revealing name :) Caller is responsible for deallocating return value via sfree().
Definition at line 1323 of file blast_util.c.
References BlastScoreBlk::alphabet_code, BlastScoreBlk::alphabet_size, Blast_ResFreqFree(), Blast_ResFreqNew(), Blast_ResFreqStdComp(), BLASTAA_SEQ_CODE, BLASTAA_SIZE, i, malloc(), NULL, Blast_ResFreq::prob, BlastScoreBlk::protein_alphabet, and TRUE.
Referenced by _PSISequenceWeightsNew(), CConservationScoringMethod::CConservationScoringMethod(), ColumnResidueProfile::getBackgroundResFreq(), GetStandardProbability(), Kappa_compactSearchItemsNew(), PSICreatePssmFromFrequencyRatios(), s_GetPosBasedStartFreqRatios(), and CConservationScoringMethod::x_Init().
size_t BLAST_GetTranslatedProteinLength | ( | size_t | nucleotide_length, |
unsigned int | context | ||
) |
Calculates the length of frame for a translated protein.
nucleotide_length | Length of the nucleotide sequence translated [in] |
context | Index of the translated frame (values: 0 to 5, inclusive) [in] |
Definition at line 923 of file blast_util.c.
References CODON_LENGTH, and context.
Referenced by SetupQueryInfo_OMF().
Int4 BLAST_GetTranslation | ( | const Uint1 * | query_seq, |
const Uint1 * | query_seq_rev, | ||
Int4 | nt_length, | ||
Int2 | frame, | ||
Uint1 * | buffer, | ||
const Uint1 * | genetic_code | ||
) |
GetTranslation to get the translation of the nucl.
sequence in the appropriate frame and with the appropriate GeneticCode. The function return an allocated char*, the caller must delete this. The first and last spaces of this char* contain NULLB's.
query_seq | Forward strand of the nucleotide sequence [in] |
query_seq_rev | Reverse strand of the nucleotide sequence [in] |
nt_length | Length of the nucleotide sequence [in] |
frame | What frame to translate into? [in] |
buffer | Preallocated buffer for the translated sequence [in][out] |
genetic_code | Genetic code to use for translation, in ncbistdaa encoding [in] |
Definition at line 428 of file blast_util.c.
References ABS, CODON_LENGTH, FENCE_SENTRY, IS_residue, NULLB, and s_CodonToAA().
Referenced by BLAST_GetAllTranslations(), Blast_GetPartialTranslation(), Blast_HSPGetTargetTranslation(), BlastTargetTranslationNew(), and SetupQueries_OMF().
Int2 BLAST_PackDNA | ( | const Uint1 * | buffer, |
Int4 | length, | ||
EBlastEncoding | encoding, | ||
Uint1 ** | packed_seq | ||
) |
Convert a sequence in ncbi4na or blastna encoding into a packed sequence in ncbi2na encoding.
Needed for 2 sequences BLASTn comparison.
buffer | original sequence data (one base per byte) [in] |
length | length of the sequence data above [in] |
encoding | source encoding of the sequence data above [in] |
packed_seq | output buffer containing compressed sequence. Its length will be (length/COMPRESSION_RATIO + 1), caller is responsible for deallocating it [out] |
Definition at line 870 of file blast_util.c.
References abort(), buffer, COMPRESSION_RATIO, eBlastEncodingNucleotide, malloc(), NCBI2NA_MASK, and NCBI4NA_TO_BLASTNA.
char* BLAST_StrToUpper | ( | const char * | string | ) |
Returns a copy of the input string with all its characters turned to uppercase.
Useful for saving score matrix names. Caller is responsible for deallocating return value.
string | string to copy [in] |
Definition at line 1352 of file blast_util.c.
References NULL, NULLB, strdup, and toupper().
Referenced by Blast_ScoreBlkMatrixInit(), and BLAST_ValidateOptions().
Int4 BLAST_TranslateCompressedSequence | ( | Uint1 * | translation, |
Int4 | length, | ||
const Uint1 * | nt_seq, | ||
Int2 | frame, | ||
Uint1 * | prot_seq | ||
) |
Translate a nucleotide sequence without ambiguity codes.
This is used for the first-pass translation of the database. The genetic code to be used is determined by the translation_table This function translates a packed (ncbi2na) nucl. alphabet. It views a basepair as being in one of four sets of 2-bits: |0|1|2|3||0|1|2|3||0|1|2|3||...
1st byte | 2 byte | 3rd byte...
A codon that starts at the beginning of the above sequence starts in state "0" and includes basepairs 0, 1, and 2. The next codon, in the same frame, after that starts in state "3" and includes 3, 0, and 1.
Optimization: changed the single main loop to
translation | The translation table [in] |
length | Length of the nucleotide sequence [in] |
nt_seq | The original nucleotide sequence [in] |
frame | What frame to translate to? [in] |
prot_seq | Preallocated buffer for the (translated) protein sequence, with NULLB sentinels on either end. [out] |
Definition at line 508 of file blast_util.c.
References ABS, CODON_LENGTH, NULL, and NULLB.
Referenced by BLAST_GetAllTranslations().
Int2 BlastCompressBlastnaSequence | ( | BLAST_SequenceBlk * | seq_blk | ) |
Adds a specialized representation of sequence data to a sequence block.
In the specialized representation, the byte at offset i packs together nucleotide bases i to i+3
seq_blk | structure containing sequence data. Data is assumed to be in blastna format [in][out] |
Definition at line 459 of file blast_util.c.
References BLAST_SequenceBlk::compressed_nuc_seq, BLAST_SequenceBlk::compressed_nuc_seq_start, i, len, BLAST_SequenceBlk::length, malloc(), MIN, and BLAST_SequenceBlk::sequence.
Referenced by s_BlastSmallNaLookupFinalize().
Int2 BlastNumber2Program | ( | EBlastProgramType | number, |
char ** | program | ||
) |
Return string name for program given a number.
Return is zero on success.
number | Enumerated value of program [in] |
program | string name of program (memory should be deallocated by called) [out] |
Definition at line 312 of file blast_util.c.
References eBlastTypeBlastn, eBlastTypeBlastp, eBlastTypeBlastx, eBlastTypeMapping, eBlastTypePhiBlastn, eBlastTypePhiBlastp, eBlastTypePsiBlast, eBlastTypePsiTblastn, eBlastTypeRpsBlast, eBlastTypeRpsTblastn, eBlastTypeTblastn, eBlastTypeTblastx, NULL, number, and strdup.
Referenced by Blast_ProgramNameFromType(), CRedoAlignmentTestFixture::runRedoAlignmentCoreUnitTest(), and s_ImportSearchStrategy().
Int2 BlastProgram2Number | ( | const char * | program, |
EBlastProgramType * | number | ||
) |
Set number for a given program type.
Return is zero on success.
program | string name of program [in] |
number | Enumerated value of program [out] |
Definition at line 278 of file blast_util.c.
References eBlastTypeBlastn, eBlastTypeBlastp, eBlastTypeBlastx, eBlastTypeMapping, eBlastTypePhiBlastn, eBlastTypePhiBlastp, eBlastTypePsiBlast, eBlastTypePsiTblastn, eBlastTypeRpsBlast, eBlastTypeRpsTblastn, eBlastTypeTblastn, eBlastTypeTblastx, eBlastTypeUndefined, NULL, number, and strcasecmp.
Referenced by NetworkProgram2BlastProgramType().
Int2 BlastSeqBlkNew | ( | BLAST_SequenceBlk ** | retval | ) |
Allocates a new sequence block structure.
retval | Pointer to where the sequence block structure will be allocated [out] |
Definition at line 133 of file blast_util.c.
References calloc().
Referenced by AascanTestFixture::AascanTestFixture(), BlastSetUp_SeqBlkNew(), BOOST_AUTO_TEST_CASE(), CompressedAascanTestFixture::CompressedAascanTestFixture(), CompressedAalookupTestFixture::GetSeqBlk(), AalookupTestFixture::GetSeqBlk(), InitializeBlastScoreBlk(), s_SetupSequencesForGappedReevaluateTest(), s_SetupSequencesForUngappedReevaluateNucl(), s_SetupSequencesForUngappedReevaluateTransl(), SetupQueries_OMF(), NtlookupTestFixture::SetUpQuery(), TestFixture::SetUpQuery(), NuclWordFinderTextFixture::setupSequences(), TestFixture::SetUpSubject(), SetupSubjects_OMF(), CPssmEngine::x_InitializeScoreBlock(), CRandomlyFailMockBlastSeqSrc::x_PopulateBLAST_SequenceBlk(), and CPhiblastTestFixture::x_SetupSequenceBlk().
Int2 BlastSeqBlkSetCompressedSequence | ( | BLAST_SequenceBlk * | seq_blk, |
const Uint1 * | sequence | ||
) |
Stores the compressed nucleotide sequence in the sequence block structure for the subject sequence when BLASTing 2 sequences.
This sequence should be encoded in eBlastEncodingNcbi2na and NOT have sentinel bytes (as this encoding doesn't allow them).
seq_blk | The sequence block structure to modify [in/out] |
sequence | Actual sequence buffer. [in] |
Definition at line 167 of file blast_util.c.
References NULL, BLAST_SequenceBlk::oof_sequence, BLAST_SequenceBlk::sequence, BLAST_SequenceBlk::sequence_allocated, and TRUE.
Referenced by BOOST_AUTO_TEST_CASE(), NuclWordFinderTextFixture::setupSequences(), TestFixture::SetUpSubject(), and SetupSubjects_OMF().
Int2 BlastSeqBlkSetSeqRanges | ( | BLAST_SequenceBlk * | seq_blk, |
SSeqRange * | seq_ranges, | ||
Uint4 | num_seq_ranges, | ||
Boolean | copy_seq_ranges, | ||
ESubjectMaskingType | mask_type | ||
) |
Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure.
seq_blk | The sequence block structure to modify [in/out] |
seq_ranges | sequence ranges to copy [in] |
num_seq_ranges | number of elements in array above [in] |
copy_seq_ranges | set to TRUE if seq_ranges should be copied to the |
mask_type | either kSoftDBMask or kHardDBMask [in] BLAST_SequenceBlk and assume its ownership, set to FALSE if the pointer should be copied and the ownership of the seq_ranges remains in the caller's possession. |
Definition at line 182 of file blast_util.c.
References ASSERT, calloc(), FALSE, BLAST_SequenceBlk::length, BLAST_SequenceBlk::mask_type, BLAST_SequenceBlk::num_seq_ranges, s_BlastSequenceBlkFreeSeqRanges(), BLAST_SequenceBlk::seq_ranges, BLAST_SequenceBlk::seq_ranges_allocated, tmp, and TRUE.
Referenced by AascanTestFixture::AascanTestFixture(), BOOST_AUTO_TEST_CASE(), CompressedAascanTestFixture::CompressedAascanTestFixture(), s_SeqDbGetSequence(), SetupSubjects_OMF(), and TestFixture::SkipMaskedRangesCore().
Int2 BlastSeqBlkSetSequence | ( | BLAST_SequenceBlk * | seq_blk, |
const Uint1 * | sequence, | ||
Int4 | seqlen | ||
) |
Stores the sequence in the sequence block structure.
seq_blk | The sequence block structure to modify [in/out] |
sequence | Actual sequence buffer. The first byte must be a sentinel byte [in] |
seqlen | Length of the sequence buffer above [in] |
Definition at line 147 of file blast_util.c.
References FALSE, BLAST_SequenceBlk::length, BLAST_SequenceBlk::nomask_allocated, NULL, BLAST_SequenceBlk::oof_sequence, BLAST_SequenceBlk::sequence, BLAST_SequenceBlk::sequence_nomask, BLAST_SequenceBlk::sequence_start, BLAST_SequenceBlk::sequence_start_allocated, BLAST_SequenceBlk::sequence_start_nomask, and TRUE.
Referenced by AascanTestFixture::AascanTestFixture(), BOOST_AUTO_TEST_CASE(), CompressedAascanTestFixture::CompressedAascanTestFixture(), CompressedAalookupTestFixture::GetSeqBlk(), AalookupTestFixture::GetSeqBlk(), InitializeBlastScoreBlk(), s_SetupSequencesForGappedReevaluateTest(), s_SetupSequencesForUngappedReevaluateNucl(), s_SetupSequencesForUngappedReevaluateTransl(), SetupQueries_OMF(), NtlookupTestFixture::SetUpQuery(), TestFixture::SetUpQuery(), NuclWordFinderTextFixture::setupSequences(), SetupSubjects_OMF(), CPssmEngine::x_InitializeScoreBlock(), CRandomlyFailMockBlastSeqSrc::x_PopulateBLAST_SequenceBlk(), and CPhiblastTestFixture::x_SetupSequenceBlk().
void BlastSequenceBlkClean | ( | BLAST_SequenceBlk * | seq_blk | ) |
Deallocate memory only for the sequence in the sequence block.
Definition at line 220 of file blast_util.c.
References FALSE, BLAST_SequenceBlk::nomask_allocated, BLAST_SequenceBlk::oof_sequence, BLAST_SequenceBlk::oof_sequence_allocated, s_BlastSequenceBlkFreeSeqRanges(), BLAST_SequenceBlk::sequence, BLAST_SequenceBlk::sequence_allocated, BLAST_SequenceBlk::sequence_start, BLAST_SequenceBlk::sequence_start_allocated, BLAST_SequenceBlk::sequence_start_nomask, and sfree.
Referenced by BlastSequenceBlkFree(), BOOST_AUTO_TEST_CASE(), s_SeqDbGetSequence(), and s_VDBSRC_GetSequence().
void BlastSequenceBlkCopy | ( | BLAST_SequenceBlk ** | copy, |
BLAST_SequenceBlk * | src | ||
) |
Copies contents of the source sequence block without copying sequence buffers; sets all "field_allocated" booleans to FALSE, to make sure fields are not freed on the call to BlastSequenceBlkFree.
copy | New sequence block [out] |
src | Input sequence block [in] |
Definition at line 259 of file blast_util.c.
References ASSERT, BlastMemDup(), copy(), and FALSE.
Referenced by s_MultiSeqGetSequence(), and s_QueryFactoryGetSequence().
BLAST_SequenceBlk* BlastSequenceBlkFree | ( | BLAST_SequenceBlk * | seq_blk | ) |
Deallocate memory for a sequence block.
Definition at line 245 of file blast_util.c.
References BlastMaskLocFree(), BlastSequenceBlkClean(), BLAST_SequenceBlk::compressed_nuc_seq_start, BLAST_SequenceBlk::lcase_mask, BLAST_SequenceBlk::lcase_mask_allocated, NULL, and sfree.
Referenced by BLAST_ComputeTraceback_MT(), BLAST_PreliminarySearchEngine(), BOOST_AUTO_TEST_CASE(), NaHashLookupThreadDataFree(), CRandomlyFailMockBlastSeqSrc::ReleaseSequence(), CSeedTop::Run(), s_checkDbSeqSrcFunctions(), s_ComputeNumIdentities(), s_MatchingSequenceRelease(), s_RPSComputeTraceback(), s_RPSPreliminarySearchEngine(), s_ScanSubjectForWordCounts(), SetupSubjects_OMF(), TestFixture::TearDownQuery(), TestFixture::TearDownSubject(), AalookupTestFixture::~AalookupTestFixture(), AascanTestFixture::~AascanTestFixture(), CMultiSeqInfo::~CMultiSeqInfo(), CompressedAalookupTestFixture::~CompressedAalookupTestFixture(), CompressedAascanTestFixture::~CompressedAascanTestFixture(), CQueryFactoryInfo::~CQueryFactoryInfo(), NtlookupTestFixture::~NtlookupTestFixture(), and NuclWordFinderTextFixture::~NuclWordFinderTextFixture().
Int2 BlastSetUp_SeqBlkNew | ( | const Uint1 * | buffer, |
Int4 | length, | ||
BLAST_SequenceBlk ** | seq_blk, | ||
Boolean | buffer_allocated | ||
) |
Allocates memory for *sequence_blk and then populates it.
buffer | start of sequence [in] |
length | query sequence length [in] |
seq_blk | SequenceBlk to be allocated and filled in [out] |
buffer_allocated | Is the buffer allocated? If yes, 'sequence_start' is the start of the sequence, otherwise it is 'sequence'. [in] |
Definition at line 101 of file blast_util.c.
References ASSERT, BlastSeqBlkNew(), buffer, FALSE, NULL, and TRUE.
Referenced by NtlookupTestFixture::debruijnInit(), AalookupTestFixture::GetSeqBlk(), s_SeqDbGetSequence(), and s_VDBSRC_GetSequence().
SBlastTargetTranslation* BlastTargetTranslationFree | ( | SBlastTargetTranslation * | target_t | ) |
Free SBlastTargetTranslation.
target_t | object to be freed [in] |
Definition at line 1248 of file blast_util.c.
References NULL, SBlastTargetTranslation::num_frames, SBlastTargetTranslation::range, sfree, and SBlastTargetTranslation::translations.
Referenced by Blast_HSPListReevaluateUngapped(), Blast_TracebackFromHSPList(), BOOST_AUTO_TEST_CASE(), and s_ComputeNumIdentities().
Int2 BlastTargetTranslationNew | ( | BLAST_SequenceBlk * | subject_blk, |
const Uint1 * | gen_code_string, | ||
EBlastProgramType | program_number, | ||
Boolean | is_ooframe, | ||
SBlastTargetTranslation ** | target | ||
) |
Sets up structure for target translation.
subject_blk | Target sequence information [in] |
gen_code_string | Genetic code translation information [in] |
program_number | BLAST program [in] |
is_ooframe | Out-of-frame translation if true [in] |
target | Structure being set up. [out] |
Definition at line 1268 of file blast_util.c.
References BLAST_ContextToFrame(), BLAST_GetAllTranslations(), BLAST_GetTranslation(), calloc(), context, eBlastEncodingNcbi4na, eBlastTypeBlastx, SBlastTargetTranslation::gen_code_string, GetReverseNuclSequence(), BLAST_SequenceBlk::length, malloc(), NULL, NUM_FRAMES, SBlastTargetTranslation::num_frames, BLAST_SequenceBlk::oof_sequence, BLAST_SequenceBlk::oof_sequence_allocated, SBlastTargetTranslation::partial, SBlastTargetTranslation::program_number, SBlastTargetTranslation::range, BLAST_SequenceBlk::sequence_start, sfree, SBlastTargetTranslation::subject_blk, SBlastTargetTranslation::translations, and TRUE.
Referenced by Blast_HSPListReevaluateUngapped(), Blast_TracebackFromHSPList(), BOOST_AUTO_TEST_CASE(), and s_ComputeNumIdentities().
The following binary search routine assumes that array A is filled.
Definition at line 1231 of file blast_util.c.
References A, b, n, and ncbi::grid::netcache::search::fields::size.
Reverse a nucleotide sequence in the blastna encoding, adding sentinel bytes on both ends.
sequence | Forward strand of the sequence [in] |
length | Length of the sequence plus 1 for the sentinel byte [in] |
rev_sequence_ptr | Reverse strand of the sequence [out] |
Definition at line 807 of file blast_util.c.
References FENCE_SENTRY, malloc(), and NULLB.
Referenced by BLAST_GetAllTranslations(), Blast_GetPartialTranslation(), Blast_HSPGetTargetTranslation(), and BlastTargetTranslationNew().