33 #ifndef ALGO_BLAST_CORE__BLAST_UTIL__H
34 #define ALGO_BLAST_CORE__BLAST_UTIL__H
48 #define IS_residue(x) (x <= 250)
52 #define NCBI2NA_MASK 0x03
55 #define NCBI2NA_UNPACK_BASE(x, N) (((x)>>(2*(N))) & NCBI2NA_MASK)
118 const Uint1* sequence,
130 const Uint1* sequence);
150 Uint4 num_seq_ranges,
180 const Uint1* genetic_code);
223 Uint1** rev_sequence_ptr);
290 Int4 nucl_length,
const Uint1* genetic_code,
291 Uint1** translation_buffer_ptr,
Uint4** frame_offsets_ptr,
292 Uint1** mixed_seq_ptr);
310 Uint1** translation_buffer_ptr,
Int4* protein_length,
311 Uint1** mixed_seq_ptr);
356 #define MAX_FULL_TRANSLATION 2100
364 #define FENCE_SENTRY 201
390 const Uint1* gen_code_string,
Definitions used throughout BLAST.
ESubjectMaskingType
Define the possible subject masking types.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definitions for various programs supported by core BLAST.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definitions and functions associated with the BlastQueryInfo structure.
Int4 BLAST_FrameToContext(Int2 frame, EBlastProgramType program)
Convert translation frame or strand into a context number suitable for indexing into the BlastQueryIn...
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Int2 BlastSeqBlkSetSeqRanges(BLAST_SequenceBlk *seq_blk, SSeqRange *seq_ranges, Uint4 num_seq_ranges, Boolean copy_seq_ranges, ESubjectMaskingType mask_type)
Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure.
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
size_t BLAST_GetTranslatedProteinLength(size_t nucleotide_length, unsigned int context)
Calculates the length of frame for a translated protein.
Int2 BLAST_CreateMixedFrameDNATranslation(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info)
Initialize the mixed-frame sequence for out-of-frame gapped extension.
Int2 BlastNumber2Program(EBlastProgramType number, char **program)
Return string name for program given a number.
Int2 BlastSeqBlkSetCompressedSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence)
Stores the compressed nucleotide sequence in the sequence block structure for the subject sequence wh...
int Blast_GetPartialTranslation(const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr)
Get one frame translation - needed when only parts of subject sequences are translated.
Int2 BlastTargetTranslationNew(BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target)
Sets up structure for target translation.
Int2 BlastSetUp_SeqBlkNew(const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated)
Allocates memory for *sequence_blk and then populates it.
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
Int2 BLAST_PackDNA(const Uint1 *buffer, Int4 length, EBlastEncoding encoding, Uint1 **packed_seq)
Convert a sequence in ncbi4na or blastna encoding into a packed sequence in ncbi2na encoding.
SBlastTargetTranslation * BlastTargetTranslationFree(SBlastTargetTranslation *target_t)
Free SBlastTargetTranslation.
Int2 BlastCompressBlastnaSequence(BLAST_SequenceBlk *seq_blk)
Adds a specialized representation of sequence data to a sequence block.
void BlastSequenceBlkClean(BLAST_SequenceBlk *seq_blk)
Deallocate memory only for the sequence in the sequence block.
Int2 BLAST_GetAllTranslations(const Uint1 *nucl_seq, EBlastEncoding encoding, Int4 nucl_length, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Uint4 **frame_offsets_ptr, Uint1 **mixed_seq_ptr)
Translate nucleotide into 6 frames.
Int4 BLAST_GetTranslation(const Uint1 *query_seq, const Uint1 *query_seq_rev, Int4 nt_length, Int2 frame, Uint1 *buffer, const Uint1 *genetic_code)
GetTranslation to get the translation of the nucl.
Int2 BlastProgram2Number(const char *program, EBlastProgramType *number)
Set number for a given program type.
Int2 GetReverseNuclSequence(const Uint1 *sequence, Int4 length, Uint1 **rev_sequence_ptr)
Reverse a nucleotide sequence in the blastna encoding, adding sentinel bytes on both ends.
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
unsigned int BLAST_GetNumberOfContexts(EBlastProgramType program)
Get the number of contexts for a given program.
Int4 BSearchInt4(Int4 n, Int4 *A, Int4 size)
The following binary search routine assumes that array A is filled.
double * BLAST_GetStandardAaProbabilities(void)
Get the standard amino acid probabilities.
char * BLAST_StrToUpper(const char *string)
Returns a copy of the input string with all its characters turned to uppercase.
void BlastSequenceBlkCopy(BLAST_SequenceBlk **copy, BLAST_SequenceBlk *src)
Copies contents of the source sequence block without copying sequence buffers; sets all "field_alloca...
Int4 BLAST_TranslateCompressedSequence(Uint1 *translation, Int4 length, const Uint1 *nt_seq, Int2 frame, Uint1 *prot_seq)
Translate a nucleotide sequence without ambiguity codes.
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
const struct ncbi::grid::netcache::search::fields::SIZE size
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
Uint1 Boolean
bool replacment for C
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Structure to hold a sequence.
The query related information.
Information about target translations.
A structure containing two integers, used e.g.
static CS_CONTEXT * context