60 Int4 m = 0,
b = 0, e = 0;
62 if (ranges ==
NULL || num_ranges <= 0) {
70 if (ranges[m].left > target) {
78 if ( (target > ranges[
b].right) && (
b < (num_ranges-1) ) ) {
105 if (*seq_blk ==
NULL) {
110 ASSERT(seq_blk && *seq_blk);
112 if (buffer_allocated) {
113 (*seq_blk)->sequence_start_allocated =
TRUE;
116 (*seq_blk)->sequence = (*seq_blk)->sequence_start+1;
120 (*seq_blk)->sequence_start =
NULL;
123 (*seq_blk)->sequence_start_nomask = (*seq_blk)->sequence_start;
124 (*seq_blk)->sequence_nomask = (*seq_blk)->sequence;
125 (*seq_blk)->nomask_allocated =
FALSE;
127 (*seq_blk)->length = length;
128 (*seq_blk)->bases_offset = 0;
148 const Uint1* sequence,
168 const Uint1* sequence)
184 Uint4 num_seq_ranges,
190 if ( !seq_blk || !seq_ranges ) {
194 ASSERT(num_seq_ranges >= 1);
197 if (copy_seq_ranges) {
201 if ( !
tmp ) {
return -1; }
204 num_seq_ranges *
sizeof(*seq_ranges));
213 tmp[num_seq_ranges - 1].right = seq_blk->
length;
271 (*copy)->sequence_allocated =
FALSE;
272 (*copy)->sequence_start_allocated =
FALSE;
273 (*copy)->oof_sequence_allocated =
FALSE;
274 (*copy)->lcase_mask_allocated =
FALSE;
275 (*copy)->seq_ranges_allocated =
FALSE;
294 else if (
strcasecmp(
"rpsblast", program) == 0)
296 else if (
strcasecmp(
"rpstblastn", program) == 0)
298 else if (
strcasecmp(
"psiblast", program) == 0)
300 else if (
strcasecmp(
"psitblastn", program) == 0)
302 else if (
strcasecmp(
"phiblastn", program) == 0)
304 else if (
strcasecmp(
"phiblastp", program) == 0)
320 *program =
strdup(
"blastn");
323 *program =
strdup(
"blastp");
326 *program =
strdup(
"blastx");
329 *program =
strdup(
"tblastn");
332 *program =
strdup(
"tblastx");
335 *program =
strdup(
"rpsblast");
338 *program =
strdup(
"rpstblastn");
341 *program =
strdup(
"psiblast");
344 *program =
strdup(
"psitblastn");
347 *program =
strdup(
"phiblastp");
350 *program =
strdup(
"phiblastn");
353 *program =
strdup(
"mapper");
356 *program =
strdup(
"unknown");
374 register int i, j, k, index0, index1, index2;
375 static Uint1 mapping[4] = { 8,
385 if ((codon[0] | codon[1] | codon[2]) > 15) {
394 for (
i = 0;
i < 4;
i++) {
395 if (codon[0] & mapping[
i]) {
397 for (j = 0; j < 4; j++) {
398 if (codon[1] & mapping[j]) {
399 index1 = index0 + (j * 4);
400 for (k = 0; k < 4; k++) {
401 if (codon[2] & mapping[k]) {
432 Int4 index, index_prot;
436 nucl_seq = (frame >= 0 ? (
Uint1 *)query_seq : (
Uint1 *)(query_seq_rev+1));
443 codon[0] = nucl_seq[index];
444 codon[1] = nucl_seq[index+1];
445 codon[2] = nucl_seq[index+2];
449 prot_seq[index_prot] = residue;
453 prot_seq[index_prot] =
NULLB;
455 return index_prot - 1;
473 new_seq[-1] = new_seq[-2] = new_seq[-3] = 0;
474 new_seq[
len-3] = new_seq[
len-2] = new_seq[
len-1] = 0;
480 for (
i = 0;
i < max_start;
i++) {
481 curr_letter = curr_letter << 2 | (old_seq[
i] & 3);
482 new_seq[
i - max_start] = curr_letter;
488 for (;
i <
len;
i++) {
489 curr_letter = curr_letter << 2 | (old_seq[
i] & 3);
490 new_seq[
i - max_start] = curr_letter;
496 for (
i = 0;
i < max_start;
i++) {
497 curr_letter = curr_letter << 2;
498 new_seq[
len - (max_start -
i)] = curr_letter;
512 Int2 total_remainder;
514 int byte_value, codon=-1;
515 Uint1 last_remainder, last_byte, remainder;
516 Uint1* nt_seq_end,* nt_seq_start;
517 Uint1* prot_seq_start;
518 int byte_value1,byte_value2,byte_value3,byte_value4,byte_value5;
521 if (nt_seq ==
NULL || prot_seq ==
NULL ||
529 prot_seq_start = prot_seq;
531 remainder = length%4;
534 nt_seq_end = (
Uint1 *) (nt_seq + (length)/4 - 1);
535 last_remainder = (4*(length/4) - frame + 1)%
CODON_LENGTH;
536 total_remainder = last_remainder+remainder;
539 byte_value = *nt_seq;
542 while (nt_seq < nt_seq_end) {
545 codon = (byte_value >> 2);
546 *prot_seq = translation[codon];
550 codon = ((byte_value & 3) << 4);
552 byte_value = *nt_seq;
553 codon += (byte_value >> 4);
554 *prot_seq = translation[codon];
556 if (nt_seq >= nt_seq_end) {
562 codon = ((byte_value & 15) << 2);
564 byte_value = *nt_seq;
565 codon += (byte_value >> 6);
566 *prot_seq = translation[codon];
568 if (nt_seq >= nt_seq_end) {
574 codon = byte_value & 63;
575 *prot_seq = translation[codon];
578 byte_value = *nt_seq;
586 while (nt_seq < (nt_seq_end-10)) {
587 byte_value1 = *(++nt_seq);
588 byte_value2 = *(++nt_seq);
589 byte_value3 = *(++nt_seq);
591 codon = (byte_value >> 2);
592 *prot_seq = translation[codon];
596 codon = ((byte_value & 3) << 4);
597 codon += (byte_value1 >> 4);
598 *prot_seq = translation[codon];
601 byte_value4 = *(++nt_seq);
603 codon = ((byte_value1 & 15) << 2);
605 codon += (byte_value2 >> 6);
606 *prot_seq = translation[codon];
609 codon = byte_value2 & 63;
610 byte_value5 = *(++nt_seq);
611 *prot_seq = translation[codon];
615 codon = (byte_value3 >> 2);
616 *prot_seq = translation[codon];
619 byte_value = *(++nt_seq);
620 codon = ((byte_value3 & 3) << 4);
621 codon += (byte_value4 >> 4);
622 *prot_seq = translation[codon];
625 codon = ((byte_value4 & 15) << 2);
626 codon += (byte_value5 >> 6);
627 *prot_seq = translation[codon];
630 codon = byte_value5 & 63;
631 *prot_seq = translation[codon];
641 byte_value = *nt_seq;
642 codon = byte_value & 63;
644 *prot_seq = translation[codon];
646 }
else if (
state == 0) {
647 byte_value = *nt_seq;
648 codon = ((byte_value) >> 2);
650 *prot_seq = translation[codon];
655 byte_value = *(nt_seq_end);
656 last_byte = *(nt_seq_end+1);
658 codon = (last_byte >> 2);
659 }
else if (
state == 2) {
660 codon = ((byte_value & 15) << 2);
661 codon += (last_byte >> 6);
662 }
else if (
state == 3) {
663 codon = ((byte_value & 3) << 4);
664 codon += (last_byte >> 4);
666 *prot_seq = translation[codon];
670 nt_seq_start = (
Uint1 *) nt_seq;
672 state = remainder+frame;
679 codon = (last_byte >> 6);
680 byte_value = *nt_seq;
681 codon += ((byte_value & 15) << 2);
683 }
else if (
state == 1) {
684 codon = (last_byte >> 4);
685 byte_value = *nt_seq;
686 codon += ((byte_value & 3) << 4);
688 }
else if (
state == 2) {
689 codon = (last_byte >> 2);
692 *prot_seq = translation[codon];
695 state = 3 + (remainder + frame + 1);
699 byte_value = *nt_seq;
702 while (nt_seq > nt_seq_start) {
705 codon = (byte_value & 63);
706 *prot_seq = translation[codon];
710 codon = (byte_value >> 6);
712 byte_value = *nt_seq;
713 codon += ((byte_value & 15) << 2);
714 *prot_seq = translation[codon];
716 if (nt_seq <= nt_seq_start) {
722 codon = (byte_value >> 4);
724 byte_value = *nt_seq;
725 codon += ((byte_value & 3) << 4);
726 *prot_seq = translation[codon];
728 if (nt_seq <= nt_seq_start) {
734 codon = (byte_value >> 2);
735 *prot_seq = translation[codon];
738 byte_value = *nt_seq;
746 while (nt_seq > (nt_seq_start+10)) {
747 byte_value1 = *(--nt_seq);
748 byte_value2 = *(--nt_seq);
749 byte_value3 = *(--nt_seq);
751 codon = (byte_value & 63);
752 *prot_seq = translation[codon];
754 codon = (byte_value >> 6);
755 codon += ((byte_value1 & 15) << 2);
756 *prot_seq = translation[codon];
758 byte_value4 = *(--nt_seq);
759 codon = (byte_value1 >> 4);
760 codon += ((byte_value2 & 3) << 4);
761 *prot_seq = translation[codon];
763 codon = (byte_value2 >> 2);
764 *prot_seq = translation[codon];
766 byte_value5 = *(--nt_seq);
768 codon = (byte_value3 & 63);
769 *prot_seq = translation[codon];
771 byte_value = *(--nt_seq);
772 codon = (byte_value3 >> 6);
773 codon += ((byte_value4 & 15) << 2);
774 *prot_seq = translation[codon];
776 codon = (byte_value4 >> 4);
777 codon += ((byte_value5 & 3) << 4);
778 *prot_seq = translation[codon];
780 codon = (byte_value5 >> 2);
781 *prot_seq = translation[codon];
788 byte_value = *nt_seq;
790 codon = (byte_value & 63);
791 *prot_seq = translation[codon];
793 }
else if (
state == 2) {
794 codon = (byte_value >> 2);
795 *prot_seq = translation[codon];
802 return (
Int4)(prot_seq - prot_seq_start);
808 Uint1** rev_sequence_ptr)
814 Uint1 conversion_table[16] = {
821 if (!rev_sequence_ptr)
826 rev_sequence[0] = rev_sequence[length+1] =
NULLB;
828 for (index = 0; index < length; ++index) {
832 rev_sequence[length-index] = conversion_table[sequence[index]];
835 *rev_sequence_ptr = rev_sequence;
856 switch (context_number) {
857 case 0: frame = 1;
break;
858 case 1: frame = 2;
break;
859 case 2: frame = 3;
break;
860 case 3: frame = -1;
break;
861 case 4: frame = -2;
break;
862 case 5: frame = -3;
break;
863 default:
abort();
break;
875 Int4 index, new_index;
882 for (index=0, new_index=0; new_index < new_length-1;
885 new_buffer[new_index] =
891 new_buffer[new_index] =
903 for (; index < length; index++) {
905 case 0: shift = 6;
break;
906 case 1: shift = 4;
break;
907 case 2: shift = 2;
break;
913 new_buffer[new_index] |=
917 *packed_seq = new_buffer;
993 Int2 index1, index2, index3, bp1, bp2, bp3;
999 static Uint1 mapping[4] = {2,
1004 if (genetic_code ==
NULL)
1008 if (translation ==
NULL)
1011 for (index1=0; index1<4; index1++)
1013 for (index2=0; index2<4; index2++)
1015 for (index3=0; index3<4; index3++)
1027 codon = (mapping[bp1]<<4) + (mapping[bp2]<<2) + (mapping[bp3]);
1028 translation[(index3<<4) + (index2<<2) + index1] =
1029 genetic_code[codon];
1033 codon = (mapping[index1]<<4) + (mapping[index2]<<2) +
1035 translation[(index1<<4) + (index2<<2) + index3] =
1036 genetic_code[codon];
1046 Int4 nucl_length,
const Uint1* genetic_code,
1047 Uint1** translation_buffer_ptr,
Uint4** frame_offsets_ptr,
1048 Uint1** mixed_seq_ptr)
1050 Uint1* translation_buffer,* mixed_seq;
1051 Uint1* translation_table =
NULL,* translation_table_rc =
NULL;
1052 Uint1* nucl_seq_rev;
1055 Uint4* frame_offsets;
1058 Uint4 buffer_length =2*(nucl_length+1)+2;
1063 if ((translation_buffer =
1078 frame_offsets[0] = 0;
1086 nucl_length, nucl_seq, frame, translation_buffer+
offset);
1090 nucl_length, nucl_seq, frame, translation_buffer+
offset);
1095 nucl_length, frame, translation_buffer+
offset, genetic_code);
1105 sfree(nucl_seq_rev);
1107 free(translation_table);
1108 sfree(translation_table_rc);
1113 if (mixed_seq_ptr) {
1117 *mixed_seq_ptr = mixed_seq = (
Uint1*)
malloc(2*nucl_length+3);
1120 for (
i = 0;
i <= nucl_length; ++
i) {
1123 *seq++ = translation_buffer[frame_offsets[index+
context]+
offset];
1128 if (translation_buffer_ptr)
1129 *translation_buffer_ptr = translation_buffer;
1131 sfree(translation_buffer);
1133 if (frame_offsets_ptr)
1134 *frame_offsets_ptr = frame_offsets;
1136 sfree(frame_offsets);
1143 Uint1** translation_buffer_ptr,
Int4* protein_length,
1144 Uint1** mixed_seq_ptr)
1146 Uint1* translation_buffer;
1155 if (!mixed_seq_ptr) {
1156 if ((translation_buffer =
1159 sfree(nucl_seq_rev);
1165 nucl_length, frame, translation_buffer,
1168 *protein_length = length;
1171 Int2 frame_sign = ((frame < 0) ? -1 : 1);
1178 sfree(nucl_seq_rev);
1185 nucl_length, (
short)(frame_sign*index),
1186 translation_buffer+
offset, genetic_code);
1187 frame_offsets[index-1] =
offset;
1193 *protein_length = nucl_length;
1194 for (index = 0, seq = *mixed_seq_ptr; index <= nucl_length;
1196 *seq = translation_buffer[frame_offsets[index%
CODON_LENGTH] +
1201 sfree(nucl_seq_rev);
1202 if (translation_buffer_ptr)
1203 *translation_buffer_ptr = translation_buffer;
1205 sfree(translation_buffer);
1215 ASSERT(frame >= -3 && frame <= 3 && frame != 0);
1223 ASSERT(frame == 1 || frame == -1);
1224 return frame == 1 ? 0 : 1;
1256 for (index=0; index<target_t->
num_frames; index++)
1260 if (target_t->
range)
1269 const Uint1* gen_code_string,
1282 retval->
partial = !is_ooframe;
1309 sfree(nucl_seq_rev);
1327 double* retval =
NULL;
1345 retval[
i] = standard_probabilities->
prob[
i];
1354 char* retval =
NULL;
1366 for (p = retval; *p !=
NULLB; p++) {
1367 *p =
toupper((
unsigned char)(*p));
1399 if ( !progress_info ) {
1402 sfree(progress_info);
1408 if ( !progress_info ) {
ESubjectMaskingType
Define the possible subject masking types.
#define COMPRESSION_RATIO
Compression ratio of nucleotide bases (4 bases in 1 byte)
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define CODON_LENGTH
Codons are always of length 3.
#define NUM_STRANDS
Number of frames in a nucleotide sequence.
#define NUM_FRAMES
Number of frames to which we translate in translating searches.
@ ePrelimSearch
Preliminary stage.
BLAST filtering functions.
BlastMaskLoc * BlastMaskLocFree(BlastMaskLoc *mask_loc)
Deallocate memory for a BlastMaskLoc structure as well as the BlastSeqLoc's pointed to.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Boolean Blast_ProgramIsValid(EBlastProgramType p)
Returns true if program is not undefined.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Uint4 QueryInfo_GetSeqBufLen(const BlastQueryInfo *qinfo)
Get the number of bytes required for the concatenated sequence buffer, given a query info structure.
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
Blast_ResFreq * Blast_ResFreqFree(Blast_ResFreq *rfp)
Deallocates Blast_ResFreq and prob0 element.
Int2 Blast_ResFreqStdComp(const BlastScoreBlk *sbp, Blast_ResFreq *rfp)
Calculates residues frequencies given a standard distribution.
Blast_ResFreq * Blast_ResFreqNew(const BlastScoreBlk *sbp)
Allocates a new Blast_ResFreq structure and fills in the prob element based upon the contents of sbp.
Int4 BLAST_FrameToContext(Int2 frame, EBlastProgramType program)
Convert translation frame or strand into a context number suitable for indexing into the BlastQueryIn...
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
static void s_BlastSequenceBlkFreeSeqRanges(BLAST_SequenceBlk *seq_blk)
Auxiliary function to free the BLAST_SequenceBlk::seq_ranges field if applicable.
Int2 BlastSeqBlkSetSeqRanges(BLAST_SequenceBlk *seq_blk, SSeqRange *seq_ranges, Uint4 num_seq_ranges, Boolean copy_seq_ranges, ESubjectMaskingType mask_type)
Sets the seq_range and related fields appropriately in the BLAST_SequenceBlk structure.
void SBlastProgressReset(SBlastProgress *progress_info)
Resets the progress structure to its original state (as if newly allocated) for a fresh start without...
Int4 SSeqRangeArrayLessThanOrEqual(const SSeqRange *ranges, Int4 num_ranges, Int4 target)
Returns the index of the range, such that this element is the first range that either contains the ta...
Int2 BlastSeqBlkSetSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence, Int4 seqlen)
Stores the sequence in the sequence block structure.
SBlastProgress * SBlastProgressNew(void *user_data)
Allocates and initializes a new SBlastProgress structure.
SSeqRange SSeqRangeNew(Int4 start, Int4 stop)
Create a new SSeqRange structure with both fields initialized.
size_t BLAST_GetTranslatedProteinLength(size_t nucleotide_length, unsigned int context)
Calculates the length of frame for a translated protein.
SBlastProgress * SBlastProgressFree(SBlastProgress *progress_info)
Deallocates a SBlastProgress structure.
Int2 BLAST_CreateMixedFrameDNATranslation(BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info)
Initialize the mixed-frame sequence for out-of-frame gapped extension.
Int2 BlastNumber2Program(EBlastProgramType number, char **program)
Return string name for program given a number.
static Uint1 s_CodonToAA(Uint1 *codon, const Uint1 *codes)
Translate 3 nucleotides into an amino acid MUST have 'X' as unknown amino acid.
Int2 BlastSeqBlkSetCompressedSequence(BLAST_SequenceBlk *seq_blk, const Uint1 *sequence)
Stores the compressed nucleotide sequence in the sequence block structure for the subject sequence wh...
int Blast_GetPartialTranslation(const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr)
Get one frame translation - needed when only parts of subject sequences are translated.
Int2 BlastTargetTranslationNew(BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target)
Sets up structure for target translation.
Int2 BlastSetUp_SeqBlkNew(const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated)
Allocates memory for *sequence_blk and then populates it.
Int1 BLAST_ContextToFrame(EBlastProgramType prog_number, Uint4 context_number)
This function translates the context number of a context into the frame of the sequence.
Int2 BLAST_PackDNA(const Uint1 *buffer, Int4 length, EBlastEncoding encoding, Uint1 **packed_seq)
Convert a sequence in ncbi4na or blastna encoding into a packed sequence in ncbi2na encoding.
SBlastTargetTranslation * BlastTargetTranslationFree(SBlastTargetTranslation *target_t)
Free SBlastTargetTranslation.
Int2 BlastCompressBlastnaSequence(BLAST_SequenceBlk *seq_blk)
Adds a specialized representation of sequence data to a sequence block.
Int4 BLAST_GetTranslation(const Uint1 *query_seq, const Uint1 *query_seq_rev, Int4 nt_length, Int2 frame, Uint1 *prot_seq, const Uint1 *genetic_code)
GetTranslation to get the translation of the nucl.
void BlastSequenceBlkClean(BLAST_SequenceBlk *seq_blk)
Deallocate memory only for the sequence in the sequence block.
Int2 BLAST_GetAllTranslations(const Uint1 *nucl_seq, EBlastEncoding encoding, Int4 nucl_length, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Uint4 **frame_offsets_ptr, Uint1 **mixed_seq_ptr)
Translate nucleotide into 6 frames.
unsigned int BLAST_GetNumberOfContexts(EBlastProgramType p)
Get the number of contexts for a given program.
Int2 BlastProgram2Number(const char *program, EBlastProgramType *number)
Set number for a given program type.
Int2 GetReverseNuclSequence(const Uint1 *sequence, Int4 length, Uint1 **rev_sequence_ptr)
Reverse a nucleotide sequence in the blastna encoding, adding sentinel bytes on both ends.
Int2 BlastSeqBlkNew(BLAST_SequenceBlk **retval)
Allocates a new sequence block structure.
Int4 BSearchInt4(Int4 n, Int4 *A, Int4 size)
The following binary search routine assumes that array A is filled.
static Uint1 * s_BlastGetTranslationTable(const Uint1 *genetic_code, Boolean reverse_complement)
Gets the translation array for a given genetic code.
char * BLAST_StrToUpper(const char *string)
Returns a copy of the input string with all its characters turned to uppercase.
void BlastSequenceBlkCopy(BLAST_SequenceBlk **copy, BLAST_SequenceBlk *src)
Copies contents of the source sequence block without copying sequence buffers; sets all "field_alloca...
void __sfree(void **x)
Implemented in blast_util.c.
double * BLAST_GetStandardAaProbabilities()
Get the standard amino acid probabilities.
Int4 BLAST_TranslateCompressedSequence(Uint1 *translation, Int4 length, const Uint1 *nt_seq, Int2 frame, Uint1 *prot_seq)
Translate a nucleotide sequence without ambiguity codes.
Various auxiliary BLAST utility functions.
#define NCBI2NA_MASK
Bit mask for obtaining a single base from a byte in ncbi2na format.
#define FENCE_SENTRY
This sentry value is used as a 'fence' around the valid portions of partially decoded sequences.
#define IS_residue(x)
Does character encode a residue?
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
#define BLASTAA_SIZE
Size of aminoacid alphabet.
const Uint1 NCBI4NA_TO_BLASTNA[]
Translates between ncbi4na and blastna.
#define BLASTAA_SEQ_CODE
== Seq_code_ncbistdaa
const Uint1 AMINOACID_TO_NCBISTDAA[]
Translates between ncbieaa and ncbistdaa.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
@ eBlastEncodingNcbi2na
NCBI2na.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int8_t Int1
1-byte (8-bit) signed integer
const struct ncbi::grid::netcache::search::fields::SIZE size
#define INT1_MAX
largest number represented by signed short (one byte)
#define MIN(a, b)
returns smaller of a and b.
void * BlastMemDup(const void *orig, size_t size)
Copies memory using memcpy and malloc.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ABS(a)
returns absolute value of a (|a|)
#define NULLB
terminating byte of a char* string.
#define ASSERT
macro for assert.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Structure to hold a sequence.
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Uint1 * compressed_nuc_seq_start
start of compressed_nuc_seq
Uint4 num_seq_ranges
Number of elements in seq_ranges.
Boolean sequence_allocated
TRUE if memory has been allocated for sequence.
BlastMaskLoc * lcase_mask
Locations to be masked from operations on this sequence: lookup table for query; scanning for subject...
SSeqRange * seq_ranges
Ranges of the sequence to search.
Boolean lcase_mask_allocated
TRUE if memory has been allocated for lcase_mask.
Int4 length
Length of sequence.
ESubjectMaskingType mask_type
type of subject masking
Uint1 * sequence_nomask
Start of query sequence without masking.
Boolean seq_ranges_allocated
TRUE if memory has been allocated for seq_ranges.
Uint1 * sequence_start_nomask
Query sequence without masking.
Uint1 * sequence
Sequence used for search (could be translation).
Boolean oof_sequence_allocated
TRUE if memory has been allocated for oof_sequence.
Boolean nomask_allocated
If false the two above are just pointers to sequence and sequence_start.
Uint1 * compressed_nuc_seq
4-to-1 compressed version of sequence
Boolean sequence_start_allocated
TRUE if memory has been allocated for sequence_start.
Uint1 * oof_sequence
Mixed-frame protein representation of a nucleotide sequence for out-of-frame alignment.
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
The query related information.
BlastContextInfo * contexts
Information per context.
Int4 last_context
Index of the last element of the context array.
Structure used for scoring calculations.
Boolean protein_alphabet
TRUE if alphabet_code is for a protein alphabet (e.g., ncbistdaa etc.), FALSE for nt.
Int2 alphabet_size
size of alphabet.
Uint1 alphabet_code
NCBI alphabet code.
Stores the letter frequency of a sequence or database.
double * prob
letter probs, (possible) non-zero offset.
Progress monitoring structure.
EBlastStage stage
Stage of the BLAST search currently in progress.
void * user_data
Pointer to user-provided data.
Information about target translations.
EBlastProgramType program_number
Program being run.
Int4 * range
start and stop of translated sequences.
Int4 num_frames
how many frames, one dimension of translation_buffer.
const Uint1 * gen_code_string
Genetic code string for translation.
BLAST_SequenceBlk * subject_blk
target sequence being translated.
Uint1 ** translations
two dimensional array for translations.
Boolean partial
specifies that nucleotide sequence is too long to translated.
A structure containing two integers, used e.g.
Int4 left
left endpoint of range (zero based)
Int4 right
right endpoint of range (zero based)
static CS_CONTEXT * context
static Uint4 reverse_complement(Uint4 seq, Uint1 size)
voidp calloc(uInt items, uInt size)