139 lookup_header =
info->lookup_header;
147 lookup->alphabet_size = 26;
149 lookup->alphabet_size = 28;
169 for (
i = 0;
i <
lookup->backbone_size;
i++) {
170 if (
lookup->rps_backbone[
i].num_used > 0) {
177 profile_header =
info->profile_header;
184 num_pssm_rows =
lookup->rps_seq_offsets[
lookup->num_profiles];
188 for (
i = 0;
i < num_pssm_rows + 1;
i++) {
189 lookup->rps_pssm[
i] = pssm_start;
190 pssm_start +=
lookup->alphabet_size;
200 for (
i = 0;
i <
lookup->num_buckets;
i++) {
217 for (
i = 0;
i <
lookup->num_buckets;
i++)
239 for (
i = 0;
i <
lookup->word_length;
i++)
270 Int4 overflow_cells_needed = 0;
271 Int4 overflow_cursor = 0;
272 Int4 longest_chain = 0;
276 #ifdef LOOKUP_VERBOSE
277 Int4 backbone_occupancy = 0;
278 Int4 thick_backbone_occupancy = 0;
279 Int4 num_overflows = 0;
283 for (
i = 0;
i <
lookup->backbone_size;
i++) {
284 if (
lookup->thin_backbone[
i]) {
285 #ifdef LOOKUP_VERBOSE
286 backbone_occupancy++;
289 #ifdef LOOKUP_VERBOSE
292 overflow_cells_needed +=
lookup->thin_backbone[
i][1];
294 if (
lookup->thin_backbone[
i][1] > longest_chain)
295 longest_chain =
lookup->thin_backbone[
i][1];
298 lookup->overflow_size = overflow_cells_needed;
299 lookup->longest_chain = longest_chain;
301 #ifdef LOOKUP_VERBOSE
302 thick_backbone_occupancy = backbone_occupancy - num_overflows;
303 printf(
"backbone size: %d\n",
lookup->backbone_size);
304 printf(
"backbone occupancy: %d (%f%%)\n", backbone_occupancy,
305 100.0 * backbone_occupancy /
lookup->backbone_size);
306 printf(
"thick_backbone occupancy: %d (%f%%)\n",
307 thick_backbone_occupancy,
308 100.0 * thick_backbone_occupancy /
lookup->backbone_size);
309 printf(
"num_overflows: %d\n", num_overflows);
310 printf(
"overflow size: %d\n", overflow_cells_needed);
311 printf(
"longest chain: %d\n", longest_chain);
312 printf(
"exact matches: %d\n",
lookup->exact_matches);
313 printf(
"neighbor matches: %d\n",
lookup->neighbor_matches);
317 lookup->bone_type = bone_type;
332 if (overflow_cells_needed > 0) {
338 for (
i = 0;
i <
lookup->backbone_size;
i++) {
340 if (
lookup->thin_backbone[
i] ) {
353 dest += overflow_cursor;
354 overflow_cursor +=
lookup->thin_backbone[
i][1];
356 for (j=0; j <
lookup->thin_backbone[
i][1]; j++)
357 dest[j] =
lookup->thin_backbone[
i][j + 2];
379 if (overflow_cells_needed > 0) {
385 for (
i = 0;
i <
lookup->backbone_size;
i++) {
386 if (
lookup->thin_backbone[
i] ) {
394 dest=((
Uint2 *) (
lookup->overflow))+overflow_cursor;
395 overflow_cursor +=
lookup->thin_backbone[
i][1];
397 for (j=0; j <
lookup->thin_backbone[
i][1]; j++)
398 dest[j] =
lookup->thin_backbone[
i][j + 2];
433 Int4 **exact_backbone;
440 for (
i = 0;
i <
lookup->alphabet_size;
i++) {
441 row_max[
i] = matrix[
i][0];
442 for (j = 1; j <
lookup->alphabet_size; j++)
443 row_max[
i] =
MAX(row_max[
i], matrix[
i][j]);
461 for (
i = 0;
i <
lookup->backbone_size;
i++) {
462 if (exact_backbone[
i] !=
NULL) {
464 exact_backbone[
i], query_bias, row_max);
469 sfree(exact_backbone);
482 #ifdef LOOKUP_VERBOSE
483 lookup->exact_matches += offset_list[1];
490 w =
query + offset_list[2];
494 score = matrix[w[0]][w[0]];
495 for (
i = 1;
i <
lookup->word_length;
i++)
496 score += matrix[w[
i]][w[
i]];
504 if (
lookup->threshold == 0 || score < lookup->threshold) {
505 for (
i = 0;
i < offset_list[1];
i++) {
508 query_bias + offset_list[
i + 2]);
511 #ifdef LOOKUP_VERBOSE
512 lookup->neighbor_matches -= offset_list[1];
518 if (
lookup->threshold == 0)
525 info.subject_word = s;
529 info.matrix = matrix;
530 info.row_max = row_max;
531 info.offset_list = offset_list;
533 info.query_bias = query_bias;
539 score = row_max[w[0]];
540 for (
i = 1;
i <
lookup->word_length;
i++)
541 score += row_max[w[
i]];
549 Int4 alphabet_size =
info->alphabet_size;
552 Uint1 *subject_word =
info->subject_word;
562 score -=
info->row_max[query_word[current_pos]];
563 row =
info->matrix[query_word[current_pos]];
565 if (current_pos ==
info->wordsize - 1) {
573 Int4 *offset_list =
info->offset_list;
580 for (
i = 0;
i < alphabet_size;
i++) {
581 if (score +
row[
i] >= threshold) {
582 subject_word[current_pos] =
i;
583 for (j = 0; j < offset_list[1]; j++) {
585 charsize, subject_word,
586 query_bias + offset_list[j + 2]);
588 #ifdef LOOKUP_VERBOSE
589 lookup->neighbor_matches += offset_list[1];
600 for (
i = 0;
i < alphabet_size;
i++) {
601 if (score +
row[
i] >= threshold) {
602 subject_word[current_pos] =
i;
633 for (
i = 0;
i < wordsize - 1;
i++) {
634 row_max[
i] =
row[
i][0];
635 for (j = 1; j <
lookup->alphabet_size; j++)
643 row_max[wordsize - 1] =
row[wordsize - 1][0];
644 for (
i = 1;
i <
lookup->alphabet_size;
i++)
645 row_max[wordsize - 1] =
MAX(row_max[wordsize - 1],
646 row[wordsize - 1][
i]);
655 for (
i = 0;
i < wordsize - 1;
i++)
656 row_max[
i] = row_max[
i + 1];
675 info.subject_word = s;
679 info.matrix = matrix;
680 info.row_max = row_max;
690 for (
i = 1;
i <
lookup->word_length;
i++)
699 Int4 alphabet_size =
info->alphabet_size;
701 Uint1 *subject_word =
info->subject_word;
711 score -=
info->row_max[current_pos];
712 row =
info->matrix[current_pos];
714 if (current_pos ==
info->wordsize - 1) {
727 for (
i = 0;
i < alphabet_size;
i++) {
728 if (score +
row[
i] >= threshold) {
729 subject_word[current_pos] =
i;
731 charsize, subject_word,
offset);
732 #ifdef LOOKUP_VERBOSE
733 lookup->neighbor_matches++;
744 for (
i = 0;
i < alphabet_size;
i++) {
745 if (score +
row[
i] >= threshold) {
746 subject_word[current_pos] =
i;
759 if (
lookup->curr_overflow_cell ==
762 Int4 bank_idx =
lookup->curr_overflow_bank + 1;
768 lookup->curr_overflow_bank++;
769 lookup->curr_overflow_cell = 0;
772 return lookup->overflow_banks[
lookup->curr_overflow_bank] +
773 lookup->curr_overflow_cell++;
790 switch (num_entries) {
833 if (cell_index == 0 ) {
866 static const Int4 W7p1[] = { 0, 10, 20, 30, 40, 50, 60, 70, 80, 90};
867 static const Int4 W7p2[] = { 0, 100, 200, 300, 400, 500, 600, 700, 800,
869 static const Int4 W7p3[] = { 0, 1000, 2000, 3000, 4000, 5000, 6000,
871 static const Int4 W7p4[] = { 0, 10000, 20000, 30000, 40000, 50000, 60000,
872 70000, 80000, 90000};
873 static const Int4 W7p5[] = { 0, 100000, 200000, 300000, 400000, 500000,
874 600000, 700000, 800000, 900000};
875 static const Int4 W7p6[] = { 0, 1000000, 2000000, 3000000, 4000000,
876 5000000, 6000000, 7000000, 8000000, 9000000};
878 static const Int4 W6p1[] = { 0, 15, 30, 45, 60, 75, 90, 105, 120, 135,
879 150, 165, 180, 195, 210};
880 static const Int4 W6p2[] = { 0, 225, 450, 675, 900, 1125, 1350, 1575,
881 1800, 2025, 2250, 2475, 2700, 2925, 3150};
882 static const Int4 W6p3[] = { 0, 3375, 6750, 10125, 13500, 16875, 20250,
883 23625, 27000, 30375, 33750, 37125, 40500,
885 static const Int4 W6p4[] = { 0, 50625, 101250, 151875, 202500, 253125,
886 303750, 354375, 405000, 455625, 506250,
887 556875, 607500, 658125, 708750};
888 static const Int4 W6p5[] = { 0, 759375, 1518750, 2278125, 3037500,
889 3796875, 4556250, 5315625, 6075000, 6834375,
890 7593750, 8353125, 9112500, 9871875, 10631250};
894 switch (
lookup->word_length) {
896 index = w[0] + W6p1[w[1]] + W6p2[w[2]] + W6p3[w[3]] + W6p4[w[4]];
899 index = w[0] + W6p1[w[1]] + W6p2[w[2]] + W6p3[w[3]] + W6p4[w[4]] + W6p5[w[5]];
902 index = w[0] + W7p1[w[1]] + W7p2[w[2]] + W7p3[w[3]] + W7p4[w[4]] + W7p5[w[5]] + W7p6[w[6]];
926 lookup->compressed_alphabet_size,
975 Int4 longChar, shortChar;
977 for (longChar = 0; longChar <
BLASTAA_SIZE; longChar++) {
978 for (shortChar = 0; shortChar <
979 info->compressed_alphabet_size; shortChar++) {
981 sortTable[shortChar].
diff =
info->row_max[longChar] -
982 info->matrix[longChar][shortChar];
983 sortTable[shortChar].
letter = shortChar;
986 qsort(sortTable,
info->compressed_alphabet_size,
989 for (
i = 0;
i <
info->compressed_alphabet_size;
i++) {
1007 Uint1 *subject_word =
info->subject_word;
1011 Int4 currQueryChar = query_word[current_pos];
1019 score -=
info->row_max[currQueryChar];
1020 rowSorted =
info->matrixSorted[currQueryChar];
1021 charSorted =
info->matrixSortedChar[currQueryChar];
1023 if (current_pos ==
info->wordsize - 1) {
1031 Int4 query_offset =
info->query_offset;
1033 for (
i = 0;
i <
info->compressed_alphabet_size &&
1034 (score + rowSorted[
i] >=
info->threshold);
i++) {
1035 subject_word[current_pos] = charSorted[
i];
1038 #ifdef LOOKUP_VERBOSE
1039 lookup->neighbor_matches++;
1049 for (
i = 0;
i <
info->compressed_alphabet_size &&
1050 (score + rowSorted[
i] >=
info->threshold);
i++) {
1051 subject_word[current_pos] = charSorted[
i];
1071 #ifdef LOOKUP_VERBOSE
1078 for (
i = 0;
i <
lookup->word_length;
i++) {
1079 int c =
lookup->compress_table[w[
i]];
1081 if (c >=
lookup->compressed_alphabet_size)
1084 score +=
info->matrix[w[
i]][c];
1093 if (
lookup->threshold == 0 || score < lookup->threshold) {
1097 #ifdef LOOKUP_VERBOSE
1098 lookup->neighbor_matches--;
1104 if (
lookup->threshold == 0)
1109 info->query_word = w;
1110 info->subject_word = s;
1111 info->query_offset = query_offset;
1117 score =
info->row_max[w[0]];
1118 for (
i = 1;
i <
lookup->word_length;
i++)
1119 score +=
info->row_max[w[
i]];
1136 Int4 ** compressed_matrix,
1150 for (
i = 0;
i <
lookup->alphabet_size;
i++) {
1151 info.row_max[
i] = compressed_matrix[
i][0];
1152 for (j = 1; j <
lookup->compressed_alphabet_size; j++)
1158 info.compressed_alphabet_size =
lookup->compressed_alphabet_size;
1160 info.matrix = compressed_matrix;
1185 Int4 longest_chain = 0;
1189 const Int4 kTargetPVBytes = 262144;
1190 #ifdef LOOKUP_VERBOSE
1192 Int4 histogram[HISTSIZE] = {0};
1193 Int4 backbone_occupancy = 0;
1194 Int4 num_overflows = 0;
1200 if (
lookup->backbone[
i].num_used)
1213 if (count <= 0.01 * lookup->backbone_size) {
1214 pv_array_bts +=
ilog2(
lookup->backbone_size / (8 * kTargetPVBytes));
1218 (
lookup->backbone_size >> pv_array_bts) + 1,
1220 lookup->pv_array_bts = pv_array_bts;
1224 for (
i = 0;
i <
lookup->backbone_size;
i++) {
1230 longest_chain =
MAX(
count, longest_chain);
1232 #ifdef LOOKUP_VERBOSE
1236 if (
count >= HISTSIZE)
1241 #ifdef LOOKUP_VERBOSE
1246 lookup->longest_chain = longest_chain;
1248 #ifdef LOOKUP_VERBOSE
1249 backbone_occupancy =
lookup->backbone_size - histogram[0];
1251 printf(
"backbone size: %d\n",
lookup->backbone_size);
1252 printf(
"backbone occupancy: %d (%f%%)\n", backbone_occupancy,
1253 100.0 * backbone_occupancy /
lookup->backbone_size);
1254 printf(
"num_overflows: %d\n", num_overflows);
1255 printf(
"longest chain: %d\n", longest_chain);
1256 printf(
"exact matches: %d\n",
lookup->exact_matches);
1257 printf(
"neighbor matches: %d\n",
lookup->neighbor_matches);
1258 printf(
"banks allocated: %d\n",
lookup->curr_overflow_bank + 1);
1259 printf(
"PV array: %d entries per bit\n", 1 << (
lookup->pv_array_bts -
1261 printf(
"Lookup table histogram:\n");
1262 for (
i = 0;
i < HISTSIZE;
i++) {
1263 printf(
"%d\t%d\n",
i, histogram[
i]);
1278 const double kMatrixScale = 100.0;
1286 ASSERT(word_size == 5 || word_size == 6 || word_size == 7);
1291 lookup->word_length = word_size;
1294 if (word_size == 6 || word_size == 5) {
1295 lookup->compressed_alphabet_size = 15;
1296 lookup->reciprocal_alphabet_size = 286331154;
1299 lookup->compressed_alphabet_size = 10;
1300 lookup->reciprocal_alphabet_size = 429496730;
1309 lookup->compressed_alphabet_size,
1311 if (new_alphabet ==
NULL)
1330 lookup->curr_overflow_bank = -1;
1339 table_scale =
iexp(
lookup->compressed_alphabet_size, word_size - 1);
1345 lookup->scaled_compress_table[
i] = -1;
1364 for (
i = 0;
i <=
lookup->curr_overflow_bank;
i++) {
static void s_CompressedLookupAddEncoded(BlastCompressedAaLookupTable *lookup, Uint1 *w, Int4 query_offset)
Add a single query offset to the compressed lookup table.
static void s_CompressedAddNeighboringWords(BlastCompressedAaLookupTable *lookup, Int4 **compressed_matrix, BLAST_SequenceBlk *query, BlastSeqLoc *location)
Index a query sequence; i.e.
static Int4 s_CompressedLookupFinalize(BlastCompressedAaLookupTable *lookup)
Complete the construction of a compressed protein lookup table.
struct LetterAndScoreDifferencePair LetterAndScoreDifferencePair
Structure used as a helper for sorting matrix according to substitution score.
static void s_AddPSSMWordHitsCore(NeighborInfo *info, Int4 score, Int4 current_pos)
Add neighboring words to the lookup table in case of a position-specific matrix, using NeighborInfo s...
Int4 BlastCompressedAaLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastCompressedAaLookupTable **lut, const LookupTableOptions *opt, BlastScoreBlk *sbp)
Create a new compressed protein lookup table.
static void s_CompressedAddWordHits(CompressedNeighborInfo *info, Uint1 *query, Int4 query_offset)
Add neighboring words to the lookup table (compressed alphabet).
static void s_AddWordHits(BlastAaLookupTable *lookup, Int4 **matrix, Uint1 *query, Int4 *offset_list, Int4 query_bias, Int4 *row_max)
Add neighboring words to the lookup table.
BlastCompressedAaLookupTable * BlastCompressedAaLookupTableDestruct(BlastCompressedAaLookupTable *lookup)
Free the compressed lookup table.
BlastAaLookupTable * BlastAaLookupTableDestruct(BlastAaLookupTable *lookup)
Free the lookup table.
static CompressedOverflowCell * s_CompressedListGetNewCell(BlastCompressedAaLookupTable *lookup)
Fetch next vacant cell from a bank.
static void s_CompressedLookupAddWordHit(BlastCompressedAaLookupTable *lookup, Int4 index, Int4 query_offset)
Add a single query offset to the compressed alphabet protein lookup table.
BlastRPSLookupTable * RPSLookupTableDestruct(BlastRPSLookupTable *lookup)
Free the lookup table.
static void s_AddPSSMNeighboringWords(BlastAaLookupTable *lookup, Int4 **matrix, Int4 query_bias, BlastSeqLoc *location)
A position-specific version of AddNeighboringWords.
Int2 RPSLookupTableNew(const BlastRPSInfo *info, BlastRPSLookupTable **lut)
Create a new RPS blast lookup table.
struct CompressedNeighborInfo CompressedNeighborInfo
Structure containing information needed for adding neighboring words (specific to compressed lookup t...
static void s_CompressedLookupAddUnencoded(BlastCompressedAaLookupTable *lookup, Uint1 *w, Int4 query_offset)
Add a single query offset to the compressed lookup table.
static void s_AddWordHitsCore(NeighborInfo *info, Int4 score, Int4 current_pos)
Add neighboring words to the lookup table using NeighborInfo structure.
static void s_CompressedAddWordHitsCore(CompressedNeighborInfo *info, Int4 score, Int4 current_pos)
Very similar to s_AddWordHitsCore.
void BlastAaLookupIndexQuery(BlastAaLookupTable *lookup, Int4 **matrix, BLAST_SequenceBlk *query, BlastSeqLoc *location, Int4 query_bias)
Index a protein query.
Int4 BlastAaLookupFinalize(BlastAaLookupTable *lookup, EBoneType bone_type)
Pack the data structures comprising a protein lookup table into their final form.
static void s_AddPSSMWordHits(BlastAaLookupTable *lookup, Int4 **matrix, Int4 query_bias, Int4 *row_max)
Add neighboring words to the lookup table in case of a position-specific matrix.
static void s_loadSortedMatrix(CompressedNeighborInfo *info)
Prepare "score sorted" version of the substitution matrix".
static int ScoreDifferenceSort(const void *a, const void *b)
callback for the "sort"
Int4 BlastAaLookupTableNew(const LookupTableOptions *opt, BlastAaLookupTable **lut)
Create a new protein lookup table.
static void s_AddNeighboringWords(BlastAaLookupTable *lookup, Int4 **matrix, BLAST_SequenceBlk *query, Int4 query_bias, BlastSeqLoc *location)
Index a query sequence; i.e.
struct NeighborInfo NeighborInfo
Structure containing information needed for adding neighboring words.
Routines for creating protein BLAST lookup tables.
#define COMPRESSED_OVERFLOW_MAX_BANKS
The maximum number of banks (usually less than 10 are needed; memory will run out before this is insu...
#define COMPRESSED_HITS_PER_BACKBONE_CELL
number of query offsets to store in a backbone cell
#define COMPRESSED_HITS_CELL_MASK
#define RPS_BUCKET_SIZE
The number of regions into which the concatenated RPS blast database is split via bucket sorting.
#define AA_HITS_PER_CELL
maximum number of hits in one lookup table cell
#define COMPRESSED_OVERFLOW_CELLS_IN_BANK
number of cells in one bank of cells
static NCBI_INLINE Int4 s_ComputeCompressedIndex(Int4 wordsize, const Uint1 *word, Int4 compressed_alphabet_size, Int4 *skip, BlastCompressedAaLookupTable *lookup)
Convert a word to use a compressed alphabet.
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Declarations of static arrays used to define some NCBI encodings to be used in a toolkit independent ...
void BlastLookupAddWordHit(Int4 **backbone, Int4 wordsize, Int4 charsize, Uint1 *seq, Int4 query_offset)
Add a single query offset to a generic lookup table.
#define PV_ARRAY_BTS
bits-to-shift from lookup_index to pv_array index.
void BlastLookupIndexQueryExactMatches(Int4 **backbone, Int4 word_length, Int4 charsize, Int4 lut_word_length, BLAST_SequenceBlk *query, BlastSeqLoc *locations)
Add all applicable query offsets to a generic lookup table.
#define PV_SET(lookup, index, shift)
Set the bit at position 'index' in the PV array bitfield within 'lookup'.
#define PV_ARRAY_TYPE
The pv_array 'native' type.
#define BLAST_WORDSIZE_PROT
length of word to trigger an extension.
#define RPS_MAGIC_NUM_28
Version number for 28-letter alphabet.
#define RPS_MAGIC_NUM
RPS blast version number.
SCompressedAlphabet * SCompressedAlphabetFree(SCompressedAlphabet *alphabet)
Free a compressed alphabet and score matrix.
SCompressedAlphabet * SCompressedAlphabetNew(BlastScoreBlk *sbp, Int4 compressed_alphabet_size, double scale_factor)
Allocate a new compressed alphabet and score matrix.
static int lookup(const char *name, const struct lookup_int *table)
static const char location[]
#define BLASTAA_SIZE
Size of aminoacid alphabet.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint16_t Uint2
2-byte (16-bit) unsigned integer
Utility functions for lookup table generation.
Int4 iexp(Int4 x, Int4 n)
Integer exponentiation using right to left binary algorithm.
Int4 ilog2(Int8 x)
Integer base two logarithm.
#define ASSERT
macro for assert.
#define MAX(a, b)
returns larger of a and b.
#define row(bind, expected)
structure defining one cell of the compacted lookup table
union AaLookupBackboneCell::@3 payload
union that specifies either entries stored right on the backbone if fewer than AA_HITS_PER_CELL are p...
Int4 entries[3]
if the number of hits for this cell is AA_HITS_PER_CELL or less, the hits are all stored directly in ...
Int4 overflow_cursor
integer offset into the overflow array where the list of hits for this cell begins
Int4 num_used
number of hits stored for this cell
structure defining one cell of the small (i.e., use short) lookup table
union AaLookupSmallboneCell::@4 payload
union that specifies either entries stored right on the backbone if fewer than AA_HITS_PER_CELL are p...
Uint2 num_used
number of hits stored for this cell
Int4 overflow_cursor
integer offset into the overflow array where the list of hits for this cell begins
Uint2 entries[3]
if the number of hits for this cell is AA_HITS_PER_CELL or less, the hits are all stored directly in ...
Structure to hold a sequence.
The basic lookup table structure for blastp searches.
The lookup table structure for protein searches using a compressed alphabet.
The RPS engine uses this structure to access all of the RPS blast related data (assumed to be collect...
The basic lookup table structure for RPS blast searches.
Structure used for scoring calculations.
Used to hold a set of positions, mostly used for filtering.
SSeqRange * ssr
location data on the sequence.
struct BlastSeqLoc * next
next in linked list
structure for hashtable of indexed query offsets
Int4 query_offsets[4]
storage for query offsets local to the backbone cell
CompressedMixedOffsets overflow_list
storage for remote query offsets
union CompressedLookupBackboneCell::@5 payload
structure for holding the list of query offsets
Int4 query_offsets[4 -2]
the query offsets stored locally
CompressedOverflowCell * head
head of linked list of cells of query offsets stored off the backbone
Structure containing information needed for adding neighboring words (specific to compressed lookup t...
Int4 matrixSorted[BLASTAA_SIZE][BLASTAA_SIZE]
version of substitution matrix whose rows are sorted by score
BlastCompressedAaLookupTable * lookup
Lookup table.
Int4 compressed_alphabet_size
for use with compressed alphabet
Int4 wordsize
number of residues in a word
Uint1 * subject_word
the computed neighboring word
Int4 ** matrix
the substitution matrix
Int4 query_offset
a single query offset to index
Uint1 matrixSortedChar[BLASTAA_SIZE][BLASTAA_SIZE]
matrix with the letters permuted identically to that of matrixSorted
Int4 threshold
the score threshold for neighboring words
Int4 row_max[BLASTAA_SIZE]
maximum possible score for each row of the matrix
Uint1 * query_word
the word whose neighbors we are computing
cell in list for holding query offsets
struct CompressedOverflowCell * next
pointer to next cell
Int4 query_offsets[4]
the query offsets stored in the cell
Structure used as a helper for sorting matrix according to substitution score.
Int4 diff
score difference from row maximum
Uint1 letter
given protein letter
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 word_size
Determines the size of the lookup table.
double threshold
Score threshold for putting words in a lookup table (fractional values are allowed,...
Structure containing information needed for adding neighboring words.
Int4 ** matrix
the substitution matrix
Uint1 * subject_word
the computed neighboring word
Int4 alphabet_size
number of letters in the alphabet
Uint1 * query_word
the word whose neighbors we are computing
Int4 threshold
the score threshold for neighboring words
Int4 charsize
number of bits in a residue
Int4 query_bias
bias all stored offsets for multiple queries
BlastAaLookupTable * lookup
Lookup table.
Int4 * row_max
maximum possible score for each row of the matrix
Int4 * offset_list
list of offsets where the word occurs in the query
Int4 wordsize
number of residues in a word
structure defining one cell of the RPS lookup table
structure used for bucket sorting offsets retrieved from the RPS blast lookup table.
Int4 num_filled
number of offset pairs currently in bucket
Int4 num_alloc
max number of offset pairs bucket can hold
BlastOffsetPair * offset_pairs
list of offset pairs
int ** data
actual scoring matrix data, stored in row-major form
Scoring matrix data used for compressed protein alphabets.
Uint1 * compress_table
translation table (AA->compressed)
SBlastScoreMatrix * matrix
score matrix
Int4 left
left endpoint of range (zero based)
Int4 right
right endpoint of range (zero based)
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
static Uint4 letter(char c)
voidp calloc(uInt items, uInt size)