65 if (q_off == q_pos)
return TRUE;
90 if (index == q_pos)
return TRUE;
91 if (index == -1 || index >= 0)
return FALSE;
94 index = overflow[src_off++];
96 if (index == q_pos)
return TRUE;
97 index = overflow[src_off++];
125 num_hits =
lookup->thick_backbone[index].num_used;
128 lookup->thick_backbone[index].payload.entries :
129 lookup->overflow +
lookup->thick_backbone[index].payload.overflow_cursor;
131 for (
i=0;
i<num_hits; ++
i) {
132 if (lookup_pos[
i] == q_pos)
return TRUE;
157 Uint1 *subject0, *sf, *q_beg, *q_end, *s, *start;
158 Int2 remainder, base;
159 Int4 q_avail, s_avail;
161 base = 3 - (s_off % 4);
164 q_avail =
query->length - q_off;
165 s_avail =
subject->length - s_off;
167 q = q_beg = q_end =
query->sequence + q_off;
189 while ((s > start) || (s == start && base < remainder)) {
201 }
else if (sum < X) {
207 ungapped_data->
s_start = s_off - (q_off - ungapped_data->
q_start);
209 if (q_avail < s_avail) {
218 q =
query->sequence + q_off;
225 while (s < sf || (s == sf && base > remainder)) {
230 X_current = (-score > X) ? -score : X;
232 }
else if (sum < X_current)
241 ungapped_data->
length = (
Int4)(q_end - q_beg);
242 ungapped_data->
score = score;
266 const Int4 * score_table,
Int4 reduced_cutoff)
292 for (
i = 0;
i <
len; s--, q -= 4,
i++) {
293 Uint1 s_byte = s[-1];
294 Uint1 q_byte = (q[-4] << 6) | (q[-3] << 4) | (q[-2] << 2) | q[-1];
296 sum += score_table[q_byte ^ s_byte];
310 ungapped_data->
s_start = s_ext - (q_ext - ungapped_data->
q_start);
322 for (
i = 0;
i <
len; s++, q += 4,
i++) {
324 Uint1 q_byte = (q[0] << 6) | (q[1] << 4) | (q[2] << 2) | q[3];
326 sum += score_table[q_byte ^ s_byte];
337 if (score >= reduced_cutoff) {
341 s_off, X, ungapped_data);
345 ungapped_data->
score = score;
372 if (
table->chain[index].diag == diag) {
373 *level =
table->chain[index].level;
374 *hit_len =
table->chain[index].hit_len;
375 *hit_saved =
table->chain[index].hit_saved;
379 index =
table->chain[index].next;
409 if (
table->chain[index].diag == diag) {
410 table->chain[index].level = level;
412 table->chain[index].hit_saved = hit_saved;
419 table->chain[index].diag = diag;
420 table->chain[index].level = level;
422 table->chain[index].hit_saved = hit_saved;
426 index =
table->chain[index].next;
433 table->capacity *= 2;
462 Int4 lut_word_length,
473 index = (s[0] << 24 | s[1] << 16 | s[2] << 8) >> shift;
479 index = (s[0] << 24 | s[1] << 16 ) >> shift;
485 index = (s[0] << 24 | s[1] << 16 | s[2] << 8 | s[3]) >> shift;
489 (lookup_wrap, index, q_pos));
515 Uint4 lut_word_length,
520 Int4 context, q_range;
521 Int4 ext_to, ext_max;
522 Int4 q_end = *q_off + word_length;
523 Int4 s_end = *s_off + word_length;
531 if (word_length == lut_word_length)
return 1;
542 s_end - lut_word_length,
544 q_end - lut_word_length))
return 0;
547 for (;
TRUE; ++(*s_off), ++(*q_off)) {
549 *s_off, lut_word_length, *q_off))
break;
553 ext_to = word_length - (q_end - (*q_off));
554 ext_max =
MIN(q_range - q_end, s_range - s_end);
557 if (ext_to || locations) {
559 if (ext_to > ext_max)
return 0;
563 for (s_pos = s_end - lut_word_length,
564 q_pos = q_end - lut_word_length;
566 s_pos -= lut_word_length,
567 q_pos -= lut_word_length) {
569 s_pos, lut_word_length, q_pos))
return 0;
572 (*extended) = ext_to;
576 if (!check_double)
return 1;
581 ext_to += word_length;
582 ext_max =
MIN(ext_max, ext_to);
585 for (s_pos = s_end, q_pos = q_end;
586 *extended + lut_word_length <= ext_max;
587 s_pos += lut_word_length,
588 q_pos += lut_word_length,
589 (*extended) += lut_word_length) {
591 lut_word_length, q_pos))
break;
595 s_pos -= (lut_word_length - 1);
596 q_pos -= (lut_word_length - 1);
597 while (*extended < ext_max) {
599 lut_word_length, q_pos))
return 1;
605 return ((ext_max == ext_to) ? 2 : 1);
637 Int4 word_length,
Int4 lut_word_length,
645 Int4 diag, real_diag;
646 Int4 s_end, s_off_pos, s_end_pos;
653 Int4 last_hit, hit_saved;
664 real_diag = diag & diag_table->
diag_mask;
665 last_hit = hit_level_array[real_diag].
last_hit;
666 hit_saved = hit_level_array[real_diag].
flag;
667 s_end = s_off + word_length;
668 s_off_pos = s_off + diag_table->
offset;
669 s_end_pos = s_end + diag_table->
offset;
672 if (s_off_pos < last_hit)
return 0;
674 if (two_hits && (hit_saved || s_end_pos > last_hit +
window_size )) {
678 query_mask, query_info, s_range,
679 word_length, lut_word_length, lut,
TRUE, &extended);
680 if (!word_type)
return 0;
683 s_end_pos += extended;
686 if (word_type == 1) {
690 Int4 s_b = s_end_pos - 2 * word_length;
692 if (Delta < 0) Delta = 0;
698 && off_s_end -
delta >= s_a
699 && off_s_end - off_s_l <= s_b) {
704 off_s_end = hit_level_array[off_diag].
last_hit;
708 && off_s_end - off_s_l +
delta <= s_b) {
718 }
else if (check_masks) {
721 query_mask, query_info, s_range,
722 word_length, lut_word_length, lut,
FALSE, &extended))
return 0;
725 s_end_pos += extended;
731 cutoffs = word_params->
cutoffs + context;
732 ungapped_data = &dummy_ungapped_data;
744 s_off, -(cutoffs->
x_dropoff), ungapped_data);
755 *final_data = *ungapped_data;
763 ungapped_data =
NULL;
768 hit_level_array[real_diag].
last_hit = s_end_pos;
769 hit_level_array[real_diag].
flag = hit_ready;
771 diag_table->
hit_len_array[real_diag] = (hit_ready) ? 0 : s_end_pos - s_off_pos;
805 Int4 word_length,
Int4 lut_word_length,
814 Int4 s_end, s_off_pos, s_end_pos, s_l;
821 Int4 last_hit, hit_saved = 0;
828 diag = s_off - q_off;
829 s_end = s_off + word_length;
830 s_off_pos = s_off + hash_table->
offset;
831 s_end_pos = s_end + hash_table->
offset;
836 if(!rc) last_hit = 0;
839 if (s_off_pos < last_hit)
return 0;
841 if (two_hits && (hit_saved || s_end_pos > last_hit +
window_size )) {
845 query_mask, query_info, s_range,
846 word_length, lut_word_length, lut,
TRUE, &extended);
847 if (!word_type)
return 0;
850 s_end_pos += extended;
853 if (word_type == 1) {
856 Int4 s_b = s_end_pos - 2 * word_length;
858 if (Delta < 0) Delta = 0;
862 Int4 off_hit_saved = 0;
864 &off_s_end, &off_s_l, &off_hit_saved);
867 && off_s_end -
delta >= s_a
868 && off_s_end - off_s_l <= s_b) {
873 &off_s_end, &off_s_l, &off_hit_saved);
877 && off_s_end - off_s_l +
delta <= s_b) {
887 }
else if (check_masks) {
890 query_mask, query_info, s_range,
891 word_length, lut_word_length, lut,
FALSE, &extended))
return 0;
894 s_end_pos += extended;
901 cutoffs = word_params->
cutoffs + context;
902 ungapped_data = &dummy_ungapped_data;
914 s_off, -(cutoffs->
x_dropoff), ungapped_data);
926 *final_data = *ungapped_data;
934 ungapped_data =
NULL;
940 (hit_ready) ? 0 : s_end_pos - s_off_pos,
975 Int4 hits_extended = 0;
983 check_masks = !lut->
stride;
996 for (; index < num_hits; ++index) {
1003 query_info, s_range,
1004 word_length, word_length,
1006 word_params, matrix,
1013 for (; index < num_hits; ++index) {
1020 query_info, s_range,
1021 word_length, word_length,
1023 word_params, matrix,
1029 return hits_extended;
1063 Int4 hits_extended = 0;
1064 Int4 word_length, lut_word_length, ext_to;
1073 check_masks = !lut->
stride;
1081 ext_to = word_length - lut_word_length;
1093 for (; index < num_hits; ++index) {
1102 Int4 s_off = s_offset;
1106 for (; ext_left <
MIN(ext_to, s_offset); ++ext_left) {
1120 if (ext_left < ext_to) {
1122 s_off = s_offset + lut_word_length;
1123 if (s_off + ext_to - ext_left > s_range)
1125 q =
query->sequence + q_offset + lut_word_length;
1128 for (; ext_right < ext_to - ext_left; ++ext_right) {
1139 if (ext_left + ext_right < ext_to)
1143 q_offset -= ext_left;
1144 s_offset -= ext_left;
1153 query_info, s_range,
1154 word_length, lut_word_length,
1156 word_params, matrix,
1164 query_info, s_range,
1165 word_length, lut_word_length,
1167 word_params, matrix,
1173 return hits_extended;
1206 Int4 hits_extended = 0;
1207 Int4 word_length, lut_word_length, ext_to;
1216 check_masks = !lut->
stride;
1224 ext_to = word_length - lut_word_length;
1236 for (; index < num_hits; ++index) {
1246 Int4 ext_max =
MIN(ext_to, s_offset);
1250 for (; ext_left < ext_max; s--, q -= 4, ++ext_left) {
1253 if ((
byte & 3) != q[-1] || ++ext_left == ext_max)
1255 if (((
byte >> 2) & 3) != q[-2] || ++ext_left == ext_max)
1257 if (((
byte >> 4) & 3) != q[-3] || ++ext_left == ext_max)
1259 if ((
byte >> 6) != q[-4])
1267 if (ext_left < ext_to) {
1269 ext_max = ext_to -ext_left;
1270 if (s_offset + lut_word_length + ext_max > s_range)
1272 q =
query->sequence + q_offset + lut_word_length;
1275 for (; ext_right < ext_max; s++, q += 4, ++ext_right) {
1278 if ((
byte >> 6) != q[0] || ++ext_right == ext_max)
1280 if (((
byte >> 4) & 3) != q[1] || ++ext_right == ext_max)
1282 if (((
byte >> 2) & 3) != q[2] || ++ext_right == ext_max)
1284 if ((
byte & 3) != q[3])
1289 if (ext_left + ext_right < ext_to)
1293 q_offset -= ext_left;
1294 s_offset -= ext_left;
1303 query_info, s_range,
1304 word_length, lut_word_length,
1306 word_params, matrix,
1314 query_info, s_range,
1315 word_length, lut_word_length,
1317 word_params, matrix,
1323 return hits_extended;
1331 4, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1332 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1333 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1334 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1335 3, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1336 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1337 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1338 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1339 3, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1340 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1341 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1342 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1343 3, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1344 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1345 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1346 2, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0,
1354 4, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
1355 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1356 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1357 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1392 Int4 hits_extended = 0;
1396 Int4 ext_to = word_length - lut_word_length;
1400 for (; index < num_hits; ++index) {
1415 if ( (s_offset > 0) && (q_offset > 0) ) {
1416 Uint1 q_byte = q[q_offset - 4];
1419 ext_left =
MIN(
MIN(ext_left, ext_to), q_offset - q_start);
1424 if ((ext_left < ext_to) && ((q_offset + lut_word_length) <
query->length)) {
1425 Uint1 q_byte = q[q_offset + lut_word_length];
1428 ext_right =
MIN(
MIN(ext_right, s_range - (s_offset + lut_word_length)),
1429 q_range - (q_offset + lut_word_length));
1430 if (ext_left + ext_right < ext_to)
1434 q_offset -= ext_left;
1435 s_offset -= ext_left;
1441 query_info, s_range,
1442 word_length, lut_word_length,
1444 word_params, matrix,
1453 query_info, s_range,
1454 word_length, lut_word_length,
1456 word_params, matrix,
1462 return hits_extended;
1497 Int4 hits_extended = 0;
1504 for (; index < num_hits; ++index) {
1514 Int4 ext_max =
MIN(
MIN(word_length - lut_word_length, s_offset), q_offset - q_start);
1532 while (ext_left < ext_max) {
1533 Uint1 q_byte = q[q_off - 4];
1542 ext_left =
MIN(ext_left, ext_max);
1549 ext_max =
MIN(
MIN(word_length - ext_left, s_range - s_off), q_range - q_off);
1550 while (ext_right < ext_max) {
1551 Uint1 q_byte = q[q_off];
1560 ext_right =
MIN(ext_right, ext_max);
1562 if (ext_left + ext_right < word_length)
1565 q_offset -= ext_left;
1566 s_offset -= ext_left;
1572 query_info, s_range,
1573 word_length, lut_word_length,
1575 word_params, matrix,
1583 query_info, s_range,
1584 word_length, lut_word_length,
1586 word_params, matrix,
1592 return hits_extended;
1609 Int4 hitsfound, total_hits = 0;
1610 Int4 hits_extended = 0;
1615 Int4 lut_word_length;
1620 word_length =
lookup->word_length;
1621 lut_word_length =
lookup->lut_word_length;
1629 word_length =
lookup->template_length;
1630 lut_word_length =
lookup->template_length;
1632 word_length =
lookup->word_length;
1633 lut_word_length =
lookup->lut_word_length;
1641 word_length =
lookup->word_length;
1642 lut_word_length =
lookup->lut_word_length;
1649 scan_range[2] =
subject->length - lut_word_length;
1666 scan_range[1] =
subject->seq_ranges[0].left + word_length - lut_word_length;
1667 scan_range[2] =
subject->seq_ranges[0].right - lut_word_length;
1675 hitsfound = scansub(lookup_wrap,
subject, offset_pairs, max_hits, &scan_range[1]);
1680 total_hits += hitsfound;
1681 hits_extended += extend(offset_pairs, hitsfound, word_params,
1683 query_info, ewp, init_hitlist, scan_range[2] + lut_word_length);
1689 init_hitlist->
total);
1731 if( check_oid( oid, &last_vol_idx ) ==
eNotIndexed ) {
1733 subject,
query, query_info, lookup_wrap, matrix,word_params,
1734 ewp, offset_pairs, max_hits, init_hitlist, ungapped_stats );
1743 hsp_end = hsp + init_hitlist->
total;
1745 for( ; hsp < hsp_end; ++hsp ) {
1748 diag =
IR_DIAG( q_off, s_off );
1754 cutoffs = word_params->
cutoffs + context;
1757 q_off, s_off + word_size, s_off,
1758 -(cutoffs->
x_dropoff), &dummy_ungapped_data,
1765 *ungapped_data = dummy_ungapped_data;
1766 if( new_hsp != hsp ) *new_hsp = *hsp;
1776 if( new_hsp != hsp ) *new_hsp = *hsp;
1896 for (
i = 1;
i < num_arrays;
i++) {
1946 Int4 hitsfound, total_hits = 0;
1947 Int4 hits_extended = 0;
1951 Int4 lut_word_length;
1957 if (*hsp_list_ptr ==
NULL) {
1962 hsp_list = *hsp_list_ptr;
1972 word_length =
lookup->word_length;
1973 lut_word_length =
lookup->lut_word_length;
1980 word_length =
lookup->template_length;
1981 lut_word_length =
lookup->template_length;
1983 word_length =
lookup->word_length;
1984 lut_word_length =
lookup->lut_word_length;
1992 word_length =
lookup->word_length;
1993 lut_word_length =
lookup->lut_word_length;
1999 word_length =
lookup->word_length;
2000 lut_word_length =
lookup->lut_word_length;
2006 scan_range[2] =
subject->length - lut_word_length;
2018 scan_range[1] =
subject->seq_ranges[0].left + word_length - lut_word_length;
2019 scan_range[2] =
subject->seq_ranges[0].right - lut_word_length;
2029 if (getenv(
"MAPPER_USE_SMALL_WORDS")) {
2035 hitsfound = scansub(lookup_wrap,
subject, offset_pairs, max_hits, &scan_range[1]);
2037 if (hitsfound >= 0) {
2045 for (
i = 0;
i < hitsfound;
i++) {
2051 Int4 diag = s_off - q_off;
2056 word_hits->
last_pos[context] = s_off;
2066 if (last_p != 0 && last_d == diag &&
2067 s_off - last_p < lut_word_length + 1) {
2071 ASSERT(index < word_hits->num_arrays);
2079 word_hits->
num[index],
2080 word_params, score_params,
2087 scan_range[2] + lut_word_length,
2090 word_hits->
num[index] = 0;
2102 total_hits += hitsfound;
2104 word_params, score_params,
2111 scan_range[2] + lut_word_length,
2117 if (!read_is_query) {
2126 if (word_hits->
num[
i] > 0) {
2129 word_params, score_params,
2136 scan_range[2] + lut_word_length,
2140 word_hits->
num[
i] = 0;
2196 if( check_oid( oid, &last_vol_idx ) ==
eNotIndexed ) {
2199 score_params, hit_params, offset_pairs, word_hits, max_hits,
2200 gap_align, init_hitlist, hsp_list, ungapped_stats,
2207 if (*hsp_list ==
NULL) {
2212 if( word_size > 0) {
2216 hsp_end = hsp + init_hitlist->
total;
2218 for( ; hsp < hsp_end; ++hsp ) {
2223 diag =
IR_DIAG( q_off, s_off );
2233 Uint1* query_seq =
query->sequence + query_start;
2237 Int4 num_identical = 0;
2238 Int4 right_ungapped_ext_len = 0;
2245 q_off - query_start,
2250 &right_ungapped_ext_len);
2268 q_off - query_start, s_off,
static int lookup(const char *name, const struct lookup_int *table)
#define COMPRESSION_RATIO
Compression ratio of nucleotide bases (4 bases in 1 byte)
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
void Blast_UngappedStatsUpdate(BlastUngappedStats *ungapped_stats, Int4 total_hits, Int4 extended_hits, Int4 saved_hits)
Fill data in the ungapped hits diagnostics structure.
Boolean BLAST_SaveInitialHit(BlastInitHitList *init_hitlist, Int4 q_off, Int4 s_off, BlastUngappedData *ungapped_data)
Save the initial hit data into the initial hit list structure.
void Blast_InitHitListSortByScore(BlastInitHitList *init_hitlist)
Sort array of initial HSPs by score.
Int2 Blast_ExtendWordExit(Blast_ExtendWord *ewp, Int4 subject_length)
Update the word extension structure after scanning of each subject sequence.
#define DIAGHASH_NUM_BUCKETS
Number of hash buckets in BLAST_DiagHash.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Int4 BlastHspNumMax(Boolean gapped_calculation, const BlastHitSavingOptions *options)
Calculated the number of HSPs that should be saved.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSPMappingInfo * BlastHSPMappingInfoNew(void)
Allocate memory for an HSP's additional data structure.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
#define PV_ARRAY_BTS
bits-to-shift from lookup_index to pv_array index.
#define PV_TEST(lookup, index, shift)
Test the bit at position 'index' in the PV array bitfield within 'lookup'.
#define PV_ARRAY_TYPE
The pv_array 'native' type.
Routines for creating nucleotide BLAST lookup tables.
#define NA_HITS_PER_CELL
maximum number of hits in one lookup table cell
Routines for scanning nucleotide BLAST lookup tables.
void * BlastChooseNucleotideScanSubjectAny(LookupTableWrap *lookup_wrap)
Return the most generic function to scan through nucleotide subject sequences.
Int4(* TNaScanSubjectFunction)(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *subject, BlastOffsetPair *offset_pairs, Int4 max_hits, Int4 *scan_range)
Generic prototype for nucleotide subject scanning routines.
@ eSmallNaLookupTable
lookup table for blastn with small query
@ eMBLookupTable
megablast lookup table (includes both contiguous and discontiguous megablast)
@ eNaHashLookupTable
used for 16-base words
@ eDiagHash
use hash table (blastn only)
Int4 BSearchContextInfo(Int4 n, const BlastQueryInfo *A)
Search BlastContextInfo structures for the specified offset.
Various auxiliary BLAST utility functions.
#define NCBI2NA_UNPACK_BASE(x, N)
Macro to extract base N from a byte x (N >= 0, N < 4)
static void get_results(DBPROCESS *dbproc, int start)
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
ir_diag_hash * ir_hash_create(void)
Hash table constructor.
ir_diag_hash * ir_hash_destroy(ir_diag_hash *hash)
Hash table destructor.
Declarations of structures needed to implement diagonal hash to support ungapped extensions for index...
#define IR_LOCATE(hash, diag, key)
Find a hash table entry for the given diagonal.
#define IR_KEY(diag)
Compute the hash key from a diagonal identifier.
#define IR_DIAG(qoff, soff)
Compute diagonal identifier from subject and query offsets.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
Int4 BlastNaExtendJumper(BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, BlastQueryInfo *query_info, BlastGapAlignStruct *gap_align, BlastHSPList *hsp_list, Uint4 s_range, SubjectIndex *s_index)
Extend a list of word hits.
int JumperGappedAlignmentCompressedWithTraceback(const Uint1 *query, const Uint1 *subject, Int4 query_length, Int4 subject_length, Int4 query_start, Int4 subject_start, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 *num_identical, Int4 *right_ungapped_ext_len)
Jumper gapped alignment with traceback; 1 base per byte in query, 4 bases per byte in subject.
int JumperFindSpliceSignals(BlastHSP *hsp, Int4 query_len, const Uint1 *subject, Int4 subject_len)
Find splice signals at the edges of an HSP and save them in the HSP.
Boolean JumperGoodAlign(const BlastGapAlignStruct *gap_align, const BlastHitSavingParameters *hit_params, Int4 num_identical, BlastContextInfo *context_info)
Test whether an HSP should be saved.
SubjectIndex * SubjectIndexNew(BLAST_SequenceBlk *subject, Int4 width, Int4 word_size)
Index a sequence, used for indexing compressed nucleotide subject sequence.
SubjectIndex * SubjectIndexFree(SubjectIndex *sindex)
Free subject index structure.
JumperEditsBlock * JumperFindEdits(const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align)
GapEditScript * JumperPrelimEditBlockToGapEditScript(JumperPrelimEditBlock *rev_prelim_block, JumperPrelimEditBlock *fwd_prelim_block)
Convert Jumper's preliminary edit script to GapEditScript.
#define SUBJECT_INDEX_WORD_LENGTH
for(len=0;yy_str[len];++len)
if(yy_accept[yy_current_state])
Boolean(* T_Lookup_Callback)(const LookupTableWrap *, Int4, Int4)
Function pointer type to check the presence of index->q_off pair.
static NCBI_INLINE Boolean s_DetermineScanningOffsets(const BLAST_SequenceBlk *subject, Int4 word_length, Int4 lut_word_length, Int4 *range)
Determines the scanner's offsets taking the database masking restrictions into account (if any).
Declarations for functions that extract hits from indexed blast databases (specialized for megablast)
#define LAST_VOL_IDX_NULL
int(* T_MB_IdbCheckOid)(Int4 oid, Int4 *last_vol_id)
Function pointer type to check index seeds availability for oid.
unsigned long(* T_MB_IdbGetResults)(Int4 oid, Int4 chunk, BlastInitHitList *init_hitlist)
Function pointer type to retrieve hits from an indexed database.
static NCBI_INLINE Int4 s_BlastDiagHashRetrieve(BLAST_DiagHash *table, Int4 diag, Int4 *level, Int4 *hit_len, Int4 *hit_saved)
Attempt to retrieve information associated with diagonal diag.
static Boolean s_MBLookup(const LookupTableWrap *lookup_wrap, Int4 index, Int4 q_pos)
Check to see if an index->q_pos pair exists in MB lookup table.
static NCBI_INLINE Int4 s_BlastDiagHashInsert(BLAST_DiagHash *table, Int4 diag, Int4 level, Int4 len, Int4 hit_saved, Int4 s_off, Int4 window_size)
Attempt to store information associated with diagonal diag.
static Int4 s_BlastSmallNaExtendAlignedOneByte(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform exact match extensions on the hits retrieved from small-query lookup tables.
static Int4 s_BlastnDiagTableExtendInitialHit(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 q_off, Int4 s_off, BlastSeqLoc *query_mask, BlastQueryInfo *query_info, Int4 s_range, Int4 word_length, Int4 lut_word_length, const LookupTableWrap *lut, const BlastInitialWordParameters *word_params, Int4 **matrix, BLAST_DiagTable *diag_table, BlastInitHitList *init_hitlist, Boolean check_masks)
Perform ungapped extension given an offset pair, and save the initial hit information if the hit qual...
void BlastChooseNaExtend(LookupTableWrap *lookup_wrap)
Choose the best routine to use for creating ungapped alignments.
static NCBI_INLINE Boolean s_IsSeedMasked(const LookupTableWrap *lookup_wrap, const BLAST_SequenceBlk *subject, Int4 s_off, Int4 lut_word_length, Int4 q_pos)
Test to see if seed->q_off exists in lookup table.
static Int4 s_BlastNaExtendAligned(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform exact match extensions on the hits retrieved from blastn/megablast lookup tables,...
static Int4 s_BlastNaExtend(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform exact match extensions on the hits retrieved from blastn/megablast lookup tables,...
static Int4 s_BlastSmallNaExtend(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform exact match extensions on the hits retrieved from small-query blastn lookup tables,...
static void s_NuclUngappedExtend(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, Int4 q_off, Int4 s_match_end, Int4 s_off, Int4 X, BlastUngappedData *ungapped_data, const Int4 *score_table, Int4 reduced_cutoff)
Perform ungapped extension of a word hit.
Int2 BlastNaWordFinder(BLAST_SequenceBlk *subject, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, LookupTableWrap *lookup_wrap, Int4 **matrix, const BlastInitialWordParameters *word_params, Blast_ExtendWord *ewp, BlastOffsetPair *offset_pairs, Int4 max_hits, BlastInitHitList *init_hitlist, BlastUngappedStats *ungapped_stats)
Find all words for a given subject sequence and perform ungapped extensions, assuming ordinary blastn...
static Boolean s_NaLookup(const LookupTableWrap *lookup_wrap, Int4 index, Int4 q_pos)
Check to see if an index->q_pos pair exists in Na lookup table.
static void s_NuclUngappedExtendExact(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, Int4 q_off, Int4 s_off, Int4 X, BlastUngappedData *ungapped_data)
Perform ungapped extension of a word hit, using a score matrix and extending one base at a time.
Int2 JumperNaWordFinder(BLAST_SequenceBlk *subject, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, LookupTableWrap *lookup_wrap, const BlastInitialWordParameters *word_params, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, BlastOffsetPair *offset_pairs, MapperWordHits *word_hits, Int4 max_hits, BlastGapAlignStruct *gap_align, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list_ptr, BlastUngappedStats *ungapped_stats, BlastGappedStats *gapped_stats)
MapperWordHits * MapperWordHitsFree(MapperWordHits *wh)
static const Uint1 s_ExactMatchExtendRight[256]
Entry i of this list gives the number of pairs of bits that are zero in the bit pattern of i,...
static Int4 s_BlastNaExtendDirect(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Uint4 s_range)
Perform ungapped extensions on the hits retrieved from blastn/megablast lookup tables,...
static Int4 s_TypeOfWord(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 *q_off, Int4 *s_off, BlastSeqLoc *locations, BlastQueryInfo *query_info, Uint4 s_range, Uint4 word_length, Uint4 lut_word_length, const LookupTableWrap *lookup_wrap, Boolean check_double, Int4 *extended)
Check the mini-extended word against masked query regions, and do right extension if necessary.
MapperWordHits * MapperWordHitsNew(const BLAST_SequenceBlk *query, const BlastQueryInfo *query_info)
Int2 ShortRead_IndexedWordFinder(BLAST_SequenceBlk *subject, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, LookupTableWrap *lookup_wrap, const BlastInitialWordParameters *word_params, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, BlastOffsetPair *offset_pairs, MapperWordHits *word_hits, Int4 max_hits, BlastGapAlignStruct *gap_align, BlastInitHitList *init_hitlist, BlastHSPList **hsp_list, BlastUngappedStats *ungapped_stats, BlastGappedStats *gapped_stats)
static const Uint1 s_ExactMatchExtendLeft[256]
Entry i of this list gives the number of pairs of bits that are zero in the bit pattern of i,...
static Boolean s_SmallNaLookup(const LookupTableWrap *lookup_wrap, Int4 index, Int4 q_pos)
Check to see if an index->q_pos pair exists in SmallNa lookup table.
Int2 MB_IndexedWordFinder(BLAST_SequenceBlk *subject, BLAST_SequenceBlk *query, BlastQueryInfo *query_info, LookupTableWrap *lookup_wrap, Int4 **matrix, const BlastInitialWordParameters *word_params, Blast_ExtendWord *ewp, BlastOffsetPair *offset_pairs, Int4 max_hits, BlastInitHitList *init_hitlist, BlastUngappedStats *ungapped_stats)
Finds all runs of a specified number of exact matches between two nucleotide sequences.
static Int4 s_BlastnDiagHashExtendInitialHit(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 q_off, Int4 s_off, BlastSeqLoc *query_mask, BlastQueryInfo *query_info, Int4 s_range, Int4 word_length, Int4 lut_word_length, const LookupTableWrap *lut, const BlastInitialWordParameters *word_params, Int4 **matrix, BLAST_DiagHash *hash_table, BlastInitHitList *init_hitlist, Boolean check_masks)
Perform ungapped extension given an offset pair, and save the initial hit information if the hit qual...
Nucleotide ungapped extension code.
Int4(* TNaExtendFunction)(const BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 **matrix, BlastQueryInfo *query_info, Blast_ExtendWord *ewp, BlastInitHitList *init_hitlist, Int4 range)
Signature of function used to compute ungapped alignments.
const struct ncbi::grid::netcache::search::fields::KEY key
#define MIN(a, b)
returns smaller of a and b.
#define NCBI_INLINE
"inline" seems to work on our remaining in-house compilers (WorkShop, Compaq, ICC,...
Uint1 Boolean
bool replacment for C
#define ASSERT
macro for assert.
#define INT4_MIN
Smallest (most negative) number represented by signed int.
#define MAX(a, b)
returns larger of a and b.
Int4 delta(size_t dimension_, const Int4 *score_)
Track initial word matches using hashing with chaining.
Int4 offset
"offset" added to query and subject position so that "last_hit" doesn't have to be zeroed out every t...
Structure containing parameters needed for initial word extension.
DiagStruct * hit_level_array
Array to hold latest hits and their lengths for all diagonals.
Int4 diag_array_length
Smallest power of 2 longer than query length.
Int4 diag_mask
Used to mask off everything above min_diag_length (mask = min_diag_length-1).
Uint1 * hit_len_array
Array to hold the lengthof the latest hit.
Int4 offset
"offset" added to query and subject position so that "last_hit" doesn't have to be zeroed out every t...
Structure to hold a sequence.
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Structure supporting the gapped alignment.
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
JumperGapAlign * jumper
data for jumper alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Structure containing hit counts from the gapped stage of a BLAST search.
Int4 extensions
Total number of gapped extensions performed.
The structure to hold all HSPs for a given sequence after the gapped alignment.
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastHSPMappingInfo * map_info
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastHitSavingOptions * options
The original (unparsed) options.
Structure to hold the initial HSP information.
BlastUngappedData * ungapped_data
Pointer to a structure holding ungapped alignment information.
BlastOffsetPair offsets
Offsets in query and subject, or, in PHI BLAST, start and end of pattern in subject.
Structure to hold all initial HSPs for a given subject sequence.
Int4 total
Total number of hits currently saved.
BlastInitHSP * init_hsp_array
Array of offset pairs, possibly with scores.
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 window_size
Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.
Int4 scan_range
Maximal number of gaps allowed between 2 hits.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
BlastUngappedCutoffs * cutoffs
cutoff values (one per context)
Boolean ungapped_extension
Should an ungapped extension be performed?
BlastInitialWordOptions * options
The original (unparsed) options.
Boolean matrix_only_scoring
Use the scoring matrix ( not table ) to score ungapped and gapped alignments -RMH-.
ESeedContainerType container_type
How to store offset pairs for initial seeds?
Int4 nucl_score_table[256]
the combined score of all match/mismatch combinations for aligning four bases
The lookup table structure used for Mega BLAST.
Int4 lut_word_length
number of letters in a lookup table word
Int4 pv_array_bts
The exponent of 2 by which pv_array is smaller than the backbone.
BlastSeqLoc * masked_locations
masked locations, only non-NULL for soft-masking.
Int4 * hashtable
Array of positions.
PV_ARRAY_TYPE * pv_array
Presence vector, used for quick presence check.
Boolean stride
is lookup table created with a stride
Int8 hashsize
= 4^(lut_word_length)
Int4 scan_step
Step size for scanning the database.
Int4 word_length
number of exact letter matches that will trigger an ungapped extension
Boolean discontiguous
Are discontiguous words used?
Int4 * next_pos
Extra positions stored here.
void * extend_callback
function for extending hits
Int4 template_length
Length of the discontiguous word template.
The basic lookup table structure for blastn searches.
Int4 scan_step
number of bases between successive words
BlastSeqLoc * masked_locations
masked locations, only non-NULL for soft-masking.
Int4 lut_word_length
Length in bases of a word indexed by the lookup table.
Int4 word_length
Length in bases of the full word match required to trigger extension.
void * extend_callback
function for extending hits
The query related information.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Int4 last_context
Index of the last element of the context array.
Uint4 max_length
Length of the longest among the concatenated queries.
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Used to hold a set of positions, mostly used for filtering.
Lookup table structure for blastn searches with small queries.
Int4 scan_step
number of bases between successive words
Int4 word_length
Length in bases of the full word match required to trigger extension.
void * extend_callback
function for extending hits
BlastSeqLoc * masked_locations
masked locations, only non-NULL for soft-masking.
Int4 lut_word_length
Length in bases of a word indexed by the lookup table.
All the ungapped cutoff values that can change from context to context.
Int4 reduced_nucl_cutoff_score
for blastn, a reduced cutoff score for use with approximate ungapped alignments
Int4 cutoff_score
Cutoff score for saving ungapped hits.
Int4 x_dropoff
Raw X-dropoff value used in the ungapped extension.
Structure to hold ungapped alignment information.
Int4 score
Score of the ungapped alignment.
Int4 length
Length of the ungapped alignment.
Int4 q_start
Start of the ungapped alignment in query.
Int4 s_start
Start of the ungapped alignment in subject.
Structure containing hit counts from the ungapped stage of a BLAST search.
Int4 good_init_extends
Number of successful initial extensions, i.e.
Structure for keeping initial word extension information.
BLAST_DiagHash * hash_table
Hash table and related parameters.
BLAST_DiagTable * diag_table
Diagonal array and related parameters.
Structure for keeping last hit information for a diagonal in a hash table, when eRight or eRightAndLe...
Int4 hit_len
The length of last hit.
signed int level
This hit's offset in the subject sequence.
unsigned int hit_saved
Whether or not this hit has been saved.
Uint4 next
Offset of next element in the chain.
Int4 diag
This hit's diagonal.
Structure for keeping last hit information for a diagonal.
signed int last_hit
Offset of the last hit.
unsigned int flag
Reset the next extension?
JumperPrelimEditBlock * left_prelim_block
JumperPrelimEditBlock * right_prelim_block
Wrapper structure for different types of BLAST lookup tables.
void * lookup_callback
function used to look up an index->q_off pair
void * lut
Pointer to the actual lookup table structure.
void * check_index_oid
function used to check if seeds for a given oid are present
ELookupTableType lut_type
What kind of a lookup table it is?
void * read_indexed_db
function used to retrieve hits from an indexed database
Int4 num_arrays
number of pair_arrays
BlastOffsetPair ** pair_arrays
lists of word hits
Int4 * last_diag
diagnal for the last word hit for each query context
Int4 array_size
size of each array
Int4 * last_pos
subject position for the last word hit for each query context
Int4 * num
number of hits in the list
Int4 divisor
divisor used to find pair_arrays index based on query offset
Index for a chunk of a subject sequence.
Uint4 qend
Right end (in the query) of the last seen seed on the diagonal.
Uint4 diag
Diagonal identifier.
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
Uint4 s_off
Subject offset.
struct BlastOffsetPair::@6 qs_offsets
Query/subject offset pair.
voidp calloc(uInt items, uInt size)