89 #define GET_BASE(seq, pos, compressed) (compressed ? NCBI2NA_UNPACK_BASE(seq[pos >> 2], 3 - (pos & 3)) : seq[pos])
92 #define UNPACK_BASE_OLD(seq, pos) ((((seq)[(pos) >> 2] << (2 * ((pos) & 3))) & 0xC0) >> 6)
95 #define UNPACK_BASE(seq, pos) (NCBI2NA_UNPACK_BASE((seq)[(pos) / 4], 3 - ((pos) & 3)))
98 #define JUMPER_EDIT_BLOCK_ADD(block, op) ((block)->edit_ops[(block)->num_ops++] = op)
102 #define JOP_TO_OP(op) (op >= 0 ? eGapAlignSub : (op == JUMPER_INSERTION ? eGapAlignIns : eGapAlignDel))
105 #define JOP_TO_NUM(op) (op > 0 ? op : 1)
167 for (
i = 0;
i < 256;
i++) {
169 for (k = 0;k < 4; k++) {
170 Uint4 cell = ((
i >> (2 * k)) & 3);
172 case 0:
table[
i] += cell << 3 * 8;
break;
173 case 1:
table[
i] += cell << 2 * 8;
break;
174 case 2:
table[
i] += cell << 1 * 8;
break;
175 case 3:
table[
i] += cell;
break;
191 if (jgap_align->
table) {
219 if (!retval->
table) {
232 if (!left || !right) {
254 ASSERT(edit_index < edit_script->num_ops);
256 if (!edit_script || !query_pos || !subject_pos) {
260 for (j = 0;j < edit_index;j++) {
261 ASSERT(j < edit_script->num_ops);
274 *query_pos += edit_script->
edit_ops[j];
275 *subject_pos += edit_script->
edit_ops[j];
288 Int4 query_offset,
Int4 subject_offset,
289 Int4 query_length,
Int4 subject_length,
296 ASSERT(score && num_identical);
307 while (k < edit_script->num_ops &&
314 for (;k < edit_script->
num_ops;k++) {
324 for (j = k;j < edit_script->
num_ops - 1;j++) {
335 (*score) -= err_score;
339 q_pos = query_offset;
340 s_pos = subject_offset;
366 q_pos = query_offset;
367 s_pos = subject_offset;
371 (*score) -= err_score;
385 q_pos = query_offset;
386 s_pos = subject_offset;
390 while (b < edit_script->edit_ops[k] &&
396 ASSERT(q_pos <= query_length);
397 ASSERT(s_pos <= subject_length);
413 for (j = edit_script->
num_ops - 1;j >
i;j--) {
427 if (
b < num_matches) {
435 for (j = k;j < edit_script->
num_ops - 1;j++) {
459 Int4 query_length,
Int4 subject_length,
460 Int4 err_score,
Int4* num_identical)
481 for (k =
i + 1;k < combined->
num_ops;k++) {
491 subject_length, &gap_align->
score,
492 err_score, num_identical);
495 while (combined->
num_ops > 0 &&
505 gap_align->
score -= err_score;
524 Int4 num_matches = 0;
527 if (jops->
num_ops == 0 || margin == 0) {
540 while (index >= 1 && jops->
edit_ops[index] > 0) {
541 num_matches += jops->
edit_ops[index];
548 while (jops->
num_ops > 1 && num_matches < margin) {
554 (*cp) += (is_right_ext ? -op : op);
555 (*cq) += (is_right_ext ? -op : op);
556 *num_identical -= op;
558 else if (is_right_ext) {
594 while (index >= 1 && jops->
edit_ops[index] > 0) {
595 num_matches += jops->
edit_ops[index];
620 if (rev_prelim_block->
num_ops == 0 && fwd_prelim_block->
num_ops == 0) {
626 last_op = rev_prelim_block->
num_ops > 0 ?
630 for (
i = rev_prelim_block->
num_ops - 2;
i >= 0;
i--) {
636 for (
i = 0;
i < fwd_prelim_block->
num_ops;
i++) {
648 if (rev_prelim_block->
num_ops > 0) {
652 last_op = retval->
op_type[index];
656 if (current_op == last_op) {
663 last_op = current_op;
669 if (index == 0 && retval->
num[index] == 0) {
672 last_op = retval->
op_type[index];
675 for (;
i < fwd_prelim_block->
num_ops;
i++) {
677 if (current_op == last_op) {
684 last_op = current_op;
694 Int4 match_score,
Int4 mismatch_score,
695 Int4 gap_open_score,
Int4 gap_extend_score)
710 score += op * match_score;
713 score += mismatch_score;
716 score += gap_open_score;
717 while (i < edit_script->num_ops && edit_script->
edit_ops[
i] == op) {
718 score += gap_extend_score;
735 int query_length,
int subject_length,
736 Int4 match_score,
Int4 mismatch_score,
737 Int4 gap_open_score,
Int4 gap_extend_score,
739 Int4* query_ext_len,
Int4* subject_ext_len,
741 Int4* ungapped_ext_len)
743 const Uint1 *cp, *cp1, *cpmax, *cpmax4, *cpstop =
NULL;
744 Int4 cq, cq1, cqmax, cqmax4, cqstop = 0;
747 int num_mismatches = 0;
750 Uint4 trace_mask = (1 << max_mismatches) - 1;
751 Int4 score = 0, best_score = 0;
760 cpmax = cp + query_length;
764 cqmax = subject_length;
773 while (cp < cpmax && cq < cqmax && num_mismatches < max_mismatches) {
775 if (!(cq & 3) && cp < cpmax4 && cq < cqmax4) {
804 if (cp1 >= cpmax || cq1 >= cqmax
816 if (
i + cp1 >= cpmax ||
i + cq1 >= cqmax) {
820 if (cp1 >= cpmax || cq1 >= cqmax) {
839 if (new_matches < window) {
840 trace <<= new_matches;
846 *num_identical += new_matches;
847 score += new_matches * match_score;
853 score += mismatch_score * jp->
dcp;
854 if (
trace & trace_mask) {
855 num_mismatches += jp->
dcp;
860 num_mismatches = jp->
dcp;
867 if (is_ungapped && jp->
dcp != jp->
dcq) {
868 *ungapped_ext_len = (
Int4)(cp -
query - 1);
877 if (jp->
ok == 0 && jp->
lng) {
881 *num_identical += jp->
lng;
882 score += jp->
lng * match_score;
886 if (score >= best_score) {
895 *num_identical += new_matches;
896 score += new_matches * match_score;
897 if (score >= best_score) {
904 *subject_ext_len = cqstop;
907 *ungapped_ext_len = *query_ext_len;
917 int query_length,
int subject_length,
918 Int4 match_score,
Int4 mismatch_score,
919 Int4 gap_open_score,
Int4 gap_extend_score,
921 Int4* query_ext_len,
Int4* subject_ext_len,
925 Int4* ungapped_ext_len,
928 const Uint1 *cp, *cp1, *cpmax, *cpmax4;
929 Int4 cq, cq1, cqmax, cqmax4;
932 int num_mismatches = 0;
935 Uint4 trace_mask = (1 << max_mismatches) - 1;
943 cpmax = cp + query_length;
947 cqmax = subject_length;
955 if (!left_extension) {
959 while (cp < cpmax && cq < cqmax && num_mismatches < max_mismatches) {
961 if (!(cq & 3) && cp < cpmax4 && cq < cqmax4) {
990 if (cp1 >= cpmax || cq1 >= cqmax
1002 if (
i + cp1 >= cpmax ||
i + cq1 >= cqmax) {
1006 if (cp1 >= cpmax || cq1 >= cqmax) {
1027 if (new_matches < window) {
1028 trace <<= new_matches;
1034 *num_identical += new_matches;
1039 if (jp->
dcp == jp->
dcq) {
1040 if (
trace & trace_mask) {
1041 num_mismatches += jp->
dcp;
1046 num_mismatches = jp->
dcp;
1049 for (
i = 0;
i < jp->
dcp;
i++) {
1053 }
else if (jp->
dcp > jp->
dcq) {
1077 if (is_ungapped && jp->
dcp != jp->
dcq) {
1078 *ungapped_ext_len = (
Int4)(cp -
query - 1);
1079 is_ungapped =
FALSE;
1087 if (jp->
ok == 0 && jp->
lng) {
1093 *num_identical += jp->
lng;
1104 *num_identical += new_matches;
1108 s_TrimExtension(edit_script, -mismatch_score, &cp, &cq, num_identical,
1112 *subject_ext_len = cq;
1115 *ungapped_ext_len = *query_ext_len;
1120 gap_open_score, gap_extend_score);
1126 int query_length,
int subject_length,
1127 Int4 match_score,
Int4 mismatch_score,
1128 Int4 gap_open_score,
Int4 gap_extend_score,
1129 int max_mismatches,
int window,
1131 Int4* query_ext_len,
Int4* subject_ext_len,
1133 Int4* best_num_identical,
1135 Int4* ungapped_ext_len)
1137 const Uint1 *cp, *cp1, *cpmax, *cpmax4, *cpstop =
NULL;
1138 Int4 cq, cq1, cqmax, cqmax4, cqstop = 0;
1141 int num_mismatches = 0;
1142 int new_matches = 0;
1144 Uint4 trace_mask = (1 << max_mismatches) - 1;
1146 Int4 score = 0, best_score = 0;
1147 Int4 num_ops = 0, num_identical = *best_num_identical;
1150 Int4 last_gap_open = 0;
1158 cpmax = cp + query_length;
1163 cqmax = subject_length;
1171 if (!left_extension) {
1175 while (cp < cpmax && cq < cqmax && num_mismatches < max_mismatches) {
1177 if (!(cq & 3) && cp < cpmax4 && cq < cqmax4) {
1206 if (cp1 >= cpmax || cq1 >= cqmax
1219 if (cp1 >= cpmax ||
i + cq1 >= cqmax) {
1226 if ( cq1 >= cqmax) {
1247 if (new_matches < window) {
1248 trace <<= new_matches;
1254 num_identical += new_matches;
1255 score += new_matches * match_score;
1261 if (score >= best_score) {
1264 num_ops = edit_script->
num_ops;
1266 *best_num_identical = num_identical;
1269 if (best_score - score > x_drop) {
1274 if (jp->
dcp == jp->
dcq) {
1275 score += jp->
dcp * mismatch_score;
1276 if (
trace & trace_mask) {
1277 num_mismatches += jp->
dcp;
1282 num_mismatches = jp->
dcp;
1285 for (
i = 0;
i < jp->
dcp;
i++) {
1289 }
else if (jp->
dcp > jp->
dcq) {
1293 score += gap_extend_score;
1296 score += gap_open_score;
1309 score += gap_extend_score;
1312 score += gap_open_score;
1323 if (is_ungapped && jp->
dcp != jp->
dcq) {
1324 *ungapped_ext_len = (
Int4)(cp -
query - 1);
1325 is_ungapped =
FALSE;
1333 if (cp1 < cpmax && jp->
ok == 0 && jp->
lng) {
1339 num_identical += jp->
lng;
1340 score += jp->
lng * match_score;
1345 if (score >= best_score) {
1348 num_ops = edit_script->
num_ops;
1350 *best_num_identical = num_identical;
1358 num_identical += new_matches;
1359 score += new_matches;
1363 if (score >= best_score) {
1366 num_ops = edit_script->
num_ops;
1368 *best_num_identical = num_identical;
1371 *query_ext_len = (
Int4)(cpstop -
query);
1372 *subject_ext_len = cqstop;
1373 edit_script->
num_ops = num_ops;
1376 *ungapped_ext_len = *query_ext_len;
1385 int query_length,
int subject_length,
1386 Int4 match_score,
Int4 mismatch_score,
1387 Int4 gap_open_score,
Int4 gap_extend_score,
1388 int max_mismatches,
int window,
1389 int* query_ext_len,
int* subject_ext_len,
1393 const Uint1 *cp, *cp1, *cpmax;
1394 Int4 cq, cq1, cqmax;
1397 int score = 0, num_mismatches = 0;
1398 int new_matches = 0;
1400 Uint4 trace_mask = (1 << max_mismatches) - 1;
1408 cpmax = cp + query_length;
1412 cqmax = subject_length;
1417 if (!left_extension) {
1421 while (cp < cpmax && cq < cqmax && num_mismatches < max_mismatches) {
1424 score += match_score;
1449 if (cp1 >= cpmax || cq1 >= cqmax
1461 if (
i + cp1 >= cpmax ||
i + cq1 >= cqmax) {
1465 if (cp1 >= cpmax || cq1 >= cqmax) {
1485 if (new_matches < window) {
1486 trace <<= new_matches;
1502 if (jp->
dcp == jp->
dcq) {
1503 score += mismatch_score * jp->
dcp;
1504 if (
trace & trace_mask) {
1505 num_mismatches += jp->
dcp;
1510 num_mismatches = jp->
dcp;
1514 }
else if (jp->
dcp > jp->
dcq) {
1515 score += gap_open_score + gap_extend_score * (jp->
dcp - jp->
dcq);
1520 score += gap_open_score + gap_extend_score * (jp->
dcq - jp->
dcp);
1530 if (jp->
ok == 0 && jp->
lng) {
1531 score += match_score * jp->
lng;
1546 *subject_ext_len = cq;
1554 int query_length,
int subject_length,
1555 Int4 match_score,
Int4 mismatch_score,
1556 Int4 gap_open_score,
Int4 gap_extend_score,
1557 int max_mismatches,
int window,
1558 Int4* query_ext_len,
Int4* subject_ext_len,
1560 Int4* num_identical,
1562 Int4* ungapped_ext_len,
1565 const Uint1 *cp, *cp1, *cpmax;
1566 Int4 cq, cq1, cqmax;
1569 int num_mismatches = 0;
1570 int new_matches = 0;
1572 Uint4 trace_mask = (1 << max_mismatches) - 1;
1580 cpmax = cp + query_length;
1584 cqmax = subject_length;
1587 if (left_extension) {
1592 while (cp < cpmax && cq < cqmax && num_mismatches < max_mismatches) {
1614 if (cp1 >= cpmax || cq1 >= cqmax
1626 if (
i + cp1 >= cpmax ||
i + cq1 >= cqmax) {
1630 if (cp1 >= cpmax || cq1 >= cqmax) {
1651 if (new_matches < window) {
1652 trace <<= new_matches;
1658 *num_identical += new_matches;
1663 if (jp->
dcp == jp->
dcq) {
1664 if (
trace & trace_mask) {
1665 num_mismatches += jp->
dcp;
1670 num_mismatches = jp->
dcp;
1673 for (
i = 0;
i < jp->
dcp;
i++) {
1677 }
else if (jp->
dcp > jp->
dcq) {
1701 if (is_ungapped && jp->
dcp != jp->
dcq) {
1702 *ungapped_ext_len = (
Int4)(cp -
query - 1);
1703 is_ungapped =
FALSE;
1711 if (jp->
ok == 0 && jp->
lng) {
1717 *num_identical += jp->
lng;
1728 *num_identical += new_matches;
1732 s_TrimExtension(edit_script, -mismatch_score, &cp, &cq, num_identical,
1736 *subject_ext_len = cq;
1739 *ungapped_ext_len = *query_ext_len;
1744 gap_open_score, gap_extend_score);
1750 Int4 query_offset,
Int4 subject_offset,
1751 Int4 match_score,
Int4 mismatch_score,
1752 Int4 gap_open_score,
Int4 gap_extend_score,
1753 int max_mismatches,
int window,
Uint4*
table,
1754 Int4* query_ext_len,
Int4* subject_ext_len,
1755 Int4* num_identical)
1757 const Uint1 *cp, *cp1, *cpmin, *cpmin4, *cpstop =
NULL;
1758 Int4 cq, cq1, cqmin, cqmin4, cqstop = 0;
1761 int num_mismatches = 0;
1762 int new_matches = 0;
1764 Uint4 trace_mask = (1 << max_mismatches) - 1;
1765 Int4 score = 0, best_score = 0;
1772 cp =
query + query_offset;
1776 cq = subject_offset;
1783 while (cp >= cpmin && cq >= cqmin && num_mismatches < max_mismatches) {
1785 if ((cq & 3) == 3 && cp >= cpmin4 && cq >= cqmin4) {
1817 if (cp1 < cpmin || cq1 < cqmin
1829 if (cp1 -
i < cpmin || cq1 -
i < cqmin) {
1833 if (cp1 < cpmin || cq1 < cqmin) {
1852 if (new_matches < window) {
1853 trace <<= new_matches;
1859 *num_identical += new_matches;
1860 score = new_matches * match_score;
1865 if (jp->
dcp == jp->
dcq) {
1866 score += mismatch_score * jp->
dcp;
1867 if (
trace & trace_mask) {
1868 num_mismatches += jp->
dcp;
1873 num_mismatches = jp->
dcp;
1884 if (!jp->
ok && jp->
lng) {
1888 *num_identical += jp->
lng;
1889 score += jp->
lng * match_score;
1893 if (score >= best_score) {
1902 *num_identical += new_matches;
1903 score += new_matches * match_score;
1904 if (score >= best_score) {
1910 *query_ext_len = (
Int4)(
query + query_offset - cpstop);
1911 *subject_ext_len = subject_offset - cqstop;
1919 Int4 query_offset,
Int4 subject_offset,
1920 Int4 match_score,
Int4 mismatch_score,
1921 Int4 gap_open_score,
Int4 gap_extend_score,
1922 int max_mismatches,
int window,
Uint4*
table,
1923 Int4* query_ext_len,
Int4* subject_ext_len,
1925 Int4* num_identical,
1928 const Uint1 *cp, *cp1, *cpmin, *cpmin4;
1929 Int4 cq, cq1, cqmin, cqmin4;
1932 int num_mismatches = 0;
1933 int new_matches = 0;
1935 Uint4 trace_mask = (1 << max_mismatches) - 1;
1941 cp =
query + query_offset;
1945 cq = subject_offset;
1952 while (cp >= cpmin && cq >= cqmin && num_mismatches < max_mismatches) {
1954 if ((cq & 3) == 3 && cp >= cpmin4 && cq >= cqmin4) {
1986 if (cp1 < cpmin || cq1 < cqmin
1998 if (cp1 -
i < cpmin || cq1 -
i < cqmin) {
2002 if (cp1 < cpmin || cq1 < cqmin) {
2023 if (new_matches < window) {
2024 trace <<= new_matches;
2030 *num_identical += new_matches;
2035 if (jp->
dcp == jp->
dcq) {
2036 if (
trace & trace_mask) {
2037 num_mismatches += jp->
dcp;
2042 num_mismatches = jp->
dcp;
2045 for (
i = 0;
i < jp->
dcp;
i++) {
2049 }
else if (jp->
dcp > jp->
dcq) {
2077 if (!jp->
ok && jp->
lng) {
2083 *num_identical += jp->
lng;
2094 *num_identical += new_matches;
2098 s_TrimExtension(edit_script, -mismatch_score, &cp, &cq, num_identical,
2101 *query_ext_len = (
Int4)(
query + query_offset - cp);
2102 *subject_ext_len = subject_offset - cq;
2106 gap_open_score, gap_extend_score);
2112 Int4 query_offset,
Int4 subject_offset,
2113 Int4 match_score,
Int4 mismatch_score,
2114 Int4 gap_open_score,
Int4 gap_extend_score,
2115 int max_mismatches,
int window,
2117 Int4* query_ext_len,
Int4* subject_ext_len,
2119 Int4* best_num_identical)
2121 const Uint1 *cp, *cp1, *cpmin, *cpmin4, *cpstop =
NULL;
2122 Int4 cq, cq1, cqmin, cqmin4, cqstop = 0;
2125 int num_mismatches = 0;
2126 int new_matches = 0;
2128 Uint4 trace_mask = (1 << max_mismatches) - 1;
2129 Int4 score = 0, best_score = 0;
2130 Int4 num_ops = 0, num_identical = *best_num_identical;
2133 Int4 last_gap_open = 0;
2140 cp =
query + query_offset;
2145 cq = subject_offset;
2152 while (cp >= cpmin && cq >= cqmin && num_mismatches < max_mismatches) {
2154 if ((cq & 3) == 3 && cp >= cpmin4 && cq >= cqmin4) {
2186 if (cp1 < cpmin || cq1 < cqmin
2199 if (cp1 <= cpmin || cq1 -
i < cqmin) {
2227 if (new_matches < window) {
2228 trace <<= new_matches;
2234 num_identical += new_matches;
2235 score += new_matches * match_score;
2241 if (score >= best_score) {
2245 num_ops = edit_script->
num_ops;
2246 *best_num_identical = num_identical;
2249 if (best_score - score > x_drop) {
2254 if (jp->
dcp == jp->
dcq) {
2255 score += jp->
dcp * mismatch_score;
2256 if (
trace & trace_mask) {
2257 num_mismatches += jp->
dcp;
2262 num_mismatches = jp->
dcp;
2265 for (
i = 0;
i < jp->
dcp;
i++) {
2269 }
else if (jp->
dcp > jp->
dcq) {
2273 score += gap_extend_score;
2276 score += gap_open_score;
2289 score += gap_extend_score;
2292 score += gap_open_score;
2307 if (cp1 > cpmin && !jp->
ok && jp->
lng) {
2313 num_identical += jp->
lng;
2314 score += jp->
lng * match_score;
2319 if (score >= best_score) {
2323 num_ops = edit_script->
num_ops;
2324 *best_num_identical = num_identical;
2332 num_identical += new_matches;
2333 score += new_matches * match_score;
2337 if (score >= best_score) {
2341 num_ops = edit_script->
num_ops;
2342 *best_num_identical = num_identical;
2345 *query_ext_len = (
Int4)(
query + query_offset - cpstop);
2346 *subject_ext_len = subject_offset - cqstop;
2347 edit_script->
num_ops = num_ops;
2355 Int4 query_offset,
Int4 subject_offset,
2356 Int4 match_score,
Int4 mismatch_score,
2357 Int4 gap_open_score,
Int4 gap_extend_score,
2358 int max_mismatches,
int window,
2359 int* query_ext_len,
int* subject_ext_len,
2362 const Uint1 *cp, *cp1, *cpmin;
2363 Int4 cq, cq1, cqmin;
2366 int score = 0, num_mismatches = 0;
2367 int new_matches = 0;
2369 Uint4 trace_mask = (1 << max_mismatches) - 1;
2376 cp =
query + query_offset;
2380 cq = subject_offset;
2383 while (cp >= cpmin && cq >= cqmin && num_mismatches < max_mismatches) {
2386 score += match_score;
2409 if (cp1 < cpmin || cq1 < cqmin
2421 if (cp1 -
i < cpmin || cq1 -
i < cqmin) {
2425 if (cp1 < cpmin || cq1 < cqmin) {
2445 if (new_matches < window) {
2446 trace <<= new_matches;
2462 if (jp->
dcp == jp->
dcq) {
2463 score += mismatch_score * jp->
dcp;
2464 if (
trace & trace_mask) {
2465 num_mismatches += jp->
dcp;
2470 num_mismatches = jp->
dcp;
2474 }
else if (jp->
dcp > jp->
dcq) {
2475 score += gap_open_score + gap_extend_score * (jp->
dcp - jp->
dcq);
2480 score += gap_open_score + gap_extend_score * (jp->
dcq - jp->
dcp);
2490 if (!jp->
ok && jp->
lng) {
2491 score += match_score * jp->
lng;
2505 *query_ext_len = (
int)(
query + query_offset - cp);
2506 *subject_ext_len = subject_offset - cq;
2514 Int4 query_length,
Int4 subject_length,
2515 Int4 query_start,
Int4 subject_start,
2518 Int4* num_identical,
2519 Int4* right_ungapped_ext_len)
2521 Int4 score_left = 0, score_right = 0;
2522 Int4 q_ext_len, s_ext_len;
2523 Int4 q_length, s_length;
2524 Int4 offset_adjustment;
2526 const Uint1 kBaseN = 14;
2543 if (!*rev_prelim_block || !*fwd_prelim_block ||
2545 2 *
MIN(query_length, subject_length)) {
2560 q_length = query_start + offset_adjustment;
2561 s_length = subject_start + offset_adjustment;
2564 if (query_start > 0 && subject_start > 0) {
2577 &q_ext_len, &s_ext_len,
2582 gap_align->
query_start = q_length - q_ext_len + 1;
2584 left_ext_done =
TRUE;
2593 if (query_start < query_length - 1 && subject_start < subject_length - 1) {
2598 query_length - q_length,
2599 subject_length - s_length,
2608 &q_ext_len, &s_ext_len,
2612 right_ungapped_ext_len);
2614 gap_align->
query_stop = q_length + q_ext_len;
2622 gap_align->
score = score_left + score_right;
2624 if (offset_adjustment && !left_ext_done) {
2625 ASSERT((*rev_prelim_block)->num_ops <
2626 (*rev_prelim_block)->num_allocated);
2629 *num_identical += offset_adjustment;
2630 gap_align->
score += offset_adjustment * score_params->
reward;
2632 if (offset_adjustment && *right_ungapped_ext_len) {
2633 *right_ungapped_ext_len += offset_adjustment;
2640 for (
i = gap_align->
query_start;i < gap_align->query_stop;
i++) {
2641 if (
query[
i] == kBaseN) {
2664 if (100.0 * (
double)num_identical / (
double)align_len
2690 if (score < cutoff_score) {
2694 edit_dist = align_len - num_identical;
2728 if (!retval->
edits) {
2758 const Uint1 kGap = 15;
2782 edit->query_pos = q_pos;
2791 edit->query_pos = q_pos;
2792 edit->query_base = kGap;
2799 edit->query_pos = q_pos;
2801 edit->subject_base = kGap;
2820 edit->query_pos = q_pos;
2829 edit->query_pos = q_pos;
2830 edit->query_base = kGap;
2837 edit->query_pos = q_pos;
2839 edit->subject_base = kGap;
2867 if (!block_ptr || !*block_ptr || !append_ptr) {
2883 if (!block->
edits) {
2902 if (!edit_script_ptr || !*edit_script_ptr || !append_ptr) {
2906 edit_script = *edit_script_ptr;
2920 edit_script->
num = realloc(edit_script->
num,
2923 if (!edit_script->
num) {
2934 edit_script->
size++;
2943 #define NUM_SIGNALS 8
2982 if (overhangs->
left) {
2986 if (overhangs->
right) {
2999 const Int4 kMinOverhangLength = 0;
3000 const Int4 kMaxSubjectOverhang = query_len < 400 ? 30 : 60;
3003 query_len - hsp->
query.
end < kMinOverhangLength) {
3026 for (
i = 0;
i <
len;
i++) {
3029 overhangs->
left = overhang;
3033 if (hsp->
query.
end <= query_len - kMinOverhangLength) {
3039 if (query_len - hsp->
query.
end + 1 < 6)
3041 MIN(
MAX(query_len - hsp->
query.
end + 1, 2), kMaxSubjectOverhang);
3043 len = kMaxSubjectOverhang;
3050 for (
i = 0;
i <
len;
i++) {
3054 overhangs->
right = overhang;
3069 Int4 first_diag =
first->qs_offsets.s_off -
first->qs_offsets.q_off;
3072 if (first_diag < second_diag) {
3075 if (first_diag > second_diag) {
3116 edit_script->
num[0] = length;
3149 for (
i = 0;
i < length;
i++) {
3150 if ((
query[q_offset +
i] & 0xfc) != 0) {
3164 for (
i = 0;
i < length;
i++) {
3165 if ((
query[q_offset +
i] & 0xfc) != 0) {
3170 edit->query_pos = q_offset +
i;
3197 Int4 num_identical = 0;
3201 if (!getenv(
"MAPPER_NO_GAP_SHIFT")) {
3204 score_params->
penalty, &num_identical);
3225 if (!new_hsp || status) {
3267 Int4 hits_extended = 0;
3268 Int4 word_length, lut_word_length, ext_to;
3271 Int4 num_identical = 0;
3273 Int4 skip_until = 0;
3286 ext_to = word_length - lut_word_length;
3302 for (; index < num_hits; ++index) {
3307 Int4 diag = s_offset - q_offset;
3311 if (diag == last_diag && q_offset < skip_until) {
3320 Int4 s_off = s_offset;
3324 for (; ext_left <
MIN(ext_to, s_offset); ++ext_left) {
3338 if (ext_left < ext_to) {
3340 s_off = s_offset + lut_word_length;
3341 if (s_off + ext_to - ext_left > s_range)
3343 q =
query->sequence + q_offset + lut_word_length;
3346 for (; ext_right < ext_to - ext_left; ++ext_right) {
3357 if (ext_left + ext_right < ext_to)
3361 q_offset -= ext_left;
3362 s_offset -= ext_left;
3367 q_offset -= query_start;
3371 Uint1* query_seq =
query->sequence + query_start;
3372 Int4 right_ungapped_ext_len = 0;
3383 &right_ungapped_ext_len);
3392 skip_until = q_offset + query_start + right_ungapped_ext_len;
3399 Uint1* query_seq =
query->sequence + query_start;
3402 const Int4 kMinSubjectOverhang = 100;
3412 if (!getenv(
"MAPPER_NO_GAP_SHIFT")) {
3415 score_params->
penalty, &num_identical);
3471 if (getenv(
"MAPPER_USE_SMALL_WORDS") &&
3472 query_len - new_hsp->
query.
end < 16 &&
3486 for (
i = 1;
i < query_len - new_hsp->
query.
end;
i++) {
3487 if (query_seq[new_hsp->
query.
end +
i] !=
3496 if (
i > 4 ||
i == query_len - new_hsp->
query.
end) {
3526 if ((query_seq[q +
i] & 0xfc) != 0) {
3542 word = (query_seq[q] << 6) | (query_seq[q + 1] << 4) |
3543 (query_seq[q + 2] << 2) | query_seq[q + 3];
3545 word = (word << 2) | query_seq[q +
i];
3556 MIN((from + kMaxIntronLength),
3557 (
subject->length - (query_len - q + 1))));
3569 while (qt < query_len && st < subject->length &&
3573 (query_seq[qt] & 0xfc) != 0)) {
3580 if (qt == query_len) {
3585 while (qf >= 0 && sf >= 0 &&
3588 (query_seq[qf] & 0xfc) != 0)) {
3599 qf <= new_hsp->
query.offset) {
3607 while (qf < qt && (query_seq[qf] & 0xfc) != 0) {
3612 while (qt > qf && (query_seq[qt - 1] & 0xfc) != 0) {
3647 if (getenv(
"MAPPER_USE_SMALL_WORDS") &&
3693 for (; !found && q >= 0 &&
round < 2;q--,
round++) {
3704 if ((query_seq[q +
i] & 0xfc) != 0) {
3721 word = (query_seq[q] << 6) | (query_seq[q + 1] << 4) |
3722 (query_seq[q + 2] << 2) | query_seq[q + 3];
3724 word = (word << 2) | query_seq[q +
i];
3732 MAX(from - kMaxIntronLength, q + 1));
3744 (query_seq[k] & 0xfc) != 0)) {
3757 while (qt < query_len && st < subject->length &&
3761 (query_seq[qt] & 0xfc) != 0)) {
3777 while (k < qt && (query_seq[k] & 0xfc) != 0) {
3782 while (qt > k && (query_seq[qt] & 0xfc) != 0) {
3818 return hits_extended;
3900 for (k = 0;k < 4;k++) {
3902 (
subject->sequence[
i] >> (2 * (3 - k))) & 3;
3947 for (
i = 0;
i < num_lookups;
i++) {
3949 ssr->
left = width *
i;
3955 query_opt, word_size);
3964 retval->
width = width;
3988 if (!s_index || !s_index->
lookups[0]) {
4014 word = word &
lookup->mask;
4021 lookup->thick_backbone[word].payload.overflow_cursor;
4024 retval->
word = word;
4071 lookup->thick_backbone[it->
word].payload.overflow_cursor;
4116 lookup->thick_backbone[it->
word].payload.overflow_cursor;
4137 #define MAX_NUM_MATCHES 10
4142 Int4 subject_from,
Int4 subject_to,
4150 Int4 q = query_from;
4154 Int4 num_extensions = 0;
4155 Int4 word_size = 12;
4156 Int4 big_word_size = 0;
4164 Int4 scan_from = subject_from;
4167 Uint4 mask = (1U << (2 * word_size)) - 1;
4169 Int4 best_score = 0;
4170 Int4 num_matches = 0;
4176 Boolean is_right = subject_from < subject_to;
4178 big_word_size =
MIN(
MAX(query_len - query_from - 5, word_size), 24);
4179 scan_step = big_word_size - word_size + 1;
4183 big_word_size =
MIN(
MAX(query_from - 5, word_size), 24);
4184 scan_step = -(big_word_size - word_size + 1);
4187 if ((is_right && (query_len - query_from + 1 < big_word_size ||
4188 scan_to - subject_from < big_word_size)) ||
4189 (!is_right && (query_from < big_word_size ||
4190 subject_from - scan_to < big_word_size))) {
4198 for (; q + big_word_size < query_len && num_words <
MAX_NUM_MATCHES; q++) {
4201 while (q + big_word_size <= query_len) {
4202 for (
i = 0;
i < big_word_size;
i++) {
4203 if ((query_seq[q +
i] & 0xfc) != 0) {
4210 if (
i == big_word_size) {
4218 if (q + big_word_size - 1 >= query_len) {
4223 word[num_words] = (query_seq[q] << 6) | (query_seq[q + 1] << 4) |
4224 (query_seq[q + 2] << 2) | query_seq[q + 3];
4225 for (
i = 4;
i < word_size;
i++) {
4226 word[num_words] = (word[num_words] << 2) | query_seq[q +
i];
4230 if (word[num_words] == 0 || word[num_words] == 0xffffff) {
4234 query_pos[num_words] = q;
4244 for (
i = 0;
i < big_word_size;
i++) {
4245 if ((query_seq[q +
i] & 0xfc) != 0) {
4246 q = q - big_word_size +
i;
4252 if (
i == big_word_size) {
4265 word[num_words] = (query_seq[q] << 6) | (query_seq[q + 1] << 4) |
4266 (query_seq[q + 2] << 2) | query_seq[q + 3];
4267 for (
i = 4;
i < word_size;
i++) {
4268 word[num_words] = (word[num_words] << 2) | query_seq[q +
i];
4272 if (word[num_words] == 0 || word[num_words] == 0xffffff) {
4276 query_pos[num_words] = q;
4282 if (num_words == 0) {
4286 for (
i = scan_from; (scan_from < scan_to &&
i < scan_to) ||
4287 (scan_from > scan_to &&
i > scan_to);
i += scan_step) {
4289 Int4 local_ungapped_ext;
4299 if (num_matches > kMaxNumMatches) {
4305 w = (
Int4)s[0] << 16 | s[1] << 8 | s[2];
4309 for (; last_idx < num_bytes; last_idx++) {
4310 w = w << 8 | s[last_idx];
4314 w = (w << 8) | s[last_idx];
4316 index = (w >> shift) &
mask;
4323 for (k = 0;k < num_words; k++) {
4324 if (index == word[k]) {
4329 if (k >= num_words) {
4333 q_offset = query_pos[k];
4336 for (k = word_size;k < big_word_size;k++) {
4341 if (k < big_word_size) {
4360 &local_ungapped_ext);
4363 if (gap_align->
score <= best_score) {
4367 best_score = gap_align->
score;
4374 query_info, gap_align,
subject,
4375 score_params, hit_params);
4378 if (hsp->
score >= query_len - query_from) {
4407 if (!
query || !
subject || !query_info || !gap_align || !score_params ||
4408 !hit_params || !hsp_stream) {
4422 hsp_list->
oid, word_size);
4427 for (ch = chains; ch; ch = ch->
next) {
4444 query_info, gap_align, score_params,
4445 hit_params, hsp_list);
4460 query_info, gap_align, score_params,
4461 hit_params, hsp_list);
4465 for (h = ch->
hsps; h; h = h->
next) {
4479 #define NUM_DIMERS (1 << 4)
4491 for (
i=0;
i < length - 1;
i++) {
4492 Uint1 base_1 = sequence[
i];
4493 Uint1 base_2 = sequence[
i + 1];
4495 if ((base_1 & 0xfc) == 0 && (base_2 & 0xfc) == 0) {
4496 Int4 dimer = (base_1 << 2) | base_2;
4505 sum += (double)counts[
i] *
log((
double)counts[
i] / num);
4509 return -sum * (1.0 /(
log(16.0))) + 0.5;
4535 const double kMaxFractionOfAmbiguousBases = options->
frac_ambig;
4541 for (
i = 0;
i < length;
i++) {
4542 if (sequence[
i] & 0xfc) {
4547 if ((
double)num / length > kMaxFractionOfAmbiguousBases) {
4553 if (entropy <= options->entropy) {
4565 if (query_length <= 20) {
4566 return query_length;
4568 else if (query_length <= 34) {
4571 else if (query_length < 200) {
4572 return (
Int4)(0.6 * query_length);
#define COMPRESSION_RATIO
Compression ratio of nucleotide bases (4 bases in 1 byte)
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Private interface for blast_gapalign.c.
Structures and API used for saving BLAST hits.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSPMappingInfo * BlastHSPMappingInfoNew(void)
Allocate memory for an HSP's additional data structure.
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
int BlastHSPStreamWrite(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Invokes the user-specified write function for this BlastHSPStream implementation.
#define BLASTERR_MEMORY
System error: out of memory condition.
Routines for creating nucleotide BLAST lookup tables.
Int4 BlastNaLookupTableNew(BLAST_SequenceBlk *query, BlastSeqLoc *locations, BlastNaLookupTable **lut, const LookupTableOptions *opt, const QuerySetUpOptions *query_options, Int4 lut_width)
Create a new nucleotide lookup table.
#define NA_HITS_PER_CELL
maximum number of hits in one lookup table cell
BlastNaLookupTable * BlastNaLookupTableDestruct(BlastNaLookupTable *lookup)
Free a nucleotide lookup table.
@ eMBLookupTable
megablast lookup table (includes both contiguous and discontiguous megablast)
Int4 BSearchContextInfo(Int4 n, const BlastQueryInfo *A)
Search BlastContextInfo structures for the specified offset.
static void DLIST_NAME() append(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
static DLIST_TYPE *DLIST_NAME() first(DLIST_LIST_TYPE *list)
static int lookup(const char *name, const struct lookup_int *table)
void GapPrelimEditBlockAdd(GapPrelimEditBlock *edit_block, EGapAlignOpType op_type, Int4 num_ops)
Add a new operation to a preliminary edit block, possibly combining it with the last operation if the...
EGapAlignOpType
Operation types within the edit script.
@ eGapAlignIns
Insertion: a gap in subject.
@ eGapAlignSub
Substitution.
@ eGapAlignDel
Deletion: a gap in query.
GapEditScript * GapEditScriptNew(Int4 size)
Initialize the edit script structure.
GapEditScript * GapEditScriptDelete(GapEditScript *esp)
Free edit script structure.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
#define MT_LOCK_Do(lk, how)
Call "lk->handler(lk->data, how)".
@ eMT_Unlock
unlock critical section
@ eMT_Lock
lock critical section
unsigned int
A callback function used to compare two keys in a database.
Implementation of a number of BlastHSPWriters to save the best chain of RNA-Seq hits to a genome.
HSPChain * FindPartialyCoveredQueries(void *data, Int4 oid, Int4 word_size)
Find HSP chains that do not cover full extend of queries for a given subject.
<!DOCTYPE HTML >< html > n< header > n< title > PubSeq Gateway Help Page</title > n< style > n table
static int s_GetSeqPositions(JumperPrelimEditBlock *edit_script, Int4 edit_index, Int4 *query_pos, Int4 *subject_pos)
static JumperPrelimEditBlock * JumperPrelimEditBlockFree(JumperPrelimEditBlock *block)
Int4 BlastNaExtendJumper(BlastOffsetPair *offset_pairs, Int4 num_hits, const BlastInitialWordParameters *word_params, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, LookupTableWrap *lookup_wrap, BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, BlastQueryInfo *query_info, BlastGapAlignStruct *gap_align, BlastHSPList *hsp_list, Uint4 s_range, SubjectIndex *s_index)
Extend a list of word hits.
Int4 JumperExtendLeft(const Uint1 *query, const Uint1 *subject, Int4 query_offset, Int4 subject_offset, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, int *query_ext_len, int *subject_ext_len, GapPrelimEditBlock *edit_script)
static BlastHSP * s_CreateHSPForWordHit(Int4 q_offset, Int4 s_offset, Int4 length, Int4 context, const Uint1 *query, const BlastQueryInfo *query_info, const BLAST_SequenceBlk *subject, Int4 query_len)
int JumperGappedAlignmentCompressedWithTraceback(const Uint1 *query, const Uint1 *subject, Int4 query_length, Int4 subject_length, Int4 query_start, Int4 subject_start, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 *num_identical, Int4 *right_ungapped_ext_len)
Jumper gapped alignment with traceback; 1 base per byte in query, 4 bases per byte in subject.
JumperEditsBlock * JumperEditsBlockFree(JumperEditsBlock *block)
SequenceOverhangs * SequenceOverhangsFree(SequenceOverhangs *overhangs)
int JumperFindSpliceSignals(BlastHSP *hsp, Int4 query_len, const Uint1 *subject, Int4 subject_len)
Find splice signals at the edges of an HSP and save them in the HSP.
#define UNPACK_BASE(seq, pos)
JumperGapAlign * JumperGapAlignNew(Int4 size)
static int s_ShiftGapsRight(JumperPrelimEditBlock *edit_script, const Uint1 *query, const Uint1 *subject, Int4 query_offset, Int4 subject_offset, Int4 query_length, Int4 subject_length, Int4 *score, Int4 err_score, Int4 *num_identical)
SubjectIndexIterator * SubjectIndexIteratorNew(SubjectIndex *s_index, Int4 word, Int4 from, Int4 to)
Create an iterator for locations of a given word.
Int4 SubjectIndexIteratorPrev(SubjectIndexIterator *it)
Return the previous location of a word in an indexed sequence.
Int4 JumperExtendRight(const Uint1 *query, const Uint1 *subject, int query_length, int subject_length, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, int *query_ext_len, int *subject_ext_len, GapPrelimEditBlock *edit_script, Boolean left_extension)
static Int2 s_MaskSequence(Int4 offset, Int4 length, BlastSeqLoc **seq_locs)
static int s_ShiftGaps(BlastGapAlignStruct *gap_align, const Uint1 *query, const Uint1 *subject, Int4 query_length, Int4 subject_length, Int4 err_score, Int4 *num_identical)
static Int4 s_SaveSubjectOverhangs(BlastHSP *hsp, Uint1 *subject, Int4 query_len)
static void s_ResetJumperPrelimEditBlocks(JumperPrelimEditBlock *left, JumperPrelimEditBlock *right)
static Int4 s_ComputeExtensionScore(JumperPrelimEditBlock *edit_script, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score)
JumperEditsBlock * JumperEditsBlockCombine(JumperEditsBlock **block_ptr, JumperEditsBlock **append_ptr)
Int4 JumperPrelimEditBlockAdd(JumperPrelimEditBlock *block, JumperOpType op)
JumperGapAlign * JumperGapAlignFree(JumperGapAlign *jgap_align)
static Int4 s_FindDimerEntropy(Uint1 *sequence, Int4 length)
GapEditScript * GapEditScriptCombine(GapEditScript **edit_script_ptr, GapEditScript **append_ptr)
Int4 JumperExtendLeftCompressedWithTraceback(const Uint1 *query, const Uint1 *subject, Int4 query_offset, Int4 subject_offset, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, Uint4 *table, Int4 *query_ext_len, Int4 *subject_ext_len, JumperPrelimEditBlock *edit_script, Int4 *num_identical, JUMP *jumper)
JumperEditsBlock * JumperEditsBlockNew(Int4 num)
Boolean JumperGoodAlign(const BlastGapAlignStruct *gap_align, const BlastHitSavingParameters *hit_params, Int4 num_identical, BlastContextInfo *context_info)
Test whether an HSP should be saved.
SubjectIndex * SubjectIndexNew(BLAST_SequenceBlk *subject, Int4 width, Int4 word_size)
Index a sequence, used for indexing compressed nucleotide subject sequence.
Int4 JumperExtendLeftCompressedWithTracebackOptimal(const Uint1 *query, const Uint1 *subject, Int4 query_offset, Int4 subject_offset, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, Int4 x_drop, Uint4 *table, Int4 *query_ext_len, Int4 *subject_ext_len, JumperPrelimEditBlock *edit_script, Int4 *best_num_identical)
Int2 DoAnchoredSearch(BLAST_SequenceBlk *query, BLAST_SequenceBlk *subject, Int4 word_size, BlastQueryInfo *query_info, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, BlastHSPStream *hsp_stream)
Do a search against a single subject with smaller word size and with no database word frequency filte...
static JumperPrelimEditBlock * JumperPrelimEditBlockNew(Int4 size)
Int4 JumperExtendRightCompressedWithTraceback(const Uint1 *query, const Uint1 *subject, int query_length, int subject_length, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, Uint4 *table, Int4 *query_ext_len, Int4 *subject_ext_len, JumperPrelimEditBlock *edit_script, Int4 *num_identical, Boolean left_extension, Int4 *ungapped_ext_len, JUMP *jumper)
SubjectIndexIterator * SubjectIndexIteratorFree(SubjectIndexIterator *it)
Free memory reserved for subject index word iterator.
static Int4 DoAnchoredScan(Uint1 *query_seq, Int4 query_len, Int4 query_from, Int4 context, BLAST_SequenceBlk *subject, Int4 subject_from, Int4 subject_to, BlastQueryInfo *query_info, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params, BlastHSPList *hsp_list)
Int4 JumperExtendRightCompressed(const Uint1 *query, const Uint1 *subject, int query_length, int subject_length, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, Uint4 *table, Int4 *query_ext_len, Int4 *subject_ext_len, Int4 *num_identical, Int4 *ungapped_ext_len)
Int4 JumperExtendRightCompressedWithTracebackOptimal(const Uint1 *query, const Uint1 *subject, int query_length, int subject_length, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, Int4 x_drop, Uint4 *table, Int4 *query_ext_len, Int4 *subject_ext_len, JumperPrelimEditBlock *edit_script, Int4 *best_num_identical, Boolean left_extension, Int4 *ungapped_ext_len)
SubjectIndex * SubjectIndexFree(SubjectIndex *sindex)
Free subject index structure.
static int s_CompareOffsetPairsByDiagQuery(const void *a, const void *b)
JumperEditsBlock * JumperFindEdits(const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align)
Int2 FilterQueriesForMapping(Uint1 *sequence, Int4 length, Int4 offset, const SReadQualityOptions *options, BlastSeqLoc **seq_loc)
Int4 JumperExtendLeftCompressed(const Uint1 *query, const Uint1 *subject, Int4 query_offset, Int4 subject_offset, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, Uint4 *table, Int4 *query_ext_len, Int4 *subject_ext_len, Int4 *num_identical)
#define JUMPER_EDIT_BLOCK_ADD(block, op)
JumperEditsBlock * JumperEditsBlockDup(const JumperEditsBlock *block)
GapEditScript * JumperPrelimEditBlockToGapEditScript(JumperPrelimEditBlock *rev_prelim_block, JumperPrelimEditBlock *fwd_prelim_block)
Convert Jumper's preliminary edit script to GapEditScript.
static BlastHSP * s_CreateHSP(Uint1 *query_seq, Int4 query_len, Int4 context, BlastQueryInfo *query_info, BlastGapAlignStruct *gap_align, BLAST_SequenceBlk *subject, const BlastScoringParameters *score_params, const BlastHitSavingParameters *hit_params)
Int4 SubjectIndexIteratorNext(SubjectIndexIterator *it)
Return the next location of a word in an indexed sequence.
static void s_CreateTable(Uint4 *table)
static void s_TrimExtension(JumperPrelimEditBlock *jops, int margin, const Uint1 **cp, Int4 *cq, Int4 *num_identical, Boolean is_right_ext)
Int4 GetCutoffScore(Int4 query_length)
Get alignment cutoff score for a given query length.
static void s_SubjectIndexNewCleanup(BLAST_SequenceBlk *sequence, BlastSeqLoc *seqloc, LookupTableOptions *opt, QuerySetUpOptions *query_opt, SubjectIndex *sindex)
Int4 JumperExtendRightWithTraceback(const Uint1 *query, const Uint1 *subject, int query_length, int subject_length, Int4 match_score, Int4 mismatch_score, Int4 gap_open_score, Int4 gap_extend_score, int max_mismatches, int window, Int4 *query_ext_len, Int4 *subject_ext_len, JumperPrelimEditBlock *edit_script, Int4 *num_identical, Boolean left_extension, Int4 *ungapped_ext_len, JUMP *jumper)
Right extension with traceback.
Int2 JumperOpType
Jumper edit script operation.
#define SUBJECT_INDEX_WORD_LENGTH
const struct ncbi::grid::netcache::search::fields::SIZE size
#define MIN(a, b)
returns smaller of a and b.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define ASSERT
macro for assert.
#define MAX(a, b)
returns larger of a and b.
static SLJIT_INLINE sljit_ins st(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
HSPChain * HSPChainFree(HSPChain *chain_list)
Deallocate a chain or list of chains.
Structure to hold a sequence.
Uint1 * sequence
Sequence used for search (could be translation).
The context related information.
Int4 query_length
Length of this query, strand or frame.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int1 frame
Frame number (-1, -2, -3, 0, 1, 2, or 3)
Structure supporting the gapped alignment.
Int4 gap_x_dropoff
X-dropoff parameter to use.
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 max_mismatches
Max number of mismatches for jumper.
Int4 mismatch_window
Window sie for mismatches for jumper.
JumperGapAlign * jumper
data for jumper alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Uint1 left_edge
Two subject bases before the alignment in the four least significant bits and flags in most significa...
JumperEditsBlock * edits
Information about mismatches and gaps, used for mapping short reads.
SequenceOverhangs * subject_overhangs
Unaligned subject subsequence.
Default implementation of BlastHSPStream.
BlastHSPWriter * writer
writer to be applied when writing
MT_LOCK x_lock
Mutex for writing and reading results.
void * data
data structure
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
Int4 context
Context number of query.
BlastSeg subject
Subject sequence info.
Int4 score
This HSP's raw score.
BlastHSPMappingInfo * map_info
Int4 longest_intron
The longest distance between HSPs allowed for combining via sum statistics with uneven gaps.
Int4 cutoff_score
The (raw) score cut-off threshold.
Int4 max_edit_distance
Maximum number of mismatches and gaps.
Int4 cutoff_score_fun[2]
Coefficients x100 for the raw score cut-off threshold as a function of query length: x[0] + x[1] * qu...
double percent_identity
The percent identity cut-off threshold.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
BlastHitSavingOptions * options
The original (unparsed) options.
Parameter block that contains a pointer to BlastInitialWordOptions and the values derived from it.
The lookup table structure used for Mega BLAST.
Int4 lut_word_length
number of letters in a lookup table word
Int4 word_length
number of exact letter matches that will trigger an ungapped extension
The basic lookup table structure for blastn searches.
Int4 lut_word_length
Length in bases of a word indexed by the lookup table.
Int4 word_length
Length in bases of the full word match required to trigger extension.
The query related information.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
Int4 gap_extend
Penalty for each gap residue (scaled version)
Int2 penalty
Penalty for a mismatch.
Int4 gap_open
Extra penalty for starting a gap (scaled version)
Int2 reward
Reward for a match.
Used to hold a set of positions, mostly used for filtering.
SSeqRange * ssr
location data on the sequence.
struct BlastSeqLoc * next
next in linked list
Edit script: linked list of correspondencies between two sequences.
Int4 * num
Array of number of operations.
Int4 size
Size of above arrays.
EGapAlignOpType * op_type
Array of type of operation.
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
A chain of HSPs: spliced alignment.
HSPContainer * hsps
A list of HSPs that belong to this chain.
struct HSPChain * next
Pointer to the next chain in a list.
struct HSPContainer * next
Alignment edit script for gapped alignment.
Gapped alignment data needed for jumper.
JumperPrelimEditBlock * left_prelim_block
Uint4 * table
Table used for matching 4 bases in compressed subject to 4 bases in uncompressed query.
JumperPrelimEditBlock * right_prelim_block
Internal alignment edit script.
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 word_size
Determines the size of the lookup table.
Wrapper structure for different types of BLAST lookup tables.
void * lut
Pointer to the actual lookup table structure.
ELookupTableType lut_type
What kind of a lookup table it is?
Options required for setting up the query sequence.
Filtering options for mapping next-generation sequences.
double frac_ambig
Fraction of ambiguous bases.
A structure containing two integers, used e.g.
Int4 left
left endpoint of range (zero based)
Int4 right
right endpoint of range (zero based)
Structure to save short unaligned subsequences outside an HSP.
Uint1 * left
Left subsequence.
Uint1 * right
Rught subsequence.
Int4 right_len
Length of the right subsequence.
Int4 left_len
Length of the left subsequence.
Iterator over word locations in subject index.
SubjectIndex * subject_index
Index for a chunk of a subject sequence.
Int4 num_lookups
Number of lookup tables used.
Int4 width
Number of bases covered by each lookup table.
BlastNaLookupTable ** lookups
Array of lookup tables.
This symbol enables the verbose option in makeblastdb and other BLAST+ search command line applicatio...
Uint4 s_off
Subject offset.
struct BlastOffsetPair::@6 qs_offsets
Query/subject offset pair.
static CS_CONTEXT * context
voidp calloc(uInt items, uInt size)