60 # define STDERR_COMMA stderr,
76 #ifndef KAPPA_BLASTP_NO_SEG_SEQUENCE
77 #define KAPPA_BLASTP_NO_SEG_SEQUENCE 0
84 #ifndef KAPPA_TBLASTN_NO_SEG_SEQUENCE
85 #define KAPPA_TBLASTN_NO_SEG_SEQUENCE 0
108 for(hsp_index = 0; hsp_index < hsp_list->
hspcnt; hsp_index++) {
145 double best_evalue = DBL_MAX;
153 double query_eff =
MAX((query_length - length_adjustment), 1);
154 double subject_eff =
MAX((subject_length - length_adjustment), 1.0);
155 double dblen_eff = (double) query_context->
eff_searchsp / query_eff;
158 double db_to_sequence_scale = subject_eff / dblen_eff;
161 for (hsp_index = 0; hsp_index < hsp_list->
hspcnt; hsp_index++) {
163 double align_p_value;
164 double combined_p_value;
168 #ifdef KAPPA_PRINT_DIAGNOSTICS
170 double old_e_value = hsp->
evalue;
172 hsp->
evalue *= db_to_sequence_scale;
177 hsp->
evalue /= db_to_sequence_scale;
179 if (hsp->
evalue < best_evalue) {
180 best_evalue = hsp->
evalue;
183 #ifdef KAPPA_PRINT_DIAGNOSTICS
186 Blast_GiList* gi_list;
187 gi_list = BlastSeqSrcGetGis(seqSrc, (
void *) (&subject_id));
188 if ((gi_list) && (gi_list->num_used > 0)) {
189 sequence_gi = gi_list->data[0];
193 printf(
"GI %d Lambda ratio %e comp. p-value %e; "
194 "adjust E-value of query length %d match length "
195 "%d from %e to %e\n",
196 sequence_gi, LambdaRatio, comp_p_value,
197 query_length, subject_length, old_e_value, hsp->
evalue);
198 Blast_GiListFree(gi_list);
230 old_hspcnt = *hspcnt;
232 for (iread = 1; iread < *hspcnt; iread++) {
237 hsp1 = hsp_array[iread];
238 for (ireadBack = 0; ireadBack < iread && hsp1 !=
NULL; ireadBack++) {
243 hsp2 = hsp_array[ireadBack];
268 for (iread = 0; iread < *hspcnt; iread++) {
269 if (hsp_array[iread] !=
NULL) {
270 hsp_array[iwrite++] = hsp_array[iread];
275 for ( ; iwrite < old_hspcnt; iwrite++) {
276 hsp_array[iwrite] =
NULL;
285 if (edit_script !=
NULL)
312 static const int unknown_value = 0;
316 if (hsp_list ==
NULL) {
321 for (align = *alignments;
NULL != align; align = align->
next) {
328 unknown_value, unknown_value,
331 &editScript, &new_hsp);
404 double pvalueForThisPair,
409 *pbestEvalue = DBL_MAX;
431 if ((0 <= pvalueForThisPair) && (pvalueForThisPair <= 1)) {
434 &queryInfo->
contexts[context_index],
435 LambdaRatio, subject_id);
440 if (hsp_list->
hspcnt > 0) {
445 return status == 0 ? 0 : -1;
465 const Uint1* gen_code_string,
480 if ( !hsp_list)
return;
484 memset((
void*) &seq_arg, 0,
sizeof(seq_arg));
485 seq_arg.oid = hsp_list->
oid;
486 seq_arg.encoding = encoding;
487 seq_arg.check_oid_exclusion =
TRUE;
488 seq_arg.ranges = ranges;
494 subject_blk = seq_arg.seq;
503 subject = seq_arg.seq->sequence;
551 s_CalcLambda(
double probs[],
int min_score,
int max_score,
double lambda0)
559 score_range = max_score - min_score + 1;
561 for (
i = 0;
i < score_range;
i++) {
562 avg += (min_score +
i) * probs[
i];
569 freq.
sprob = &probs[-min_score];
595 const char *matrixName,
596 double **startNumerator)
601 double *standardProb;
607 if (stdFreqRatios ==
NULL) {
610 for (
i = 0;
i < numPositions;
i++) {
612 returnRatios[
i][j] = stdFreqRatios->
data[
query[
i]][j];
618 if(standardProb ==
NULL) {
622 for (
i = 0;
i < numPositions;
i++) {
628 returnRatios[
i][j] = startNumerator[
i][j] / standardProb[j];
650 const char *matrixName)
658 if (stdFreqRatios ==
NULL) {
663 returnRatios[
i][j] = stdFreqRatios->
data[
i][j];
676 #define SCALING_FACTOR 32
695 const char * matrixName,
712 fillPosMatrix, posFreqs);
716 if (posSearch ==
NULL || compactSearch ==
NULL || internal_pssm ==
NULL) {
727 internal_pssm->
nrows);
743 scale_factor,
FALSE, sbp);
775 double localScalingFactor)
781 for (frame_index = 0; frame_index < 6; frame_index++) {
782 tail[frame_index] =
NULL;
783 numAligns[frame_index] = 0;
786 for (hsp_index = 0; hsp_index < hspcnt; hsp_index++) {
787 BlastHSP * hsp = hsp_array[hsp_index];
789 frame_index = hsp->
context - init_context;
790 ASSERT(frame_index < 6 && frame_index >= 0);
801 if (new_align ==
NULL)
803 if (tail[frame_index] ==
NULL) {
805 self[frame_index] = new_align;
808 tail[frame_index]->
next = new_align;
810 tail[frame_index] = new_align;
811 numAligns[frame_index]++;
853 Int4 * queryAlignmentExtent,
854 Int4 * matchAlignmentExtent,
857 Int4 XdropAlignScore;
859 Int4 doublingCount = 0;
868 &(
subject->data[matchStart]) - 1,
869 queryEnd - queryStart + 1, matchEnd - matchStart + 1,
870 queryAlignmentExtent,
872 gap_align, scoringParams, queryStart - 1,
FALSE,
FALSE,
877 if((XdropAlignScore < score) && (doublingCount < 3)) {
880 }
while((XdropAlignScore < score) && (doublingCount < 3));
883 *newScore = XdropAlignScore;
911 if (self->index >=0) {
913 if (self->length > 0) {
918 free(self->local_data);
920 self->local_data =
NULL;
945 Uint1* subject_seq,
int subject_len,
947 int* query_ext_len,
int* subject_ext_len,
950 int num_identical = 0;
952 int gaps_in_query = 0;
953 int gaps_in_subject = 0;
956 while (q_pos < query_len && s_pos < subject_len) {
960 while (q_pos < query_len && s_pos < subject_len
961 && query_seq[q_pos] == subject_seq[s_pos]) {
969 for (
n=1;
n < max_shift && q_pos +
n + 1 < query_len
970 && s_pos +
n + 1 < subject_len && !
match;
n++) {
973 if (query_seq[q_pos +
n] == subject_seq[s_pos +
n]
974 && query_seq[q_pos +
n + 1] == subject_seq[s_pos +
n + 1]) {
985 if (!
match && query_seq[q_pos +
n] == subject_seq[s_pos]
986 && query_seq[q_pos +
n + 1] == subject_seq[s_pos + 1]) {
991 gaps_in_subject +=
n;
996 if (!
match && query_seq[q_pos] == subject_seq[s_pos +
n]
997 && query_seq[q_pos + 1] == subject_seq[s_pos +
n + 1]) {
1014 *query_ext_len = q_pos;
1015 *subject_ext_len = s_pos;
1016 *align_len = q_pos > s_pos ? q_pos + gaps_in_query : s_pos + gaps_in_subject;
1018 return num_identical;
1040 Uint1* subject_seq,
int subject_len,
1042 int* query_ext_len,
int* subject_ext_len,
1045 int q_pos = query_len - 1;
1046 int s_pos = subject_len - 1;
1047 int num_identical = 0;
1048 int gaps_in_query = 0;
1049 int gaps_in_subject = 0;
1050 while (q_pos >= 0 && s_pos >= 0) {
1055 while (q_pos > 0 && s_pos > 0 && query_seq[q_pos] == subject_seq[s_pos]) {
1062 for (
n=1;
n < max_shift && q_pos -
n - 1 > 0 && s_pos -
n - 1 > 0
1066 if (query_seq[q_pos -
n] == subject_seq[s_pos -
n]
1067 && query_seq[q_pos -
n - 1] == subject_seq[s_pos -
n - 1]) {
1075 if (!
match && query_seq[q_pos -
n] == subject_seq[s_pos]
1076 && query_seq[q_pos -
n - 1] == subject_seq[s_pos - 1]) {
1080 gaps_in_subject +=
n;
1085 if (!
match && query_seq[q_pos] == subject_seq[s_pos -
n]
1086 && query_seq[q_pos - 1] == subject_seq[s_pos -
n - 1]) {
1101 *query_ext_len = query_len - q_pos - 1;
1102 *subject_ext_len = subject_len - s_pos - 1;
1103 *align_len += *query_ext_len > *subject_ext_len ?
1104 *query_ext_len + gaps_in_query : *subject_ext_len + gaps_in_subject;
1106 return num_identical;
1121 for (k=0;k < word_size;k++) {
1144 const Uint8* query_hashes,
1155 int subject_from = 0;
1158 int num_identical = 0;
1162 if (!query_seq || !query_hashes || !subject_seq
1163 || query_len < word_size || subject_len < word_size) {
1169 for (s_pos = 0; s_pos < subject_len - word_size; s_pos++) {
1173 if (s_pos == 0 ||
match) {
1179 hash += subject_seq[s_pos + word_size - 1];
1184 for (q_pos = query_from;q_pos < query_len - word_size; q_pos++) {
1185 if (query_hashes[q_pos] ==
hash) {
1191 if (q_pos < query_len - word_size) {
1192 int query_start = q_pos;
1193 int subject_start = s_pos;
1195 int query_left_len, query_right_len;
1196 int subject_left_len, subject_right_len;
1197 int align_len_left=0, align_len_right=0;
1200 num_identical += word_size;
1204 query_start - query_from,
1205 subject_seq + subject_from,
1206 subject_start - subject_from,
1208 &query_left_len, &subject_left_len,
1212 num_identical +=
s_ExtendRight(query_seq + query_start + word_size,
1213 query_len - query_start - word_size,
1214 subject_seq + subject_start + word_size,
1215 subject_len - subject_start - word_size,
1217 &query_right_len, &subject_right_len,
1224 query_from = query_start + word_size + query_right_len;
1225 subject_from = subject_start + word_size + subject_right_len;
1227 s_pos = subject_from - 1;
1234 return num_identical;
1260 const int seqOffset,
1262 const int queryOffset,
1263 const Uint8* query_words,
1266 int qStart = align->
queryStart - queryOffset;
1268 int qEnd = align->
queryEnd - queryOffset - 1;
1270 int sEnd = align->
matchEnd - seqOffset - 1;
1271 const double kMinFractionNearIdentical = 0.95;
1274 int query_len = qEnd - qStart + 1;
1275 int subject_len = sEnd - sStart + 1;
1276 int align_len =
MIN(query_len, subject_len);
1278 int query_left_len = 0;
1279 int subject_left_len = 0;
1280 int query_right_len = 0;
1281 int subject_right_len = 0;
1282 int align_left_len = 0;
1283 int align_right_len = 0;
1285 double fraction_identical;
1290 seqData->
data + sStart, subject_len,
1292 &query_right_len, &subject_right_len,
1296 if (query_right_len >= query_len || subject_right_len >= subject_len) {
1297 fraction_identical = (double)num_identical / (
double)align_len;
1298 ASSERT(fraction_identical - 1.0 < 1e-10);
1299 return fraction_identical > kMinFractionNearIdentical;
1305 query_len - query_right_len,
1306 seqData->
data + sStart + subject_right_len,
1307 subject_len - subject_right_len,
1309 &query_left_len, &subject_left_len,
1314 if (query_left_len + query_right_len >= query_len
1315 || subject_left_len + subject_right_len >= subject_len) {
1317 fraction_identical = (double)num_identical / (
double)(align_len);
1318 ASSERT(fraction_identical - 1.0 < 1e-10);
1319 return fraction_identical > kMinFractionNearIdentical;
1325 query_words + qStart + query_right_len,
1326 query_len - query_left_len - query_right_len,
1327 seqData->
data + sStart + subject_right_len,
1328 subject_len - subject_left_len - subject_right_len,
1331 fraction_identical = (double)num_identical / (
double)align_len;
1332 ASSERT(fraction_identical - 1.0 < 1e-10);
1333 if (fraction_identical > kMinFractionNearIdentical) {
1360 Int4 default_db_genetic_code,
1367 self->local_data =
NULL;
1370 if (seq_info !=
NULL) {
1371 self->local_data = seq_info;
1376 memset((
void*) &seq_info->
seq_arg, 0,
sizeof(seq_info->
seq_arg));
1377 seq_info->
seq_arg.
oid =
self->index = subject_index;
1403 if (self->length == 0) {
1414 #define BLASTP_MASK_RESIDUE 21
1416 #define BLASTP_MASK_INSTRUCTIONS "S 10 1.8 2.1"
1438 &filter_options,
NULL);
1441 seqData->
length, 0, filter_options,
1442 &mask_seqloc,
NULL);
1445 if (is_seq_biased) {
1446 *is_seq_biased = (mask_seqloc !=
NULL);
1452 if (mask_seqloc !=
NULL) {
1480 const Uint8* query_words,
1482 const Boolean shouldTestIdentical,
1484 const Boolean isSmithWaterman,
1485 Boolean* subject_maybe_biased)
1491 Uint1 * translation_buffer;
1493 Int4 translated_length;
1494 int translation_frame;
1495 Uint1 * na_sequence;
1496 int translation_start;
1498 int num_nucleotides;
1500 local_data =
self->local_data;
1508 translation_frame =
range->context;
1509 if (translation_frame > 0) {
1510 translation_start = 3 *
range->begin;
1513 self->length - 3 *
range->end + translation_frame + 1;
1516 3 * (
range->end -
range->begin) +
ABS(translation_frame) - 1;
1520 (
Int2) translation_frame,
1522 &translation_buffer,
1526 seqData->
buffer = translation_buffer;
1527 seqData->
data = translation_buffer + 1;
1528 seqData->
length = translated_length;
1531 if (compo_adjust_mode
1532 && (!subject_maybe_biased || *subject_maybe_biased)) {
1534 if ( (!shouldTestIdentical)
1535 || (shouldTestIdentical
1537 queryData, q_range->
begin,
1538 query_words, align)))) {
1541 subject_maybe_biased);
1578 const Uint8* query_words,
1580 const Boolean shouldTestIdentical,
1582 const Boolean isSmithWaterman,
1583 Boolean* subject_maybe_biased)
1593 if (self->local_data ==
NULL)
1606 seqData->
length =
self->length;
1610 if((self->index < 0) && (align->
frame != 0)) {
1611 int i=0, offsets =0;
1617 origData += offsets;
1621 for (idx = 0; idx < seqData->
length; idx++) {
1622 seqData->
data[idx] = origData[idx];
1626 if (compo_adjust_mode
1627 && (!subject_maybe_biased || *subject_maybe_biased)) {
1629 if ( (!shouldTestIdentical)
1630 || (shouldTestIdentical
1632 q_range->
begin, query_words,
1636 subject_maybe_biased);
1642 *seqData->
data++ =
'\0';
1677 const Uint8* query_words,
1679 const Boolean shouldTestIdentical,
1681 const Boolean isSmithWaterman,
1682 Boolean* subject_maybe_biased)
1692 for (idx = 0; idx < queryData->
length; idx++) {
1695 queryData->
data[idx] = (origData[idx] != 24) ? origData[idx] : 3;
1700 q_range, queryData, query_words,
1701 align, shouldTestIdentical,
1702 compo_adjust_mode, isSmithWaterman,
1703 subject_maybe_biased);
1706 q_range, queryData, query_words,
1707 align, shouldTestIdentical,
1708 compo_adjust_mode, isSmithWaterman,
1709 subject_maybe_biased);
1755 int queryStart, queryEnd, queryIndex, matchStart, matchEnd, frame;
1762 queryIndex = query_range->
context;
1765 frame = subject_range->
context;
1768 queryStart, queryEnd, queryIndex,
1769 matchStart, matchEnd, frame,
1772 *edit_script =
NULL;
1818 Int4 ccat_query_length,
1821 Int4 full_subject_length,
1829 Int4 queryExtent, matchExtent;
1842 (void) ccat_query_length;
1843 (void) full_subject_length;
1848 subject, matchStart, *pmatchEnd,
1849 gap_align, scoringParams,
1850 score, &queryExtent, &matchExtent,
1852 *pqueryEnd = queryStart + queryExtent;
1853 *pmatchEnd = matchStart + matchExtent;
1858 if (editScript !=
NULL) {
1860 Int4 aqueryStart = queryStart + query_range->
begin;
1861 Int4 aqueryEnd = *pqueryEnd + query_range->
begin;
1862 Int4 amatchStart = matchStart + subject_range->
begin;
1863 Int4 amatchEnd = *pmatchEnd + subject_range->
begin;
1866 aqueryStart, aqueryEnd,
1868 amatchStart, amatchEnd,
1869 subject_range->
context, editScript);
1876 return obj !=
NULL ? 0 : -1;
1903 int ccat_query_length,
1906 int full_subject_length,
1910 Int4 q_start, s_start;
1921 (void) ccat_query_length;
1922 (void) full_subject_length;
1937 subject_data->
data, gapAlign,
1945 query_range, subject_range,
1946 matrix_adjust_rule);
1995 sfree(*searchParams);
1996 *searchParams =
NULL;
2029 for (
i = 0;
i < numQueries;
i++) {
2033 if (positionBased) {
2089 if (positionBased) {
2091 rows = query_length;
2097 for (
i = 0;
i < rows;
i++) {
2120 double scale_factor)
2123 for (
i = 0;
i < num_queries;
i++) {
2126 kbp->
Lambda /= scale_factor;
2172 if (positionBased) {
2174 rows = query_length;
2179 for (
i = 0;
i < rows;
i++) {
2202 double scale_factor,
2203 const char * matrixName)
2209 lenName = strlen(matrixName);
2210 if (
NULL == (self->matrixName =
malloc(lenName + 1))) {
2213 memcpy(self->matrixName, matrixName, lenName + 1);
2215 if (self->positionBased) {
2225 queryBlk->
length, sbp, scale_factor);
2226 self->ungappedLambda = sbp->
kbp_psi[0]->
Lambda / scale_factor;
2233 self->startFreqRatios,
2234 self->ungappedLambda);
2247 Uint8* query_hashes;
2253 if (!seq_data || !words || seq_len < word_size) {
2257 query_hashes = (
Uint8*)
calloc((seq_len - word_size + 1),
2259 *words = query_hashes;
2261 if (!query_hashes) {
2267 query_hashes[0] =
s_GetHash(&seq_data[0], word_size);
2268 for (
i = 1;
i < seq_len - word_size;
i++) {
2269 query_hashes[
i] = query_hashes[
i - 1];
2270 query_hashes[
i] <<= 5;
2271 query_hashes[
i] &=
mask;
2272 query_hashes[
i] += (
Uint8)seq_data[
i + word_size - 1];
2288 for (
i = 0;
i < num_queries;
i++) {
2289 if ((*query_info)[
i].words) {
2290 free((*query_info)[
i].words);
2319 if (compo_query_info !=
NULL) {
2320 for (
i = 0;
i < num_queries;
i++) {
2332 &query_info->
words);
2340 return compo_query_info;
2360 double min_lambda = DBL_MAX;
2367 if (gapping_params ==
NULL)
2372 gapping_params->
context = context;
2374 for (
i = 0;
i < num_queries;
i++) {
2385 return gapping_params;
2399 #define NEAR_IDENTICAL_BITS_PER_POSITION (1.74)
2418 gapping_params =
NULL;
2437 double near_identical_cutoff=0;
2440 index <= queryInfo->last_context; ++index) {
2443 near_identical_cutoff =
2469 if (gapping_params ==
NULL) {
2474 compo_adjust_mode, positionBased,
2475 query_is_translated,
2476 subject_is_translated,
2479 near_identical_cutoff);
2501 for (query_index = 0; query_index < num_queries; query_index++) {
2535 while (
copy->state_struct !=
NULL) {
2537 copy->state_struct =
copy->state_struct->next;
2548 if (
copy->edit_script->op_type) {
2551 if (
copy->edit_script->num) {
2558 if (
copy->fwd_prelim_tback !=
NULL) {
2559 if (
copy->fwd_prelim_tback->edit_ops) {
2560 sfree(
copy->fwd_prelim_tback->edit_ops);
2566 if (
copy->rev_prelim_tback !=
NULL) {
2567 if (
copy->rev_prelim_tback->edit_ops) {
2568 sfree(
copy->rev_prelim_tback->edit_ops);
2574 if (
copy->greedy_align_mem !=
NULL) {
2623 copy->state_struct = c;
2651 copy->edit_script = c;
2659 for (
i = 0;
i < o->
size; ++
i) {
2672 copy->fwd_prelim_tback = c;
2692 copy->rev_prelim_tback = c;
2712 copy->greedy_align_mem = c;
2768 Uint1 alphabet_code,
2769 Int4 number_of_contexts
2773 orig->alphabet_code,
2774 orig->number_of_contexts
2780 copy->alphabet_start =
orig->alphabet_start;
2791 for (
i = 0;
i <
orig->matrix->ncols; ++
i) {
2794 orig->matrix->data[
i],
2795 m->
nrows *
sizeof(
int)
2802 orig->matrix->freqs,
2803 m->
ncols *
sizeof(
double)
2810 &&
orig->psi_matrix->pssm !=
NULL) {
2819 for (
i = 0;
i <
orig->psi_matrix->pssm->ncols; ++
i) {
2822 orig->psi_matrix->pssm->data[
i],
2823 m->
nrows *
sizeof(
int)
2828 &&
orig->psi_matrix->pssm->freqs !=
NULL) {
2831 orig->psi_matrix->pssm->freqs,
2832 m->
ncols *
sizeof(
double)
2837 &&
orig->psi_matrix->freq_ratios !=
NULL) {
2839 for (
i = 0;
i <
orig->psi_matrix->pssm->ncols; ++
i) {
2842 orig->psi_matrix->freq_ratios[
i],
2843 orig->psi_matrix->pssm->nrows *
sizeof(
double)
2847 if (
orig->psi_matrix->kbp !=
NULL) {
2851 copy->matrix_only_scoring =
orig->matrix_only_scoring;
2852 copy->complexity_adjusted_scoring =
orig->complexity_adjusted_scoring;
2857 copy->read_in_matrix =
orig->read_in_matrix;
2928 memcpy(
copy->ambiguous_res,
orig->ambiguous_res,
orig->ambig_size);
2930 copy->ambig_size =
orig->ambig_size;
2931 copy->ambig_occupy =
orig->ambig_occupy;
2932 copy->round_down =
orig->round_down;
2948 Int4 default_db_genetic_code,
2965 default_db_genetic_code,
2988 Int4 default_db_genetic_code,
2997 int status_code = 0;
3000 double localScalingFactor;
3025 double inclusion_ethresh;
3030 int* numContexts_tld =
NULL;
3031 int* numQueries_tld =
NULL;
3032 int* compositionTestIndex_tld =
NULL;
3058 if (positionBased) {
3061 if ((
int) compo_adjust_mode > 1) {
3069 if ((
int) compo_adjust_mode > 1 &&
3074 inclusion_ethresh = (psiOptions
3077 ASSERT(inclusion_ethresh != 0.0);
3079 int actual_num_threads = 1;
3081 actual_num_threads = num_threads;
3087 compo_adjust_mode, positionBased);
3088 if (savedParams ==
NULL) {
3090 goto function_cleanup;
3096 if (status_code != 0) {
3097 goto function_cleanup;
3107 localScalingFactor = 1.0;
3115 if (status_code != 0) {
3116 return (
Int2) status_code;
3120 if (redoneMatches ==
NULL) {
3122 goto function_cleanup;
3124 for (query_index = 0; query_index < numQueries; query_index++) {
3129 if (status_code != 0) {
3130 goto function_cleanup;
3164 Int4*** matrix_tld =
3181 redo_align_params_tld =
3186 int* status_code_tld =
3226 compositionTestIndex_tld =
3243 for (
i = 0;
i < actual_num_threads; ++
i) {
3249 if (query_info_tld[
i] ==
NULL) {
3251 goto function_cleanup;
3265 if (status_code != 0) {
3266 goto function_cleanup;
3270 numContexts_tld[
i] = numContexts;
3271 numQueries_tld[
i] = numQueries;
3272 compositionTestIndex_tld[
i] = compositionTestIndex;
3276 score_params_tld[
i] = scoringParams;
3280 subjectBlk_tld[
i] = subjectBlk;
3282 redoneMatches_tld[
i] =
3284 if (redoneMatches_tld[
i] ==
NULL) {
3286 goto function_cleanup;
3288 for (query_index = 0; query_index < numQueries; query_index++) {
3293 if (status_code != 0) {
3294 goto function_cleanup;
3313 if (savedParams_tld[
i] ==
NULL) {
3315 goto function_cleanup;
3325 if (status_code != 0) {
3326 goto function_cleanup;
3329 if ((
int) compo_adjust_mode > 1 && !positionBased) {
3335 if (status_code != 0) {
3336 goto function_cleanup;
3340 gapping_params_context_tld[
i].
gap_align = gap_align_tld[
i];
3342 gapping_params_context_tld[
i].
sbp = sbp_tld[
i];
3344 gapping_params_context_tld[
i].
prog_number = program_number;
3346 redo_align_params_tld[
i] =
3348 &gapping_params_context_tld[
i],
3354 if (redo_align_params_tld[
i] ==
NULL) {
3356 goto function_cleanup;
3359 if (positionBased) {
3365 if (matrix_tld[
i] ==
NULL) {
3366 goto function_cleanup;
3378 struct BlastHSPListLinkedList {
3380 struct BlastHSPListLinkedList*
next;
3382 typedef struct BlastHSPListLinkedList BlastHSPListLinkedList;
3386 if (hsp_stream ==
NULL) {
3388 *theseMatches = thisMatch;
3392 BlastHSPListLinkedList*
head =
NULL;
3393 BlastHSPListLinkedList* tail =
NULL;
3400 BlastHSPListLinkedList* entry =
3401 (BlastHSPListLinkedList*)
calloc(
3403 sizeof(BlastHSPListLinkedList)
3405 entry->match = localMatch;
3420 for (
i = 0;
i < numMatches; ++
i) {
3421 theseMatches[
i] =
head->match;
3422 BlastHSPListLinkedList* here =
head;
3429 #pragma omp parallel \
3430 default(none) num_threads(actual_num_threads) \
3431 if(actual_num_threads>1) \
3432 shared(interrupt, seqsrc_tld, score_params_tld, hit_params_tld, \
3433 gap_align_tld, results_tld, \
3434 redoneMatches_tld, \
3436 numMatches, theseMatches, \
3437 numFrames, program_number, subjectBlk_tld, positionBased, \
3438 default_db_genetic_code, localScalingFactor, queryInfo, \
3439 sbp, smithWaterman, numQueries_tld, compositionTestIndex_tld, forbidden_tld, \
3440 NRrecord_tld, actual_num_threads, sbp_tld, \
3441 matrix_tld, query_info_tld, numContexts_tld, \
3442 genetic_code_string, queryBlk, compo_adjust_mode, \
3443 alignments_tld, incoming_align_set_tld, savedParams_tld, \
3444 scoringParams, redo_align_params_tld, \
3448 #pragma omp for schedule(static)
3449 for (
b = 0;
b < numMatches; ++
b) {
3450 #pragma omp flush(interrupt)
3468 void* discarded_aligns =
NULL;
3477 int compositionTestIndex;
3483 double pvalueForThisPair = (-1);
3489 if(actual_num_threads > 1) {
3490 tid = omp_get_thread_num();
3493 seqSrc = seqsrc_tld[tid];
3494 scoringParams = score_params_tld[tid];
3495 hitParams = hit_params_tld[tid];
3496 redoneMatches = redoneMatches_tld[tid];
3497 alignments = alignments_tld[tid];
3498 incoming_align_set = incoming_align_set_tld[tid];
3499 NRrecord = NRrecord_tld[tid];
3501 redo_align_params = redo_align_params_tld[tid];
3502 matrix = matrix_tld[tid];
3503 pStatusCode = &status_code_tld[tid];
3504 query_info = query_info_tld[tid];
3505 numContexts = numContexts_tld[tid];
3506 numQueries = numQueries_tld[tid];
3507 compositionTestIndex = compositionTestIndex_tld[tid];
3508 subjectBlk = subjectBlk_tld[tid];
3509 forbidden = forbidden_tld[tid];
3517 if(actual_num_threads > 1) {
3518 #pragma omp critical(intrpt)
3520 #pragma omp flush(interrupt)
3534 if(actual_num_threads > 1) {
3535 #pragma omp critical(intrpt)
3537 #pragma omp flush(interrupt)
3543 context_index = query_index * numFrames;
3557 matchingSeq.
index = -1;
3564 default_db_genetic_code,
3568 if (*pStatusCode != 0) {
3574 goto match_loop_cleanup;
3586 if (*pStatusCode != 0) {
3587 goto match_loop_cleanup;
3591 for (frame_index = 0;
3592 frame_index < numFrames;
3593 frame_index++, context_index++) {
3594 incoming_aligns = incoming_align_set[frame_index];
3595 if (!incoming_aligns) {
3601 kbp = sbp->
kbp_gap[context_index];
3602 if (smithWaterman) {
3608 numAligns[frame_index],
3620 compositionTestIndex,
3629 numAligns[frame_index],
3639 compositionTestIndex,
3644 if (*pStatusCode != 0) {
3645 goto match_loop_cleanup;
3648 if (alignments[context_index] !=
NULL) {
3649 Int2 qframe = frame_index;
3654 qframe = 2 - qframe;
3659 &alignments[context_index],
3663 goto match_loop_cleanup;
3667 incoming_align_set[frame_index] =
NULL;
3670 if (hsp_list->
hspcnt > 1) {
3680 queryInfo, context_index,
3682 pvalueForThisPair, LambdaRatio,
3684 if (*pStatusCode != 0) {
3685 goto query_loop_cleanup;
3687 if (best_evalue <= hitParams->options->expect_value) {
3690 localScalingFactor);
3698 genetic_code_string,
3703 goto query_loop_cleanup;
3706 &redoneMatches[query_index],
3713 &redoneMatches[query_index],
3720 if (*pStatusCode == 0) {
3728 goto query_loop_cleanup;
3730 if (discarded_aligns !=
NULL) {
3740 localMatch->
oid = hsp_list->
oid;
3744 if (*pStatusCode != 0) {
3745 for (context_index = 0;
3746 context_index < numContexts;
3749 &alignments[context_index],
3756 if ((actual_num_threads > 1) &&
3757 (*pStatusCode != 0 || !seqSrc)) {
3758 #pragma omp critical(intrpt)
3760 #pragma omp flush(interrupt)
3775 for (
i = 0;
i < actual_num_threads; ++
i) {
3776 if (status_code_tld[
i] != 0) {
3777 status_code = status_code_tld[
i];
3780 for (
i = 0;
i < actual_num_threads; ++
i) {
3781 if (seqSrc && status_code == 0) {
3784 redoneMatches_tld[
i],
3787 if (redoneMatches_tld[
i] !=
NULL) {
3789 for (qi = 0; qi < numQueries; ++qi) {
3791 sfree(redoneMatches_tld[
i][qi].heapArray);
3796 if (redoneMatches_tld[
i] !=
NULL) {
3798 for (qi = 0; qi < numQueries; ++qi) {
3800 sfree(redoneMatches_tld[
i][qi].heapArray);
3805 sfree(redoneMatches_tld[
i]);
3807 if (redoneMatches !=
NULL) {
3809 for (qi = 0; qi < numQueries; ++qi) {
3811 sfree(redoneMatches[qi].heapArray);
3816 if (hsp_stream !=
NULL) {
3821 for (
i = 0;
i < actual_num_threads; ++
i) {
3825 hit_params_tld[
i] =
NULL;
3850 for (
i = 0;
i < actual_num_threads; ++
i) {
3853 for (j = 0; j < local_results->
num_queries; ++j) {
3868 if (redoneMatches !=
NULL) {
3869 for (query_index = 0; query_index < numQueries; query_index++) {
3872 sfree(redoneMatches);
3873 redoneMatches =
NULL;
3875 if (gapAlign !=
NULL) {
3879 positionBased, compo_adjust_mode);
3882 for (
i = 0;
i < actual_num_threads; ++
i) {
3887 sfree(alignments_tld[
i]);
3888 sfree(incoming_align_set_tld[
i]);
3897 sfree(alignments_tld);
3898 sfree(compositionTestIndex_tld);
3899 sfree(gap_align_tld);
3900 sfree(gapping_params_context_tld);
3901 sfree(hit_params_tld);
3902 sfree(incoming_align_set_tld);
3904 sfree(NRrecord_tld);
3905 sfree(numContexts_tld);
3906 sfree(numQueries_tld);
3907 sfree(query_info_tld);
3908 sfree(redo_align_params_tld);
3909 sfree(redoneMatches_tld);
3911 sfree(savedParams_tld);
3913 sfree(score_params_tld);
3915 sfree(status_code_tld);
3916 sfree(subjectBlk_tld);
3917 sfree(forbidden_tld);
3918 sfree(theseMatches);
3920 return (
Int2) status_code;
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
#define CODON_LENGTH
Codons are always of length 3.
BLAST filtering functions.
void Blast_MaskTheResidues(Uint1 *buffer, Int4 length, Boolean is_na, const BlastSeqLoc *mask_loc, Boolean reverse, Int4 offset)
Masks the letters in buffer.
Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)
Produces SBlastFilterOptions from a string that has been traditionally supported in blast.
Int2 BlastSetUp_Filter(EBlastProgramType program_number, Uint1 *sequence, Int4 length, Int4 offset, const SBlastFilterOptions *filter_options, BlastSeqLoc **seqloc_retval, Blast_Message **blast_message)
Runs seg filtering functions, according to the filtering options, returns BlastSeqLoc*.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Int4 ALIGN_EX(const Uint1 *A, const Uint1 *B, Int4 M, Int4 N, Int4 *a_offset, Int4 *b_offset, GapPrelimEditBlock *edit_block, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Boolean reversed, Boolean reverse_sequence, Boolean *fence_hit)
Low level function to perform dynamic programming gapped extension with traceback.
GapEditScript * Blast_PrelimEditBlockToGapEditScript(GapPrelimEditBlock *rev_prelim_tback, GapPrelimEditBlock *fwd_prelim_tback)
Convert the initial list of traceback actions from a non-OOF gapped alignment into a blast edit scrip...
Structures and functions prototypes used for BLAST gapped extension.
Int2 BLAST_GappedAlignmentWithTraceback(EBlastProgramType program, const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Boolean *fence_hit)
Perform a gapped alignment with traceback.
Int2 BLAST_GapAlignStructNew(const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, Uint4 max_subject_length, BlastScoreBlk *sbp, BlastGapAlignStruct **gap_align_ptr)
Initializes the BlastGapAlignStruct structure.
BlastGapAlignStruct * BLAST_GapAlignStructFree(BlastGapAlignStruct *gap_align)
Deallocates memory in the BlastGapAlignStruct structure.
Private interface for blast_gapalign.c.
Structures and API used for saving BLAST hits.
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Int2 Blast_HSPGetNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr, const BlastScoreBlk *sbp)
Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::...
BlastHitList * Blast_HitListFree(BlastHitList *hitlist)
Deallocate memory for the hit list.
Int2 Blast_HSPResultsReverseOrder(BlastHSPResults *results)
Reverse order of HSP lists in each hit list in the BLAST results.
BlastHitList * Blast_HitListNew(Int4 hitlist_size)
Allocate memory for a hit list of a given size.
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Int2 Blast_HSPListGetEvalues(EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor)
Calculate the expected values for all HSPs in a hit list, without using the sum statistics.
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
const Uint1 * Blast_HSPGetTargetTranslation(SBlastTargetTranslation *target_t, const BlastHSP *hsp, Int4 *translated_length)
Returns a buffer with a protein translated from nucleotide.
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
void Blast_HSPListSwap(BlastHSPList *list1, BlastHSPList *list2)
Swaps the two HSP lists via structure assignment.
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Int2 Blast_HSPListReapByEvalue(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the e-value threshold from the HSP list.
Int2 Blast_HitListUpdate(BlastHitList *hit_list, BlastHSPList *hsp_list)
Insert a new HSP list into the hit list.
Utilities for dealing with BLAST HSPs in the core of BLAST.
#define CONTAINED_IN_HSP(a, b, c, d, e, f)
TRUE if c is between a and b; f between d and e.
const int kBlastHSPStream_Eof
Return value when the end of the stream is reached (applicable to read method only)
int BlastHSPStreamRead(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Invokes the user-specified read function for this BlastHSPStream implementation.
void BlastHSPStreamTBackClose(BlastHSPStream *hsp_stream, BlastHSPResults *results)
Closes the BlastHSPStream structure after traceback.
Private interfaces to support the multi-threaded traceback in conjunction with the BlastHSPStream.
Int2 Blast_RedoAlignmentCore_MT(EBlastProgramType program_number, Uint4 num_threads, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Recompute alignments for each match found by the gapped BLAST algorithm.
struct BlastKappa_SavedParameters BlastKappa_SavedParameters
A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCo...
static void s_RestoreSearch(BlastScoreBlk *sbp, BlastScoringParameters *scoring, const BlastKappa_SavedParameters *searchParams, int query_length, Boolean positionBased, ECompoAdjustModes compo_adjust_mode)
Restore the parameters that were adjusted to their original values.
static int s_RecordInitialSearch(BlastKappa_SavedParameters *searchParams, BlastScoreBlk *sbp, const BlastScoringParameters *scoring, int query_length, ECompoAdjustModes compo_adjust_mode, Boolean positionBased)
Record the initial value of the search parameters that are to be adjusted.
#define KAPPA_BLASTP_NO_SEG_SEQUENCE
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to com...
#define SCALING_FACTOR
SCALING_FACTOR is a multiplicative factor used to get more bits of precision in the integer matrix sc...
static BlastCompo_Alignment * s_RedoOneAlignment(BlastCompo_Alignment *in_align, EMatrixAdjustRule matrix_adjust_rule, BlastCompo_SequenceData *query_data, BlastCompo_SequenceRange *query_range, int ccat_query_length, BlastCompo_SequenceData *subject_data, BlastCompo_SequenceRange *subject_range, int full_subject_length, BlastCompo_GappingParams *gapping_params)
A callback: calculate the traceback for one alignment by performing an x-drop alignment in both direc...
static BlastGapAlignStruct * s_BlastGapAlignStruct_Copy(BlastGapAlignStruct *orig, BlastScoreBlk *sbp)
Create a "deep" copy of a BlastGapAlignStruct structure.
static int s_MatrixInfoInit(Blast_MatrixInfo *self, BLAST_SequenceBlk *queryBlk, BlastScoreBlk *sbp, double scale_factor, const char *matrixName)
Initialize an object of type Blast_MatrixInfo.
static int s_ScalePosMatrix(int **fillPosMatrix, const char *matrixName, double **posFreqs, Uint1 *query, int queryLength, BlastScoreBlk *sbp, double scale_factor)
Produce a scaled-up version of the position-specific matrix with a given set of position-specific res...
static void s_SWFindFinalEndsUsingXdrop(BlastCompo_SequenceData *query, Int4 queryStart, Int4 queryEnd, BlastCompo_SequenceData *subject, Int4 matchStart, Int4 matchEnd, BlastGapAlignStruct *gap_align, const BlastScoringParameters *scoringParams, Int4 score, Int4 *queryAlignmentExtent, Int4 *matchAlignmentExtent, Int4 *newScore)
Redo a S-W alignment using an x-drop alignment.
static void s_HSPListNormalizeScores(BlastHSPList *hsp_list, double lambda, double logK, double scoreDivisor)
Given a list of HSPs with (possibly) high-precision scores, rescale the scores to have standard preci...
static int s_GetPosBasedStartFreqRatios(double **returnRatios, Int4 numPositions, Uint1 *query, const char *matrixName, double **startNumerator)
Fill a two-dimensional array with the frequency ratios that underlie a position specific score matrix...
static int s_NewAlignmentUsingXdrop(BlastCompo_Alignment **pnewAlign, Int4 *pqueryEnd, Int4 *pmatchEnd, Int4 queryStart, Int4 matchStart, Int4 score, BlastCompo_SequenceData *query, BlastCompo_SequenceRange *query_range, Int4 ccat_query_length, BlastCompo_SequenceData *subject, BlastCompo_SequenceRange *subject_range, Int4 full_subject_length, BlastCompo_GappingParams *gapping_params, EMatrixAdjustRule matrix_adjust_rule)
A callback used when performing SmithWaterman alignments: Calculate the traceback for one alignment b...
static BlastCompo_QueryInfo * s_GetQueryInfo(Uint1 *query_data, const BlastQueryInfo *blast_query_info, Boolean skip)
Save information about all queries in an array of objects of type BlastCompo_QueryInfo.
static void s_BlastGapAlignStruct_Free(BlastGapAlignStruct *copy)
Free a BlastGapAlignStruct copy created by s_BlastGapAlignStruct_Copy.
#define KAPPA_TBLASTN_NO_SEG_SEQUENCE
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to com...
static void s_ComputeNumIdentities(const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, BLAST_SequenceBlk *subject_blk, const BlastSeqSrc *seq_src, BlastHSPList *hsp_list, const BlastScoringOptions *scoring_options, const Uint1 *gen_code_string, const BlastScoreBlk *sbp, BlastSeqSrcSetRangesArg *ranges)
Compute the number of identities for the HSPs in the hsp_list.
static int s_FindNumIdentical(Uint1 *query_seq, const Uint8 *query_hashes, int query_len, Uint1 *subject_seq, int subject_len, int max_shift)
Find a local number of identical residues in two aligned sequences by finding word matches and doing ...
static void s_FillResultsFromCompoHeaps(BlastHSPResults *results, BlastCompo_Heap heaps[], Int4 hitlist_size)
Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.
static int s_ResultHspToDistinctAlign(BlastCompo_Alignment **self, int *numAligns, BlastHSP *hsp_array[], Int4 hspcnt, int init_context, const BlastQueryInfo *queryInfo, double localScalingFactor)
Convert an array of HSPs to a list of BlastCompo_Alignment objects.
static const Blast_RedoAlignCallbacks redo_align_callbacks
Callbacks used by the Blast_RedoOneMatch* routines.
static int s_SequenceGetRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *s_range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceData *query, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)
Obtain the sequence data that lies within the given range.
static int s_DoSegSequenceData(BlastCompo_SequenceData *seqData, EBlastProgramType program_name, Boolean *is_seq_biased)
Filter low complexity regions from the sequence data; uses the SEG algorithm.
static BlastCompo_GappingParams * s_GappingParamsNew(BlastKappa_GappingParamsContext *context, const BlastExtensionParameters *extendParams, int num_queries)
Create a new object of type BlastCompo_GappingParams.
static void s_SavedParametersFree(BlastKappa_SavedParameters **searchParams)
Release the data associated with a BlastKappa_SavedParameters and delete the object.
static void s_FreeEditScript(void *edit_script)
A callback used to free an EditScript that has been stored in a BlastCompo_Alignment.
static double s_CalcLambda(double probs[], int min_score, int max_score, double lambda0)
A callback routine: compute lambda for the given score probabilities.
static int s_ExtendLeft(Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len)
Extend left from the end of the sequence and subject ranges and count identities.
static void s_HitlistReapContained(BlastHSP *hsp_array[], Int4 *hspcnt)
Remove from a hitlist all HSPs that are completely contained in an HSP that occurs earlier in the lis...
struct BlastKappa_SequenceInfo BlastKappa_SequenceInfo
BLAST-specific information that is associated with a BlastCompo_MatchingSequence.
static int s_HSPListFromDistinctAlignments(BlastHSPList *hsp_list, BlastCompo_Alignment **alignments, int oid, const BlastQueryInfo *queryInfo, int frame)
Converts a list of objects of type BlastCompo_Alignment to an new object of type BlastHSPList and ret...
static void s_FreeBlastCompo_QueryInfoArray(BlastCompo_QueryInfo **query_info, int num_queries)
#define NEAR_IDENTICAL_BITS_PER_POSITION
static void s_MatchingSequenceRelease(BlastCompo_MatchingSequence *self)
Release the resources associated with a matching sequence.
static int s_ExtendRight(Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len)
Do a simple gapped extension to the right from the beginning of query and subject ranges examining on...
static Boolean s_TestNearIdentical(const BlastCompo_SequenceData *seqData, const int seqOffset, const BlastCompo_SequenceData *queryData, const int queryOffset, const Uint8 *query_words, const BlastCompo_Alignment *align)
Test whether the aligned parts of two sequences that have a high-scoring gapless alignment are nearly...
static BlastScoreBlk * s_BlastScoreBlk_Copy(EBlastProgramType program, BlastScoreBlk *orig, Uint1 alphabet_code, Int4 number_of_contexts)
Create a "deep" copy of a BlastScoreBlk structure.
struct BlastKappa_GappingParamsContext BlastKappa_GappingParamsContext
Data and data-structures needed to perform a gapped alignment.
static Uint8 s_GetHash(const Uint1 *data, int word_size)
Get hash for a word of word_size residues assuming 28-letter alphabet.
static int s_SequenceGetProteinRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)
Get a string of protein data from a protein sequence.
static void s_BlastScoreBlk_Free(BlastScoreBlk **copy)
Free a BlastScoreBlk copy created by s_BlastScoreBlk_Copy.
static int s_MatchingSequenceInitialize(BlastCompo_MatchingSequence *self, EBlastProgramType program_number, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, Int4 subject_index, BlastSeqSrcSetRangesArg *ranges)
Initialize a new matching sequence, obtaining information about the sequence from the search.
static Blast_RedoAlignParams * s_GetAlignParams(BlastKappa_GappingParamsContext *context, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, const BlastHitSavingParameters *hitParams, const BlastExtensionParameters *extendParams)
Read the parameters required for the Blast_RedoOneMatch* functions from the corresponding parameters ...
Int2 Blast_RedoAlignmentCore(EBlastProgramType program_number, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Recompute alignments for each match found by the gapped BLAST algorithm.
static int s_HitlistEvaluateAndPurge(int *pbestScore, double *pbestEvalue, BlastHSPList *hsp_list, const BlastSeqSrc *seqSrc, int subject_length, EBlastProgramType program_number, const BlastQueryInfo *queryInfo, int context_index, BlastScoreBlk *sbp, const BlastHitSavingParameters *hitParams, double pvalueForThisPair, double LambdaRatio, int subject_id)
Adding evalues to a list of HSPs and remove those that do not have sufficiently good (low) evalue.
static int s_CreateWordArray(const Uint1 *seq_data, Int4 seq_len, Uint8 **words)
static void s_AdjustEvaluesForComposition(BlastHSPList *hsp_list, double comp_p_value, const BlastSeqSrc *seqSrc, Int4 subject_length, const BlastContextInfo *query_context, double LambdaRatio, int subject_id)
Adjusts the E-values in a BLAST_HitList to be composites of a composition-based P-value and a score/a...
static int s_GetStartFreqRatios(double **returnRatios, const char *matrixName)
Fill a two-dimensional array with the frequency ratios that underlie the named score matrix.
Int4 s_GetSubjectLength(Int4 total_subj_length, EBlastProgramType program_number)
static BlastCompo_Alignment * s_NewAlignmentFromGapAlign(BlastGapAlignStruct *gap_align, GapEditScript **edit_script, BlastCompo_SequenceRange *query_range, BlastCompo_SequenceRange *subject_range, EMatrixAdjustRule matrix_adjust_rule)
Reads a BlastGapAlignStruct that has been used to compute a traceback, and return a BlastCompo_Alignm...
static BlastKappa_SavedParameters * s_SavedParametersNew(Int4 rows, Int4 numQueries, ECompoAdjustModes compo_adjust_mode, Boolean positionBased)
Create a new instance of BlastKappa_SavedParameters.
static void s_RescaleSearch(BlastScoreBlk *sbp, BlastScoringParameters *sp, int num_queries, double scale_factor)
Rescale the search parameters in the search object and options object to obtain more precision.
static int s_SequenceGetTranslatedRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)
Obtain a string of translated data.
#define BLASTP_MASK_INSTRUCTIONS
Default instructions and mask residue for SEG filtering.
static void s_ClearHeap(BlastCompo_Heap *self)
Remove all matches from a BlastCompo_Heap.
Header file for composition-based statistics.
#define PSI_INCLUSION_ETHRESH
Defaults for PSI-BLAST and DELTA-BLAST options.
SBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)
Frees SBlastFilterOptions and all subservient structures.
@ eSmithWatermanTbck
Smith-waterman finds optimal scores, then ALIGN_EX to find alignment.
int Kappa_impalaScaling(Kappa_posSearchItems *posSearch, Kappa_compactSearchItems *compactSearch, double scalingFactor, Boolean doBinarySearch, BlastScoreBlk *sbp)
Copied from posit2.c.
Kappa_compactSearchItems * Kappa_compactSearchItemsNew(const Uint1 *query, unsigned int queryLength, BlastScoreBlk *sbp)
Creates a new Kappa_compactSearchItems structure.
Kappa_posSearchItems * Kappa_posSearchItemsFree(Kappa_posSearchItems *posSearch)
Deallocates the Kappa_posSearchItems structure.
Kappa_compactSearchItems * Kappa_compactSearchItemsFree(Kappa_compactSearchItems *compactSearch)
Deallocates the Kappa_compactSearchItems structure.
Kappa_posSearchItems * Kappa_posSearchItemsNew(unsigned int queryLength, const char *matrix_name, int **posPrivateMatrix, double **posFreqs)
Allocates a new Kappa_posSearchItems structure.
Port of posit.h structures and impalaScaling for implementing composition based statistics for PSI-BL...
Boolean Blast_QueryIsPssm(EBlastProgramType p)
Returns true if the query is PSSM.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
int _PSIConvertFreqRatiosToPSSM(_PSIInternalPssmData *internal_pssm, const Uint1 *query, const BlastScoreBlk *sbp, const double *std_probs)
Converts the PSSM's frequency ratios obtained in the previous stage to a PSSM of scores.
_PSIInternalPssmData * _PSIInternalPssmDataNew(Uint4 query_length, Uint4 alphabet_size)
Allocates a new _PSIInternalPssmData structure.
const double kPosEpsilon
minimum return value of s_computeRelativeEntropy
void _PSICopyMatrix_int(int **dest, int **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be int matrices with dimensions ncols by nrows...
_PSIInternalPssmData * _PSIInternalPssmDataFree(_PSIInternalPssmData *pssm_data)
Deallocates the _PSIInternalPssmData structure.
void _PSICopyMatrix_double(double **dest, double **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be double matrices with dimensions ncols by nr...
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
Int4 BlastSeqSrcGetSeqLen(const BlastSeqSrc *seq_src, void *oid)
Retrieve sequence length (number of residues/bases)
void BlastSeqSrcReleaseSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Deallocate individual sequence.
BlastSeqSrc * BlastSeqSrcCopy(const BlastSeqSrc *seq_src)
Copy function: needed to guarantee thread safety.
BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)
Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...
Int4 BlastSeqSrcGetMaxSeqLen(const BlastSeqSrc *seq_src)
Get the length of the longest sequence in the sequence source.
Boolean BlastSeqSrcGetSupportsPartialFetching(const BlastSeqSrc *seq_src)
Find if the Blast Sequence Source supports partial fetching.
Int2 BlastSeqSrcGetSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Retrieve an individual sequence.
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
double Blast_KarlinLambdaNR(Blast_ScoreFreq *sfp, double initialLambdaGuess)
Calculates the parameter Lambda given an initial guess for its value.
double BLAST_KarlinEtoP(double x)
Convert an E-value to a P-value.
Blast_KarlinBlk * Blast_KarlinBlkNew(void)
Callocs a Blast_KarlinBlk.
Blast_KarlinBlk * Blast_KarlinBlkFree(Blast_KarlinBlk *kbp)
Deallocates the KarlinBlk.
double BLAST_KarlinPtoE(double p)
Convert a P-value to an E-value.
Blast_ScoreFreq * Blast_ScoreFreqNew(Int4 score_min, Int4 score_max)
Creates a new structure to keep track of score frequencies for a scoring system.
SPsiBlastScoreMatrix * SPsiBlastScoreMatrixNew(size_t ncols)
Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.
Int2 Blast_KarlinBlkCopy(Blast_KarlinBlk *kbp_to, Blast_KarlinBlk *kbp_from)
Copies contents of one Karlin block to another.
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Functions to do gapped alignment with traceback.
BlastSeqSrcSetRangesArg * BLAST_SetupPartialFetching(EBlastProgramType program_number, BlastSeqSrc *seq_src, const BlastHSPList **hsp_list, Int4 num_hsplists)
Attempts to set up partial fetching, if it fails (e.g.
EBlastEncoding Blast_TracebackGetEncoding(EBlastProgramType program_number)
Get the subject sequence encoding type for the traceback, given a program number.
SThreadLocalData * SThreadLocalDataFree(SThreadLocalData *tld)
Deallocate the SThreadLocalData structure passed in.
SThreadLocalDataArray * SThreadLocalDataArrayNew(Uint4 num_threads)
Allocate a new SThreadLocalDataArray structure.
BlastHSPResults * SThreadLocalDataArrayConsolidateResults(SThreadLocalDataArray *array)
Extracts a single, consolidated BlastHSPResults structure from its input for single threaded processi...
Private interface to support the multi-threaded traceback.
Various auxiliary BLAST utility functions.
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
int Blast_GetPartialTranslation(const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr)
Get one frame translation - needed when only parts of subject sequences are translated.
Int2 BlastTargetTranslationNew(BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target)
Sets up structure for target translation.
SBlastTargetTranslation * BlastTargetTranslationFree(SBlastTargetTranslation *target_t)
Free SBlastTargetTranslation.
double * BLAST_GetStandardAaProbabilities(void)
Get the standard amino acid probabilities.
Declares a "heap" data structure that is used to store computed alignments when composition adjustmen...
void * BlastCompo_HeapPop(BlastCompo_Heap *self)
Remove and return the element in the BlastCompo_Heap with largest (worst) evalue; ties are broken acc...
void BlastCompo_HeapRelease(BlastCompo_Heap *self)
Release the storage associated with the fields of a BlastCompo_Heap.
int BlastCompo_HeapInsert(BlastCompo_Heap *self, void *alignments, double eValue, int score, int subject_index, void **discardedAligns)
Try to insert a collection of alignments into a heap.
int BlastCompo_HeapInitialize(BlastCompo_Heap *self, int heapThreshold, double ecutoff)
Initialize a new BlastCompo_Heap; parameters to this function correspond directly to fields in the Bl...
int BlastCompo_HeapWouldInsert(BlastCompo_Heap *self, double eValue, int score, int subject_index)
Return true if self may insert a match that had the given eValue, score and subject_index.
Blast_CompositionWorkspace * Blast_CompositionWorkspaceNew(void)
Create a new Blast_CompositionWorkspace object, allocating memory for all its component arrays.
int Blast_CompositionWorkspaceInit(Blast_CompositionWorkspace *NRrecord, const char *matrixName)
Initialize the fields of a Blast_CompositionWorkspace for a specific underlying scoring matrix.
Blast_MatrixInfo * Blast_MatrixInfoNew(int rows, int cols, int positionBased)
Create a Blast_MatrixInfo object.
void Blast_Int4MatrixFromFreq(int **matrix, int size, double **freq, double Lambda)
Compute an integer-valued amino-acid score matrix from a set of score frequencies.
void Blast_CompositionWorkspaceFree(Blast_CompositionWorkspace **NRrecord)
Free memory associated with a record of type Blast_CompositionWorkspace.
void Blast_ReadAaComposition(Blast_AminoAcidComposition *composition, int alphsize, const Uint1 *sequence, int length)
Compute the true amino acid composition of a sequence, ignoring ambiguity characters and other nonsta...
ECompoAdjustModes
An collection of constants that specify all permissible modes of composition adjustment.
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
@ eCompositionMatrixAdjust
Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence pro...
EMatrixAdjustRule
An collection of constants that specify all rules that may be used to generate a compositionally adju...
static void cleanup(void)
static int heap[2 *(256+1+29)+1]
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
EGapAlignOpType
Operation types within the edit script.
GapEditScript * GapEditScriptDelete(GapEditScript *esp)
Free edit script structure.
void GapPrelimEditBlockReset(GapPrelimEditBlock *edit_block)
Reset a preliminary edit block without freeing it.
Defines the interface to interact with the genetic code singleton object.
Uint1 * GenCodeSingletonFind(Uint4 gen_code_id)
Returns the genetic code string for the requested genetic code id.
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
#define BLASTAA_SIZE
Size of aminoacid alphabet.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingProtein
NCBIstdaa.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
uint64_t Uint8
8-byte (64-bit) unsigned integer
unsigned int
A callback function used to compare two keys in a database.
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
for(len=0;yy_str[len];++len)
Functions to link HSPs using sum statistics.
Int2 BLAST_LinkHsps(EBlastProgramType program_number, BlastHSPList *hsp_list, const BlastQueryInfo *query_info, Int4 subject_length, const BlastScoreBlk *sbp, const BlastLinkHSPParameters *link_hsp_params, Boolean gapped_calculation)
Link HSPs using sum statistics.
SFreqRatios * _PSIMatrixFrequencyRatiosFree(SFreqRatios *freq_ratios)
Deallocate the frequency ratios structure.
SFreqRatios * _PSIMatrixFrequencyRatiosNew(const char *matrix_name)
Retrive the matrix's frequency ratios.
Definitions used to get joint probabilities for a scoring matrix.
int Blast_FrequencyDataIsAvailable(const char *matrix_name)
Retrieve the background letter probabilities implicitly used in constructing the score matrix matrix_...
range(_Ty, _Ty) -> range< _Ty >
int strcmp(const char *str1, const char *str2)
Prototypes for portable math library (ported from C Toolkit)
#define NCBIMATH_LN2
Natural log(2)
long BLAST_Nint(double x)
Nearest integer.
#define MIN(a, b)
returns smaller of a and b.
Uint1 Boolean
bool replacment for C
#define ABS(a)
returns absolute value of a (|a|)
#define NCBI_CONST_UINT8(v)
#define ASSERT
macro for assert.
#define MAX(a, b)
returns larger of a and b.
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double lambda(size_t dimMatrix_, const Int4 *const *scoreMatrix_, const double *q_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
double f(double x_, const double &y_)
Declarations for several linear algebra routines.
void Nlm_Int4MatrixFree(int ***mat)
Free a matrix created by Nlm_DenseMatrixNew or Nlm_LtriangMatrixNew.
int ** Nlm_Int4MatrixNew(int nrows, int ncols)
Create and return a new Int4 matrix.
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definitions used to redo a set of alignments, using either composition matrix adjustment or the Smith...
void BlastCompo_AlignmentsFree(BlastCompo_Alignment **palign, void(*free_context)(void *))
Recursively free all alignments in the singly linked list whose head is *palign.
Blast_RedoAlignParams * Blast_RedoAlignParamsNew(Blast_MatrixInfo **pmatrix_info, BlastCompo_GappingParams **pgapping_params, ECompoAdjustModes compo_adjust_mode, int positionBased, int subject_is_translated, int query_is_translated, int ccat_query_length, int cutoff_s, double cutoff_e, int do_link_hsps, const Blast_RedoAlignCallbacks *callbacks, double near_identical_cutoff)
Create new Blast_RedoAlignParams object.
#define GET_NUCL_LENGTH(l)
#define GET_TRANSLATED_LENGTH(l, f)
int BlastCompo_EarlyTermination(double evalue, BlastCompo_Heap significantMatches[], int numQueries)
Return true if a heuristic determines that it is unlikely to be worthwhile to redo a query-subject pa...
BlastCompo_Alignment * BlastCompo_AlignmentNew(int score, EMatrixAdjustRule whichRule, int queryIndex, int queryStart, int queryEnd, int matchStart, int matchEnd, int frame, void *context)
Create a new BlastCompo_Alignment; parameters to this function correspond directly to fields of Blast...
int Blast_RedoOneMatch(BlastCompo_Alignment **alignments, Blast_RedoAlignParams *params, BlastCompo_Alignment *incoming_aligns, int hspcnt, double Lambda, BlastCompo_MatchingSequence *matchingSeq, int ccat_query_length, BlastCompo_QueryInfo query_info[], int numQueries, int **matrix, int alphsize, Blast_CompositionWorkspace *NRrecord, double *pvalueThisPair, int compositionTestIndex, double *LambdaRatio)
Recompute all alignments for one query/subject pair using composition-based statistics or composition...
void Blast_RedoAlignParamsFree(Blast_RedoAlignParams **pparams)
Free a set of Blast_RedoAlignParams.
int Blast_RedoOneMatchSmithWaterman(BlastCompo_Alignment **alignments, Blast_RedoAlignParams *params, BlastCompo_Alignment *incoming_aligns, int hspcnt, double Lambda, double logK, BlastCompo_MatchingSequence *matchingSeq, BlastCompo_QueryInfo query_info[], int numQueries, int **matrix, int alphsize, Blast_CompositionWorkspace *NRrecord, Blast_ForbiddenRanges *forbidden, BlastCompo_Heap *significantMatches, double *pvalueThisPair, int compositionTestIndex, double *LambdaRatio)
Recompute all alignments for one query/subject pair using the Smith-Waterman algorithm and possibly a...
void Blast_ForbiddenRangesRelease(Blast_ForbiddenRanges *self)
Release the storage associated with the fields of self, but do not delete self.
int Blast_ForbiddenRangesInitialize(Blast_ForbiddenRanges *self, int capacity)
Initialize a new, empty Blast_ForbiddenRanges.
Structure to hold a sequence.
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Int4 length
Length of sequence.
Uint1 * sequence_nomask
Start of query sequence without masking.
Uint1 * sequence
Sequence used for search (could be translation).
Uint1 * oof_sequence
Mixed-frame protein representation of a nucleotide sequence for out-of-frame alignment.
Uint1 * gen_code_string
for nucleotide subject sequences (tblast[nx]), the genetic code used to create a translated protein s...
Within the composition adjustment module, an object of type BlastCompo_Alignment represents a distinc...
int frame
the subject frame
int matchStart
the start of the alignment in the subject
int score
the score of this alignment
int matchEnd
one past the end of the alignment in the subject
int queryStart
the start of the alignment in the query
EMatrixAdjustRule matrix_adjust_rule
how the score matrix was computed
struct BlastCompo_Alignment * next
the next alignment in the list
int queryIndex
index of the query in a concatenated query
int queryEnd
one past the end of the alignment in the query
void * context
traceback info for a gapped alignment
Parameters used to compute gapped alignments.
int x_dropoff
for x-drop algorithms, once a path falls below the best score by this (positive) amount,...
int gap_open
penalty for opening a gap
void * context
a pointer to any additional gapping parameters that may be needed by the calling routine.
int gap_extend
penalty for extending a gapped alignment by one residue
A BlastCompo_Heap represents a collection of alignments between one query sequence and several matchi...
A BlastCompo_MatchingSequence represents a subject sequence to be aligned with the query.
Int4 index
index of this sequence in the database
void * local_data
holds any sort of data that is necessary for callbacks to access the sequence
Int4 length
length of this matching sequence
Collected information about a query.
int origin
origin of the query in a concatenated query
Blast_AminoAcidComposition composition
the composition of the query
BlastCompo_SequenceData seq
sequence data for the query
double eff_search_space
effective search space of searches involving this query
Uint8 * words
list words in the query, needed for testing whether the query and a subject are nearly identical
BlastCompo_SequenceData - represents a string of amino acids or nucleotides.
int length
the length of data.
Uint1 * buffer
if non-nil, points to memory that must be freed when this instance of BlastCompo_SequenceData is dele...
Uint1 * data
amino acid or nucleotide data
BlastCompo_SequenceRange - a struct whose instances represent a range of data in a sequence.
int begin
the starting index of the range
int end
one beyond the last item in the range
int context
integer identifier for this window, can indicate a translation frame or an index into a set of sequen...
The context related information.
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int4 length_adjustment
Length adjustment for boundary conditions.
Int8 eff_searchsp
Effective search space for this context.
Options used for gapped extension These include: a.
EBlastTbackExt eTbackExt
type of traceback extension.
Int4 unifiedP
Indicates unified P values to be used in blastp or tblastn.
double gap_x_dropoff_final
X-dropoff value for the final gapped extension (in bits)
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Computed values used as parameters for gapped alignments.
BlastExtensionOptions * options
The original (unparsed) options.
Int4 gap_x_dropoff_final
X-dropoff value for the final gapped extension (raw)
Structure supporting the gapped alignment.
GapPrelimEditBlock * fwd_prelim_tback
traceback from right extensions
Int4 gap_x_dropoff
X-dropoff parameter to use.
GapPrelimEditBlock * rev_prelim_tback
traceback from left extensions
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
BlastScoreBlk * sbp
Pointer to the scoring information block.
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Auxiliary structure for dynamic programming gapped extension.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Int4 hspcnt
Number of HSPs saved.
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
double best_evalue
Smallest e-value for HSPs in this list.
Int4 query_index
Index of the query which this HSPList corresponds to.
The structure to contain all BLAST results, for multiple queries.
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Int4 num_queries
Number of query sequences.
Default implementation of BlastHSPStream.
Structure holding all information about an HSP.
double evalue
This HSP's e-value.
Int4 num_ident
Number of identical base pairs in this HSP.
BlastSeg query
Query sequence info.
Int4 context
Context number of query.
double bit_score
Bit score, calculated from score.
BlastSeg subject
Subject sequence info.
Int2 comp_adjustment_method
which mode of composition adjustment was used; relevant only for blastp and tblastn
Int4 score
This HSP's raw score.
The structure to contain all BLAST results for one query sequence.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 hitlist_size
Maximal number of database sequences to return results for.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Int4 cutoff_score_min
smallest cutoff score across all contexts
Boolean do_sum_stats
TRUE if sum stats will be used.
BlastLinkHSPParameters * link_hsp_params
Parameters for linking HSPs with sum statistics; linking is not done if NULL.
BlastHitSavingOptions * options
The original (unparsed) options.
Data and data-structures needed to perform a gapped alignment.
BlastGapAlignStruct * gap_align
additional parameters for a gapped alignment
EBlastProgramType prog_number
the type of search being performed
BlastScoreBlk * sbp
the score block for this search
const BlastScoringParameters * scoringParams
scoring parameters for a gapped alignment
double localScalingFactor
the amount by which this search has been scaled
A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCo...
double scale_factor
the original scale factor
Int4 num_queries
Number of queries in this search.
Int4 gap_open
a penalty for the existence of a gap
double original_expect_value
expect value on entry
Int4 gapExtend
a penalty for each residue in the gap
Blast_KarlinBlk ** kbp_gap_orig
copy of the original gapped Karlin-Altschul block corresponding to the first context
Int4 ** origMatrix
The original matrix values.
BLAST-specific information that is associated with a BlastCompo_MatchingSequence.
const BlastSeqSrc * seq_src
BLAST sequence data source.
EBlastProgramType prog_number
identifies the type of blast search being performed.
BlastSeqSrcGetSeqArg seq_arg
argument to GetSequence method of the BlastSeqSrc (
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Int4 last_context
Index of the last element of the context array.
Uint4 max_length
Length of the longest among the concatenated queries.
Structure used for scoring calculations.
Blast_KarlinBlk ** kbp_psi
K-A parameters for position-based alignments.
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
SPsiBlastScoreMatrix * psi_matrix
PSSM and associated data.
Uint1 alphabet_code
NCBI alphabet code.
Int4 number_of_contexts
Used by sfp and kbp, how large are these.
SBlastScoreMatrix * matrix
scoring matrix data
Blast_KarlinBlk * kbp_ideal
Ideal values (for query with average database composition).
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
EBlastProgramType program_number
indicates blastn, blastp, etc.
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
double scale_factor
multiplier for all cutoff scores
Int4 gap_extend
Penalty for each gap residue (scaled version)
Int4 gap_open
Extra penalty for starting a gap (scaled version)
BlastScoringOptions * options
User-provided values for these params.
Int4 gapped_start
Where the gapped extension started.
Int2 frame
Translation frame.
Used to hold a set of positions, mostly used for filtering.
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
EBlastEncoding encoding
Encoding of sequence, i.e.
Boolean check_oid_exclusion
Check whether an OID is excluded due to overlapping filtering.
BlastSeqSrcSetRangesArg * ranges
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Structure used as the argument to function SetRanges.
Complete type definition of Blast Sequence Source ADT.
Work arrays used to perform composition-based matrix adjustment.
An instance of Blast_ForbiddenRanges is used by the Smith-Waterman algorithm to represent ranges in t...
Structure to hold the Gumbel parameters (for FSC).
Structure to hold the Karlin-Altschul parameters.
double K
K value used in statistics.
double Lambda
Lambda value used in statistics.
double logK
natural log of K value used in statistics
Information about a amino-acid substitution matrix.
Callbacks used by Blast_RedoOneMatch and Blast_RedoOneMatchSmithWaterman routines.
A parameter block for the Blast_RedoOneMatch and Blast_RedoOneMatchSmithWaterman routines.
Holds score frequencies used in calculation of Karlin-Altschul parameters for an ungapped search.
double * sprob0
arrays for frequency of given score
double score_avg
average score, must be negative for local alignment.
Int4 score_max
highest allowed scores
Int4 obs_min
lowest observed (actual) scores
double * sprob
arrays for frequency of given score, shifted down by score_min.
Int4 score_min
lowest allowed scores
Int4 obs_max
highest observed (actual) scores
Edit script: linked list of correspondencies between two sequences.
Int4 * num
Array of number of operations.
Int4 size
Size of above arrays.
EGapAlignOpType * op_type
Array of type of operation.
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
Int4 num_ops_allocated
size of allocated array
GapPrelimEditScript * edit_ops
array of edit operations
A version of GapEditScript used to store initial results from the gapped alignment routines.
Int4 num
Number of operations.
EGapAlignOpType op_type
Type of operation.
Structure to keep memory for state structure.
struct GapStateArrayStruct * next
Next link in the list.
Int4 length
length of the state_array.
Uint1 * state_array
array to be used.
Structure used to pass data into the scaling routines.
double * standardProb
Array of standard residue probabilities, as those returned by BLAST_GetStandardAaProbabilities.
Structure used to pass data into the scaling routines.
double ** posFreqs
PSSM's frequency ratios [alias].
int ** posPrivateMatrix
Scaled PSSM [alias].
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
Scoring matrix used in BLAST.
size_t nrows
number of rows
double lambda
derived value of the matrix lambda -RMH-
double * freqs
array of assumed matrix background frequencies -RMH-
size_t ncols
number of columns
int ** data
actual scoring matrix data, stored in row-major form
Information about target translations.
Stores the frequency ratios along with their bit scale factor.
double ** data
The actual frequency ratios.
All auxiliary memory needed for the greedy extension algorithm.
Scoring matrix data used in PSI-BLAST.
SBlastScoreMatrix * pssm
position-specific score matrix
double ** freq_ratios
PSSM's frequency ratios, dimensions are specified in pssm data above.
Blast_KarlinBlk * kbp
Karlin-Altschul block associated with this PSSM.
Data structure to support MT traceback: this encapsulates the data that each thread modifies.
BlastHitSavingParameters * hit_params
Hit saving parameters.
BlastHSPResults * results
Structure to store results from this thread.
Internal representation of a PSSM in various stages of its creation and its dimensions.
int ** scaled_pssm
scaled PSSM (scores)
Uint4 nrows
number of rows (alphabet_size)
Uint4 ncols
number of columns (query_length)
double ** freq_ratios
frequency ratios
Headers for computing a "composition" p-value of a match, and for computing a unified p-value combini...
double Blast_Overall_P_Value(double p_comp, double p_alignment)
This function implements the method of Fisher, R.
voidp calloc(uInt items, uInt size)