NCBI C++ ToolKit
blast_kappa.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_kappa.c 100164 2023-06-28 13:36:01Z merezhuk $
2  * ==========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Alejandro Schaffer, Mike Gertz (ported to algo/blast by Tom Madden)
27  *
28  */
29 
30 /** @file blast_kappa.c
31  * Utilities for doing Smith-Waterman alignments and adjusting the scoring
32  * system for each match in blastpgp
33  */
34 
35 #include <float.h>
45 #include "blast_psi_priv.h"
46 #include "blast_gapalign_priv.h"
47 #include "blast_hits_priv.h"
48 #include "blast_posit.h"
51 
52 #ifdef _OPENMP
53 #include <omp.h>
54 
55 # ifdef _WIN32
56 /* stderr expands to (__acrt_iob_func(2)), which won't work in an OpenMP
57  * shared(...) list. */
58 # define STDERR_COMMA
59 # else
60 # define STDERR_COMMA stderr,
61 # endif
62 #endif
63 
69 
70 /* Define KAPPA_PRINT_DIAGNOSTICS to turn on printing of
71  * diagnostic information from some routines. */
72 
73 /** Compile-time option; if set to a true value, then blastp runs
74  that use Blast_RedoAlignmentCore to compute the traceback will not
75  SEG the subject sequence */
76 #ifndef KAPPA_BLASTP_NO_SEG_SEQUENCE
77 #define KAPPA_BLASTP_NO_SEG_SEQUENCE 0
78 #endif
79 
80 
81 /** Compile-time option; if set to a true value, then blastp runs
82  that use Blast_RedoAlignmentCore to compute the traceback will not
83  SEG the subject sequence */
84 #ifndef KAPPA_TBLASTN_NO_SEG_SEQUENCE
85 #define KAPPA_TBLASTN_NO_SEG_SEQUENCE 0
86 #endif
87 
88 
89 /**
90  * Given a list of HSPs with (possibly) high-precision scores, rescale
91  * the scores to have standard precision and set the scale-independent
92  * bit scores. This routine does *not* resort the list; it is assumed
93  * that the list is already sorted according to e-values that have been
94  * computed using the initial, higher-precision scores.
95  *
96  * @param hsp_list the HSP list
97  * @param logK Karlin-Altschul statistical parameter [in]
98  * @param lambda Karlin-Altschul statistical parameter [in]
99  * @param scoreDivisor the value by which reported scores are to be
100  */
101 static void
103  double lambda,
104  double logK,
105  double scoreDivisor)
106 {
107  int hsp_index;
108  for(hsp_index = 0; hsp_index < hsp_list->hspcnt; hsp_index++) {
109  BlastHSP * hsp = hsp_list->hsp_array[hsp_index];
110 
111  hsp->score = (Int4)BLAST_Nint(((double) hsp->score) / scoreDivisor);
112  /* Compute the bit score using the newly computed scaled score. */
113  hsp->bit_score = (hsp->score*lambda*scoreDivisor - logK)/NCBIMATH_LN2;
114  }
115 }
116 
117 
118 /**
119  * Adjusts the E-values in a BLAST_HitList to be composites of
120  * a composition-based P-value and a score/alignment-based P-value
121  *
122  * @param hsp_list the hitlist whose E-values need to be adjusted
123  * @param comp_p_value P-value from sequence composition
124  * @param seqSrc a source of sequence data
125  * @param subject_length length of database sequence
126  * @param query_context info about this query context; needed when
127  * multiple queries are being used
128  * @param LambdaRatio the ratio between the observed value of Lambda
129  * and the predicted value of lambda (used to print
130  * diagnostics)
131  * @param subject_id the subject id of this sequence (used to print
132  * diagnostics)
133  **/
134 static void
136  BlastHSPList *hsp_list,
137  double comp_p_value,
138  const BlastSeqSrc* seqSrc,
139  Int4 subject_length,
140  const BlastContextInfo * query_context,
141  double LambdaRatio,
142  int subject_id)
143 {
144  /* Smallest observed evalue after adjustment */
145  double best_evalue = DBL_MAX;
146 
147  /* True length of the query */
148  int query_length = query_context->query_length;
149  /* Length adjustment to compensate for edge effects */
150  int length_adjustment = query_context->length_adjustment;
151 
152  /* Effective lengths of the query, subject, and database */
153  double query_eff = MAX((query_length - length_adjustment), 1);
154  double subject_eff = MAX((subject_length - length_adjustment), 1.0);
155  double dblen_eff = (double) query_context->eff_searchsp / query_eff;
156 
157  /* Scale factor to convert the database E-value to the sequence E-value */
158  double db_to_sequence_scale = subject_eff / dblen_eff;
159 
160  int hsp_index;
161  for (hsp_index = 0; hsp_index < hsp_list->hspcnt; hsp_index++) {
162  /* for all HSPs */
163  double align_p_value; /* P-value for the alignment score */
164  double combined_p_value; /* combination of two P-values */
165 
166  /* HSP for this iteration */
167  BlastHSP * hsp = hsp_list->hsp_array[hsp_index];
168 #ifdef KAPPA_PRINT_DIAGNOSTICS
169  /* Original E-value, saved if diagnostics are printed. */
170  double old_e_value = hsp->evalue;
171 #endif
172  hsp->evalue *= db_to_sequence_scale;
173 
174  align_p_value = BLAST_KarlinEtoP(hsp->evalue);
175  combined_p_value = Blast_Overall_P_Value(comp_p_value,align_p_value);
176  hsp->evalue = BLAST_KarlinPtoE(combined_p_value);
177  hsp->evalue /= db_to_sequence_scale;
178 
179  if (hsp->evalue < best_evalue) {
180  best_evalue = hsp->evalue;
181  }
182 
183 #ifdef KAPPA_PRINT_DIAGNOSTICS
184  if (seqSrc){
185  int sequence_gi; /*GI of a sequence*/
186  Blast_GiList* gi_list; /*list of GI's for a sequence*/
187  gi_list = BlastSeqSrcGetGis(seqSrc, (void *) (&subject_id));
188  if ((gi_list) && (gi_list->num_used > 0)) {
189  sequence_gi = gi_list->data[0];
190  } else {
191  sequence_gi = (-1);
192  }
193  printf("GI %d Lambda ratio %e comp. p-value %e; "
194  "adjust E-value of query length %d match length "
195  "%d from %e to %e\n",
196  sequence_gi, LambdaRatio, comp_p_value,
197  query_length, subject_length, old_e_value, hsp->evalue);
198  Blast_GiListFree(gi_list);
199  }
200 #endif
201  } /* end for all HSPs */
202 
203  hsp_list->best_evalue = best_evalue;
204 
205  /* suppress unused parameter warnings if diagnostics are not printed */
206  (void) seqSrc;
207  (void) query_length;
208  (void) LambdaRatio;
209  (void) subject_id;
210 }
211 
212 
213 /**
214  * Remove from a hitlist all HSPs that are completely contained in an
215  * HSP that occurs earlier in the list and that:
216  * - is on the same strand; and
217  * - has equal or greater score. T
218  * The hitlist should be sorted by some measure of significance before
219  * this routine is called.
220  * @param hsp_array array to be reaped
221  * @param hspcnt length of hsp_array
222  */
223 static void
224 s_HitlistReapContained(BlastHSP * hsp_array[], Int4 * hspcnt)
225 {
226  Int4 iread; /* iteration index used to read the hitlist */
227  Int4 iwrite; /* iteration index used to write to the hitlist */
228  Int4 old_hspcnt; /* number of HSPs in the hitlist on entry */
229 
230  old_hspcnt = *hspcnt;
231 
232  for (iread = 1; iread < *hspcnt; iread++) {
233  /* for all HSPs in the hitlist */
234  Int4 ireadBack; /* iterator over indices less than iread */
235  BlastHSP *hsp1; /* an HSP that is a candidate for deletion */
236 
237  hsp1 = hsp_array[iread];
238  for (ireadBack = 0; ireadBack < iread && hsp1 != NULL; ireadBack++) {
239  /* for all HSPs before hsp1 in the hitlist and while hsp1
240  * has not been deleted */
241  BlastHSP *hsp2; /* an HSP that occurs earlier in hsp_array
242  * than hsp1 */
243  hsp2 = hsp_array[ireadBack];
244 
245  if( hsp2 == NULL ) { /* hsp2 was deleted in a prior iteration. */
246  continue;
247  }
248  if (hsp2->query.frame == hsp1->query.frame &&
249  hsp2->subject.frame == hsp1->subject.frame) {
250  /* hsp1 and hsp2 are in the same query/subject frame. */
251  if (CONTAINED_IN_HSP
252  (hsp2->query.offset, hsp2->query.end, hsp1->query.offset,
253  hsp2->subject.offset, hsp2->subject.end,
254  hsp1->subject.offset) &&
256  (hsp2->query.offset, hsp2->query.end, hsp1->query.end,
257  hsp2->subject.offset, hsp2->subject.end,
258  hsp1->subject.end) &&
259  hsp1->score <= hsp2->score) {
260  hsp1 = hsp_array[iread] = Blast_HSPFree(hsp_array[iread]);
261  }
262  } /* end if hsp1 and hsp2 are in the same query/subject frame */
263  } /* end for all HSPs before hsp1 in the hitlist */
264  } /* end for all HSPs in the hitlist */
265 
266  /* Condense the hsp_array, removing any NULL items. */
267  iwrite = 0;
268  for (iread = 0; iread < *hspcnt; iread++) {
269  if (hsp_array[iread] != NULL) {
270  hsp_array[iwrite++] = hsp_array[iread];
271  }
272  }
273  *hspcnt = iwrite;
274  /* Fill the remaining memory in hsp_array with NULL pointers. */
275  for ( ; iwrite < old_hspcnt; iwrite++) {
276  hsp_array[iwrite] = NULL;
277  }
278 }
279 
280 
281 /** A callback used to free an EditScript that has been stored in a
282  * BlastCompo_Alignment. */
283 static void s_FreeEditScript(void * edit_script)
284 {
285  if (edit_script != NULL)
286  GapEditScriptDelete(edit_script);
287 }
288 
289 
290 /**
291  * Converts a list of objects of type BlastCompo_Alignment to an
292  * new object of type BlastHSPList and returns the result. Conversion
293  * in this direction is lossless. The list passed to this routine is
294  * freed to ensure that there is no aliasing of fields between the
295  * list of BlastCompo_Alignments and the new hitlist.
296  *
297  * @param hsp_list The hsp_list to populate
298  * @param alignments A list of distinct alignments; freed before return [in]
299  * @param oid Ordinal id of a database sequence [in]
300  * @param queryInfo information about all queries in this search [in]
301  * @param frame query frame
302  * @return Allocated and filled BlastHSPList structure.
303  */
304 static int
306  BlastCompo_Alignment ** alignments,
307  int oid,
308  const BlastQueryInfo* queryInfo,
309  int frame)
310 {
311  int status = 0; /* return code for any routine called */
312  static const int unknown_value = 0; /* dummy constant to use when a
313  parameter value is not known */
314  BlastCompo_Alignment * align; /* an alignment in the list */
315 
316  if (hsp_list == NULL) {
317  return -1;
318  }
319  hsp_list->oid = oid;
320 
321  for (align = *alignments; NULL != align; align = align->next) {
322  BlastHSP * new_hsp = NULL;
323  GapEditScript * editScript = align->context;
324  align->context = NULL;
325 
326  status = Blast_HSPInit(align->queryStart, align->queryEnd,
327  align->matchStart, align->matchEnd,
328  unknown_value, unknown_value,
329  align->queryIndex,
330  frame, (Int2) align->frame, align->score,
331  &editScript, &new_hsp);
332  switch (align->matrix_adjust_rule) {
333  case eDontAdjustMatrix:
335  break;
338  break;
339  default:
341  break;
342  }
343  if (status != 0)
344  break;
345  /* At this point, the subject and possibly the query sequence have
346  * been filtered; since it is not clear that num_ident of the
347  * filtered sequences, rather than the original, is desired,
348  * explicitly leave num_ident blank. */
349  new_hsp->num_ident = 0;
350 
351  status = Blast_HSPListSaveHSP(hsp_list, new_hsp);
352  if (status != 0)
353  break;
354  }
355  if (status == 0) {
357  Blast_HSPListSortByScore(hsp_list);
358  } else {
359  hsp_list = Blast_HSPListFree(hsp_list);
360  }
361  return 0;
362 }
363 
364 Int4 s_GetSubjectLength(Int4 total_subj_length, EBlastProgramType program_number)
365 {
366  return ((program_number == eBlastTypeRpsTblastn) ?
367  (GET_NUCL_LENGTH(total_subj_length) - 1 ) /3 : total_subj_length);
368 }
369 
370 
371 /**
372  * Adding evalues to a list of HSPs and remove those that do not have
373  * sufficiently good (low) evalue.
374  *
375  * @param *pbestScore best (highest) score in the list
376  * @param *pbestEvalue best (lowest) evalue in the list
377  * @param hsp_list the list
378  * @param seqSrc a source of sequence data
379  * @param subject_length length of the subject sequence
380  * @param program_number the type of BLAST search being performed
381  * @param queryInfo information about the queries
382  * @param context_index the index of the query corresponding to
383  * the HSPs in hsp_list
384  * @param sbp the score block for this search
385  * @param hitParams parameters used to assign evalues and
386  * decide whether to save hits.
387  * @param pvalueForThisPair composition p-value
388  * @param LambdaRatio lambda ratio, if available
389  * @param subject_id index of subject
390  *
391  * @return 0 on success; -1 on failure (can fail because some methods
392  * of generating evalues use auxiliary structures)
393  */
394 static int
395 s_HitlistEvaluateAndPurge(int * pbestScore, double *pbestEvalue,
396  BlastHSPList * hsp_list,
397  const BlastSeqSrc* seqSrc,
398  int subject_length,
399  EBlastProgramType program_number,
400  const BlastQueryInfo* queryInfo,
401  int context_index,
402  BlastScoreBlk* sbp,
403  const BlastHitSavingParameters* hitParams,
404  double pvalueForThisPair,
405  double LambdaRatio,
406  int subject_id)
407 {
408  int status = 0;
409  *pbestEvalue = DBL_MAX;
410  *pbestScore = 0;
411  if (hitParams->do_sum_stats) {
412  status = BLAST_LinkHsps(program_number, hsp_list, queryInfo,
413  subject_length, sbp,
414  hitParams->link_hsp_params, TRUE);
415  } else {
416 
417 
418  status =
419  Blast_HSPListGetEvalues(program_number, queryInfo,
420  s_GetSubjectLength(subject_length, program_number),
421  hsp_list, TRUE, FALSE, sbp,
422  0.0, /* use a non-zero gap decay
423  only when linking HSPs */
424  1.0); /* Use scaling factor equal to
425  1, because both scores and
426  Lambda are scaled, so they
427  will cancel each other. */
428  }
429  if (eBlastTypeBlastp == program_number ||
430  eBlastTypeBlastx == program_number) {
431  if ((0 <= pvalueForThisPair) && (pvalueForThisPair <= 1)) {
432  s_AdjustEvaluesForComposition(hsp_list, pvalueForThisPair, seqSrc,
433  subject_length,
434  &queryInfo->contexts[context_index],
435  LambdaRatio, subject_id);
436  }
437  }
438  if (status == 0) {
439  Blast_HSPListReapByEvalue(hsp_list, hitParams->options);
440  if (hsp_list->hspcnt > 0) {
441  *pbestEvalue = hsp_list->best_evalue;
442  *pbestScore = hsp_list->hsp_array[0]->score;
443  }
444  }
445  return status == 0 ? 0 : -1;
446 }
447 
448 /** Compute the number of identities for the HSPs in the hsp_list
449  * @note Should work for blastp and tblastn now.
450  *
451  * @param query_blk the query sequence data [in]
452  * @param query_info structure describing the query_blk structure [in]
453  * @param seq_src source of subject sequence data [in]
454  * @param hsp_list list of HSPs to be processed [in|out]
455  * @param scoring_options scoring options [in]
456  * @gen_code_string Genetic code for tblastn [in]
457  */
458 static void
460  const BlastQueryInfo* query_info,
461  BLAST_SequenceBlk* subject_blk,
462  const BlastSeqSrc* seq_src,
463  BlastHSPList* hsp_list,
464  const BlastScoringOptions* scoring_options,
465  const Uint1* gen_code_string,
466  const BlastScoreBlk* sbp,
467  BlastSeqSrcSetRangesArg * ranges)
468 {
469  Uint1* query = NULL;
470  Uint1* query_nomask = NULL;
471  Uint1* subject = NULL;
472  const EBlastProgramType program_number = scoring_options->program_number;
473  const Boolean kIsOutOfFrame = scoring_options->is_ooframe;
474  const EBlastEncoding encoding = Blast_TracebackGetEncoding(program_number);
475  BlastSeqSrcGetSeqArg seq_arg;
476  Int2 status = 0;
477  int i;
478  SBlastTargetTranslation* target_t = NULL;
479 
480  if ( !hsp_list) return;
481 
482  /* Initialize the subject */
483  if (seq_src){
484  memset((void*) &seq_arg, 0, sizeof(seq_arg));
485  seq_arg.oid = hsp_list->oid;
486  seq_arg.encoding = encoding;
487  seq_arg.check_oid_exclusion = TRUE;
488  seq_arg.ranges = ranges;
489  status = BlastSeqSrcGetSequence(seq_src, (void*) &seq_arg);
490  ASSERT(status == 0);
491  (void)status; /* to pacify compiler warning */
492 
493  if (program_number == eBlastTypeTblastn) {
494  subject_blk = seq_arg.seq;
496  subject_blk,
497  gen_code_string,
499  kIsOutOfFrame,
500  &target_t
501  );
502  } else {
503  subject = seq_arg.seq->sequence;
504  }
505  } else {
506  subject = subject_blk->sequence;
507  }
508 
509  for (i = 0; i < hsp_list->hspcnt; i++) {
510  BlastHSP* hsp = hsp_list->hsp_array[i];
511 
512  /* Initialize the query */
513  if (program_number == eBlastTypeBlastx && kIsOutOfFrame) {
514  Int4 context = hsp->context - hsp->context % CODON_LENGTH;
515  Int4 context_offset = query_info->contexts[context].query_offset;
516  query = query_blk->oof_sequence + CODON_LENGTH + context_offset;
517  query_nomask = query_blk->oof_sequence + CODON_LENGTH + context_offset;
518  } else {
519  query = query_blk->sequence +
520  query_info->contexts[hsp->context].query_offset;
521  query_nomask = query_blk->sequence_nomask +
522  query_info->contexts[hsp->context].query_offset;
523  }
524 
525  /* Translate subject if needed. */
526  if (program_number == eBlastTypeTblastn) {
527  const Uint1* target_sequence = Blast_HSPGetTargetTranslation(target_t, hsp, NULL);
528  status = Blast_HSPGetNumIdentitiesAndPositives(query, target_sequence, hsp, scoring_options, 0, sbp);
529  }
530  else
531  status = Blast_HSPGetNumIdentitiesAndPositives(query_nomask, subject, hsp, scoring_options, 0, sbp);
532 
533  ASSERT(status == 0);
534  }
535  target_t = BlastTargetTranslationFree(target_t);
536  if (seq_src) {
537  // ranges allocated outside of this fcuntion
538  seq_arg.ranges = NULL;
539  BlastSeqSrcReleaseSequence(seq_src, (void*) &seq_arg);
540  BlastSequenceBlkFree(seq_arg.seq);
541  }
542 }
543 
544 
545 /**
546  * A callback routine: compute lambda for the given score
547  * probabilities.
548  * (@sa calc_lambda_type).
549  */
550 static double
551 s_CalcLambda(double probs[], int min_score, int max_score, double lambda0)
552 {
553 
554  int i; /* loop index */
555  int score_range; /* range of possible scores */
556  double avg; /* expected score of aligning two characters */
557  Blast_ScoreFreq freq; /* score frequency data */
558 
559  score_range = max_score - min_score + 1;
560  avg = 0.0;
561  for (i = 0; i < score_range; i++) {
562  avg += (min_score + i) * probs[i];
563  }
564  freq.score_min = min_score;
565  freq.score_max = max_score;
566  freq.obs_min = min_score;
567  freq.obs_max = max_score;
568  freq.sprob0 = probs;
569  freq.sprob = &probs[-min_score];
570  freq.score_avg = avg;
571 
572  return Blast_KarlinLambdaNR(&freq, lambda0);
573 }
574 
575 
576 /** Fill a two-dimensional array with the frequency ratios that
577  * underlie a position specific score matrix (PSSM).
578  *
579  * @param returnRatios a two-dimensional array with BLASTAA_SIZE
580  * columns
581  * @param numPositions the number of rows in returnRatios
582  * @param query query sequence data, of length numPositions
583  * @param matrixName the name of the position independent matrix
584  * corresponding to this PSSM
585  * @param startNumerator position-specific data used to generate the
586  * PSSM
587  * @return 0 on success; -1 if the named matrix isn't known, or if
588  * there was a memory error
589  * @todo find out what start numerator is.
590  */
591 static int
592 s_GetPosBasedStartFreqRatios(double ** returnRatios,
593  Int4 numPositions,
594  Uint1 * query,
595  const char *matrixName,
596  double **startNumerator)
597 {
598  Int4 i,j; /* loop indices */
599  SFreqRatios * stdFreqRatios = NULL; /* frequency ratios for the
600  named matrix. */
601  double *standardProb; /* probabilities of each
602  letter*/
603  const double kPosEpsilon = 0.0001; /* values below this cutoff
604  are treated specially */
605 
606  stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
607  if (stdFreqRatios == NULL) {
608  return -1;
609  }
610  for (i = 0; i < numPositions; i++) {
611  for (j = 0; j < BLASTAA_SIZE; j++) {
612  returnRatios[i][j] = stdFreqRatios->data[query[i]][j];
613  }
614  }
615  stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
616 
617  standardProb = BLAST_GetStandardAaProbabilities();
618  if(standardProb == NULL) {
619  return -1;
620  }
621  /*reverse multiplication done in posit.c*/
622  for (i = 0; i < numPositions; i++) {
623  for (j = 0; j < BLASTAA_SIZE; j++) {
624  if ((standardProb[query[i]] > kPosEpsilon) &&
625  (standardProb[j] > kPosEpsilon) &&
626  (j != eStopChar) && (j != eXchar) &&
627  (startNumerator[i][j] > kPosEpsilon)) {
628  returnRatios[i][j] = startNumerator[i][j] / standardProb[j];
629  }
630  }
631  }
632  sfree(standardProb);
633 
634  return 0;
635 }
636 
637 
638 /**
639  * Fill a two-dimensional array with the frequency ratios that underlie the
640  * named score matrix.
641  *
642  * @param returnRatios a two-dimensional array of size
643  * BLASTAA_SIZE x BLASTAA_SIZE
644  * @param matrixName the name of a matrix
645  * @return 0 on success; -1 if the named matrix isn't known, or if
646  * there was a memory error
647  */
648 static int
649 s_GetStartFreqRatios(double ** returnRatios,
650  const char *matrixName)
651 {
652  /* Loop indices */
653  int i,j;
654  /* Frequency ratios for the matrix */
655  SFreqRatios * stdFreqRatios = NULL;
656 
657  stdFreqRatios = _PSIMatrixFrequencyRatiosNew(matrixName);
658  if (stdFreqRatios == NULL) {
659  return -1;
660  }
661  for (i = 0; i < BLASTAA_SIZE; i++) {
662  for (j = 0; j < BLASTAA_SIZE; j++) {
663  returnRatios[i][j] = stdFreqRatios->data[i][j];
664  }
665  }
666  stdFreqRatios = _PSIMatrixFrequencyRatiosFree(stdFreqRatios);
667 
668  return 0;
669 }
670 
671 
672 /** SCALING_FACTOR is a multiplicative factor used to get more bits of
673  * precision in the integer matrix scores. It cannot be arbitrarily
674  * large because we do not want total alignment scores to exceed
675  * -(BLAST_SCORE_MIN) */
676 #define SCALING_FACTOR 32
677 
678 
679 /**
680  * Produce a scaled-up version of the position-specific matrix
681  * with a given set of position-specific residue frequencies.
682  *
683  * @param fillPosMatrix is the matrix to be filled
684  * @param matrixName name of the standard substitution matrix [in]
685  * @param posFreqs PSSM's frequency ratios [in]
686  * @param query Query sequence data [in]
687  * @param queryLength Length of the query sequence above [in]
688  * @param sbp stores various parameters of the search
689  * @param scale_factor amount by which ungapped parameters should be
690  * scaled.
691  * @return 0 on success; -1 on failure
692  */
693 static int
694 s_ScalePosMatrix(int ** fillPosMatrix,
695  const char * matrixName,
696  double ** posFreqs,
697  Uint1 * query,
698  int queryLength,
699  BlastScoreBlk* sbp,
700  double scale_factor)
701 {
702  /* Data used by scaling routines */
703  Kappa_posSearchItems *posSearch = NULL;
704  /* A reduced collection of search parameters used by PSI-blast */
705  Kappa_compactSearchItems *compactSearch = NULL;
706  /* Representation of a PSSM internal to PSI-blast */
707  _PSIInternalPssmData* internal_pssm = NULL;
708  /* return code */
709  int status = 0;
710 
711  posSearch = Kappa_posSearchItemsNew(queryLength, matrixName,
712  fillPosMatrix, posFreqs);
713  compactSearch = Kappa_compactSearchItemsNew(query, queryLength, sbp);
714  /* Copy data into new structures */
715  internal_pssm = _PSIInternalPssmDataNew(queryLength, BLASTAA_SIZE);
716  if (posSearch == NULL || compactSearch == NULL || internal_pssm == NULL) {
717  status = -1;
718  goto cleanup;
719  }
720  _PSICopyMatrix_int(internal_pssm->pssm, posSearch->posMatrix,
721  internal_pssm->ncols, internal_pssm->nrows);
722  _PSICopyMatrix_int(internal_pssm->scaled_pssm,
723  posSearch->posPrivateMatrix,
724  internal_pssm->ncols, internal_pssm->nrows);
725  _PSICopyMatrix_double(internal_pssm->freq_ratios,
726  posSearch->posFreqs, internal_pssm->ncols,
727  internal_pssm->nrows);
728  status = _PSIConvertFreqRatiosToPSSM(internal_pssm, query, sbp,
729  compactSearch->standardProb);
730  if (status != 0) {
731  goto cleanup;
732  }
733  /* Copy data from new structures to posSearchItems */
734  _PSICopyMatrix_int(posSearch->posMatrix, internal_pssm->pssm,
735  internal_pssm->ncols, internal_pssm->nrows);
737  internal_pssm->scaled_pssm,
738  internal_pssm->ncols, internal_pssm->nrows);
739  _PSICopyMatrix_double(posSearch->posFreqs,
740  internal_pssm->freq_ratios,
741  internal_pssm->ncols, internal_pssm->nrows);
742  status = Kappa_impalaScaling(posSearch, compactSearch, (double)
743  scale_factor, FALSE, sbp);
744 cleanup:
745  internal_pssm = _PSIInternalPssmDataFree(internal_pssm);
746  posSearch = Kappa_posSearchItemsFree(posSearch);
747  compactSearch = Kappa_compactSearchItemsFree(compactSearch);
748 
749  return status;
750 }
751 
752 
753 /**
754  * Convert an array of HSPs to a list of BlastCompo_Alignment objects.
755  * The context field of each BlastCompo_Alignment is set to point to the
756  * corresponding HSP.
757  *
758  * @param self the array of alignment to be filled
759  * @param numAligns number of alignments
760  * @param hsp_array an array of HSPs
761  * @param hspcnt the length of hsp_array
762  * @param init_context the initial context to process
763  * @param queryInfo information about the concatenated query
764  * @param localScalingFactor the amount by which this search is scaled
765  *
766  * @return the new list of alignments; or NULL if there is an out-of-memory
767  * error (or if the original array is empty)
768  */
769 static int
771  int *numAligns,
772  BlastHSP * hsp_array[], Int4 hspcnt,
773  int init_context,
774  const BlastQueryInfo* queryInfo,
775  double localScalingFactor)
776 {
777  BlastCompo_Alignment * tail[6]; /* last element in aligns */
778  int hsp_index; /* loop index */
779  int frame_index;
780 
781  for (frame_index = 0; frame_index < 6; frame_index++) {
782  tail[frame_index] = NULL;
783  numAligns[frame_index] = 0;
784  }
785 
786  for (hsp_index = 0; hsp_index < hspcnt; hsp_index++) {
787  BlastHSP * hsp = hsp_array[hsp_index]; /* current HSP */
788  BlastCompo_Alignment * new_align; /* newly-created alignment */
789  frame_index = hsp->context - init_context;
790  ASSERT(frame_index < 6 && frame_index >= 0);
791  /* Incoming alignments will have coordinates of the query
792  portion relative to a particular query context; they must
793  be shifted for used in the composition_adjustment library.
794  */
795  new_align =
796  BlastCompo_AlignmentNew((int) (hsp->score * localScalingFactor),
798  hsp->query.offset, hsp->query.end, hsp->context,
799  hsp->subject.offset, hsp->subject.end,
800  hsp->subject.frame, hsp);
801  if (new_align == NULL) /* out of memory */
802  return -1;
803  if (tail[frame_index] == NULL) { /* if the list aligns is empty; */
804  /* make new_align the first element in the list */
805  self[frame_index] = new_align;
806  } else {
807  /* otherwise add new_align to the end of the list */
808  tail[frame_index]->next = new_align;
809  }
810  tail[frame_index] = new_align;
811  numAligns[frame_index]++;
812  }
813  return 0;
814 }
815 
816 
817 /**
818  * Redo a S-W alignment using an x-drop alignment. The result will
819  * usually be the same as the S-W alignment. The call to ALIGN_EX
820  * attempts to force the endpoints of the alignment to match the
821  * optimal endpoints determined by the Smith-Waterman algorithm.
822  * ALIGN_EX is used, so that if the data structures for storing BLAST
823  * alignments are changed, the code will not break
824  *
825  * @param query the query data
826  * @param queryStart start of the alignment in the query sequence
827  * @param queryEnd end of the alignment in the query sequence,
828  * as computed by the Smith-Waterman algorithm
829  * @param subject the subject (database) sequence
830  * @param matchStart start of the alignment in the subject sequence
831  * @param matchEnd end of the alignment in the query sequence,
832  * as computed by the Smith-Waterman algorithm
833  * @param gap_align parameters for a gapped alignment
834  * @param scoringParams Settings for gapped alignment.[in]
835  * @param score score computed by the Smith-Waterman algorithm
836  * @param queryAlignmentExtent length of the alignment in the query sequence,
837  * as computed by the x-drop algorithm
838  * @param matchAlignmentExtent length of the alignment in the subject
839  * sequence, as computed by the x-drop algorithm
840  * @param newScore alignment score computed by the x-drop
841  * algorithm
842  */
843 static void
845  Int4 queryStart,
846  Int4 queryEnd,
848  Int4 matchStart,
849  Int4 matchEnd,
850  BlastGapAlignStruct* gap_align,
851  const BlastScoringParameters* scoringParams,
852  Int4 score,
853  Int4 * queryAlignmentExtent,
854  Int4 * matchAlignmentExtent,
855  Int4 * newScore)
856 {
857  Int4 XdropAlignScore; /* alignment score obtained using X-dropoff
858  * method rather than Smith-Waterman */
859  Int4 doublingCount = 0; /* number of times X-dropoff had to be
860  * doubled */
861  Int4 gap_x_dropoff_orig = gap_align->gap_x_dropoff;
862 
865  do {
866  XdropAlignScore =
867  ALIGN_EX(&(query->data[queryStart]) - 1,
868  &(subject->data[matchStart]) - 1,
869  queryEnd - queryStart + 1, matchEnd - matchStart + 1,
870  queryAlignmentExtent,
871  matchAlignmentExtent, gap_align->fwd_prelim_tback,
872  gap_align, scoringParams, queryStart - 1, FALSE, FALSE,
873  NULL);
874 
875  gap_align->gap_x_dropoff *= 2;
876  doublingCount++;
877  if((XdropAlignScore < score) && (doublingCount < 3)) {
879  }
880  } while((XdropAlignScore < score) && (doublingCount < 3));
881 
882  gap_align->gap_x_dropoff = gap_x_dropoff_orig;
883  *newScore = XdropAlignScore;
884 }
885 
886 
887 /**
888  * BLAST-specific information that is associated with a
889  * BlastCompo_MatchingSequence.
890  */
891 typedef struct
893  EBlastProgramType prog_number; /**< identifies the type of blast
894  search being performed. The type
895  of search determines how sequence
896  data should be obtained. */
897  const BlastSeqSrc* seq_src; /**< BLAST sequence data source */
898  BlastSeqSrcGetSeqArg seq_arg; /**< argument to GetSequence method
899  of the BlastSeqSrc (@todo this
900  structure was designed to be
901  allocated on the stack, i.e.: in
902  Kappa_MatchingSequenceInitialize) */
904 
905 
906 /** Release the resources associated with a matching sequence. */
907 static void
909 {
910  if (self != NULL) {
911  if (self->index >=0) {
912  BlastKappa_SequenceInfo * local_data = self->local_data;
913  if (self->length > 0) {
915  &local_data->seq_arg);
916  BlastSequenceBlkFree(local_data->seq_arg.seq);
917  }
918  free(self->local_data);
919  }
920  self->local_data = NULL;
921  }
922 }
923 
924 /**
925  * Do a simple gapped extension to the right from the beginning of query and
926  * subject ranges examining only matches and mismatches. The extension stops
927  * when there are more than max_shift mismatches or mismatches or gaps are not
928  * followed by two identical matches. This is a simplified version of the
929  * Danielle and Jean Thierry-Miegs' jumper
930  * alignment implemented in NCBI Magic
931  * https://www.ncbi.nlm.nih.gov/IEB/Research/Acembly/Download/Downloads.html
932  *
933  * @param query_seq Query sequence [in]
934  * @param query_len Query length [in]
935  * @param subject_seq Subject sequence [in]
936  * @param subject_len Subject length [in]
937  * @param max_shift Maximum number of mismatches or gaps, extension stops if
938  * this number is reached [in]
939  * @param query_ext_len Extension length on the query [out]
940  * @param subject_ext_len Extension length on the subject [out]
941  * @param align_len Alignment length [out]
942  * @return Number of identical residues
943  */
944 static int s_ExtendRight(Uint1* query_seq, int query_len,
945  Uint1* subject_seq, int subject_len,
946  int max_shift,
947  int* query_ext_len, int* subject_ext_len,
948  int* align_len)
949 {
950  int num_identical = 0;
951  int q_pos, s_pos;
952  int gaps_in_query = 0;
953  int gaps_in_subject = 0;
954  q_pos = 0;
955  s_pos = 0;
956  while (q_pos < query_len && s_pos < subject_len) {
957  int n;
958  int match = 0;
959 
960  while (q_pos < query_len && s_pos < subject_len
961  && query_seq[q_pos] == subject_seq[s_pos]) {
962 
963  num_identical++;
964  q_pos++;
965  s_pos++;
966  }
967 
968  /* try to skip mismatches or gaps */
969  for (n=1; n < max_shift && q_pos + n + 1 < query_len
970  && s_pos + n + 1 < subject_len && !match; n++) {
971 
972  /* mismatches */
973  if (query_seq[q_pos + n] == subject_seq[s_pos + n]
974  && query_seq[q_pos + n + 1] == subject_seq[s_pos + n + 1]) {
975 
976  /* we have already checked that two positions behind mismatches
977  match so we can advance further */
978  q_pos += n + 2;
979  s_pos += n + 2;
980  num_identical += 2;
981  match = 1;
982  }
983 
984  /* gap in subject */
985  if (!match && query_seq[q_pos + n] == subject_seq[s_pos]
986  && query_seq[q_pos + n + 1] == subject_seq[s_pos + 1]) {
987 
988  q_pos += n + 2;
989  s_pos += 2;
990  num_identical += 2;
991  gaps_in_subject += n;
992  match = 1;
993  }
994 
995  /* gap in query */
996  if (!match && query_seq[q_pos] == subject_seq[s_pos + n]
997  && query_seq[q_pos + 1] == subject_seq[s_pos + n + 1]) {
998 
999  q_pos += 2;
1000  s_pos += n + 2;
1001  num_identical += 2;
1002  gaps_in_query += n;
1003  match = 1;
1004  }
1005  }
1006 
1007  if (match) {
1008  continue;
1009  }
1010 
1011  /* exit the loop */
1012  break;
1013  }
1014  *query_ext_len = q_pos;
1015  *subject_ext_len = s_pos;
1016  *align_len = q_pos > s_pos ? q_pos + gaps_in_query : s_pos + gaps_in_subject;
1017 
1018  return num_identical;
1019 }
1020 
1021 
1022 /**
1023  * Extend left from the end of the sequence and subject ranges and count
1024  * identities. The extension stops when there are more than max_shift
1025  * mismatches or mismatches or gaps are not followed by two identical matches.
1026  * See description for s_ExtendRight for more details.
1027  *
1028  * @param query_seq Query sequence [in]
1029  * @param query_len Query length [in]
1030  * @param subject_seq Subject sequence [in]
1031  * @param subject_len Subject length [in]
1032  * @param max_shift Maximum number of mismatches or gaps, extension stops if
1033  * this number is reached [in]
1034  * @param query_ext_len Extension length on the query [out]
1035  * @param subject_ext_len Extension length on the subject [out]
1036  * @param align_len Alignment length [out]
1037  * @return Number of identical residues
1038  */
1039 static int s_ExtendLeft(Uint1* query_seq, int query_len,
1040  Uint1* subject_seq, int subject_len,
1041  int max_shift,
1042  int* query_ext_len, int* subject_ext_len,
1043  int* align_len)
1044 {
1045  int q_pos = query_len - 1;
1046  int s_pos = subject_len - 1;
1047  int num_identical = 0;
1048  int gaps_in_query = 0;
1049  int gaps_in_subject = 0;
1050  while (q_pos >= 0 && s_pos >= 0) {
1051  int n;
1052  int match = 0;
1053 
1054  /* process identies */
1055  while (q_pos > 0 && s_pos > 0 && query_seq[q_pos] == subject_seq[s_pos]) {
1056  num_identical++;
1057  q_pos--;
1058  s_pos--;
1059  }
1060 
1061  /* try to skip mismatches or gaps */
1062  for (n=1;n < max_shift && q_pos - n - 1 > 0 && s_pos - n - 1 > 0
1063  && !match; n++) {
1064 
1065  /* mismatch */
1066  if (query_seq[q_pos - n] == subject_seq[s_pos - n]
1067  && query_seq[q_pos - n - 1] == subject_seq[s_pos - n - 1]) {
1068  q_pos -= n + 2;
1069  s_pos -= n + 2;
1070  num_identical += 2;
1071  match = 1;
1072  }
1073 
1074  /* gap in subject */
1075  if (!match && query_seq[q_pos - n] == subject_seq[s_pos]
1076  && query_seq[q_pos - n - 1] == subject_seq[s_pos - 1]) {
1077  q_pos -= n + 2;
1078  s_pos -= 2;
1079  num_identical += 2;
1080  gaps_in_subject += n;
1081  match = 1;
1082  }
1083 
1084  /* gap in query */
1085  if (!match && query_seq[q_pos] == subject_seq[s_pos - n]
1086  && query_seq[q_pos - 1] == subject_seq[s_pos - n - 1]) {
1087  q_pos -= 2;
1088  s_pos -= n + 2;
1089  num_identical += 2;
1090  gaps_in_query += n;
1091  match = 1;
1092  }
1093  }
1094 
1095  if (match) {
1096  continue;
1097  }
1098 
1099  break;
1100  }
1101  *query_ext_len = query_len - q_pos - 1;
1102  *subject_ext_len = subject_len - s_pos - 1;
1103  *align_len += *query_ext_len > *subject_ext_len ?
1104  *query_ext_len + gaps_in_query : *subject_ext_len + gaps_in_subject;
1105 
1106  return num_identical;
1107 }
1108 
1109 
1110 /**
1111  * Get hash for a word of word_size residues assuming 28-letter alphabet
1112  *
1113  * @param data Sequence [in]
1114  * @param word_size Word size [in]
1115  * @return Hash value
1116  */
1117 static Uint8 s_GetHash(const Uint1* data, int word_size)
1118 {
1119  Uint8 hash = 0;
1120  int k;
1121  for (k=0;k < word_size;k++) {
1122  hash <<= 5;
1123  hash += (Int8)data[k];
1124  }
1125  return hash;
1126 }
1127 
1128 
1129 /**
1130  * Find a local number of identical residues in two aligned sequences by
1131  * finding word matches and doing a simple gapped extensions from the word hits
1132  *
1133  * @param query_seq Query sequence [in]
1134  * @param query_hashes Array of query words with index of each word
1135  * corresponding to word position in the query [in]
1136  * @param query_len Query length [in]
1137  * @param subject_seq Subject sequence [in]
1138  * @param subject_len Subject length [in]
1139  * @param max_shift Maximum number of local mismatches or gaps for extensions
1140  * [in]
1141  * @return Number of identical residues
1142  */
1143 static int s_FindNumIdentical(Uint1* query_seq,
1144  const Uint8* query_hashes,
1145  int query_len,
1146  Uint1* subject_seq,
1147  int subject_len,
1148  int max_shift)
1149 {
1150  int word_size = 8; /* word size for k-mer matching */
1151  Uint8 hash = 0;
1152  Uint8 mask = NCBI_CONST_UINT8(0xFFFFFFFFFF); /* mask for computing hash
1153  values */
1154  int query_from = 0;
1155  int subject_from = 0;
1156 
1157  int s_pos; /* position in the subject sequence */
1158  int num_identical = 0; /* number of identical residues found */
1159  Boolean match = FALSE;
1160 
1161  /* if query or subject length is smaller than word size, exit */
1162  if (!query_seq || !query_hashes || !subject_seq
1163  || query_len < word_size || subject_len < word_size) {
1164 
1165  return 0;
1166  }
1167 
1168  /* for each subject position */
1169  for (s_pos = 0; s_pos < subject_len - word_size; s_pos++) {
1170  int q_pos;
1171 
1172  /* find word hash */
1173  if (s_pos == 0 || match) {
1174  hash = s_GetHash(&subject_seq[s_pos], word_size);
1175  }
1176  else {
1177  hash <<= 5;
1178  hash &= mask;
1179  hash += subject_seq[s_pos + word_size - 1];
1180  }
1181 
1182  /* find matching query word; index of hash is position of the word
1183  the query */
1184  for (q_pos = query_from;q_pos < query_len - word_size; q_pos++) {
1185  if (query_hashes[q_pos] == hash) {
1186  break;
1187  }
1188  }
1189 
1190  /* if match */
1191  if (q_pos < query_len - word_size) {
1192  int query_start = q_pos;
1193  int subject_start = s_pos;
1194 
1195  int query_left_len, query_right_len;
1196  int subject_left_len, subject_right_len;
1197  int align_len_left=0, align_len_right=0;
1198 
1199  match = TRUE;
1200  num_identical += word_size;
1201 
1202  /* extend left from word match */
1203  num_identical += s_ExtendLeft(query_seq + query_from,
1204  query_start - query_from,
1205  subject_seq + subject_from,
1206  subject_start - subject_from,
1207  max_shift,
1208  &query_left_len, &subject_left_len,
1209  &align_len_left);
1210 
1211  /* extend right from word match */
1212  num_identical += s_ExtendRight(query_seq + query_start + word_size,
1213  query_len - query_start - word_size,
1214  subject_seq + subject_start + word_size,
1215  subject_len - subject_start - word_size,
1216  max_shift,
1217  &query_right_len, &subject_right_len,
1218  &align_len_right);
1219 
1220 
1221  /* disregard already matched and extended words when matching
1222  further positions */
1223 
1224  query_from = query_start + word_size + query_right_len;
1225  subject_from = subject_start + word_size + subject_right_len;
1226  /* s_pos will be incremented in the loop */
1227  s_pos = subject_from - 1;
1228  }
1229  else {
1230  match = FALSE;
1231  }
1232  }
1233 
1234  return num_identical;
1235 }
1236 
1237 /**
1238  * Test whether the aligned parts of two sequences that
1239  * have a high-scoring gapless alignment are nearly identical.
1240  *
1241  * First extend from the left end of the query and subject ranges and stop if
1242  * there are too manu mismatches. Then extend from the right end. Then for the
1243  * remaining protion of ths sequences find matching words and extend left and
1244  * right from the word hit. Repeat the last steo until the whole alignment
1245  * ranges are processed.
1246  *
1247  * @params seqData Subject sequence [in]
1248  * @params seqOffse Starting offset of the subject sequence in alignment data
1249  * [in]
1250  * @params queryData Query sequence [in]
1251  * @params queryOffset Starting offset of the query sequence in alignment data
1252  * [in]
1253  * @param query_words Array of query words with word index corresponding to
1254  * word's position in the query [in]
1255  * @param align Alignment data [in]
1256  * @return True if sequence parts are nearly identical, false otherwise
1257  */
1258 static Boolean
1260  const int seqOffset,
1261  const BlastCompo_SequenceData* queryData,
1262  const int queryOffset,
1263  const Uint8* query_words,
1264  const BlastCompo_Alignment* align)
1265 {
1266  int qStart = align->queryStart - queryOffset;
1267  /* align->queryEnd points to one position past alignment end */
1268  int qEnd = align->queryEnd - queryOffset - 1;
1269  int sStart = align->matchStart - seqOffset;
1270  int sEnd = align->matchEnd - seqOffset - 1;
1271  const double kMinFractionNearIdentical = 0.95;
1272  int max_shift = 8;
1273 
1274  int query_len = qEnd - qStart + 1;
1275  int subject_len = sEnd - sStart + 1;
1276  int align_len = MIN(query_len, subject_len);
1277 
1278  int query_left_len = 0;
1279  int subject_left_len = 0;
1280  int query_right_len = 0;
1281  int subject_right_len = 0;
1282  int align_left_len = 0;
1283  int align_right_len = 0;
1284 
1285  double fraction_identical;
1286 
1287  /* first find number of identies going from the beginning of the query
1288  and subject ranges */
1289  int num_identical = s_ExtendRight(queryData->data + qStart, query_len,
1290  seqData->data + sStart, subject_len,
1291  max_shift,
1292  &query_right_len, &subject_right_len,
1293  &align_right_len);
1294 
1295  /* if the whole query range was processed return near identical status */
1296  if (query_right_len >= query_len || subject_right_len >= subject_len) {
1297  fraction_identical = (double)num_identical / (double)align_len;
1298  ASSERT(fraction_identical - 1.0 < 1e-10);
1299  return fraction_identical > kMinFractionNearIdentical;
1300  }
1301 
1302  /* find the number of identies going from the end of the query and subject
1303  ranges */
1304  num_identical += s_ExtendLeft(queryData->data + qStart + query_right_len,
1305  query_len - query_right_len,
1306  seqData->data + sStart + subject_right_len,
1307  subject_len - subject_right_len,
1308  max_shift,
1309  &query_left_len, &subject_left_len,
1310  &align_left_len);
1311 
1312  /* if the whole alignment ranges where covered, return the near identical
1313  status */
1314  if (query_left_len + query_right_len >= query_len
1315  || subject_left_len + subject_right_len >= subject_len) {
1316 
1317  fraction_identical = (double)num_identical / (double)(align_len);
1318  ASSERT(fraction_identical - 1.0 < 1e-10);
1319  return fraction_identical > kMinFractionNearIdentical;
1320  }
1321 
1322  /* find the number of identical matches in the middle portion of the
1323  alignment ranges */
1324  num_identical += s_FindNumIdentical(queryData->data + qStart + query_right_len,
1325  query_words + qStart + query_right_len,
1326  query_len - query_left_len - query_right_len,
1327  seqData->data + sStart + subject_right_len,
1328  subject_len - subject_left_len - subject_right_len,
1329  max_shift);
1330 
1331  fraction_identical = (double)num_identical / (double)align_len;
1332  ASSERT(fraction_identical - 1.0 < 1e-10);
1333  if (fraction_identical > kMinFractionNearIdentical) {
1334  return TRUE;
1335  }
1336  else {
1337  return FALSE;
1338  }
1339 }
1340 
1341 
1342 /**
1343  * Initialize a new matching sequence, obtaining information about the
1344  * sequence from the search.
1345  *
1346  * @param self object to be initialized
1347  * @param seqSrc A pointer to a source from which sequence data
1348  * may be obtained
1349  * @param program_number identifies the type of blast search being
1350  * performed.
1351  * @param default_db_genetic_code default genetic code to use when
1352  * subject sequences are translated and there is
1353  * no other guidance on what code to use
1354  * @param subject_index index of the matching sequence in the database
1355  */
1356 static int
1358  EBlastProgramType program_number,
1359  const BlastSeqSrc* seqSrc,
1360  Int4 default_db_genetic_code,
1361  Int4 subject_index,
1362  BlastSeqSrcSetRangesArg * ranges)
1363 {
1364  BlastKappa_SequenceInfo * seq_info; /* BLAST-specific sequence
1365  information */
1366  self->length = 0;
1367  self->local_data = NULL;
1368 
1369  seq_info = malloc(sizeof(BlastKappa_SequenceInfo));
1370  if (seq_info != NULL) {
1371  self->local_data = seq_info;
1372 
1373  seq_info->seq_src = seqSrc;
1374  seq_info->prog_number = program_number;
1375 
1376  memset((void*) &seq_info->seq_arg, 0, sizeof(seq_info->seq_arg));
1377  seq_info->seq_arg.oid = self->index = subject_index;
1378  seq_info->seq_arg.check_oid_exclusion = TRUE;
1379  seq_info->seq_arg.ranges = ranges;
1380 
1381  if( program_number == eBlastTypeTblastn ) {
1383  } else {
1385  }
1386  if (BlastSeqSrcGetSequence(seqSrc, &seq_info->seq_arg) >= 0) {
1387  self->length =
1388  BlastSeqSrcGetSeqLen(seqSrc, (void*) &seq_info->seq_arg);
1389 
1390  /* If the subject is translated and the BlastSeqSrc implementation
1391  * doesn't provide a genetic code string, use the default genetic
1392  * code for all subjects (as in the C toolkit) */
1393  if (Blast_SubjectIsTranslated(program_number) &&
1394  seq_info->seq_arg.seq->gen_code_string == NULL) {
1395  seq_info->seq_arg.seq->gen_code_string =
1396  GenCodeSingletonFind(default_db_genetic_code);
1397  ASSERT(seq_info->seq_arg.seq->gen_code_string);
1398  }
1399  } else {
1400  self->length = 0;
1401  }
1402  }
1403  if (self->length == 0) {
1404  /* Could not obtain the required data */
1406  return -1;
1407  } else {
1408  return 0;
1409  }
1410 }
1411 
1412 
1413 /** NCBIstdaa encoding for 'X' character */
1414 #define BLASTP_MASK_RESIDUE 21
1415 /** Default instructions and mask residue for SEG filtering */
1416 #define BLASTP_MASK_INSTRUCTIONS "S 10 1.8 2.1"
1417 
1418 
1419 /**
1420  * Filter low complexity regions from the sequence data; uses the SEG
1421  * algorithm.
1422  *
1423  * @param seqData data to be filtered
1424  * @param program_name type of search being performed
1425  * @return 0 for success; -1 for out-of-memory
1426  */
1427 static int
1429  EBlastProgramType program_name,
1430  Boolean* is_seq_biased)
1431 {
1432  int status = 0;
1433  BlastSeqLoc* mask_seqloc = NULL;
1434  SBlastFilterOptions* filter_options = NULL;
1435 
1436  status = BlastFilteringOptionsFromString(program_name,
1438  &filter_options, NULL);
1439  if (status == 0) {
1440  status = BlastSetUp_Filter(program_name, seqData->data,
1441  seqData->length, 0, filter_options,
1442  &mask_seqloc, NULL);
1443  filter_options = SBlastFilterOptionsFree(filter_options);
1444  }
1445  if (is_seq_biased) {
1446  *is_seq_biased = (mask_seqloc != NULL);
1447  }
1448  if (status == 0) {
1449  Blast_MaskTheResidues(seqData->data, seqData->length,
1450  FALSE, mask_seqloc, FALSE, 0);
1451  }
1452  if (mask_seqloc != NULL) {
1453  mask_seqloc = BlastSeqLocFree(mask_seqloc);
1454  }
1455  return status;
1456 }
1457 
1458 
1459 /**
1460  * Obtain a string of translated data
1461  *
1462  * @param self the sequence from which to obtain the data [in]
1463  * @param range the range and translation frame to get [in]
1464  * @param seqData the resulting data [out]
1465  * @param queryData the query sequence [in]
1466  * @param queryOffset offset for align if there are multiple queries
1467  * @param align information about the alignment between query and subject
1468  * @param shouldTestIdentical did alignment pass a preliminary test in
1469  * redo_alignment.c that indicates the sequence
1470  * pieces may be near identical
1471  *
1472  * @return 0 on success; -1 on failure
1473  */
1474 static int
1477  BlastCompo_SequenceData * seqData,
1478  const BlastCompo_SequenceRange * q_range,
1479  BlastCompo_SequenceData * queryData,
1480  const Uint8* query_words,
1481  const BlastCompo_Alignment *align,
1482  const Boolean shouldTestIdentical,
1483  const ECompoAdjustModes compo_adjust_mode,
1484  const Boolean isSmithWaterman,
1485  Boolean* subject_maybe_biased)
1486 {
1487  int status = 0;
1488  BlastKappa_SequenceInfo * local_data; /* BLAST-specific
1489  information associated
1490  with the sequence */
1491  Uint1 * translation_buffer; /* a buffer for the translated,
1492  amino-acid sequence */
1493  Int4 translated_length; /* length of the translated sequence */
1494  int translation_frame; /* frame in which to translate */
1495  Uint1 * na_sequence; /* the nucleotide sequence */
1496  int translation_start; /* location in na_sequence to start
1497  translating */
1498  int num_nucleotides; /* the number of nucleotides to be translated */
1499 
1500  local_data = self->local_data;
1501  na_sequence = local_data->seq_arg.seq->sequence_start;
1502 
1503  /* Initialize seqData to nil, in case this routine fails */
1504  seqData->buffer = NULL;
1505  seqData->data = NULL;
1506  seqData->length = 0;
1507 
1508  translation_frame = range->context;
1509  if (translation_frame > 0) {
1510  translation_start = 3 * range->begin;
1511  } else {
1512  translation_start =
1513  self->length - 3 * range->end + translation_frame + 1;
1514  }
1515  num_nucleotides =
1516  3 * (range->end - range->begin) + ABS(translation_frame) - 1;
1517 
1518  status = Blast_GetPartialTranslation(na_sequence + translation_start,
1519  num_nucleotides,
1520  (Int2) translation_frame,
1521  local_data->seq_arg.seq->gen_code_string,
1522  &translation_buffer,
1523  &translated_length,
1524  NULL);
1525  if (status == 0) {
1526  seqData->buffer = translation_buffer;
1527  seqData->data = translation_buffer + 1;
1528  seqData->length = translated_length;
1529 
1530  if ( !(KAPPA_TBLASTN_NO_SEG_SEQUENCE) ) {
1531  if (compo_adjust_mode
1532  && (!subject_maybe_biased || *subject_maybe_biased)) {
1533 
1534  if ( (!shouldTestIdentical)
1535  || (shouldTestIdentical
1536  && (!s_TestNearIdentical(seqData, range->begin,
1537  queryData, q_range->begin,
1538  query_words, align)))) {
1539 
1540  status = s_DoSegSequenceData(seqData, eBlastTypeTblastn,
1541  subject_maybe_biased);
1542  if (status != 0) {
1543  free(seqData->buffer);
1544  seqData->buffer = NULL;
1545  seqData->data = NULL;
1546  seqData->length = 0;
1547  }
1548  }
1549  }
1550  }
1551  }
1552  return status;
1553 }
1554 
1555 
1556 /**
1557  * Get a string of protein data from a protein sequence.
1558  *
1559  * @param self a protein sequence [in]
1560  * @param range the range to get [in]
1561  * @param seqData the resulting data [out]
1562  * @param queryData the query sequence [in]
1563  * @param queryOffset offset for align if there are multiple queries
1564  * @param align information about the alignment
1565  * between query and subject [in]
1566  * @param shouldTestIdentical did alignment pass a preliminary test in
1567  * redo_alignment.c that indicates the sequence
1568  * pieces may be near identical [in]
1569  *
1570  * @return 0 on success; -1 on failure
1571  */
1572 static int
1575  BlastCompo_SequenceData * seqData,
1576  const BlastCompo_SequenceRange * q_range,
1577  BlastCompo_SequenceData * queryData,
1578  const Uint8* query_words,
1579  const BlastCompo_Alignment *align,
1580  const Boolean shouldTestIdentical,
1581  const ECompoAdjustModes compo_adjust_mode,
1582  const Boolean isSmithWaterman,
1583  Boolean* subject_maybe_biased)
1584 
1585 {
1586  int status = 0; /* return status */
1587  Int4 idx; /* loop index */
1588  Uint1 *origData; /* the unfiltered data for the sequence */
1589  /* BLAST-specific sequence information */
1590  BlastKappa_SequenceInfo * local_data = self->local_data;
1591  BLAST_SequenceBlk * seq = self->local_data;
1592 
1593  if (self->local_data == NULL)
1594  return -1;
1595 
1596  seqData->data = NULL;
1597  seqData->length = 0;
1598  /* Copy the entire sequence (necessary for SEG filtering.) */
1599  seqData->buffer = calloc((self->length + 2), sizeof(Uint1));
1600  if (seqData->buffer == NULL) {
1601  return -1;
1602  }
1603  /* First and last characters of the buffer MUST be '\0', which is
1604  * true here because the buffer was allocated using calloc. */
1605  seqData->data = seqData->buffer + 1;
1606  seqData->length = self->length;
1607 
1608  origData = (self->index >= 0) ? local_data->seq_arg.seq->sequence
1609  : seq->sequence;
1610  if((self->index < 0) && (align->frame != 0)) {
1611  int i=0, offsets =0;
1612  int f = GET_SEQ_FRAME(align->frame);
1613  int nucl_length = GET_NUCL_LENGTH(self->length);
1614  seqData->length = GET_TRANSLATED_LENGTH(nucl_length, f);
1615  for(; i < f; i++) {
1616  offsets = GET_TRANSLATED_LENGTH(nucl_length, i) +1;
1617  origData += offsets;
1618  }
1619  }
1620  /* Copy the sequence data */
1621  for (idx = 0; idx < seqData->length; idx++) {
1622  seqData->data[idx] = origData[idx];
1623  }
1624 
1625  if ( !(KAPPA_BLASTP_NO_SEG_SEQUENCE) ) {
1626  if (compo_adjust_mode
1627  && (!subject_maybe_biased || *subject_maybe_biased)) {
1628 
1629  if ( (!shouldTestIdentical)
1630  || (shouldTestIdentical
1631  && (!s_TestNearIdentical(seqData, 0, queryData,
1632  q_range->begin, query_words,
1633  align)))) {
1634 
1635  status = s_DoSegSequenceData(seqData, eBlastTypeBlastp,
1636  subject_maybe_biased);
1637  }
1638  }
1639  }
1640  /* Fit the data to the range. */
1641  seqData ->data = &seqData->data[range->begin - 1];
1642  *seqData->data++ = '\0';
1643  seqData ->length = range->end - range->begin;
1644 
1645  if (status != 0) {
1646  free(seqData->buffer);
1647  seqData->buffer = NULL;
1648  seqData->data = NULL;
1649  }
1650  return status;
1651 }
1652 
1653 
1654 /**
1655  * Obtain the sequence data that lies within the given range.
1656  *
1657  * @param self sequence information [in]
1658  * @param range range specifying the range of data [in]
1659  * @param seqData the sequence data obtained [out]
1660  * @param seqData the resulting data [out]
1661  * @param queryData the query sequence [in]
1662  * @param queryOffset offset for align if there are multiple queries
1663  * @param align information about the alignment between query and subject
1664  * @param shouldTestIdentical did alignment pass a preliminary test in
1665  * redo_alignment.c that indicates the sequence
1666  * pieces may be near identical
1667  *
1668  * @return 0 on success; -1 on failure
1669  */
1670 static int
1672  const BlastCompo_SequenceRange * s_range,
1673  BlastCompo_SequenceData * seqData,
1675  const BlastCompo_SequenceRange * q_range,
1676  BlastCompo_SequenceData * queryData,
1677  const Uint8* query_words,
1678  const BlastCompo_Alignment *align,
1679  const Boolean shouldTestIdentical,
1680  const ECompoAdjustModes compo_adjust_mode,
1681  const Boolean isSmithWaterman,
1682  Boolean* subject_maybe_biased)
1683 {
1684  Int4 idx;
1685  BlastKappa_SequenceInfo * seq_info = self->local_data;
1686  Uint1 *origData = query->data + q_range->begin;
1687  /* Copy the query sequence (necessary for SEG filtering.) */
1688  queryData->length = q_range->end - q_range->begin;
1689  queryData->buffer = calloc((queryData->length + 2), sizeof(Uint1));
1690  queryData->data = queryData->buffer + 1;
1691 
1692  for (idx = 0; idx < queryData->length; idx++) {
1693  /* Copy the sequence data, replacing occurrences of amino acid
1694  * number 24 (Selenocysteine) with number 3 (Cysteine). */
1695  queryData->data[idx] = (origData[idx] != 24) ? origData[idx] : 3;
1696  }
1697  if (seq_info && seq_info->prog_number == eBlastTypeTblastn) {
1698  /* The sequence must be translated. */
1699  return s_SequenceGetTranslatedRange(self, s_range, seqData,
1700  q_range, queryData, query_words,
1701  align, shouldTestIdentical,
1702  compo_adjust_mode, isSmithWaterman,
1703  subject_maybe_biased);
1704  } else {
1705  return s_SequenceGetProteinRange(self, s_range, seqData,
1706  q_range, queryData, query_words,
1707  align, shouldTestIdentical,
1708  compo_adjust_mode, isSmithWaterman,
1709  subject_maybe_biased);
1710  }
1711 }
1712 
1713 
1714 /** Data and data-structures needed to perform a gapped alignment */
1716  const BlastScoringParameters*
1717  scoringParams; /**< scoring parameters for a
1718  gapped alignment */
1719  BlastGapAlignStruct * gap_align; /**< additional parameters for a
1720  gapped alignment */
1721  BlastScoreBlk* sbp; /**< the score block for this search */
1722  double localScalingFactor; /**< the amount by which this
1723  search has been scaled */
1724  EBlastProgramType prog_number; /**< the type of search being
1725  performed */
1727 
1728 
1729 /**
1730  * Reads a BlastGapAlignStruct that has been used to compute a
1731  * traceback, and return a BlastCompo_Alignment representing the
1732  * alignment. The BlastGapAlignStruct is in coordinates local to the
1733  * ranges being aligned; the resulting alignment is in coordinates w.r.t.
1734  * the whole query and subject.
1735  *
1736  * @param gap_align the BlastGapAlignStruct
1737  * @param *edit_script the edit script from the alignment; on exit
1738  * NULL. The edit_script is usually
1739  * gap_align->edit_script, but we don't want
1740  * an implicit side effect on the gap_align.
1741  * @param query_range the range of the query used in this alignment
1742  * @param subject_range the range of the subject used in this alignment
1743  * @param matrix_adjust_rule the rule used to compute the scoring matrix
1744  *
1745  * @return the new alignment on success or NULL on error
1746  */
1747 static BlastCompo_Alignment *
1749  GapEditScript ** edit_script,
1750  BlastCompo_SequenceRange * query_range,
1751  BlastCompo_SequenceRange * subject_range,
1752  EMatrixAdjustRule matrix_adjust_rule)
1753 {
1754  /* parameters to BlastCompo_AlignmentNew */
1755  int queryStart, queryEnd, queryIndex, matchStart, matchEnd, frame;
1756  BlastCompo_Alignment * obj; /* the new alignment */
1757 
1758  /* In the composition_adjustment library, the query start/end are
1759  indices into the concatenated query, and so must be shifted. */
1760  queryStart = gap_align->query_start + query_range->begin;
1761  queryEnd = gap_align->query_stop + query_range->begin;
1762  queryIndex = query_range->context;
1763  matchStart = gap_align->subject_start + subject_range->begin;
1764  matchEnd = gap_align->subject_stop + subject_range->begin;
1765  frame = subject_range->context;
1766 
1767  obj = BlastCompo_AlignmentNew(gap_align->score, matrix_adjust_rule,
1768  queryStart, queryEnd, queryIndex,
1769  matchStart, matchEnd, frame,
1770  *edit_script);
1771  if (obj != NULL) {
1772  *edit_script = NULL;
1773  }
1774  return obj;
1775 }
1776 
1777 
1778 /** A callback used when performing SmithWaterman alignments:
1779  * Calculate the traceback for one alignment by performing an x-drop
1780  * alignment in the forward direction, possibly increasing the x-drop
1781  * parameter until the desired score is attained.
1782  *
1783  * The start, end and score of the alignment should be obtained
1784  * using the Smith-Waterman algorithm before this routine is called.
1785  *
1786  * @param *pnewAlign the new alignment
1787  * @param *pqueryEnd on entry, the end of the alignment in the
1788  * query, as computed by the Smith-Waterman
1789  * algorithm. On exit, the end as computed by
1790  * the x-drop algorithm
1791  * @param *pmatchEnd like as *pqueryEnd, but for the subject
1792  * sequence
1793  * @param queryStart the starting point in the query
1794  * @param matchStart the starting point in the subject
1795  * @param score the score of the alignment, as computed by
1796  * the Smith-Waterman algorithm
1797  * @param query query sequence data
1798  * @param query_range range of this query in the concatenated
1799  * query
1800  * @param ccat_query_length total length of the concatenated query
1801  * @param subject subject sequence data
1802  * @param subject_range range of subject_data in the translated
1803  * query, in amino acid coordinates
1804  * @param full_subject_length length of the full subject sequence
1805  * @param gapping_params parameters used to compute gapped
1806  * alignments
1807  * @param matrix_adjust_rule the rule used to compute the scoring matrix
1808  *
1809  * @returns 0 (posts a fatal error if it fails)
1810  * @sa new_xdrop_align_type
1811  */
1812 static int
1814  Int4 * pqueryEnd, Int4 *pmatchEnd,
1815  Int4 queryStart, Int4 matchStart, Int4 score,
1817  BlastCompo_SequenceRange * query_range,
1818  Int4 ccat_query_length,
1820  BlastCompo_SequenceRange * subject_range,
1821  Int4 full_subject_length,
1822  BlastCompo_GappingParams * gapping_params,
1823  EMatrixAdjustRule matrix_adjust_rule)
1824 {
1825  Int4 newScore;
1826  /* Extent of the alignment as computed by an x-drop alignment
1827  * (usually the same as (queryEnd - queryStart) and (matchEnd -
1828  * matchStart)) */
1829  Int4 queryExtent, matchExtent;
1830  BlastCompo_Alignment * obj = NULL; /* the new object */
1831  /* BLAST-specific parameters needed compute an X-drop alignment */
1832  BlastKappa_GappingParamsContext * context = gapping_params->context;
1833  /* Auxiliarly structure for computing gapped alignments */
1834  BlastGapAlignStruct * gap_align = context->gap_align;
1835  /* Scoring parameters for gapped alignments */
1836  const BlastScoringParameters* scoringParams = context->scoringParams;
1837  /* A structure containing the traceback of a gapped alignment */
1838  GapEditScript* editScript = NULL;
1839 
1840  /* suppress unused parameter warnings; this is a callback
1841  function, so these parameter cannot be deleted */
1842  (void) ccat_query_length;
1843  (void) full_subject_length;
1844 
1845  gap_align->gap_x_dropoff = gapping_params->x_dropoff;
1846 
1847  s_SWFindFinalEndsUsingXdrop(query, queryStart, *pqueryEnd,
1848  subject, matchStart, *pmatchEnd,
1849  gap_align, scoringParams,
1850  score, &queryExtent, &matchExtent,
1851  &newScore);
1852  *pqueryEnd = queryStart + queryExtent;
1853  *pmatchEnd = matchStart + matchExtent;
1854 
1855  editScript =
1857  gap_align->fwd_prelim_tback);
1858  if (editScript != NULL) {
1859  /* Shifted values of the endpoints */
1860  Int4 aqueryStart = queryStart + query_range->begin;
1861  Int4 aqueryEnd = *pqueryEnd + query_range->begin;
1862  Int4 amatchStart = matchStart + subject_range->begin;
1863  Int4 amatchEnd = *pmatchEnd + subject_range->begin;
1864 
1865  obj = BlastCompo_AlignmentNew(newScore, matrix_adjust_rule,
1866  aqueryStart, aqueryEnd,
1867  query_range->context,
1868  amatchStart, amatchEnd,
1869  subject_range->context, editScript);
1870  if (obj == NULL) {
1871  GapEditScriptDelete(editScript);
1872  }
1873  }
1874  *pnewAlign = obj;
1875 
1876  return obj != NULL ? 0 : -1;
1877 }
1878 
1879 
1880 /**
1881  * A callback: calculate the traceback for one alignment by
1882  * performing an x-drop alignment in both directions
1883  *
1884  * @param in_align the existing alignment, without traceback
1885  * @param matrix_adjust_rule the rule used to compute the scoring matrix
1886  * @param query_data query sequence data
1887  * @param query_range range of this query in the concatenated
1888  * query
1889  * @param ccat_query_length total length of the concatenated query
1890  * @param subject_data subject sequence data
1891  * @param subject_range range of subject_data in the translated
1892  * query, in amino acid coordinates
1893  * @param full_subject_length length of the full subject sequence
1894  * @param gapping_params parameters used to compute gapped
1895  * alignments
1896  * @sa redo_one_alignment_type
1897  */
1898 static BlastCompo_Alignment *
1900  EMatrixAdjustRule matrix_adjust_rule,
1901  BlastCompo_SequenceData * query_data,
1902  BlastCompo_SequenceRange * query_range,
1903  int ccat_query_length,
1904  BlastCompo_SequenceData * subject_data,
1905  BlastCompo_SequenceRange * subject_range,
1906  int full_subject_length,
1907  BlastCompo_GappingParams * gapping_params)
1908 {
1909  int status; /* return code */
1910  Int4 q_start, s_start; /* starting point in query and subject */
1911  /* BLAST-specific parameters needed to compute a gapped alignment */
1912  BlastKappa_GappingParamsContext * context = gapping_params->context;
1913  /* Auxiliary structure for computing gapped alignments */
1914  BlastGapAlignStruct* gapAlign = context->gap_align;
1915  /* The preliminary gapped HSP that were are recomputing */
1916  BlastHSP * hsp = in_align->context;
1917  Boolean fence_hit = FALSE;
1918 
1919  /* suppress unused parameter warnings; this is a callback
1920  function, so these parameter cannot be deleted */
1921  (void) ccat_query_length;
1922  (void) full_subject_length;
1923 
1924  /* Use the starting point supplied by the HSP. */
1925  q_start = hsp->query.gapped_start - query_range->begin;
1926  s_start = hsp->subject.gapped_start - subject_range->begin;
1927 
1928  gapAlign->gap_x_dropoff = gapping_params->x_dropoff;
1929 
1930  /*
1931  * Previously, last argument was NULL which could cause problems for
1932  * tblastn.
1933  */
1934  status =
1936  query_data->data,
1937  subject_data->data, gapAlign,
1938  context->scoringParams,
1939  q_start, s_start,
1940  query_data->length,
1941  subject_data->length,
1942  &fence_hit);
1943  if (status == 0) {
1944  return s_NewAlignmentFromGapAlign(gapAlign, &gapAlign->edit_script,
1945  query_range, subject_range,
1946  matrix_adjust_rule);
1947  } else {
1948  return NULL;
1949  }
1950 }
1951 
1952 
1953 /**
1954  * A BlastKappa_SavedParameters holds the value of certain search
1955  * parameters on entry to RedoAlignmentCore. These values are
1956  * restored on exit.
1957  */
1959  Int4 gap_open; /**< a penalty for the existence of a gap */
1960  Int4 gapExtend; /**< a penalty for each residue in the
1961  gap */
1962  double scale_factor; /**< the original scale factor */
1963  Int4 **origMatrix; /**< The original matrix values */
1964  double original_expect_value; /**< expect value on entry */
1965  /** copy of the original gapped Karlin-Altschul block
1966  * corresponding to the first context */
1968  Int4 num_queries; /**< Number of queries in this search */
1970 
1971 
1972 /**
1973  * Release the data associated with a BlastKappa_SavedParameters and
1974  * delete the object
1975  * @param searchParams the object to be deleted [in][out]
1976  */
1977 static void
1979 {
1980  /* for convenience, remove one level of indirection from searchParams */
1981  BlastKappa_SavedParameters *sp = *searchParams;
1982 
1983  if (sp != NULL) {
1984  if (sp->kbp_gap_orig != NULL) {
1985  int i;
1986  for (i = 0; i < sp->num_queries; i++) {
1987  if (sp->kbp_gap_orig[i] != NULL)
1989  }
1990  free(sp->kbp_gap_orig);
1991  }
1992  if (sp->origMatrix != NULL)
1994  }
1995  sfree(*searchParams);
1996  *searchParams = NULL;
1997 }
1998 
1999 
2000 /**
2001  * Create a new instance of BlastKappa_SavedParameters
2002  *
2003  * @param rows number of rows in the scoring matrix
2004  * @param numQueries number of queries in this search
2005  * @param compo_adjust_mode if >0, use composition-based statistics
2006  * @param positionBased if true, the search is position-based
2007  */
2010  Int4 numQueries,
2011  ECompoAdjustModes compo_adjust_mode,
2012  Boolean positionBased)
2013 {
2014  int i;
2015  BlastKappa_SavedParameters *sp; /* the new object */
2016  sp = malloc(sizeof(BlastKappa_SavedParameters));
2017 
2018  if (sp == NULL) {
2019  goto error_return;
2020  }
2021  sp->kbp_gap_orig = NULL;
2022  sp->origMatrix = NULL;
2023 
2024  sp->kbp_gap_orig = calloc(numQueries, sizeof(Blast_KarlinBlk*));
2025  if (sp->kbp_gap_orig == NULL) {
2026  goto error_return;
2027  }
2028  sp->num_queries = numQueries;
2029  for (i = 0; i < numQueries; i++) {
2030  sp->kbp_gap_orig[i] = NULL;
2031  }
2032  if (compo_adjust_mode != eNoCompositionBasedStats) {
2033  if (positionBased) {
2035  } else {
2037  }
2038  if (sp->origMatrix == NULL)
2039  goto error_return;
2040  }
2041  return sp;
2042 error_return:
2043  s_SavedParametersFree(&sp);
2044  return NULL;
2045 }
2046 
2047 
2048 /**
2049  * Record the initial value of the search parameters that are to be
2050  * adjusted.
2051  *
2052  * @param searchParams holds the recorded values [out]
2053  * @param sbp a score block [in]
2054  * @param scoring gapped alignment parameters [in]
2055  * @param query_length length of the concatenated query [in]
2056  * @param compo_adjust_mode composition adjustment mode [in]
2057  * @param positionBased is this search position-based [in]
2058  */
2059 static int
2061  BlastScoreBlk* sbp,
2062  const BlastScoringParameters* scoring,
2063  int query_length,
2064  ECompoAdjustModes compo_adjust_mode,
2065  Boolean positionBased)
2066 {
2067  int i;
2068 
2069  searchParams->gap_open = scoring->gap_open;
2070  searchParams->gapExtend = scoring->gap_extend;
2071  searchParams->scale_factor = scoring->scale_factor;
2072 
2073  for (i = 0; i < searchParams->num_queries; i++) {
2074  if (sbp->kbp_gap[i] != NULL) {
2075  /* There is a kbp_gap for query i and it must be copied */
2076  searchParams->kbp_gap_orig[i] = Blast_KarlinBlkNew();
2077  if (searchParams->kbp_gap_orig[i] == NULL) {
2078  return -1;
2079  }
2080  Blast_KarlinBlkCopy(searchParams->kbp_gap_orig[i],
2081  sbp->kbp_gap[i]);
2082  }
2083  }
2084 
2085  if (compo_adjust_mode != eNoCompositionBasedStats) {
2086  Int4 **matrix; /* scoring matrix */
2087  int j; /* iteration index */
2088  int rows; /* number of rows in matrix */
2089  if (positionBased) {
2090  matrix = sbp->psi_matrix->pssm->data;
2091  rows = query_length;
2092  } else {
2093  matrix = sbp->matrix->data;
2094  rows = BLASTAA_SIZE;
2095  }
2096 
2097  for (i = 0; i < rows; i++) {
2098  for (j = 0; j < BLASTAA_SIZE; j++) {
2099  searchParams->origMatrix[i][j] = matrix[i][j];
2100  }
2101  }
2102  }
2103  return 0;
2104 }
2105 
2106 
2107 /**
2108  * Rescale the search parameters in the search object and options
2109  * object to obtain more precision.
2110  *
2111  * @param sbp score block to be rescaled
2112  * @param sp scoring parameters to be rescaled
2113  * @param num_queries number of queries in this search
2114  * @param scale_factor amount by which to scale this search
2115  */
2116 static void
2119  int num_queries,
2120  double scale_factor)
2121 {
2122  int i;
2123  for (i = 0; i < num_queries; i++) {
2124  if (sbp->kbp_gap[i] != NULL) {
2125  Blast_KarlinBlk * kbp = sbp->kbp_gap[i];
2126  kbp->Lambda /= scale_factor;
2127  kbp->logK = log(kbp->K);
2128  }
2129  }
2130 
2131  sp->gap_open = (Int4)BLAST_Nint(sp->gap_open * scale_factor);
2132  sp->gap_extend = (Int4)BLAST_Nint(sp->gap_extend * scale_factor);
2133  sp->scale_factor = scale_factor;
2134 }
2135 
2136 
2137 /**
2138  * Restore the parameters that were adjusted to their original values.
2139  *
2140  * @param sbp the score block to be restored
2141  * @param scoring the scoring parameters to be restored
2142  * @param searchParams the initial recorded values of the parameters
2143  * @param query_length the concatenated query length
2144  * @param positionBased is this search position-based
2145  * @param compo_adjust_mode mode of composition adjustment
2146  */
2147 static void
2149  BlastScoringParameters* scoring,
2150  const BlastKappa_SavedParameters * searchParams,
2151  int query_length,
2152  Boolean positionBased,
2153  ECompoAdjustModes compo_adjust_mode)
2154 {
2155  int i;
2156 
2157  scoring->gap_open = searchParams->gap_open;
2158  scoring->gap_extend = searchParams->gapExtend;
2159  scoring->scale_factor = searchParams->scale_factor;
2160 
2161  for (i = 0; i < searchParams->num_queries; i++) {
2162  if (sbp->kbp_gap[i] != NULL) {
2164  searchParams->kbp_gap_orig[i]);
2165  }
2166  }
2167  if(compo_adjust_mode != eNoCompositionBasedStats) {
2168  int j; /* iteration index */
2169  Int4 ** matrix; /* matrix to be restored */
2170  int rows; /* number of rows in the matrix */
2171 
2172  if (positionBased) {
2173  matrix = sbp->psi_matrix->pssm->data;
2174  rows = query_length;
2175  } else {
2176  matrix = sbp->matrix->data;
2177  rows = BLASTAA_SIZE;
2178  }
2179  for (i = 0; i < rows; i++) {
2180  for (j = 0; j < BLASTAA_SIZE; j++) {
2181  matrix[i][j] = searchParams->origMatrix[i][j];
2182  }
2183  }
2184  }
2185 }
2186 
2187 
2188 /**
2189  * Initialize an object of type Blast_MatrixInfo.
2190  *
2191  * @param self object being initialized
2192  * @param queryBlk the query sequence data
2193  * @param sbp score block for this search
2194  * @param scale_factor amount by which ungapped parameters should be
2195  * scaled
2196  * @param matrixName name of the matrix
2197  */
2198 static int
2200  BLAST_SequenceBlk* queryBlk,
2201  BlastScoreBlk* sbp,
2202  double scale_factor,
2203  const char * matrixName)
2204 {
2205  int status = 0; /* return status */
2206  size_t lenName; /* length of matrixName as a string */
2207 
2208  /* copy the matrix name (strdup is not standard C) */
2209  lenName = strlen(matrixName);
2210  if (NULL == (self->matrixName = malloc(lenName + 1))) {
2211  return -1;
2212  }
2213  memcpy(self->matrixName, matrixName, lenName + 1);
2214 
2215  if (self->positionBased) {
2216  status = s_GetPosBasedStartFreqRatios(self->startFreqRatios,
2217  queryBlk->length,
2218  queryBlk->sequence,
2219  matrixName,
2220  sbp->psi_matrix->freq_ratios);
2221  if (status == 0) {
2222  status = s_ScalePosMatrix(self->startMatrix, matrixName,
2223  sbp->psi_matrix->freq_ratios,
2224  queryBlk->sequence,
2225  queryBlk->length, sbp, scale_factor);
2226  self->ungappedLambda = sbp->kbp_psi[0]->Lambda / scale_factor;
2227  }
2228  } else {
2229  self->ungappedLambda = sbp->kbp_ideal->Lambda / scale_factor;
2230  status = s_GetStartFreqRatios(self->startFreqRatios, matrixName);
2231  if (status == 0) {
2232  Blast_Int4MatrixFromFreq(self->startMatrix, self->cols,
2233  self->startFreqRatios,
2234  self->ungappedLambda);
2235  }
2236  }
2237  return status;
2238 }
2239 
2240 
2241 /* Create an array of 8-mers for a sequence, such that index of each 8-mer
2242  is the same as its position in the query */
2243 static int
2244 s_CreateWordArray(const Uint1* seq_data, Int4 seq_len, Uint8** words)
2245 {
2246  int word_size = 8; /* word size for k-mer matching */
2247  Uint8* query_hashes; /* list of hashes for query words */
2248  Uint8 mask = NCBI_CONST_UINT8(0xFFFFFFFFFF); /* mask for computing hash
2249  values */
2250  int i;
2251 
2252  /* if query or subject length is smaller than word size, exit */
2253  if (!seq_data || !words || seq_len < word_size) {
2254  return -1;
2255  }
2256 
2257  query_hashes = (Uint8*)calloc((seq_len - word_size + 1),
2258  sizeof(Uint8));
2259  *words = query_hashes;
2260 
2261  if (!query_hashes) {
2262  return -1;
2263  }
2264 
2265 
2266  /* find query word hashes */
2267  query_hashes[0] = s_GetHash(&seq_data[0], word_size);
2268  for (i = 1; i < seq_len - word_size; i++) {
2269  query_hashes[i] = query_hashes[i - 1];
2270  query_hashes[i] <<= 5;
2271  query_hashes[i] &= mask;
2272  query_hashes[i] += (Uint8)seq_data[i + word_size - 1];
2273  }
2274 
2275  return 0;
2276 }
2277 
2278 
2280  int num_queries)
2281 {
2282  int i;
2283 
2284  if (!query_info) {
2285  return;
2286  }
2287 
2288  for (i = 0;i < num_queries;i++) {
2289  if ((*query_info)[i].words) {
2290  free((*query_info)[i].words);
2291  }
2292  }
2293 
2294  free(*query_info);
2295  *query_info = NULL;
2296 }
2297 
2298 /**
2299  * Save information about all queries in an array of objects of type
2300  * BlastCompo_QueryInfo.
2301  *
2302  * @param query_data query sequence data
2303  * @param blast_query_info information about all queries, as an
2304  * internal blast data structure
2305  *
2306  * @return the new array on success, or NULL on error
2307  */
2308 static BlastCompo_QueryInfo *
2309 s_GetQueryInfo(Uint1 * query_data, const BlastQueryInfo * blast_query_info, Boolean skip)
2310 {
2311  int i; /* loop index */
2313  compo_query_info; /* the new array */
2314  int num_queries; /* the number of queries/elements in
2315  compo_query_info */
2316 
2317  num_queries = blast_query_info->last_context + 1;
2318  compo_query_info = calloc(num_queries, sizeof(BlastCompo_QueryInfo));
2319  if (compo_query_info != NULL) {
2320  for (i = 0; i < num_queries; i++) {
2321  BlastCompo_QueryInfo * query_info = &compo_query_info[i];
2322  const BlastContextInfo * query_context = &blast_query_info->contexts[i];
2323 
2324  query_info->eff_search_space =
2325  (double) query_context->eff_searchsp;
2326  query_info->origin = query_context->query_offset;
2327  query_info->seq.data = &query_data[query_info->origin];
2328  query_info->seq.length = query_context->query_length;
2329  query_info->words = NULL;
2330 
2331  s_CreateWordArray(query_info->seq.data, query_info->seq.length,
2332  &query_info->words);
2333  if (! skip) {
2335  query_info->seq.data,
2336  query_info->seq.length);
2337  }
2338  }
2339  }
2340  return compo_query_info;
2341 }
2342 
2343 
2344 /**
2345  * Create a new object of type BlastCompo_GappingParams. The new
2346  * object contains the parameters needed by the composition adjustment
2347  * library to compute a gapped alignment.
2348  *
2349  * @param context the data structures needed by callback functions
2350  * that perform the gapped alignments.
2351  * @param extendParams parameters used for a gapped extension
2352  * @param num_queries the number of queries in the concatenated query
2353  */
2354 static BlastCompo_GappingParams *
2356  const BlastExtensionParameters* extendParams,
2357  int num_queries)
2358 {
2359  int i;
2360  double min_lambda = DBL_MAX; /* smallest gapped Lambda */
2361  const BlastScoringParameters * scoring = context->scoringParams;
2362  const BlastExtensionOptions * options = extendParams->options;
2363  /* The new object */
2364  BlastCompo_GappingParams * gapping_params = NULL;
2365 
2366  gapping_params = malloc(sizeof(BlastCompo_GappingParams));
2367  if (gapping_params == NULL)
2368  return NULL;
2369 
2370  gapping_params->gap_open = scoring->gap_open;
2371  gapping_params->gap_extend = scoring->gap_extend;
2372  gapping_params->context = context;
2373 
2374  for (i = 0; i < num_queries; i++) {
2375  if (context->sbp->kbp_gap[i] != NULL &&
2376  context->sbp->kbp_gap[i]->Lambda < min_lambda) {
2377  min_lambda = context->sbp->kbp_gap[i]->Lambda;
2378  }
2379  }
2380  gapping_params->x_dropoff = (Int4)
2381  MAX(options->gap_x_dropoff_final*NCBIMATH_LN2 / min_lambda,
2382  extendParams->gap_x_dropoff_final);
2383  context->gap_align->gap_x_dropoff = gapping_params->x_dropoff;
2384 
2385  return gapping_params;
2386 }
2387 
2388 
2389 /** Callbacks used by the Blast_RedoOneMatch* routines */
2390 static const Blast_RedoAlignCallbacks
2394 };
2395 
2396 
2397 /* Bit score per alignment position threshold for preliminaru near identical
2398  test */
2399 #define NEAR_IDENTICAL_BITS_PER_POSITION (1.74)
2400 
2401 /**
2402  * Read the parameters required for the Blast_RedoOneMatch* functions from
2403  * the corresponding parameters in standard BLAST datatypes. Return a new
2404  * object representing these parameters.
2405  */
2406 static Blast_RedoAlignParams *
2408  BLAST_SequenceBlk * queryBlk,
2409  const BlastQueryInfo* queryInfo,
2410  const BlastHitSavingParameters* hitParams,
2411  const BlastExtensionParameters* extendParams)
2412 {
2413  int status = 0; /* status code */
2414  int rows; /* number of rows in the scoring matrix */
2415  int cutoff_s; /* cutoff score for saving an alignment */
2416  double cutoff_e; /* cutoff evalue for saving an alignment */
2418  gapping_params = NULL; /* parameters needed to compute a gapped
2419  alignment */
2421  scaledMatrixInfo; /* information about the scoring matrix */
2422  /* does this kind of search translate the database sequence */
2423  int subject_is_translated = (context->prog_number == eBlastTypeTblastn) || (context->prog_number == eBlastTypeRpsTblastn);
2424  int query_is_translated = context->prog_number == eBlastTypeBlastx;
2425  /* is this a positiion-based search */
2426  Boolean positionBased = (Boolean) (context->sbp->psi_matrix != NULL);
2427  /* will BLAST_LinkHsps be called to assign e-values */
2428  Boolean do_link_hsps = (hitParams->do_sum_stats);
2429  ECompoAdjustModes compo_adjust_mode =
2431 
2432  /* per position bit score cutoff for testing whether sequences are
2433  near identical */
2434  double near_identical_cutoff_bits = NEAR_IDENTICAL_BITS_PER_POSITION;
2435 
2436  /* score block is already scaled by context->localScalingFactor */
2437  double near_identical_cutoff=0;
2438  Int4 index;
2439  for (index = queryInfo->first_context;
2440  index <= queryInfo->last_context; ++index) {
2441 
2442  if ((queryInfo->contexts[index].is_valid)) {
2443  near_identical_cutoff =
2444  (near_identical_cutoff_bits * NCBIMATH_LN2)
2445  / context->sbp->kbp_gap[index]->Lambda;
2446  break;
2447  }
2448  }
2449 
2450  if (do_link_hsps) {
2451  ASSERT(hitParams->link_hsp_params != NULL);
2452  cutoff_s =
2453  (int) (hitParams->cutoff_score_min * context->localScalingFactor);
2454  } else {
2455  /* There is no cutoff score; we consider e-values instead */
2456  cutoff_s = 1;
2457  }
2458  cutoff_e = hitParams->options->expect_value;
2459  rows = positionBased ? queryInfo->max_length : BLASTAA_SIZE;
2460  scaledMatrixInfo = Blast_MatrixInfoNew(rows, BLASTAA_SIZE, positionBased);
2461  status = s_MatrixInfoInit(scaledMatrixInfo, queryBlk, context->sbp,
2462  context->localScalingFactor,
2463  context->scoringParams->options->matrix);
2464  if (status != 0) {
2465  return NULL;
2466  }
2467  gapping_params = s_GappingParamsNew(context, extendParams,
2468  queryInfo->last_context + 1);
2469  if (gapping_params == NULL) {
2470  return NULL;
2471  } else {
2472  return
2473  Blast_RedoAlignParamsNew(&scaledMatrixInfo, &gapping_params,
2474  compo_adjust_mode, positionBased,
2475  query_is_translated,
2476  subject_is_translated,
2477  queryInfo->max_length, cutoff_s, cutoff_e,
2478  do_link_hsps, &redo_align_callbacks,
2479  near_identical_cutoff);
2480  }
2481 }
2482 
2483 
2484 /**
2485  * Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.
2486  *
2487  * @param results BLAST core external results structure (pre-SeqAlign)
2488  * [out]
2489  * @param heaps an array of BlastCompo_Heap objects
2490  * @param hitlist_size size of each list in the results structure above [in]
2491  */
2492 static void
2494  BlastCompo_Heap heaps[],
2495  Int4 hitlist_size)
2496 {
2497  int query_index; /* loop index */
2498  int num_queries; /* Number of queries in this search */
2499 
2500  num_queries = results->num_queries;
2501  for (query_index = 0; query_index < num_queries; query_index++) {
2502  BlastHSPList* hsp_list;
2503  BlastHitList* hitlist;
2504  BlastCompo_Heap * heap = &heaps[query_index];
2505 
2506  results->hitlist_array[query_index] = Blast_HitListNew(hitlist_size);
2507  hitlist = results->hitlist_array[query_index];
2508 
2509  while (NULL != (hsp_list = BlastCompo_HeapPop(heap))) {
2510  Blast_HitListUpdate(hitlist, hsp_list);
2511  }
2512  }
2514 }
2515 
2516 
2517 /** Remove all matches from a BlastCompo_Heap. */
2518 static void s_ClearHeap(BlastCompo_Heap * self)
2519 {
2520  BlastHSPList* hsp_list = NULL; /* an element of the heap */
2521 
2522  while (NULL != (hsp_list = BlastCompo_HeapPop(self))) {
2523  hsp_list = Blast_HSPListFree(hsp_list);
2524  }
2525 }
2526 
2527 /**
2528  * Free a BlastGapAlignStruct copy created by s_BlastGapAlignStruct_Copy
2529  *
2530  * @param copy Pointer to BlastGapAlignStruct to be freed
2531  */
2533 {
2534  {
2535  while (copy->state_struct != NULL) {
2536  GapStateArrayStruct* cur = copy->state_struct;
2537  copy->state_struct = copy->state_struct->next;
2538  if (cur->state_array) {
2539  sfree(cur->state_array);
2540  }
2541  if (cur) {
2542  sfree(cur);
2543  }
2544  }
2545  }
2546  {
2547  if (copy->edit_script != NULL) {
2548  if (copy->edit_script->op_type) {
2549  sfree(copy->edit_script->op_type);
2550  }
2551  if (copy->edit_script->num) {
2552  sfree(copy->edit_script->num);
2553  }
2554  sfree(copy->edit_script);
2555  }
2556  }
2557  {
2558  if (copy->fwd_prelim_tback != NULL) {
2559  if (copy->fwd_prelim_tback->edit_ops) {
2560  sfree(copy->fwd_prelim_tback->edit_ops);
2561  }
2562  sfree(copy->fwd_prelim_tback);
2563  }
2564  }
2565  {
2566  if (copy->rev_prelim_tback != NULL) {
2567  if (copy->rev_prelim_tback->edit_ops) {
2568  sfree(copy->rev_prelim_tback->edit_ops);
2569  }
2570  sfree(copy->rev_prelim_tback);
2571  }
2572  }
2573  {
2574  if (copy->greedy_align_mem != NULL) {
2575  sfree(copy->greedy_align_mem);
2576  }
2577  }
2578  {
2579  if (copy->dp_mem != NULL) {
2580  sfree(copy->dp_mem);
2581  }
2582  }
2583  {
2584  if (copy->sbp != NULL) {
2585  sfree(copy->sbp);
2586  }
2587  }
2588  sfree(copy);
2589 }
2590 
2591 /**
2592  * Create a "deep" copy of a BlastGapAlignStruct structure.
2593  *
2594  * Non-pointer structure members are copied. Pointers to data which will
2595  * only be read are copied. For data which will be changing, memory for copies
2596  * will be allocated and new pointers will be assigned to them. The process
2597  * repeats down the structure hierarchy until all pointers are dealt with.
2598  *
2599  * @param orig Pointer to BlastGapAlignStruct structure to be copied
2600  * @param sbp Pointer to BlastScoreBlk structure, required to set copy->sbp
2601  *
2602  * @return Pointer to copy of original BlastGapAlignStruct structure
2603  */
2606  BlastScoreBlk* sbp
2607 )
2608 {
2611 
2612  // Copy plain old data (ints, doubles, booleans, ...).
2613  // Any pointer members will be processed separately.
2614  memcpy(copy, orig, sizeof(BlastGapAlignStruct));
2615 
2616  {
2617  GapStateArrayStruct* o = orig->state_struct;
2618  if (o != NULL) {
2620  1,
2621  sizeof(GapStateArrayStruct)
2622  );
2623  copy->state_struct = c;
2624  memcpy(c, o, sizeof(GapStateArrayStruct));
2625  c->state_array = (Uint1*) calloc(c->length, sizeof(Uint1));
2626  int i;
2627  for (i = 0; i < c->length; ++i) {
2628  c->state_array[i] = o->state_array[i];
2629  }
2630  while (o->next != NULL) {
2631  c->next = (GapStateArrayStruct*)
2632  calloc(1, sizeof(GapStateArrayStruct));
2633  c = c->next;
2634  o = o->next;
2635  memcpy(c, o, sizeof(GapStateArrayStruct));
2636  c->state_array = (Uint1*) calloc(c->length, sizeof(Uint1));
2637  int i;
2638  for (i = 0; i < c->length; ++i) {
2639  c->state_array[i] = o->state_array[i];
2640  }
2641  }
2642  }
2643  }
2644  {
2645  GapEditScript* o = orig->edit_script;
2646  if (o != NULL) {
2648  1,
2649  sizeof(GapEditScript)
2650  );
2651  copy->edit_script = c;
2652  memcpy(c, o, sizeof(GapEditScript));
2653  c->op_type = (EGapAlignOpType*) calloc(
2654  o->size,
2655  sizeof(EGapAlignOpType)
2656  );
2657  c->num = (Int4*) calloc(o->size, sizeof(Int4));
2658  int i;
2659  for (i = 0; i < o->size; ++i) {
2660  c->op_type[i] = o->op_type[i];
2661  c->num[i] = o->num[i];
2662  }
2663  }
2664  }
2665  {
2666  GapPrelimEditBlock* o = orig->fwd_prelim_tback;
2667  if (o != NULL) {
2669  1,
2670  sizeof(GapPrelimEditBlock)
2671  );
2672  copy->fwd_prelim_tback = c;
2673  memcpy(c, o, sizeof(GapPrelimEditBlock));
2674  c->edit_ops = calloc(
2675  o->num_ops_allocated,
2676  sizeof(GapPrelimEditScript)
2677  );
2678  int i;
2679  for (i = 0; i < o->num_ops_allocated; ++i) {
2680  c->edit_ops[i].op_type = o->edit_ops[i].op_type;
2681  c->edit_ops[i].num = o->edit_ops[i].num;
2682  }
2683  }
2684  }
2685  {
2686  GapPrelimEditBlock* o = orig->rev_prelim_tback;
2687  if (o != NULL) {
2689  1,
2690  sizeof(GapPrelimEditBlock)
2691  );
2692  copy->rev_prelim_tback = c;
2693  memcpy(c, o, sizeof(GapPrelimEditBlock));
2694  c->edit_ops = calloc(
2695  o->num_ops_allocated,
2696  sizeof(GapPrelimEditScript)
2697  );
2698  int i;
2699  for (i = 0; i < o->num_ops_allocated; ++i) {
2700  c->edit_ops[i].op_type = o->edit_ops[i].op_type;
2701  c->edit_ops[i].num = o->edit_ops[i].num;
2702  }
2703  }
2704  }
2705  {
2706  SGreedyAlignMem* o = orig->greedy_align_mem;
2707  if (o != NULL) {
2709  1,
2710  sizeof(SGreedyAlignMem)
2711  );
2712  copy->greedy_align_mem = c;
2713  memcpy(c, o, sizeof(SGreedyAlignMem));
2714  }
2715  }
2716  {
2717  BlastGapDP* o = orig->dp_mem;
2718  if (o != NULL) {
2719  BlastGapDP* c = (BlastGapDP*) calloc(
2720  orig->dp_mem_alloc,
2721  sizeof(BlastGapDP)
2722  );
2723  copy->dp_mem = c;
2724  memcpy(c, o, orig->dp_mem_alloc * sizeof(BlastGapDP));
2725  }
2726  }
2727  {
2728  copy->sbp = sbp;
2729  }
2730 
2731  return copy;
2732 }
2733 
2734 /**
2735  * Free a BlastScoreBlk copy created by s_BlastScoreBlk_Copy
2736  *
2737  * BlastScoreBlk* pointer "bsb_ptr" should be passed as (&bsb_ptr);
2738  * this function will set bsb_ptr to NULL before returning.
2739  *
2740  * @param copy Pointer to (pointer to BlastScoreBlk to be freed)
2741  */
2742 static
2744 {
2746  *copy = NULL;
2747 }
2748 
2749 /**
2750  * Create a "deep" copy of a BlastScoreBlk structure.
2751  *
2752  * Non-pointer structure members are copied. Pointers to data which will
2753  * only be read are copied. For data which will be changing, memory for copies
2754  * will be allocated and new pointers will be assigned to them. The process
2755  * repeats down the structure hierarchy until all pointers are dealt with.
2756  *
2757  * @param program The program type
2758  * @param orig Pointer to BlastScoreBlk structure to be copied
2759  * @param alphabet_code Alphabet code
2760  * @param number_of_contexts Number of contexts
2761  *
2762  * @return Pointer to copy of original BlastScoreBlk structure
2763  */
2764 static
2766  EBlastProgramType program,
2768  Uint1 alphabet_code,
2769  Int4 number_of_contexts
2770 )
2771 {
2773  orig->alphabet_code,
2774  orig->number_of_contexts
2775  );
2776  if (copy == NULL) {
2777  return NULL;
2778  }
2779 
2780  copy->alphabet_start = orig->alphabet_start;
2781  copy->name = strdup(orig->name);
2782  copy->comments = orig->comments;
2783  /* Deep-copy orig->matrix */
2784  if (orig->matrix != NULL) {
2785  if (copy->matrix == NULL) {
2786  return BlastScoreBlkFree(copy);
2787  }
2788  SBlastScoreMatrix* m = copy->matrix;
2789  if (m->data != NULL && orig->matrix->data != NULL) {
2790  int i;
2791  for (i = 0; i < orig->matrix->ncols; ++i) {
2792  memcpy(
2793  m->data[i],
2794  orig->matrix->data[i],
2795  m->nrows * sizeof(int)
2796  );
2797  }
2798  }
2799  if (m->freqs != NULL && orig->matrix->freqs != NULL) {
2800  memcpy(
2801  m->freqs,
2802  orig->matrix->freqs,
2803  m->ncols * sizeof(double)
2804  );
2805  }
2806  m->lambda = orig->matrix->lambda;
2807  }
2808  /* Deep-copy orig->psi_matrix */
2809  if (orig->psi_matrix != NULL
2810  && orig->psi_matrix->pssm != NULL) {
2811  copy->psi_matrix = SPsiBlastScoreMatrixNew(orig->psi_matrix->pssm->ncols);
2812  if (copy->psi_matrix == NULL) {
2813  return BlastScoreBlkFree(copy);
2814  }
2815  SPsiBlastScoreMatrix* pm = copy->psi_matrix;
2816  SBlastScoreMatrix* m = pm->pssm;
2817  if (m->data != NULL && orig->psi_matrix->pssm->data != NULL) {
2818  int i;
2819  for (i = 0; i < orig->psi_matrix->pssm->ncols; ++i) {
2820  memcpy(
2821  m->data[i],
2822  orig->psi_matrix->pssm->data[i],
2823  m->nrows * sizeof(int)
2824  );
2825  }
2826  }
2827  if (m->freqs != NULL
2828  && orig->psi_matrix->pssm->freqs != NULL) {
2829  memcpy(
2830  m->freqs,
2831  orig->psi_matrix->pssm->freqs,
2832  m->ncols * sizeof(double)
2833  );
2834  }
2835  m->lambda = orig->psi_matrix->pssm->lambda;
2836  if (pm->freq_ratios != NULL
2837  && orig->psi_matrix->freq_ratios != NULL) {
2838  int i;
2839  for (i = 0; i < orig->psi_matrix->pssm->ncols; ++i) {
2840  memcpy(
2841  pm->freq_ratios[i],
2842  orig->psi_matrix->freq_ratios[i],
2843  orig->psi_matrix->pssm->nrows * sizeof(double)
2844  );
2845  }
2846  }
2847  if (orig->psi_matrix->kbp != NULL) {
2848  memcpy(pm->kbp, orig->psi_matrix->kbp, sizeof(Blast_KarlinBlk));
2849  }
2850  }
2851  copy->matrix_only_scoring = orig->matrix_only_scoring;
2852  copy->complexity_adjusted_scoring = orig->complexity_adjusted_scoring;
2853  copy->loscore = orig->loscore;
2854  copy->hiscore = orig->hiscore;
2855  copy->penalty = orig->penalty;
2856  copy->reward = orig->reward;
2857  copy->read_in_matrix = orig->read_in_matrix;
2858  if (Blast_QueryIsPssm(program)) {
2859  copy->kbp = copy->kbp_psi;
2860  copy->kbp_gap = copy->kbp_gap_psi;
2861  } else {
2862  copy->kbp = copy->kbp_std;
2863  copy->kbp_gap = copy->kbp_gap_std;
2864  }
2865  if (orig->gbp != NULL) {
2866  memcpy(copy->gbp, orig->gbp, sizeof(Blast_GumbelBlk));
2867  }
2868  int ctx;
2869  for (ctx = 0; ctx < orig->number_of_contexts; ++ctx) {
2870  if (orig->sfp != NULL && orig->sfp[ctx] != NULL) {
2871  copy->sfp[ctx] = Blast_ScoreFreqNew(
2872  orig->sfp[ctx]->score_min,
2873  orig->sfp[ctx]->score_max
2874  );
2875  if (copy->sfp[ctx] == NULL) {
2876  return BlastScoreBlkFree(copy);
2877  }
2878  copy->sfp[ctx]->obs_min = orig->sfp[ctx]->obs_min;
2879  copy->sfp[ctx]->obs_max = orig->sfp[ctx]->obs_max;
2880  copy->sfp[ctx]->score_avg = orig->sfp[ctx]->score_avg;
2881  int r = orig->sfp[ctx]->score_max - orig->sfp[ctx]->score_min + 1;
2882  memcpy(
2883  copy->sfp[ctx]->sprob0,
2884  orig->sfp[ctx]->sprob0,
2885  r * sizeof(double)
2886  );
2887  }
2888  if (orig->kbp_std != NULL && orig->kbp_std[ctx] != NULL) {
2889  copy->kbp_std[ctx] = Blast_KarlinBlkNew();
2890  if (Blast_KarlinBlkCopy(copy->kbp_std[ctx], orig->kbp_std[ctx]) != 0) {
2891  return BlastScoreBlkFree(copy);
2892  }
2893  }
2894  if (orig->kbp_gap_std != NULL && orig->kbp_gap_std[ctx] != NULL) {
2895  copy->kbp_gap_std[ctx] = Blast_KarlinBlkNew();
2896  if (Blast_KarlinBlkCopy(copy->kbp_gap_std[ctx], orig->kbp_gap_std[ctx]) != 0) {
2897  return BlastScoreBlkFree(copy);
2898  }
2899  }
2900  if (orig->kbp_psi != NULL && orig->kbp_psi[ctx] != NULL) {
2901  copy->kbp_psi[ctx] = Blast_KarlinBlkNew();
2902  if (Blast_KarlinBlkCopy(copy->kbp_psi[ctx], orig->kbp_psi[ctx]) != 0) {
2903  return BlastScoreBlkFree(copy);
2904  }
2905  }
2906  if (orig->kbp_gap_psi != NULL && orig->kbp_gap_psi[ctx] != NULL) {
2907  copy->kbp_gap_psi[ctx] = Blast_KarlinBlkNew();
2908  if (Blast_KarlinBlkCopy(copy->kbp_gap_psi[ctx], orig->kbp_gap_psi[ctx]) != 0) {
2909  return BlastScoreBlkFree(copy);
2910  }
2911  }
2912  if (Blast_QueryIsPssm(program)) {
2913  copy->kbp[ctx] = copy->kbp_psi[ctx];
2914  copy->kbp_gap[ctx] = copy->kbp_gap_psi[ctx];
2915  } else {
2916  copy->kbp[ctx] = copy->kbp_std[ctx];
2917  copy->kbp_gap[ctx] = copy->kbp_gap_std[ctx];
2918  }
2919  }
2920  if (orig->kbp_ideal != NULL) {
2921  copy->kbp_ideal = Blast_KarlinBlkNew();
2922  if (Blast_KarlinBlkCopy(copy->kbp_ideal, orig->kbp_ideal) != 0) {
2923  return BlastScoreBlkFree(copy);
2924  }
2925  }
2926  copy->ambiguous_res = (Uint1*) calloc(orig->ambig_size, sizeof(Uint1));
2927  if (orig->ambiguous_res != NULL) {
2928  memcpy(copy->ambiguous_res, orig->ambiguous_res, orig->ambig_size);
2929  }
2930  copy->ambig_size = orig->ambig_size;
2931  copy->ambig_occupy = orig->ambig_occupy;
2932  copy->round_down = orig->round_down;
2933 
2934  return copy;
2935 }
2936 
2937 /**
2938  * Recompute alignments for each match found by the gapped BLAST
2939  * algorithm. Single-thread adapter to Blast_RedoAlignmentCore_MT.
2940  */
2941 Int2
2943  BLAST_SequenceBlk * queryBlk,
2944  const BlastQueryInfo* queryInfo,
2945  BlastScoreBlk* sbp,
2946  BLAST_SequenceBlk * subjectBlk,
2947  const BlastSeqSrc* seqSrc,
2948  Int4 default_db_genetic_code,
2949  BlastHSPList * thisMatch,
2950  BlastHSPStream* hsp_stream,
2951  BlastScoringParameters* scoringParams,
2952  const BlastExtensionParameters* extendParams,
2953  const BlastHitSavingParameters* hitParams,
2954  const PSIBlastOptions* psiOptions,
2955  BlastHSPResults* results)
2956 {
2958  program_number,
2959  1, /* number of threads */
2960  queryBlk,
2961  queryInfo,
2962  sbp,
2963  subjectBlk,
2964  seqSrc,
2965  default_db_genetic_code,
2966  thisMatch,
2967  hsp_stream,
2968  scoringParams,
2969  extendParams,
2970  hitParams,
2971  psiOptions,
2972  results
2973  );
2974 }
2975 
2976 /**
2977  * Recompute alignments for each match found by the gapped BLAST
2978  * algorithm.
2979  */
2980 Int2
2982  Uint4 num_threads,
2983  BLAST_SequenceBlk * queryBlk,
2984  const BlastQueryInfo* queryInfo,
2985  BlastScoreBlk* sbp,
2986  BLAST_SequenceBlk * subjectBlk,
2987  const BlastSeqSrc* seqSrc,
2988  Int4 default_db_genetic_code,
2989  BlastHSPList * thisMatch,
2990  BlastHSPStream* hsp_stream,
2991  BlastScoringParameters* scoringParams,
2992  const BlastExtensionParameters* extendParams,
2993  const BlastHitSavingParameters* hitParams,
2994  const PSIBlastOptions* psiOptions,
2995  BlastHSPResults* results)
2996 {
2997  int status_code = 0; /* return value code */
2998  /* the factor by which to scale the scoring system in order to
2999  * obtain greater precision */
3000  double localScalingFactor;
3001  /* forbidden ranges for each database position (used in
3002  * Smith-Waterman alignments) */
3003  Blast_ForbiddenRanges forbidden = {0,};
3004  /* a collection of alignments for each query sequence with
3005  * sequences from the database */
3006  BlastCompo_Heap* redoneMatches = NULL;
3007  /* stores all fields needed for computing a compositionally
3008  * adjusted score matrix using Newton's method */
3009  Blast_CompositionWorkspace** NRrecord_tld = NULL;
3010  /* loop index */
3011  int query_index;
3012  /* number of queries in the concatenated query */
3013  int numQueries = queryInfo->num_queries;
3014  /* number of contexts in the concatenated query */
3015  int numContexts = queryInfo->last_context + 1;
3016  /* number of contexts within a query */
3017  int numFrames = (program_number == eBlastTypeBlastx) ? 6:1;
3018  /* keeps track of gapped alignment params */
3019  BlastGapAlignStruct* gapAlign = NULL;
3020  /* the values of the search parameters that will be recorded, altered
3021  * in the search structure in this routine, and then restored before
3022  * the routine exits. */
3023  BlastKappa_SavedParameters *savedParams = NULL;
3024  /* All alignments above this value will be reported, no matter how many. */
3025  double inclusion_ethresh;
3026 
3027  BlastHSPResults* local_results = NULL;
3028 
3029  BlastCompo_QueryInfo** query_info_tld = NULL;
3030  int* numContexts_tld = NULL;
3031  int* numQueries_tld = NULL;
3032  int* compositionTestIndex_tld = NULL;
3033  Blast_RedoAlignParams** redo_align_params_tld = NULL;
3034  BLAST_SequenceBlk** subjectBlk_tld = NULL;
3035  Boolean positionBased = (Boolean) (sbp->psi_matrix != NULL);
3036  ECompoAdjustModes compo_adjust_mode =
3038  Boolean smithWaterman =
3039  (Boolean) (extendParams->options->eTbackExt == eSmithWatermanTbck);
3040 
3041  /* which test function do we use to see if a composition-adjusted
3042  p-value is desired; value needs to be passed in eventually*/
3043  int compositionTestIndex = extendParams->options->unifiedP;
3044  Uint1* genetic_code_string = GenCodeSingletonFind(default_db_genetic_code);
3045 
3046  ASSERT(program_number == eBlastTypeBlastp ||
3047  program_number == eBlastTypeTblastn ||
3048  program_number == eBlastTypeBlastx ||
3049  program_number == eBlastTypePsiBlast ||
3050  program_number == eBlastTypeRpsBlast ||
3051  program_number == eBlastTypeRpsTblastn);
3052 
3053  if (0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20") &&
3054  compo_adjust_mode == eNoCompositionBasedStats) {
3055  return -1; /* BLOSUM62_20 only makes sense if
3056  * compo_adjust_mode is on */
3057  }
3058  if (positionBased) {
3059  /* Position based searches can only use traditional
3060  * composition based stats */
3061  if ((int) compo_adjust_mode > 1) {
3062  compo_adjust_mode = eCompositionBasedStats;
3063  }
3064  /* A position-based search can only have one query */
3065  ASSERT(queryInfo->num_queries == 1);
3066  ASSERT(queryBlk->length == (Int4)sbp->psi_matrix->pssm->ncols);
3067  }
3068 
3069  if ((int) compo_adjust_mode > 1 &&
3070  !Blast_FrequencyDataIsAvailable(scoringParams->options->matrix)) {
3071  return -1; /* Unsupported matrix */
3072  }
3073  /*****************/
3074  inclusion_ethresh = (psiOptions /* this can be NULL for CBl2Seq */
3075  ? psiOptions->inclusion_ethresh
3077  ASSERT(inclusion_ethresh != 0.0);
3078 
3079  int actual_num_threads = 1;
3080 #ifdef _OPENMP
3081  actual_num_threads = num_threads;
3082 #endif
3083 
3084  /* Initialize savedParams */
3085  savedParams =
3086  s_SavedParametersNew(queryInfo->max_length, numContexts,
3087  compo_adjust_mode, positionBased);
3088  if (savedParams == NULL) {
3089  status_code = -1;
3090  goto function_cleanup;
3091  }
3092  status_code =
3093  s_RecordInitialSearch(savedParams, sbp, scoringParams,
3094  queryInfo->max_length, compo_adjust_mode,
3095  positionBased);
3096  if (status_code != 0) {
3097  goto function_cleanup;
3098  }
3099 
3100  if (compo_adjust_mode != eNoCompositionBasedStats) {
3101  if((0 == strcmp(scoringParams->options->matrix, "BLOSUM62_20"))) {
3102  localScalingFactor = SCALING_FACTOR / 10;
3103  } else {
3104  localScalingFactor = SCALING_FACTOR;
3105  }
3106  } else {
3107  localScalingFactor = 1.0;
3108  }
3109  s_RescaleSearch(sbp, scoringParams, numContexts, localScalingFactor);
3110  status_code =
3111  BLAST_GapAlignStructNew(scoringParams, extendParams,
3112  (seqSrc) ? BlastSeqSrcGetMaxSeqLen(seqSrc)
3113  : subjectBlk->length,
3114  sbp, &gapAlign);
3115  if (status_code != 0) {
3116  return (Int2) status_code;
3117  }
3118 
3119  redoneMatches = calloc(numQueries, sizeof(BlastCompo_Heap));
3120  if (redoneMatches == NULL) {
3121  status_code = -1;
3122  goto function_cleanup;
3123  }
3124  for (query_index = 0; query_index < numQueries; query_index++) {
3125  status_code =
3126  BlastCompo_HeapInitialize(&redoneMatches[query_index],
3127  hitParams->options->hitlist_size,
3128  inclusion_ethresh);
3129  if (status_code != 0) {
3130  goto function_cleanup;
3131  }
3132  }
3133 
3134  BlastCompo_Heap** redoneMatches_tld =
3135  (BlastCompo_Heap**) calloc(
3136  actual_num_threads,
3137  sizeof(BlastCompo_Heap*)
3138  );
3139  BlastCompo_Alignment*** alignments_tld =
3141  actual_num_threads,
3142  sizeof(BlastCompo_Alignment**)
3143  );
3144  BlastCompo_Alignment*** incoming_align_set_tld =
3146  actual_num_threads,
3147  sizeof(BlastCompo_Alignment**)
3148  );
3149  BlastKappa_SavedParameters** savedParams_tld =
3151  actual_num_threads,
3153  );
3154  BlastScoreBlk** sbp_tld =
3155  (BlastScoreBlk**) calloc(
3156  actual_num_threads,
3157  sizeof(BlastScoreBlk*)
3158  );
3159  BlastKappa_GappingParamsContext* gapping_params_context_tld =
3161  actual_num_threads,
3163  );
3164  Int4*** matrix_tld =
3165  (Int4***) calloc(
3166  actual_num_threads,
3167  sizeof(Int4**)
3168  );
3169 
3170  NRrecord_tld =
3172  actual_num_threads,
3174  );
3175 
3176  subjectBlk_tld =
3178  actual_num_threads,
3179  sizeof(BLAST_SequenceBlk*)
3180  );
3181  redo_align_params_tld =
3183  actual_num_threads,
3184  sizeof(Blast_RedoAlignParams*)
3185  );
3186  int* status_code_tld =
3187  (int*) calloc(
3188  actual_num_threads,
3189  sizeof(int)
3190  );
3191  BlastSeqSrc** seqsrc_tld =
3192  (BlastSeqSrc**) calloc(
3193  actual_num_threads,
3194  sizeof(BlastSeqSrc*)
3195  );
3196  BlastGapAlignStruct** gap_align_tld =
3198  actual_num_threads,
3199  sizeof(BlastGapAlignStruct*)
3200  );
3201  BlastScoringParameters** score_params_tld =
3203  actual_num_threads,
3204  sizeof(BlastScoringParameters*)
3205  );
3206  BlastHitSavingParameters** hit_params_tld =
3208  actual_num_threads,
3209  sizeof(BlastHitSavingParameters*)
3210  );
3211  BlastHSPResults** results_tld =
3212  (BlastHSPResults**) calloc(
3213  actual_num_threads,
3214  sizeof(BlastHSPResults*)
3215  );
3216  query_info_tld =
3218  actual_num_threads,
3219  sizeof(BlastCompo_QueryInfo*)
3220  );
3221  numContexts_tld =
3222  (int*) calloc(
3223  actual_num_threads,
3224  sizeof(int)
3225  );
3226  compositionTestIndex_tld =
3227  (int*) calloc(
3228  actual_num_threads,
3229  sizeof(int)
3230  );
3231  numQueries_tld =
3232  (int*) calloc(
3233  actual_num_threads,
3234  sizeof(int)
3235  );
3236  Blast_ForbiddenRanges** forbidden_tld =
3238  actual_num_threads,
3239  sizeof(Blast_ForbiddenRanges*)
3240  );
3241 
3242  int i;
3243  for (i = 0; i < actual_num_threads; ++i) {
3244  query_info_tld[i] = s_GetQueryInfo(
3245  queryBlk->sequence,
3246  queryInfo,
3247  (program_number == eBlastTypeBlastx)
3248  );
3249  if (query_info_tld[i] == NULL) {
3250  status_code = -1;
3251  goto function_cleanup;
3252  }
3253 
3254  sbp_tld[i] = s_BlastScoreBlk_Copy(
3255  program_number,
3256  sbp,
3257  sbp->alphabet_code,
3258  sbp->number_of_contexts
3259  );
3260 
3261  if(smithWaterman) {
3262  forbidden_tld[i] = calloc(1,sizeof(Blast_ForbiddenRanges));
3263  status_code =
3264  Blast_ForbiddenRangesInitialize(forbidden_tld[i], queryInfo->max_length);
3265  if (status_code != 0) {
3266  goto function_cleanup;
3267  }
3268  }
3269 
3270  numContexts_tld[i] = numContexts;
3271  numQueries_tld[i] = numQueries;
3272  compositionTestIndex_tld[i] = compositionTestIndex;
3273  seqsrc_tld[i] = BlastSeqSrcCopy(seqSrc);
3274  gap_align_tld[i] =
3275  s_BlastGapAlignStruct_Copy(gapAlign, sbp_tld[i]);
3276  score_params_tld[i] = scoringParams;
3277  hit_params_tld[i] = (BlastHitSavingParameters*) hitParams;
3278  results_tld[i] =
3279  Blast_HSPResultsNew(queryInfo->num_queries);
3280  subjectBlk_tld[i] = subjectBlk;
3281 
3282  redoneMatches_tld[i] =
3283  (BlastCompo_Heap*) calloc(numQueries, sizeof(BlastCompo_Heap));
3284  if (redoneMatches_tld[i] == NULL) {
3285  status_code = -1;
3286  goto function_cleanup;
3287  }
3288  for (query_index = 0; query_index < numQueries; query_index++) {
3289  status_code =
3290  BlastCompo_HeapInitialize(&redoneMatches_tld[i][query_index],
3291  hitParams->options->hitlist_size,
3292  inclusion_ethresh);
3293  if (status_code != 0) {
3294  goto function_cleanup;
3295  }
3296  }
3297 
3298  alignments_tld[i] = (BlastCompo_Alignment**) calloc(
3299  numContexts,
3300  sizeof(BlastCompo_Alignment*)
3301  );
3302  incoming_align_set_tld[i] = (BlastCompo_Alignment**) calloc(
3303  numFrames,
3304  sizeof(BlastCompo_Alignment*)
3305  );
3306 
3307  savedParams_tld[i] = s_SavedParametersNew(
3308  queryInfo->max_length,
3309  numContexts,
3310  compo_adjust_mode,
3311  positionBased
3312  );
3313  if (savedParams_tld[i] == NULL) {
3314  status_code = -1;
3315  goto function_cleanup;
3316  }
3317  status_code = s_RecordInitialSearch(
3318  savedParams_tld[i],
3319  sbp,
3320  scoringParams,
3321  queryInfo->max_length,
3322  compo_adjust_mode,
3323  positionBased
3324  );
3325  if (status_code != 0) {
3326  goto function_cleanup;
3327  }
3328 
3329  if ((int) compo_adjust_mode > 1 && !positionBased) {
3330  NRrecord_tld[i] = Blast_CompositionWorkspaceNew();
3331  status_code = Blast_CompositionWorkspaceInit(
3332  NRrecord_tld[i],
3333  scoringParams->options->matrix
3334  );
3335  if (status_code != 0) {
3336  goto function_cleanup;
3337  }
3338  }
3339 
3340  gapping_params_context_tld[i].gap_align = gap_align_tld[i];
3341  gapping_params_context_tld[i].scoringParams = score_params_tld[i];
3342  gapping_params_context_tld[i].sbp = sbp_tld[i];
3343  gapping_params_context_tld[i].localScalingFactor = localScalingFactor;
3344  gapping_params_context_tld[i].prog_number = program_number;
3345 
3346  redo_align_params_tld[i] =
3348  &gapping_params_context_tld[i],
3349  queryBlk,
3350  queryInfo,
3351  hitParams,
3352  extendParams
3353  );
3354  if (redo_align_params_tld[i] == NULL) {
3355  status_code = -1;
3356  goto function_cleanup;
3357  }
3358 
3359  if (positionBased) {
3360  matrix_tld[i] = sbp_tld[i]->psi_matrix->pssm->data;
3361  } else {
3362  matrix_tld[i] = sbp_tld[i]->matrix->data;
3363  }
3364  /**** Validate parameters *************/
3365  if (matrix_tld[i] == NULL) {
3366  goto function_cleanup;
3367  }
3368  }
3369 
3370  /*
3371  * There are two use cases here.
3372  * (1) hsp_stream == NULL, so single match is passed in thisMatch.
3373  * Also, seqSrc == NULL and subjectBlk are != NULL.
3374  * (2) hsp_stream != NULL, so one or more matches are taken from
3375  * hsp_stream, and thisMatch is (probably) NULL.
3376  * Also, seqSrc != NULL, subjectBlk and thisMatch are == NULL.
3377  */
3378  struct BlastHSPListLinkedList {
3380  struct BlastHSPListLinkedList* next;
3381  };
3382  typedef struct BlastHSPListLinkedList BlastHSPListLinkedList;
3383 
3384  BlastHSPList** theseMatches = NULL;
3385  int numMatches = 0;
3386  if (hsp_stream == NULL) {
3387  theseMatches = (BlastHSPList**) calloc(1, sizeof(BlastHSPList*));
3388  *theseMatches = thisMatch;
3389  numMatches = 1;
3390  } else {
3391  BlastHSPList* localMatch = NULL;
3392  BlastHSPListLinkedList* head = NULL;
3393  BlastHSPListLinkedList* tail = NULL;
3394  /*
3395  * Collect matches from stream into linked list, counting them
3396  * along the way.
3397  */
3398  while (BlastHSPStreamRead(hsp_stream, &localMatch)
3399  != kBlastHSPStream_Eof) {
3400  BlastHSPListLinkedList* entry =
3401  (BlastHSPListLinkedList*) calloc(
3402  1,
3403  sizeof(BlastHSPListLinkedList)
3404  );
3405  entry->match = localMatch;
3406  if (head == NULL) {
3407  head = entry;
3408  } else {
3409  tail->next = entry;
3410  }
3411  tail = entry;
3412  ++numMatches;
3413  }
3414  /*
3415  * Convert linked list of matches into array.
3416  */
3417  theseMatches =
3418  (BlastHSPList**) calloc(numMatches, sizeof(BlastHSPList*));
3419  int i;
3420  for (i = 0; i < numMatches; ++i) {
3421  theseMatches[i] = head->match;
3422  BlastHSPListLinkedList* here = head;
3423  head = head->next;
3424  sfree(here);
3425  }
3426  }
3427 
3428  Boolean interrupt = FALSE;
3429 #pragma omp parallel \
3430  default(none) num_threads(actual_num_threads) \
3431  if(actual_num_threads>1) \
3432  shared(interrupt, seqsrc_tld, score_params_tld, hit_params_tld, \
3433  gap_align_tld, results_tld, \
3434  redoneMatches_tld, \
3435  STDERR_COMMA \
3436  numMatches, theseMatches, \
3437  numFrames, program_number, subjectBlk_tld, positionBased, \
3438  default_db_genetic_code, localScalingFactor, queryInfo, \
3439  sbp, smithWaterman, numQueries_tld, compositionTestIndex_tld, forbidden_tld, \
3440  NRrecord_tld, actual_num_threads, sbp_tld, \
3441  matrix_tld, query_info_tld, numContexts_tld, \
3442  genetic_code_string, queryBlk, compo_adjust_mode, \
3443  alignments_tld, incoming_align_set_tld, savedParams_tld, \
3444  scoringParams, redo_align_params_tld, \
3445  status_code_tld)
3446  {
3447  int b;
3448 #pragma omp for schedule(static)
3449  for (b = 0; b < numMatches; ++b) {
3450 #pragma omp flush(interrupt)
3451  if (!interrupt) {
3452  BlastCompo_Alignment** alignments = NULL;
3453  BlastCompo_Alignment** incoming_align_set = NULL;
3454  Blast_CompositionWorkspace* NRrecord = NULL;
3455  BlastCompo_QueryInfo* query_info = NULL;
3456 
3457  int numAligns[6];
3458  Blast_KarlinBlk* kbp = NULL;
3459  BlastCompo_MatchingSequence matchingSeq = {0,};
3460  BlastHSPList* hsp_list = NULL;
3461  BlastCompo_Alignment* incoming_aligns = NULL;
3462  Blast_RedoAlignParams* redo_align_params;
3463  double best_evalue;
3464  Int4 best_score;
3465  int query_index;
3466  int context_index;
3467  int frame_index;
3468  void* discarded_aligns = NULL;
3469  BlastSeqSrc* seqSrc;
3470  BlastScoringParameters* scoringParams;
3471  BlastHitSavingParameters* hitParams;
3472  BlastCompo_Heap* redoneMatches;
3473  BlastScoreBlk* sbp;
3474  BLAST_SequenceBlk* subjectBlk;
3475  int numContexts;
3476  int numQueries;
3477  int compositionTestIndex;
3478  /* existing alignments for a match */
3479  Int4** matrix; /* score matrix */
3480  int* pStatusCode;
3481  Blast_ForbiddenRanges* forbidden = NULL;
3482 
3483  double pvalueForThisPair = (-1); /* p-value for this match
3484  for composition; -1 == no adjustment*/
3485  double LambdaRatio; /*lambda ratio*/
3486 
3487  int tid = 0;
3488 #ifdef _OPENMP
3489  if(actual_num_threads > 1) {
3490  tid = omp_get_thread_num();
3491  }
3492 #endif
3493  seqSrc = seqsrc_tld[tid];
3494  scoringParams = score_params_tld[tid];
3495  hitParams = hit_params_tld[tid];
3496  redoneMatches = redoneMatches_tld[tid];
3497  alignments = alignments_tld[tid];
3498  incoming_align_set = incoming_align_set_tld[tid];
3499  NRrecord = NRrecord_tld[tid];
3500  sbp = sbp_tld[tid];
3501  redo_align_params = redo_align_params_tld[tid];
3502  matrix = matrix_tld[tid];
3503  pStatusCode = &status_code_tld[tid];
3504  query_info = query_info_tld[tid];
3505  numContexts = numContexts_tld[tid];
3506  numQueries = numQueries_tld[tid];
3507  compositionTestIndex = compositionTestIndex_tld[tid];
3508  subjectBlk = subjectBlk_tld[tid];
3509  forbidden = forbidden_tld[tid];
3510 
3511  BlastHSPList* localMatch = theseMatches[b];
3512 
3513  if (localMatch->hsp_array == NULL) {
3514  if (seqSrc) {
3515  continue;
3516  }
3517  if(actual_num_threads > 1) {
3518 #pragma omp critical(intrpt)
3519  interrupt = TRUE;
3520 #pragma omp flush(interrupt)
3521  continue;
3522  }
3523  }
3524 
3526  localMatch->best_evalue,
3527  redoneMatches,
3528  numQueries
3529  )) {
3530  Blast_HSPListFree(localMatch);
3531  if (seqSrc) {
3532  continue;
3533  }
3534  if(actual_num_threads > 1) {
3535 #pragma omp critical(intrpt)
3536  interrupt = TRUE;
3537 #pragma omp flush(interrupt)
3538  continue;
3539  }
3540  }
3541 
3542  query_index = localMatch->query_index;
3543  context_index = query_index * numFrames;
3544  BlastSeqSrcSetRangesArg * ranges = NULL;
3545  /* Get the sequence for this match */
3546  if (seqSrc && BlastSeqSrcGetSupportsPartialFetching(seqSrc)) {
3547  ranges = BLAST_SetupPartialFetching(
3548  program_number,
3549  (BlastSeqSrc*) seqSrc,
3550  (const BlastHSPList**)&localMatch,
3551  1
3552  );
3553  }
3554 
3555  if (subjectBlk) {
3556  matchingSeq.length = subjectBlk->length;
3557  matchingSeq.index = -1;
3558  matchingSeq.local_data = subjectBlk;
3559  } else {
3560  *pStatusCode = s_MatchingSequenceInitialize(
3561  &matchingSeq,
3562  program_number,
3563  seqSrc,
3564  default_db_genetic_code,
3565  localMatch->oid,
3566  ranges
3567  );
3568  if (*pStatusCode != 0) {
3569  /*
3570  * some sequences may have been excluded by membit filtering
3571  * so this is not really an exception
3572  */
3573  *pStatusCode = 0;
3574  goto match_loop_cleanup;
3575  }
3576  }
3577  *pStatusCode = s_ResultHspToDistinctAlign(
3578  incoming_align_set, /* o */
3579  numAligns, /* o */
3580  localMatch->hsp_array, /* i */
3581  localMatch->hspcnt, /* i */
3582  context_index, /* i */
3583  queryInfo, /* i */
3584  localScalingFactor /* i */
3585  );
3586  if (*pStatusCode != 0) {
3587  goto match_loop_cleanup;
3588  }
3589 
3590  hsp_list = Blast_HSPListNew(0);
3591  for (frame_index = 0;
3592  frame_index < numFrames;
3593  frame_index++, context_index++) {
3594  incoming_aligns = incoming_align_set[frame_index];
3595  if (!incoming_aligns) {
3596  continue;
3597  }
3598  /*
3599  * All alignments in thisMatch should be to the same query
3600  */
3601  kbp = sbp->kbp_gap[context_index];
3602  if (smithWaterman) {
3603  *pStatusCode =
3605  alignments,
3606  redo_align_params,
3607  incoming_aligns,
3608  numAligns[frame_index],
3609  kbp->Lambda,
3610  kbp->logK,
3611  &matchingSeq,
3612  query_info,
3613  numQueries,
3614  matrix,
3615  BLASTAA_SIZE,
3616  NRrecord,
3617  forbidden,
3618  redoneMatches,
3619  &pvalueForThisPair,
3620  compositionTestIndex,
3621  &LambdaRatio
3622  );
3623  } else {
3624  *pStatusCode =
3626  alignments, // thread-local
3627  redo_align_params, // thread-local
3628  incoming_aligns, // thread-local
3629  numAligns[frame_index], // local
3630  kbp->Lambda, // thread-local
3631  &matchingSeq, // thread-local
3632  -1, // const
3633  query_info, // thread-local
3634  numContexts, // thread-local
3635  matrix, // thread-local
3636  BLASTAA_SIZE, // const
3637  NRrecord, // thread-local
3638  &pvalueForThisPair, // local
3639  compositionTestIndex, // thread-local
3640  &LambdaRatio // local
3641  );
3642  }
3643 
3644  if (*pStatusCode != 0) {
3645  goto match_loop_cleanup;
3646  }
3647 
3648  if (alignments[context_index] != NULL) {
3649  Int2 qframe = frame_index;
3650  if (program_number == eBlastTypeBlastx) {
3651  if (qframe < 3) {
3652  qframe++;
3653  } else {
3654  qframe = 2 - qframe;
3655  }
3656  }
3657  *pStatusCode =
3659  &alignments[context_index],
3660  matchingSeq.index,
3661  queryInfo, qframe);
3662  if (*pStatusCode) {
3663  goto match_loop_cleanup;
3664  }
3665  }
3666  BlastCompo_AlignmentsFree(&incoming_aligns, NULL);
3667  incoming_align_set[frame_index] = NULL;
3668  }
3669 
3670  if (hsp_list->hspcnt > 1) {
3672  &hsp_list->hspcnt);
3673  }
3674  *pStatusCode =
3675  s_HitlistEvaluateAndPurge(&best_score, &best_evalue,
3676  hsp_list,
3677  seqSrc,
3678  matchingSeq.length,
3679  program_number,
3680  queryInfo, context_index,
3681  sbp, hitParams,
3682  pvalueForThisPair, LambdaRatio,
3683  matchingSeq.index);
3684  if (*pStatusCode != 0) {
3685  goto query_loop_cleanup;
3686  }
3687  if (best_evalue <= hitParams->options->expect_value) {
3688  /* The best alignment is significant */
3689  s_HSPListNormalizeScores(hsp_list, kbp->Lambda, kbp->logK,
3690  localScalingFactor);
3692  queryBlk,
3693  queryInfo,
3694  subjectBlk,
3695  seqSrc,
3696  hsp_list,
3697  scoringParams->options,
3698  genetic_code_string,
3699  sbp,
3700  ranges
3701  );
3702  if (!seqSrc) {
3703  goto query_loop_cleanup;
3704  }
3706  &redoneMatches[query_index],
3707  best_evalue,
3708  best_score,
3709  localMatch->oid
3710  )) {
3711  *pStatusCode =
3713  &redoneMatches[query_index],
3714  hsp_list,
3715  best_evalue,
3716  best_score,
3717  localMatch->oid,
3718  &discarded_aligns
3719  );
3720  if (*pStatusCode == 0) {
3721  hsp_list = NULL;
3722  }
3723  } else {
3724  hsp_list = Blast_HSPListFree(hsp_list);
3725  }
3726 
3727  if (*pStatusCode) {
3728  goto query_loop_cleanup;
3729  }
3730  if (discarded_aligns != NULL) {
3731  Blast_HSPListFree(discarded_aligns);
3732  }
3733  }
3734 query_loop_cleanup:
3735 match_loop_cleanup:
3736  if (seqSrc) {
3737  localMatch = Blast_HSPListFree(localMatch);
3738  } else {
3739  Blast_HSPListSwap(localMatch, hsp_list);
3740  localMatch->oid = hsp_list->oid;
3741  }
3742  hsp_list = Blast_HSPListFree(hsp_list);
3743 
3744  if (*pStatusCode != 0) {
3745  for (context_index = 0;
3746  context_index < numContexts;
3747  context_index++) {
3749  &alignments[context_index],
3751  );
3752  }
3753  }
3754  s_MatchingSequenceRelease(&matchingSeq);
3755  BlastCompo_AlignmentsFree(&incoming_aligns, NULL);
3756  if ((actual_num_threads > 1) &&
3757  (*pStatusCode != 0 || !seqSrc)) {
3758 #pragma omp critical(intrpt)
3759  interrupt = TRUE;
3760 #pragma omp flush(interrupt)
3761  continue;
3762  }
3763 
3764  } /* end of if(!interrupt) */
3765  }
3766 #pragma omp barrier
3767 
3768  /*
3769  * end of omp parallel section
3770  */
3771  }
3772 
3773 function_cleanup:
3774 
3775  for (i = 0; i < actual_num_threads; ++i) {
3776  if (status_code_tld[i] != 0) {
3777  status_code = status_code_tld[i];
3778  }
3779  }
3780  for (i = 0; i < actual_num_threads; ++i) {
3781  if (seqSrc && status_code == 0) {
3783  results_tld[i],
3784  redoneMatches_tld[i],
3785  hitParams->options->hitlist_size
3786  );
3787  if (redoneMatches_tld[i] != NULL) {
3788  int qi;
3789  for (qi = 0; qi < numQueries; ++qi) {
3790  sfree(redoneMatches_tld[i][qi].array);
3791  sfree(redoneMatches_tld[i][qi].heapArray);
3792  }
3793  s_ClearHeap(redoneMatches_tld[i]);
3794  }
3795  } else {
3796  if (redoneMatches_tld[i] != NULL) {
3797  int qi;
3798  for (qi = 0; qi < numQueries; ++qi) {
3799  sfree(redoneMatches_tld[i][qi].array);
3800  sfree(redoneMatches_tld[i][qi].heapArray);
3801  }
3802  s_ClearHeap(redoneMatches_tld[i]);
3803  }
3804  }
3805  sfree(redoneMatches_tld[i]);
3806  }
3807  if (redoneMatches != NULL) {
3808  int qi;
3809  for (qi = 0; qi < numQueries; ++qi) {
3810  sfree(redoneMatches[qi].array);
3811  sfree(redoneMatches[qi].heapArray);
3812  }
3813  s_ClearHeap(redoneMatches);
3814  }
3815 
3816  if (hsp_stream != NULL) {
3817  /* Reduce results from all threads and continue with business as usual */
3818  SThreadLocalDataArray* thread_data =
3819  SThreadLocalDataArrayNew(actual_num_threads);
3820  int i;
3821  for (i = 0; i < actual_num_threads; ++i) {
3822  SThreadLocalData* tdi = thread_data->tld[i];
3823  BlastHSPResults* rdi = results_tld[i];
3824  tdi->hit_params = hit_params_tld[i];
3825  hit_params_tld[i] = NULL;
3826  tdi->results =
3827  (BlastHSPResults*) calloc(1, sizeof(BlastHSPResults));
3828  tdi->results->num_queries = rdi->num_queries;
3829  tdi->results->hitlist_array =
3830  (BlastHitList**) calloc(
3831  tdi->results->num_queries,
3832  sizeof(BlastHitList*)
3833  );
3834  int j;
3835  for (j = 0; j < tdi->results->num_queries; ++j) {
3836  tdi->results->hitlist_array[j] = rdi->hitlist_array[j];
3837  rdi->hitlist_array[j] = NULL;
3838  }
3839  }
3840  local_results = SThreadLocalDataArrayConsolidateResults(thread_data);
3841  ASSERT(local_results);
3842 
3843  /* post-traceback pipes */
3844  BlastHSPStreamTBackClose(hsp_stream, local_results);
3845 
3846  for (i = 0; i < local_results->num_queries; ++i) {
3847  results->hitlist_array[i] = local_results->hitlist_array[i];
3848  local_results->hitlist_array[i] = NULL;
3849  }
3850  for (i = 0; i < actual_num_threads; ++i) {
3851  thread_data->tld[i]->hit_params = NULL;
3852  int j;
3853  for (j = 0; j < local_results->num_queries; ++j) {
3854  thread_data->tld[i]->results->hitlist_array[j] =
3856  thread_data->tld[i]->results->hitlist_array[j]
3857  );
3858  }
3859  sfree(thread_data->tld[i]->results->hitlist_array);
3860  sfree(thread_data->tld[i]->results);
3861  thread_data->tld[i] = SThreadLocalDataFree(thread_data->tld[i]);
3862  }
3863  sfree(thread_data->tld);
3864  sfree(thread_data);
3865  Blast_HSPResultsFree(local_results);
3866  }
3867 
3868  if (redoneMatches != NULL) {
3869  for (query_index = 0; query_index < numQueries; query_index++) {
3870  BlastCompo_HeapRelease(&redoneMatches[query_index]);
3871  }
3872  sfree(redoneMatches);
3873  redoneMatches = NULL;
3874  }
3875  if (gapAlign != NULL) {
3876  gapAlign = BLAST_GapAlignStructFree(gapAlign);
3877  }
3878  s_RestoreSearch(sbp, scoringParams, savedParams, queryBlk->length,
3879  positionBased, compo_adjust_mode);
3880  s_SavedParametersFree(&savedParams);
3881 
3882  for (i = 0; i < actual_num_threads; ++i) {
3883  s_BlastScoreBlk_Free(&sbp_tld[i]);
3884  gap_align_tld[i]->sbp = NULL;
3885  s_BlastGapAlignStruct_Free(gap_align_tld[i]);
3886  Blast_RedoAlignParamsFree(&redo_align_params_tld[i]);
3887  sfree(alignments_tld[i]);
3888  sfree(incoming_align_set_tld[i]);
3889  Blast_CompositionWorkspaceFree(&NRrecord_tld[i]);
3890  s_SavedParametersFree(&savedParams_tld[i]);
3891  BlastSeqSrcFree(seqsrc_tld[i]);
3892  results_tld[i] = Blast_HSPResultsFree(results_tld[i]);
3893  s_FreeBlastCompo_QueryInfoArray(&query_info_tld[i], numContexts);
3894  if (smithWaterman)
3895  Blast_ForbiddenRangesRelease(forbidden_tld[i]);
3896  }
3897  sfree(alignments_tld);
3898  sfree(compositionTestIndex_tld);
3899  sfree(gap_align_tld);
3900  sfree(gapping_params_context_tld);
3901  sfree(hit_params_tld);
3902  sfree(incoming_align_set_tld);
3903  sfree(matrix_tld);
3904  sfree(NRrecord_tld);
3905  sfree(numContexts_tld);
3906  sfree(numQueries_tld);
3907  sfree(query_info_tld);
3908  sfree(redo_align_params_tld);
3909  sfree(redoneMatches_tld);
3910  sfree(results_tld);
3911  sfree(savedParams_tld);
3912  sfree(sbp_tld);
3913  sfree(score_params_tld);
3914  sfree(seqsrc_tld);
3915  sfree(status_code_tld);
3916  sfree(subjectBlk_tld);
3917  sfree(forbidden_tld);
3918  sfree(theseMatches);
3919 
3920  return (Int2) status_code;
3921 }
3922 
3923 
3924 
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
#define CODON_LENGTH
Codons are always of length 3.
Definition: blast_def.h:63
BLAST filtering functions.
void Blast_MaskTheResidues(Uint1 *buffer, Int4 length, Boolean is_na, const BlastSeqLoc *mask_loc, Boolean reverse, Int4 offset)
Masks the letters in buffer.
Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)
Produces SBlastFilterOptions from a string that has been traditionally supported in blast.
Definition: blast_filter.c:436
Int2 BlastSetUp_Filter(EBlastProgramType program_number, Uint1 *sequence, Int4 length, Int4 offset, const SBlastFilterOptions *filter_options, BlastSeqLoc **seqloc_retval, Blast_Message **blast_message)
Runs seg filtering functions, according to the filtering options, returns BlastSeqLoc*.
BlastSeqLoc * BlastSeqLocFree(BlastSeqLoc *loc)
Deallocate all BlastSeqLoc objects in a chain.
Definition: blast_filter.c:737
Int4 ALIGN_EX(const Uint1 *A, const Uint1 *B, Int4 M, Int4 N, Int4 *a_offset, Int4 *b_offset, GapPrelimEditBlock *edit_block, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 query_offset, Boolean reversed, Boolean reverse_sequence, Boolean *fence_hit)
Low level function to perform dynamic programming gapped extension with traceback.
GapEditScript * Blast_PrelimEditBlockToGapEditScript(GapPrelimEditBlock *rev_prelim_tback, GapPrelimEditBlock *fwd_prelim_tback)
Convert the initial list of traceback actions from a non-OOF gapped alignment into a blast edit scrip...
Structures and functions prototypes used for BLAST gapped extension.
Int2 BLAST_GappedAlignmentWithTraceback(EBlastProgramType program, const Uint1 *query, const Uint1 *subject, BlastGapAlignStruct *gap_align, const BlastScoringParameters *score_params, Int4 q_start, Int4 s_start, Int4 query_length, Int4 subject_length, Boolean *fence_hit)
Perform a gapped alignment with traceback.
Int2 BLAST_GapAlignStructNew(const BlastScoringParameters *score_params, const BlastExtensionParameters *ext_params, Uint4 max_subject_length, BlastScoreBlk *sbp, BlastGapAlignStruct **gap_align_ptr)
Initializes the BlastGapAlignStruct structure.
BlastGapAlignStruct * BLAST_GapAlignStructFree(BlastGapAlignStruct *gap_align)
Deallocates memory in the BlastGapAlignStruct structure.
Private interface for blast_gapalign.c.
Structures and API used for saving BLAST hits.
BlastHSPResults * Blast_HSPResultsFree(BlastHSPResults *results)
Deallocate memory for BLAST results.
Definition: blast_hits.c:3358
Int2 Blast_HSPInit(Int4 query_start, Int4 query_end, Int4 subject_start, Int4 subject_end, Int4 query_gapped_start, Int4 subject_gapped_start, Int4 query_context, Int2 query_frame, Int2 subject_frame, Int4 score, GapEditScript **gap_edit, BlastHSP **ret_hsp)
Allocates BlastHSP and inits with information from input.
Definition: blast_hits.c:151
Int2 Blast_HSPGetNumIdentitiesAndPositives(const Uint1 *query, const Uint1 *subject, BlastHSP *hsp, const BlastScoringOptions *score_options, Int4 *align_length_ptr, const BlastScoreBlk *sbp)
Calculate number of identities and positives in an HSP and set the BlastHSP::num_ident and BlastHSP::...
Definition: blast_hits.c:966
BlastHitList * Blast_HitListFree(BlastHitList *hitlist)
Deallocate memory for the hit list.
Definition: blast_hits.c:3131
Int2 Blast_HSPResultsReverseOrder(BlastHSPResults *results)
Reverse order of HSP lists in each hit list in the BLAST results.
Definition: blast_hits.c:3412
BlastHitList * Blast_HitListNew(Int4 hitlist_size)
Allocate memory for a hit list of a given size.
Definition: blast_hits.c:3117
BlastHSPList * Blast_HSPListNew(Int4 hsp_max)
Creates HSP list structure with a default size HSP array.
Definition: blast_hits.c:1558
BlastHSPResults * Blast_HSPResultsNew(Int4 num_queries)
Initialize the results structure.
Definition: blast_hits.c:3338
Int2 Blast_HSPListGetEvalues(EBlastProgramType program_number, const BlastQueryInfo *query_info, Int4 subject_length, BlastHSPList *hsp_list, Boolean gapped_calculation, Boolean RPS_prelim, const BlastScoreBlk *sbp, double gap_decay_rate, double scaling_factor)
Calculate the expected values for all HSPs in a hit list, without using the sum statistics.
Definition: blast_hits.c:1811
BlastHSP * Blast_HSPFree(BlastHSP *hsp)
Deallocate memory for an HSP structure.
Definition: blast_hits.c:130
const Uint1 * Blast_HSPGetTargetTranslation(SBlastTargetTranslation *target_t, const BlastHSP *hsp, Int4 *translated_length)
Returns a buffer with a protein translated from nucleotide.
Definition: blast_hits.c:1147
Int2 Blast_HSPListSaveHSP(BlastHSPList *hsp_list, BlastHSP *hsp)
Saves HSP information into a BlastHSPList structure.
Definition: blast_hits.c:1754
BlastHSPList * Blast_HSPListFree(BlastHSPList *hsp_list)
Deallocate memory for an HSP list structure as well as all it's components.
Definition: blast_hits.c:1542
void Blast_HSPListSwap(BlastHSPList *list1, BlastHSPList *list2)
Swaps the two HSP lists via structure assignment.
Definition: blast_hits.c:1614
void Blast_HSPListSortByScore(BlastHSPList *hsp_list)
Sort the HSPs in an HSP list by score.
Definition: blast_hits.c:1374
Int2 Blast_HSPListReapByEvalue(BlastHSPList *hsp_list, const BlastHitSavingOptions *hit_options)
Discard the HSPs above the e-value threshold from the HSP list.
Definition: blast_hits.c:1976
Int2 Blast_HitListUpdate(BlastHitList *hit_list, BlastHSPList *hsp_list)
Insert a new HSP list into the hit list.
Definition: blast_hits.c:3235
Utilities for dealing with BLAST HSPs in the core of BLAST.
#define CONTAINED_IN_HSP(a, b, c, d, e, f)
TRUE if c is between a and b; f between d and e.
const int kBlastHSPStream_Eof
Return value when the end of the stream is reached (applicable to read method only)
int BlastHSPStreamRead(BlastHSPStream *hsp_stream, BlastHSPList **hsp_list)
Invokes the user-specified read function for this BlastHSPStream implementation.
void BlastHSPStreamTBackClose(BlastHSPStream *hsp_stream, BlastHSPResults *results)
Closes the BlastHSPStream structure after traceback.
Private interfaces to support the multi-threaded traceback in conjunction with the BlastHSPStream.
Int2 Blast_RedoAlignmentCore_MT(EBlastProgramType program_number, Uint4 num_threads, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Recompute alignments for each match found by the gapped BLAST algorithm.
Definition: blast_kappa.c:2981
struct BlastKappa_SavedParameters BlastKappa_SavedParameters
A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCo...
static void s_RestoreSearch(BlastScoreBlk *sbp, BlastScoringParameters *scoring, const BlastKappa_SavedParameters *searchParams, int query_length, Boolean positionBased, ECompoAdjustModes compo_adjust_mode)
Restore the parameters that were adjusted to their original values.
Definition: blast_kappa.c:2148
static int s_RecordInitialSearch(BlastKappa_SavedParameters *searchParams, BlastScoreBlk *sbp, const BlastScoringParameters *scoring, int query_length, ECompoAdjustModes compo_adjust_mode, Boolean positionBased)
Record the initial value of the search parameters that are to be adjusted.
Definition: blast_kappa.c:2060
#define KAPPA_BLASTP_NO_SEG_SEQUENCE
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to com...
Definition: blast_kappa.c:77
#define SCALING_FACTOR
SCALING_FACTOR is a multiplicative factor used to get more bits of precision in the integer matrix sc...
Definition: blast_kappa.c:676
static BlastCompo_Alignment * s_RedoOneAlignment(BlastCompo_Alignment *in_align, EMatrixAdjustRule matrix_adjust_rule, BlastCompo_SequenceData *query_data, BlastCompo_SequenceRange *query_range, int ccat_query_length, BlastCompo_SequenceData *subject_data, BlastCompo_SequenceRange *subject_range, int full_subject_length, BlastCompo_GappingParams *gapping_params)
A callback: calculate the traceback for one alignment by performing an x-drop alignment in both direc...
Definition: blast_kappa.c:1899
static BlastGapAlignStruct * s_BlastGapAlignStruct_Copy(BlastGapAlignStruct *orig, BlastScoreBlk *sbp)
Create a "deep" copy of a BlastGapAlignStruct structure.
Definition: blast_kappa.c:2604
static int s_MatrixInfoInit(Blast_MatrixInfo *self, BLAST_SequenceBlk *queryBlk, BlastScoreBlk *sbp, double scale_factor, const char *matrixName)
Initialize an object of type Blast_MatrixInfo.
Definition: blast_kappa.c:2199
static int s_ScalePosMatrix(int **fillPosMatrix, const char *matrixName, double **posFreqs, Uint1 *query, int queryLength, BlastScoreBlk *sbp, double scale_factor)
Produce a scaled-up version of the position-specific matrix with a given set of position-specific res...
Definition: blast_kappa.c:694
static void s_SWFindFinalEndsUsingXdrop(BlastCompo_SequenceData *query, Int4 queryStart, Int4 queryEnd, BlastCompo_SequenceData *subject, Int4 matchStart, Int4 matchEnd, BlastGapAlignStruct *gap_align, const BlastScoringParameters *scoringParams, Int4 score, Int4 *queryAlignmentExtent, Int4 *matchAlignmentExtent, Int4 *newScore)
Redo a S-W alignment using an x-drop alignment.
Definition: blast_kappa.c:844
static void s_HSPListNormalizeScores(BlastHSPList *hsp_list, double lambda, double logK, double scoreDivisor)
Given a list of HSPs with (possibly) high-precision scores, rescale the scores to have standard preci...
Definition: blast_kappa.c:102
static int s_GetPosBasedStartFreqRatios(double **returnRatios, Int4 numPositions, Uint1 *query, const char *matrixName, double **startNumerator)
Fill a two-dimensional array with the frequency ratios that underlie a position specific score matrix...
Definition: blast_kappa.c:592
static int s_NewAlignmentUsingXdrop(BlastCompo_Alignment **pnewAlign, Int4 *pqueryEnd, Int4 *pmatchEnd, Int4 queryStart, Int4 matchStart, Int4 score, BlastCompo_SequenceData *query, BlastCompo_SequenceRange *query_range, Int4 ccat_query_length, BlastCompo_SequenceData *subject, BlastCompo_SequenceRange *subject_range, Int4 full_subject_length, BlastCompo_GappingParams *gapping_params, EMatrixAdjustRule matrix_adjust_rule)
A callback used when performing SmithWaterman alignments: Calculate the traceback for one alignment b...
Definition: blast_kappa.c:1813
static BlastCompo_QueryInfo * s_GetQueryInfo(Uint1 *query_data, const BlastQueryInfo *blast_query_info, Boolean skip)
Save information about all queries in an array of objects of type BlastCompo_QueryInfo.
Definition: blast_kappa.c:2309
static void s_BlastGapAlignStruct_Free(BlastGapAlignStruct *copy)
Free a BlastGapAlignStruct copy created by s_BlastGapAlignStruct_Copy.
Definition: blast_kappa.c:2532
#define KAPPA_TBLASTN_NO_SEG_SEQUENCE
Compile-time option; if set to a true value, then blastp runs that use Blast_RedoAlignmentCore to com...
Definition: blast_kappa.c:85
static void s_ComputeNumIdentities(const BLAST_SequenceBlk *query_blk, const BlastQueryInfo *query_info, BLAST_SequenceBlk *subject_blk, const BlastSeqSrc *seq_src, BlastHSPList *hsp_list, const BlastScoringOptions *scoring_options, const Uint1 *gen_code_string, const BlastScoreBlk *sbp, BlastSeqSrcSetRangesArg *ranges)
Compute the number of identities for the HSPs in the hsp_list.
Definition: blast_kappa.c:459
static int s_FindNumIdentical(Uint1 *query_seq, const Uint8 *query_hashes, int query_len, Uint1 *subject_seq, int subject_len, int max_shift)
Find a local number of identical residues in two aligned sequences by finding word matches and doing ...
Definition: blast_kappa.c:1143
static void s_FillResultsFromCompoHeaps(BlastHSPResults *results, BlastCompo_Heap heaps[], Int4 hitlist_size)
Convert an array of BlastCompo_Heap objects to a BlastHSPResults structure.
Definition: blast_kappa.c:2493
static int s_ResultHspToDistinctAlign(BlastCompo_Alignment **self, int *numAligns, BlastHSP *hsp_array[], Int4 hspcnt, int init_context, const BlastQueryInfo *queryInfo, double localScalingFactor)
Convert an array of HSPs to a list of BlastCompo_Alignment objects.
Definition: blast_kappa.c:770
static const Blast_RedoAlignCallbacks redo_align_callbacks
Callbacks used by the Blast_RedoOneMatch* routines.
Definition: blast_kappa.c:2391
static int s_SequenceGetRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *s_range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceData *query, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)
Obtain the sequence data that lies within the given range.
Definition: blast_kappa.c:1671
static int s_DoSegSequenceData(BlastCompo_SequenceData *seqData, EBlastProgramType program_name, Boolean *is_seq_biased)
Filter low complexity regions from the sequence data; uses the SEG algorithm.
Definition: blast_kappa.c:1428
static BlastCompo_GappingParams * s_GappingParamsNew(BlastKappa_GappingParamsContext *context, const BlastExtensionParameters *extendParams, int num_queries)
Create a new object of type BlastCompo_GappingParams.
Definition: blast_kappa.c:2355
static void s_SavedParametersFree(BlastKappa_SavedParameters **searchParams)
Release the data associated with a BlastKappa_SavedParameters and delete the object.
Definition: blast_kappa.c:1978
static void s_FreeEditScript(void *edit_script)
A callback used to free an EditScript that has been stored in a BlastCompo_Alignment.
Definition: blast_kappa.c:283
static double s_CalcLambda(double probs[], int min_score, int max_score, double lambda0)
A callback routine: compute lambda for the given score probabilities.
Definition: blast_kappa.c:551
static int s_ExtendLeft(Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len)
Extend left from the end of the sequence and subject ranges and count identities.
Definition: blast_kappa.c:1039
static void s_HitlistReapContained(BlastHSP *hsp_array[], Int4 *hspcnt)
Remove from a hitlist all HSPs that are completely contained in an HSP that occurs earlier in the lis...
Definition: blast_kappa.c:224
struct BlastKappa_SequenceInfo BlastKappa_SequenceInfo
BLAST-specific information that is associated with a BlastCompo_MatchingSequence.
static int s_HSPListFromDistinctAlignments(BlastHSPList *hsp_list, BlastCompo_Alignment **alignments, int oid, const BlastQueryInfo *queryInfo, int frame)
Converts a list of objects of type BlastCompo_Alignment to an new object of type BlastHSPList and ret...
Definition: blast_kappa.c:305
static void s_FreeBlastCompo_QueryInfoArray(BlastCompo_QueryInfo **query_info, int num_queries)
Definition: blast_kappa.c:2279
#define NEAR_IDENTICAL_BITS_PER_POSITION
Definition: blast_kappa.c:2399
static void s_MatchingSequenceRelease(BlastCompo_MatchingSequence *self)
Release the resources associated with a matching sequence.
Definition: blast_kappa.c:908
static int s_ExtendRight(Uint1 *query_seq, int query_len, Uint1 *subject_seq, int subject_len, int max_shift, int *query_ext_len, int *subject_ext_len, int *align_len)
Do a simple gapped extension to the right from the beginning of query and subject ranges examining on...
Definition: blast_kappa.c:944
static Boolean s_TestNearIdentical(const BlastCompo_SequenceData *seqData, const int seqOffset, const BlastCompo_SequenceData *queryData, const int queryOffset, const Uint8 *query_words, const BlastCompo_Alignment *align)
Test whether the aligned parts of two sequences that have a high-scoring gapless alignment are nearly...
Definition: blast_kappa.c:1259
static BlastScoreBlk * s_BlastScoreBlk_Copy(EBlastProgramType program, BlastScoreBlk *orig, Uint1 alphabet_code, Int4 number_of_contexts)
Create a "deep" copy of a BlastScoreBlk structure.
Definition: blast_kappa.c:2765
struct BlastKappa_GappingParamsContext BlastKappa_GappingParamsContext
Data and data-structures needed to perform a gapped alignment.
static Uint8 s_GetHash(const Uint1 *data, int word_size)
Get hash for a word of word_size residues assuming 28-letter alphabet.
Definition: blast_kappa.c:1117
static int s_SequenceGetProteinRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)
Get a string of protein data from a protein sequence.
Definition: blast_kappa.c:1573
static void s_BlastScoreBlk_Free(BlastScoreBlk **copy)
Free a BlastScoreBlk copy created by s_BlastScoreBlk_Copy.
Definition: blast_kappa.c:2743
static int s_MatchingSequenceInitialize(BlastCompo_MatchingSequence *self, EBlastProgramType program_number, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, Int4 subject_index, BlastSeqSrcSetRangesArg *ranges)
Initialize a new matching sequence, obtaining information about the sequence from the search.
Definition: blast_kappa.c:1357
static Blast_RedoAlignParams * s_GetAlignParams(BlastKappa_GappingParamsContext *context, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, const BlastHitSavingParameters *hitParams, const BlastExtensionParameters *extendParams)
Read the parameters required for the Blast_RedoOneMatch* functions from the corresponding parameters ...
Definition: blast_kappa.c:2407
Int2 Blast_RedoAlignmentCore(EBlastProgramType program_number, BLAST_SequenceBlk *queryBlk, const BlastQueryInfo *queryInfo, BlastScoreBlk *sbp, BLAST_SequenceBlk *subjectBlk, const BlastSeqSrc *seqSrc, Int4 default_db_genetic_code, BlastHSPList *thisMatch, BlastHSPStream *hsp_stream, BlastScoringParameters *scoringParams, const BlastExtensionParameters *extendParams, const BlastHitSavingParameters *hitParams, const PSIBlastOptions *psiOptions, BlastHSPResults *results)
Recompute alignments for each match found by the gapped BLAST algorithm.
Definition: blast_kappa.c:2942
static int s_HitlistEvaluateAndPurge(int *pbestScore, double *pbestEvalue, BlastHSPList *hsp_list, const BlastSeqSrc *seqSrc, int subject_length, EBlastProgramType program_number, const BlastQueryInfo *queryInfo, int context_index, BlastScoreBlk *sbp, const BlastHitSavingParameters *hitParams, double pvalueForThisPair, double LambdaRatio, int subject_id)
Adding evalues to a list of HSPs and remove those that do not have sufficiently good (low) evalue.
Definition: blast_kappa.c:395
static int s_CreateWordArray(const Uint1 *seq_data, Int4 seq_len, Uint8 **words)
Definition: blast_kappa.c:2244
static void s_AdjustEvaluesForComposition(BlastHSPList *hsp_list, double comp_p_value, const BlastSeqSrc *seqSrc, Int4 subject_length, const BlastContextInfo *query_context, double LambdaRatio, int subject_id)
Adjusts the E-values in a BLAST_HitList to be composites of a composition-based P-value and a score/a...
Definition: blast_kappa.c:135
static int s_GetStartFreqRatios(double **returnRatios, const char *matrixName)
Fill a two-dimensional array with the frequency ratios that underlie the named score matrix.
Definition: blast_kappa.c:649
Int4 s_GetSubjectLength(Int4 total_subj_length, EBlastProgramType program_number)
Definition: blast_kappa.c:364
static BlastCompo_Alignment * s_NewAlignmentFromGapAlign(BlastGapAlignStruct *gap_align, GapEditScript **edit_script, BlastCompo_SequenceRange *query_range, BlastCompo_SequenceRange *subject_range, EMatrixAdjustRule matrix_adjust_rule)
Reads a BlastGapAlignStruct that has been used to compute a traceback, and return a BlastCompo_Alignm...
Definition: blast_kappa.c:1748
static BlastKappa_SavedParameters * s_SavedParametersNew(Int4 rows, Int4 numQueries, ECompoAdjustModes compo_adjust_mode, Boolean positionBased)
Create a new instance of BlastKappa_SavedParameters.
Definition: blast_kappa.c:2009
static void s_RescaleSearch(BlastScoreBlk *sbp, BlastScoringParameters *sp, int num_queries, double scale_factor)
Rescale the search parameters in the search object and options object to obtain more precision.
Definition: blast_kappa.c:2117
static int s_SequenceGetTranslatedRange(const BlastCompo_MatchingSequence *self, const BlastCompo_SequenceRange *range, BlastCompo_SequenceData *seqData, const BlastCompo_SequenceRange *q_range, BlastCompo_SequenceData *queryData, const Uint8 *query_words, const BlastCompo_Alignment *align, const Boolean shouldTestIdentical, const ECompoAdjustModes compo_adjust_mode, const Boolean isSmithWaterman, Boolean *subject_maybe_biased)
Obtain a string of translated data.
Definition: blast_kappa.c:1475
#define BLASTP_MASK_INSTRUCTIONS
Default instructions and mask residue for SEG filtering.
Definition: blast_kappa.c:1416
static void s_ClearHeap(BlastCompo_Heap *self)
Remove all matches from a BlastCompo_Heap.
Definition: blast_kappa.c:2518
Header file for composition-based statistics.
#define PSI_INCLUSION_ETHRESH
Defaults for PSI-BLAST and DELTA-BLAST options.
SBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)
Frees SBlastFilterOptions and all subservient structures.
@ eSmithWatermanTbck
Smith-waterman finds optimal scores, then ALIGN_EX to find alignment.
int Kappa_impalaScaling(Kappa_posSearchItems *posSearch, Kappa_compactSearchItems *compactSearch, double scalingFactor, Boolean doBinarySearch, BlastScoreBlk *sbp)
Copied from posit2.c.
Definition: blast_posit.c:393
Kappa_compactSearchItems * Kappa_compactSearchItemsNew(const Uint1 *query, unsigned int queryLength, BlastScoreBlk *sbp)
Creates a new Kappa_compactSearchItems structure.
Definition: blast_posit.c:101
Kappa_posSearchItems * Kappa_posSearchItemsFree(Kappa_posSearchItems *posSearch)
Deallocates the Kappa_posSearchItems structure.
Definition: blast_posit.c:75
Kappa_compactSearchItems * Kappa_compactSearchItemsFree(Kappa_compactSearchItems *compactSearch)
Deallocates the Kappa_compactSearchItems structure.
Definition: blast_posit.c:140
Kappa_posSearchItems * Kappa_posSearchItemsNew(unsigned int queryLength, const char *matrix_name, int **posPrivateMatrix, double **posFreqs)
Allocates a new Kappa_posSearchItems structure.
Definition: blast_posit.c:44
Port of posit.h structures and impalaScaling for implementing composition based statistics for PSI-BL...
Boolean Blast_QueryIsPssm(EBlastProgramType p)
Returns true if the query is PSSM.
Definition: blast_program.c:46
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastx
Definition: blast_program.h:75
@ eBlastTypeRpsTblastn
Definition: blast_program.h:85
@ eBlastTypePsiBlast
Definition: blast_program.h:82
@ eBlastTypeRpsBlast
Definition: blast_program.h:84
@ eBlastTypeTblastn
Definition: blast_program.h:77
@ eBlastTypeBlastp
Definition: blast_program.h:73
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Definition: blast_program.c:63
int _PSIConvertFreqRatiosToPSSM(_PSIInternalPssmData *internal_pssm, const Uint1 *query, const BlastScoreBlk *sbp, const double *std_probs)
Converts the PSSM's frequency ratios obtained in the previous stage to a PSSM of scores.
_PSIInternalPssmData * _PSIInternalPssmDataNew(Uint4 query_length, Uint4 alphabet_size)
Allocates a new _PSIInternalPssmData structure.
const double kPosEpsilon
minimum return value of s_computeRelativeEntropy
void _PSICopyMatrix_int(int **dest, int **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be int matrices with dimensions ncols by nrows...
_PSIInternalPssmData * _PSIInternalPssmDataFree(_PSIInternalPssmData *pssm_data)
Deallocates the _PSIInternalPssmData structure.
void _PSICopyMatrix_double(double **dest, double **src, unsigned int ncols, unsigned int nrows)
Copies src matrix into dest matrix, both of which must be double matrices with dimensions ncols by nr...
Private interface for Position Iterated BLAST API, contains the PSSM generation engine.
Int4 BlastSeqSrcGetSeqLen(const BlastSeqSrc *seq_src, void *oid)
Retrieve sequence length (number of residues/bases)
Definition: blast_seqsrc.c:281
void BlastSeqSrcReleaseSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Deallocate individual sequence.
Definition: blast_seqsrc.c:289
BlastSeqSrc * BlastSeqSrcCopy(const BlastSeqSrc *seq_src)
Copy function: needed to guarantee thread safety.
Definition: blast_seqsrc.c:138
BlastSeqSrc * BlastSeqSrcFree(BlastSeqSrc *seq_src)
Frees the BlastSeqSrc structure by invoking the destructor function set by the user-defined construct...
Definition: blast_seqsrc.c:112
Int4 BlastSeqSrcGetMaxSeqLen(const BlastSeqSrc *seq_src)
Get the length of the longest sequence in the sequence source.
Definition: blast_seqsrc.c:193
Boolean BlastSeqSrcGetSupportsPartialFetching(const BlastSeqSrc *seq_src)
Find if the Blast Sequence Source supports partial fetching.
Definition: blast_seqsrc.c:251
Int2 BlastSeqSrcGetSequence(const BlastSeqSrc *seq_src, BlastSeqSrcGetSeqArg *getseq_arg)
Retrieve an individual sequence.
Definition: blast_seqsrc.c:271
BlastScoreBlk * BlastScoreBlkFree(BlastScoreBlk *sbp)
Deallocates BlastScoreBlk as well as all associated structures.
Definition: blast_stat.c:965
double Blast_KarlinLambdaNR(Blast_ScoreFreq *sfp, double initialLambdaGuess)
Calculates the parameter Lambda given an initial guess for its value.
Definition: blast_stat.c:2566
double BLAST_KarlinEtoP(double x)
Convert an E-value to a P-value.
Definition: blast_stat.c:4172
Blast_KarlinBlk * Blast_KarlinBlkNew(void)
Callocs a Blast_KarlinBlk.
Definition: blast_stat.c:2860
Blast_KarlinBlk * Blast_KarlinBlkFree(Blast_KarlinBlk *kbp)
Deallocates the KarlinBlk.
Definition: blast_stat.c:956
double BLAST_KarlinPtoE(double p)
Convert a P-value to an E-value.
Definition: blast_stat.c:4158
Blast_ScoreFreq * Blast_ScoreFreqNew(Int4 score_min, Int4 score_max)
Creates a new structure to keep track of score frequencies for a scoring system.
Definition: blast_stat.c:2112
SPsiBlastScoreMatrix * SPsiBlastScoreMatrixNew(size_t ncols)
Allocates a new SPsiBlastScoreMatrix structure of dimensions ncols by BLASTAA_SIZE.
Definition: blast_stat.c:805
Int2 Blast_KarlinBlkCopy(Blast_KarlinBlk *kbp_to, Blast_KarlinBlk *kbp_from)
Copies contents of one Karlin block to another.
Definition: blast_stat.c:2870
BlastScoreBlk * BlastScoreBlkNew(Uint1 alphabet, Int4 number_of_contexts)
Allocates and initializes BlastScoreBlk.
Definition: blast_stat.c:884
Functions to do gapped alignment with traceback.
BlastSeqSrcSetRangesArg * BLAST_SetupPartialFetching(EBlastProgramType program_number, BlastSeqSrc *seq_src, const BlastHSPList **hsp_list, Int4 num_hsplists)
Attempts to set up partial fetching, if it fails (e.g.
EBlastEncoding Blast_TracebackGetEncoding(EBlastProgramType program_number)
Get the subject sequence encoding type for the traceback, given a program number.
SThreadLocalData * SThreadLocalDataFree(SThreadLocalData *tld)
Deallocate the SThreadLocalData structure passed in.
SThreadLocalDataArray * SThreadLocalDataArrayNew(Uint4 num_threads)
Allocate a new SThreadLocalDataArray structure.
BlastHSPResults * SThreadLocalDataArrayConsolidateResults(SThreadLocalDataArray *array)
Extracts a single, consolidated BlastHSPResults structure from its input for single threaded processi...
Private interface to support the multi-threaded traceback.
Various auxiliary BLAST utility functions.
BLAST_SequenceBlk * BlastSequenceBlkFree(BLAST_SequenceBlk *seq_blk)
Deallocate memory for a sequence block.
Definition: blast_util.c:245
int Blast_GetPartialTranslation(const Uint1 *nucl_seq, Int4 nucl_length, Int2 frame, const Uint1 *genetic_code, Uint1 **translation_buffer_ptr, Int4 *protein_length, Uint1 **mixed_seq_ptr)
Get one frame translation - needed when only parts of subject sequences are translated.
Definition: blast_util.c:1141
Int2 BlastTargetTranslationNew(BLAST_SequenceBlk *subject_blk, const Uint1 *gen_code_string, EBlastProgramType program_number, Boolean is_ooframe, SBlastTargetTranslation **target)
Sets up structure for target translation.
Definition: blast_util.c:1268
SBlastTargetTranslation * BlastTargetTranslationFree(SBlastTargetTranslation *target_t)
Free SBlastTargetTranslation.
Definition: blast_util.c:1248
double * BLAST_GetStandardAaProbabilities(void)
Get the standard amino acid probabilities.
Definition: blast_util.c:1323
ncbi::TMaskedQueryRegions mask
Declares a "heap" data structure that is used to store computed alignments when composition adjustmen...
void * BlastCompo_HeapPop(BlastCompo_Heap *self)
Remove and return the element in the BlastCompo_Heap with largest (worst) evalue; ties are broken acc...
Definition: compo_heap.c:444
void BlastCompo_HeapRelease(BlastCompo_Heap *self)
Release the storage associated with the fields of a BlastCompo_Heap.
Definition: compo_heap.c:432
int BlastCompo_HeapInsert(BlastCompo_Heap *self, void *alignments, double eValue, int score, int subject_index, void **discardedAligns)
Try to insert a collection of alignments into a heap.
Definition: compo_heap.c:330
int BlastCompo_HeapInitialize(BlastCompo_Heap *self, int heapThreshold, double ecutoff)
Initialize a new BlastCompo_Heap; parameters to this function correspond directly to fields in the Bl...
Definition: compo_heap.c:414
int BlastCompo_HeapWouldInsert(BlastCompo_Heap *self, double eValue, int score, int subject_index)
Return true if self may insert a match that had the given eValue, score and subject_index.
Definition: compo_heap.c:252
Blast_CompositionWorkspace * Blast_CompositionWorkspaceNew(void)
Create a new Blast_CompositionWorkspace object, allocating memory for all its component arrays.
int Blast_CompositionWorkspaceInit(Blast_CompositionWorkspace *NRrecord, const char *matrixName)
Initialize the fields of a Blast_CompositionWorkspace for a specific underlying scoring matrix.
Blast_MatrixInfo * Blast_MatrixInfoNew(int rows, int cols, int positionBased)
Create a Blast_MatrixInfo object.
void Blast_Int4MatrixFromFreq(int **matrix, int size, double **freq, double Lambda)
Compute an integer-valued amino-acid score matrix from a set of score frequencies.
void Blast_CompositionWorkspaceFree(Blast_CompositionWorkspace **NRrecord)
Free memory associated with a record of type Blast_CompositionWorkspace.
void Blast_ReadAaComposition(Blast_AminoAcidComposition *composition, int alphsize, const Uint1 *sequence, int length)
Compute the true amino acid composition of a sequence, ignoring ambiguity characters and other nonsta...
ECompoAdjustModes
An collection of constants that specify all permissible modes of composition adjustment.
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
@ eCompositionMatrixAdjust
Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence pro...
EMatrixAdjustRule
An collection of constants that specify all rules that may be used to generate a compositionally adju...
@ eDontAdjustMatrix
@ eCompoScaleOldMatrix
static void cleanup(void)
Definition: ct_dynamic.c:30
#define head
Definition: ct_nlmzip_i.h:138
static int heap[2 *(256+1+29)+1]
CS_CONTEXT * ctx
Definition: t0006.c:12
static DLIST_TYPE *DLIST_NAME() next(DLIST_LIST_TYPE *list, DLIST_TYPE *item)
Definition: dlist.tmpl.h:56
EGapAlignOpType
Operation types within the edit script.
Definition: gapinfo.h:44
GapEditScript * GapEditScriptDelete(GapEditScript *esp)
Free edit script structure.
Definition: gapinfo.c:75
void GapPrelimEditBlockReset(GapPrelimEditBlock *edit_block)
Reset a preliminary edit block without freeing it.
Definition: gapinfo.c:213
Defines the interface to interact with the genetic code singleton object.
Uint1 * GenCodeSingletonFind(Uint4 gen_code_id)
Returns the genetic code string for the requested genetic code id.
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
#define BLASTAA_SIZE
Size of aminoacid alphabet.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingProtein
NCBIstdaa.
#define NULL
Definition: ncbistd.hpp:225
#define Boolean
Definition: ncbistd.hpp:136
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
where boath are integers</td > n< td ></td > n</tr > n< tr > n< td > tse</td > n< td > optional</td > n< td > String</td > n< td class=\"description\"> TSE option controls what blob is orig
for(len=0;yy_str[len];++len)
int i
yy_size_t n
SFreqRatios * _PSIMatrixFrequencyRatiosFree(SFreqRatios *freq_ratios)
Deallocate the frequency ratios structure.
SFreqRatios * _PSIMatrixFrequencyRatiosNew(const char *matrix_name)
Retrive the matrix's frequency ratios.
Definitions used to get joint probabilities for a scoring matrix.
int Blast_FrequencyDataIsAvailable(const char *matrix_name)
Retrieve the background letter probabilities implicitly used in constructing the score matrix matrix_...
range(_Ty, _Ty) -> range< _Ty >
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
#define strdup
Definition: ncbi_ansi_ext.h:70
Prototypes for portable math library (ported from C Toolkit)
#define NCBIMATH_LN2
Natural log(2)
Definition: ncbi_math.h:161
long BLAST_Nint(double x)
Nearest integer.
Definition: ncbi_math.c:437
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define ABS(a)
returns absolute value of a (|a|)
Definition: ncbi_std.h:122
#define NCBI_CONST_UINT8(v)
Definition: ncbi_std.h:196
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:117
double r(size_t dimension_, const Int4 *score_, const double *prob_, double theta_)
double lambda(size_t dimMatrix_, const Int4 *const *scoreMatrix_, const double *q_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
Declarations for several linear algebra routines.
void Nlm_Int4MatrixFree(int ***mat)
Free a matrix created by Nlm_DenseMatrixNew or Nlm_LtriangMatrixNew.
int ** Nlm_Int4MatrixNew(int nrows, int ncols)
Create and return a new Int4 matrix.
static int match(register const pcre_uchar *eptr, register const pcre_uchar *ecode, const pcre_uchar *mstart, int offset_top, match_data *md, eptrblock *eptrb, unsigned int rdepth)
Definition: pcre_exec.c:513
Definitions used to redo a set of alignments, using either composition matrix adjustment or the Smith...
void BlastCompo_AlignmentsFree(BlastCompo_Alignment **palign, void(*free_context)(void *))
Recursively free all alignments in the singly linked list whose head is *palign.
Blast_RedoAlignParams * Blast_RedoAlignParamsNew(Blast_MatrixInfo **pmatrix_info, BlastCompo_GappingParams **pgapping_params, ECompoAdjustModes compo_adjust_mode, int positionBased, int subject_is_translated, int query_is_translated, int ccat_query_length, int cutoff_s, double cutoff_e, int do_link_hsps, const Blast_RedoAlignCallbacks *callbacks, double near_identical_cutoff)
Create new Blast_RedoAlignParams object.
#define GET_NUCL_LENGTH(l)
#define GET_TRANSLATED_LENGTH(l, f)
int BlastCompo_EarlyTermination(double evalue, BlastCompo_Heap significantMatches[], int numQueries)
Return true if a heuristic determines that it is unlikely to be worthwhile to redo a query-subject pa...
BlastCompo_Alignment * BlastCompo_AlignmentNew(int score, EMatrixAdjustRule whichRule, int queryIndex, int queryStart, int queryEnd, int matchStart, int matchEnd, int frame, void *context)
Create a new BlastCompo_Alignment; parameters to this function correspond directly to fields of Blast...
int Blast_RedoOneMatch(BlastCompo_Alignment **alignments, Blast_RedoAlignParams *params, BlastCompo_Alignment *incoming_aligns, int hspcnt, double Lambda, BlastCompo_MatchingSequence *matchingSeq, int ccat_query_length, BlastCompo_QueryInfo query_info[], int numQueries, int **matrix, int alphsize, Blast_CompositionWorkspace *NRrecord, double *pvalueThisPair, int compositionTestIndex, double *LambdaRatio)
Recompute all alignments for one query/subject pair using composition-based statistics or composition...
void Blast_RedoAlignParamsFree(Blast_RedoAlignParams **pparams)
Free a set of Blast_RedoAlignParams.
int Blast_RedoOneMatchSmithWaterman(BlastCompo_Alignment **alignments, Blast_RedoAlignParams *params, BlastCompo_Alignment *incoming_aligns, int hspcnt, double Lambda, double logK, BlastCompo_MatchingSequence *matchingSeq, BlastCompo_QueryInfo query_info[], int numQueries, int **matrix, int alphsize, Blast_CompositionWorkspace *NRrecord, Blast_ForbiddenRanges *forbidden, BlastCompo_Heap *significantMatches, double *pvalueThisPair, int compositionTestIndex, double *LambdaRatio)
Recompute all alignments for one query/subject pair using the Smith-Waterman algorithm and possibly a...
#define GET_SEQ_FRAME(f)
void Blast_ForbiddenRangesRelease(Blast_ForbiddenRanges *self)
Release the storage associated with the fields of self, but do not delete self.
int Blast_ForbiddenRangesInitialize(Blast_ForbiddenRanges *self, int capacity)
Initialize a new, empty Blast_ForbiddenRanges.
Structure to hold a sequence.
Definition: blast_def.h:242
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Definition: blast_def.h:244
Int4 length
Length of sequence.
Definition: blast_def.h:246
Uint1 * sequence_nomask
Start of query sequence without masking.
Definition: blast_def.h:256
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Uint1 * oof_sequence
Mixed-frame protein representation of a nucleotide sequence for out-of-frame alignment.
Definition: blast_def.h:259
Uint1 * gen_code_string
for nucleotide subject sequences (tblast[nx]), the genetic code used to create a translated protein s...
Definition: blast_def.h:272
Within the composition adjustment module, an object of type BlastCompo_Alignment represents a distinc...
int frame
the subject frame
int matchStart
the start of the alignment in the subject
int score
the score of this alignment
int matchEnd
one past the end of the alignment in the subject
int queryStart
the start of the alignment in the query
EMatrixAdjustRule matrix_adjust_rule
how the score matrix was computed
struct BlastCompo_Alignment * next
the next alignment in the list
int queryIndex
index of the query in a concatenated query
int queryEnd
one past the end of the alignment in the query
void * context
traceback info for a gapped alignment
Parameters used to compute gapped alignments.
int x_dropoff
for x-drop algorithms, once a path falls below the best score by this (positive) amount,...
int gap_open
penalty for opening a gap
void * context
a pointer to any additional gapping parameters that may be needed by the calling routine.
int gap_extend
penalty for extending a gapped alignment by one residue
A BlastCompo_Heap represents a collection of alignments between one query sequence and several matchi...
Definition: compo_heap.h:82
A BlastCompo_MatchingSequence represents a subject sequence to be aligned with the query.
Int4 index
index of this sequence in the database
void * local_data
holds any sort of data that is necessary for callbacks to access the sequence
Int4 length
length of this matching sequence
Collected information about a query.
int origin
origin of the query in a concatenated query
Blast_AminoAcidComposition composition
the composition of the query
BlastCompo_SequenceData seq
sequence data for the query
double eff_search_space
effective search space of searches involving this query
Uint8 * words
list words in the query, needed for testing whether the query and a subject are nearly identical
BlastCompo_SequenceData - represents a string of amino acids or nucleotides.
int length
the length of data.
Uint1 * buffer
if non-nil, points to memory that must be freed when this instance of BlastCompo_SequenceData is dele...
Uint1 * data
amino acid or nucleotide data
BlastCompo_SequenceRange - a struct whose instances represent a range of data in a sequence.
int begin
the starting index of the range
int end
one beyond the last item in the range
int context
integer identifier for this window, can indicate a translation frame or an index into a set of sequen...
The context related information.
Int4 query_length
Length of this query, strand or frame.
Boolean is_valid
Determine if this context is valid or not.
Int4 query_offset
Offset of this query, strand or frame in the concatenated super-query.
Int4 length_adjustment
Length adjustment for boundary conditions.
Int8 eff_searchsp
Effective search space for this context.
Options used for gapped extension These include: a.
EBlastTbackExt eTbackExt
type of traceback extension.
Int4 unifiedP
Indicates unified P values to be used in blastp or tblastn.
double gap_x_dropoff_final
X-dropoff value for the final gapped extension (in bits)
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Computed values used as parameters for gapped alignments.
BlastExtensionOptions * options
The original (unparsed) options.
Int4 gap_x_dropoff_final
X-dropoff value for the final gapped extension (raw)
Structure supporting the gapped alignment.
GapPrelimEditBlock * fwd_prelim_tback
traceback from right extensions
Int4 gap_x_dropoff
X-dropoff parameter to use.
GapPrelimEditBlock * rev_prelim_tback
traceback from left extensions
Int4 query_stop
query end offseet of current alignment
Int4 subject_start
subject start offset current alignment
BlastScoreBlk * sbp
Pointer to the scoring information block.
Int4 query_start
query start offset of current alignment
Int4 subject_stop
subject end offset of current alignment
Int4 score
Return value: alignment score.
GapEditScript * edit_script
The traceback (gap) information.
Auxiliary structure for dynamic programming gapped extension.
The structure to hold all HSPs for a given sequence after the gapped alignment.
Definition: blast_hits.h:153
Int4 oid
The ordinal id of the subject sequence this HSP list is for.
Definition: blast_hits.h:154
Int4 hspcnt
Number of HSPs saved.
Definition: blast_hits.h:158
BlastHSP ** hsp_array
Array of pointers to individual HSPs.
Definition: blast_hits.h:157
double best_evalue
Smallest e-value for HSPs in this list.
Definition: blast_hits.h:162
Int4 query_index
Index of the query which this HSPList corresponds to.
Definition: blast_hits.h:155
The structure to contain all BLAST results, for multiple queries.
Definition: blast_hits.h:183
BlastHitList ** hitlist_array
Array of results for individual query sequences.
Definition: blast_hits.h:185
Int4 num_queries
Number of query sequences.
Definition: blast_hits.h:184
Default implementation of BlastHSPStream.
Structure holding all information about an HSP.
Definition: blast_hits.h:126
double evalue
This HSP's e-value.
Definition: blast_hits.h:130
Int4 num_ident
Number of identical base pairs in this HSP.
Definition: blast_hits.h:128
BlastSeg query
Query sequence info.
Definition: blast_hits.h:131
Int4 context
Context number of query.
Definition: blast_hits.h:133
double bit_score
Bit score, calculated from score.
Definition: blast_hits.h:129
BlastSeg subject
Subject sequence info.
Definition: blast_hits.h:132
Int2 comp_adjustment_method
which mode of composition adjustment was used; relevant only for blastp and tblastn
Definition: blast_hits.h:139
Int4 score
This HSP's raw score.
Definition: blast_hits.h:127
The structure to contain all BLAST results for one query sequence.
Definition: blast_hits.h:169
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 hitlist_size
Maximal number of database sequences to return results for.
Parameter block that contains a pointer to BlastHitSavingOptions and the values derived from it.
Int4 cutoff_score_min
smallest cutoff score across all contexts
Boolean do_sum_stats
TRUE if sum stats will be used.
BlastLinkHSPParameters * link_hsp_params
Parameters for linking HSPs with sum statistics; linking is not done if NULL.
BlastHitSavingOptions * options
The original (unparsed) options.
Data and data-structures needed to perform a gapped alignment.
Definition: blast_kappa.c:1715
BlastGapAlignStruct * gap_align
additional parameters for a gapped alignment
Definition: blast_kappa.c:1719
EBlastProgramType prog_number
the type of search being performed
Definition: blast_kappa.c:1724
BlastScoreBlk * sbp
the score block for this search
Definition: blast_kappa.c:1721
const BlastScoringParameters * scoringParams
scoring parameters for a gapped alignment
Definition: blast_kappa.c:1717
double localScalingFactor
the amount by which this search has been scaled
Definition: blast_kappa.c:1722
A BlastKappa_SavedParameters holds the value of certain search parameters on entry to RedoAlignmentCo...
Definition: blast_kappa.c:1958
double scale_factor
the original scale factor
Definition: blast_kappa.c:1962
Int4 num_queries
Number of queries in this search.
Definition: blast_kappa.c:1968
Int4 gap_open
a penalty for the existence of a gap
Definition: blast_kappa.c:1959
double original_expect_value
expect value on entry
Definition: blast_kappa.c:1964
Int4 gapExtend
a penalty for each residue in the gap
Definition: blast_kappa.c:1960
Blast_KarlinBlk ** kbp_gap_orig
copy of the original gapped Karlin-Altschul block corresponding to the first context
Definition: blast_kappa.c:1967
Int4 ** origMatrix
The original matrix values.
Definition: blast_kappa.c:1963
BLAST-specific information that is associated with a BlastCompo_MatchingSequence.
Definition: blast_kappa.c:892
const BlastSeqSrc * seq_src
BLAST sequence data source.
Definition: blast_kappa.c:897
EBlastProgramType prog_number
identifies the type of blast search being performed.
Definition: blast_kappa.c:893
BlastSeqSrcGetSeqArg seq_arg
argument to GetSequence method of the BlastSeqSrc (
Definition: blast_kappa.c:898
The query related information.
Int4 first_context
Index of the first element of the context array.
BlastContextInfo * contexts
Information per context.
int num_queries
Number of query sequences.
Int4 last_context
Index of the last element of the context array.
Uint4 max_length
Length of the longest among the concatenated queries.
Structure used for scoring calculations.
Definition: blast_stat.h:177
Blast_KarlinBlk ** kbp_psi
K-A parameters for position-based alignments.
Definition: blast_stat.h:213
Blast_KarlinBlk ** kbp_gap
K-A parameters for gapped alignments.
Definition: blast_stat.h:208
SPsiBlastScoreMatrix * psi_matrix
PSSM and associated data.
Definition: blast_stat.h:186
Uint1 alphabet_code
NCBI alphabet code.
Definition: blast_stat.h:180
Int4 number_of_contexts
Used by sfp and kbp, how large are these.
Definition: blast_stat.h:217
SBlastScoreMatrix * matrix
scoring matrix data
Definition: blast_stat.h:185
Blast_KarlinBlk * kbp_ideal
Ideal values (for query with average database composition).
Definition: blast_stat.h:216
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
EBlastProgramType program_number
indicates blastn, blastp, etc.
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
Scoring parameters block Contains scoring-related information that is actually used for the blast sea...
double scale_factor
multiplier for all cutoff scores
Int4 gap_extend
Penalty for each gap residue (scaled version)
Int4 gap_open
Extra penalty for starting a gap (scaled version)
BlastScoringOptions * options
User-provided values for these params.
Int4 end
End of hsp.
Definition: blast_hits.h:99
Int4 gapped_start
Where the gapped extension started.
Definition: blast_hits.h:100
Int2 frame
Translation frame.
Definition: blast_hits.h:97
Int4 offset
Start of hsp.
Definition: blast_hits.h:98
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Definition: blast_seqsrc.h:257
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Definition: blast_seqsrc.h:259
EBlastEncoding encoding
Encoding of sequence, i.e.
Definition: blast_seqsrc.h:263
Boolean check_oid_exclusion
Check whether an OID is excluded due to overlapping filtering.
Definition: blast_seqsrc.h:279
BlastSeqSrcSetRangesArg * ranges
Definition: blast_seqsrc.h:286
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Definition: blast_seqsrc.h:284
Structure used as the argument to function SetRanges.
Definition: blast_seqsrc.h:208
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Work arrays used to perform composition-based matrix adjustment.
An instance of Blast_ForbiddenRanges is used by the Smith-Waterman algorithm to represent ranges in t...
Structure to hold the Gumbel parameters (for FSC).
Definition: blast_stat.h:94
Structure to hold the Karlin-Altschul parameters.
Definition: blast_stat.h:66
double K
K value used in statistics.
Definition: blast_stat.h:68
double Lambda
Lambda value used in statistics.
Definition: blast_stat.h:67
double logK
natural log of K value used in statistics
Definition: blast_stat.h:69
Information about a amino-acid substitution matrix.
Callbacks used by Blast_RedoOneMatch and Blast_RedoOneMatchSmithWaterman routines.
A parameter block for the Blast_RedoOneMatch and Blast_RedoOneMatchSmithWaterman routines.
Holds score frequencies used in calculation of Karlin-Altschul parameters for an ungapped search.
Definition: blast_stat.h:128
double * sprob0
arrays for frequency of given score
Definition: blast_stat.h:134
double score_avg
average score, must be negative for local alignment.
Definition: blast_stat.h:133
Int4 score_max
highest allowed scores
Definition: blast_stat.h:130
Int4 obs_min
lowest observed (actual) scores
Definition: blast_stat.h:131
double * sprob
arrays for frequency of given score, shifted down by score_min.
Definition: blast_stat.h:135
Int4 score_min
lowest allowed scores
Definition: blast_stat.h:129
Int4 obs_max
highest observed (actual) scores
Definition: blast_stat.h:132
Edit script: linked list of correspondencies between two sequences.
Definition: gapinfo.h:57
Int4 * num
Array of number of operations.
Definition: gapinfo.h:59
Int4 size
Size of above arrays.
Definition: gapinfo.h:60
EGapAlignOpType * op_type
Array of type of operation.
Definition: gapinfo.h:58
Preliminary version of GapEditBlock, used directly by the low- level dynamic programming routines.
Definition: gapinfo.h:73
Int4 num_ops_allocated
size of allocated array
Definition: gapinfo.h:75
GapPrelimEditScript * edit_ops
array of edit operations
Definition: gapinfo.h:74
A version of GapEditScript used to store initial results from the gapped alignment routines.
Definition: gapinfo.h:65
Int4 num
Number of operations.
Definition: gapinfo.h:67
EGapAlignOpType op_type
Type of operation.
Definition: gapinfo.h:66
Structure to keep memory for state structure.
Definition: gapinfo.h:81
struct GapStateArrayStruct * next
Next link in the list.
Definition: gapinfo.h:85
Int4 length
length of the state_array.
Definition: gapinfo.h:82
Uint1 * state_array
array to be used.
Definition: gapinfo.h:84
Structure used to pass data into the scaling routines.
Definition: blast_posit.h:73
double * standardProb
Array of standard residue probabilities, as those returned by BLAST_GetStandardAaProbabilities.
Definition: blast_posit.h:98
Structure used to pass data into the scaling routines.
Definition: blast_posit.h:56
int ** posMatrix
PSSM.
Definition: blast_posit.h:58
double ** posFreqs
PSSM's frequency ratios [alias].
Definition: blast_posit.h:62
int ** posPrivateMatrix
Scaled PSSM [alias].
Definition: blast_posit.h:60
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
All filtering options.
Scoring matrix used in BLAST.
Definition: blast_stat.h:139
size_t nrows
number of rows
Definition: blast_stat.h:143
double lambda
derived value of the matrix lambda -RMH-
Definition: blast_stat.h:145
double * freqs
array of assumed matrix background frequencies -RMH-
Definition: blast_stat.h:144
size_t ncols
number of columns
Definition: blast_stat.h:142
int ** data
actual scoring matrix data, stored in row-major form
Definition: blast_stat.h:140
Information about target translations.
Definition: blast_def.h:311
Stores the frequency ratios along with their bit scale factor.
double ** data
The actual frequency ratios.
All auxiliary memory needed for the greedy extension algorithm.
Definition: greedy_align.h:89
Scoring matrix data used in PSI-BLAST.
Definition: blast_stat.h:149
SBlastScoreMatrix * pssm
position-specific score matrix
Definition: blast_stat.h:150
double ** freq_ratios
PSSM's frequency ratios, dimensions are specified in pssm data above.
Definition: blast_stat.h:151
Blast_KarlinBlk * kbp
Karlin-Altschul block associated with this PSSM.
Definition: blast_stat.h:153
Data structure to support MT traceback: this encapsulates the data that each thread modifies.
BlastHitSavingParameters * hit_params
Hit saving parameters.
BlastHSPResults * results
Structure to store results from this thread.
Internal representation of a PSSM in various stages of its creation and its dimensions.
int ** scaled_pssm
scaled PSSM (scores)
Uint4 nrows
number of rows (alphabet_size)
int ** pssm
PSSM (scores)
Uint4 ncols
number of columns (query_length)
double ** freq_ratios
frequency ratios
static string subject
static string query
Definition: _hash_fun.h:40
@ FALSE
Definition: testodbc.c:27
@ TRUE
Definition: testodbc.c:27
Headers for computing a "composition" p-value of a match, and for computing a unified p-value combini...
double Blast_Overall_P_Value(double p_comp, double p_alignment)
This function implements the method of Fisher, R.
void free(voidpf ptr)
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Sat Dec 09 04:46:32 2023 by modify_doxy.py rev. 669887