NCBI C++ ToolKit
blast_options.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_options.c 101569 2024-01-05 18:31:16Z camacho $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  */
26 
27 /** @file blast_options.c
28  * The structures and functions in blast_options.[ch] should be used to specify
29  * user preferences. The options structures should not be changed by the BLAST code
30  * but rather be read to determine user preferences. When possible these structures
31  * should be passed in as "const".
32  *
33  */
34 
42 
43 const double kPSSM_NoImpalaScaling = 1.0;
44 
45 /** Declared in blast_def.h as extern const. */
46 const int kDustLevel = 20;
47 const int kDustWindow = 64;
48 const int kDustLinker = 1;
49 
51 {
52  if (dust_options)
53  sfree(dust_options);
54  return NULL;
55 }
56 
58 {
59  if (dust_options == NULL)
60  return 1;
61 
62  *dust_options = (SDustOptions*) malloc(sizeof(SDustOptions));
63  (*dust_options)->level = kDustLevel;
64  (*dust_options)->window = kDustWindow;
65  (*dust_options)->linker = kDustLinker;
66 
67  return 0;
68 }
69 
71 {
72  if (seg_options)
73  sfree(seg_options);
74  return NULL;
75 }
76 
78 {
79  if (seg_options == NULL)
80  return 1;
81 
82  *seg_options = (SSegOptions*) malloc(sizeof(SSegOptions));
83  (*seg_options)->window = kSegWindow;
84  (*seg_options)->locut = kSegLocut;
85  (*seg_options)->hicut = kSegHicut;
86 
87  return 0;
88 }
89 
91 {
92  if (winmask_options) {
93  *winmask_options = (SWindowMaskerOptions*) calloc(1, sizeof(SWindowMaskerOptions));
94  if (*winmask_options == NULL)
95  return BLASTERR_MEMORY;
96 
97  (*winmask_options)->taxid = 0;
98  (*winmask_options)->database = NULL;
99  return 0;
100  }
101  return 1;
102 }
103 
105 {
106  if (winmask_options)
107  {
108  if (winmask_options->database)
109  {
110  sfree(winmask_options->database);
111  }
112  sfree(winmask_options);
113  }
114  return NULL;
115 }
116 
118 {
119  if (repeat_options)
120  {
121  sfree(repeat_options->database);
122  sfree(repeat_options);
123  }
124  return NULL;
125 }
126 
128 {
129 
130  if (repeat_options == NULL)
131  return 1;
132 
133  *repeat_options = (SRepeatFilterOptions*) calloc(1, sizeof(SRepeatFilterOptions));
134  if (*repeat_options == NULL)
135  return BLASTERR_MEMORY;
136 
137  (*repeat_options)->database = strdup(kDefaultRepeatFilterDb);
138 
139  return 0;
140 }
141 
142 Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions* *repeat_options, const char* db)
143 {
144  Int2 status=0;
145 
146  if (*repeat_options == NULL)
147  status = SRepeatFilterOptionsNew(repeat_options);
148 
149  if (status)
150  return status;
151 
152  sfree((*repeat_options)->database);
153  (*repeat_options)->database = strdup(db);
154 
155  return status;
156 }
157 
158 Int2 SWindowMaskerOptionsResetDB(SWindowMaskerOptions ** winmask_options, const char* db)
159 {
160  Int2 status=0;
161 
162  if (*winmask_options == NULL)
163  status = SWindowMaskerOptionsNew(winmask_options);
164 
165  if (status)
166  return status;
167 
168  sfree((*winmask_options)->database);
169 
170  if (db) {
171  (*winmask_options)->database = strdup(db);
172  }
173 
174  return status;
175 }
176 
178  SReadQualityOptions* read_quality_options)
179 {
180  if (read_quality_options) {
181  free(read_quality_options);
182  }
183 
184  return NULL;
185 }
186 
188 {
189  if (!read_quality_options) {
190  return 1;
191  }
192 
193  *read_quality_options = calloc(1, sizeof(SReadQualityOptions));
194  if (!*read_quality_options) {
195  return 1;
196  }
197 
198  (*read_quality_options)->frac_ambig = 0.5;
199  (*read_quality_options)->entropy = 16;
200 
201  return 0;
202 }
203 
205 {
206  if (filter_options)
207  {
208  filter_options->dustOptions =
209  SDustOptionsFree(filter_options->dustOptions);
210  filter_options->segOptions =
211  SSegOptionsFree(filter_options->segOptions);
212  filter_options->repeatFilterOptions =
214  filter_options->windowMaskerOptions =
216  filter_options->readQualityOptions =
218  sfree(filter_options);
219  }
220 
221  return NULL;
222 }
223 
225 {
226  Int2 status = 0;
227 
228  if (filter_options)
229  {
230  *filter_options = (SBlastFilterOptions*) calloc(1, sizeof(SBlastFilterOptions));
231  (*filter_options)->mask_at_hash = FALSE;
232  if (type == eSeg)
233  SSegOptionsNew(&((*filter_options)->segOptions));
234  if (type == eDust || type == eDustRepeats)
235  SDustOptionsNew(&((*filter_options)->dustOptions));
236  if (type == eRepeats || type == eDustRepeats)
237  SRepeatFilterOptionsNew(&((*filter_options)->repeatFilterOptions));
238  }
239  else
240  status = 1;
241 
242  return status;
243 }
244 
245 
246 /** Merges together two sets of dust options, choosing the most non-default one.
247  *
248  * @param opt1 first set to be merged [in]
249  * @param opt2 second set to be merged [in]
250  * @return the merged options.
251  */
252 static SDustOptions* s_MergeDustOptions(const SDustOptions* opt1, const SDustOptions* opt2)
253 {
254  SDustOptions* retval = NULL;
255 
256  if (!opt1 && !opt2)
257  return NULL;
258 
259  SDustOptionsNew(&retval);
260 
261  if (opt1 && !opt2)
262  {
263  retval->level = opt1->level;
264  retval->window = opt1->window;
265  retval->linker = opt1->linker;
266  }
267  else if (!opt1 && opt2)
268  {
269  retval->level = opt2->level;
270  retval->window = opt2->window;
271  retval->linker = opt2->linker;
272  }
273  else
274  {
275  retval->level = (opt1->level != kDustLevel) ? opt1->level : opt2->level;
276  retval->window = (opt1->window != kDustWindow) ? opt1->window : opt2->window;
277  retval->linker = (opt1->linker != kDustLinker) ? opt1->linker : opt2->linker;
278  }
279 
280  return retval;
281 }
282 
283 
284 /** Merges together two sets of SEG options, choosing the most non-default one.
285  *
286  * @param opt1 first set to be merged [in]
287  * @param opt2 second set to be merged [in]
288  * @return the merged options.
289  */
290 static SSegOptions* s_MergeSegOptions(const SSegOptions* opt1, const SSegOptions* opt2)
291 {
292  SSegOptions* retval = NULL;
293 
294  if (!opt1 && !opt2)
295  return NULL;
296 
297  SSegOptionsNew(&retval);
298 
299  if (opt1 && !opt2)
300  {
301  retval->window = opt1->window;
302  retval->locut = opt1->locut;
303  retval->hicut = opt1->hicut;
304  }
305  else if (!opt1 && opt2)
306  {
307  retval->window = opt2->window;
308  retval->locut = opt2->locut;
309  retval->hicut = opt2->hicut;
310  }
311  else
312  {
313  retval->window = (opt1->window != kSegWindow) ? opt1->window : opt2->window;
314  retval->locut = (opt1->locut != kSegLocut) ? opt1->locut : opt2->locut;
315  retval->hicut = (opt1->hicut != kSegHicut) ? opt1->hicut : opt2->hicut;
316  }
317  return retval;
318 }
319 
320 /** Merges together two sets of repeat filter options, choosing the most non-default one.
321  *
322  * @param opt1 first set to be merged [in]
323  * @param opt2 second set to be merged [in]
324  * @return the merged options.
325  */
327 {
328  SRepeatFilterOptions* retval = NULL;
329 
330  if (!opt1 && !opt2)
331  return NULL;
332 
333  SRepeatFilterOptionsNew(&retval);
334 
335  if (opt1 && !opt2)
336  {
337  SRepeatFilterOptionsResetDB(&retval, opt1->database);
338  }
339  else if (!opt1 && opt2)
340  {
341  SRepeatFilterOptionsResetDB(&retval, opt2->database);
342  }
343  else
344  { /* TODO : handle different db's. */
345  SRepeatFilterOptionsResetDB(&retval, opt2->database);
346  }
347  return retval;
348 }
349 
350 /** Merges together two sets of window masker options, choosing the most non-default one.
351  *
352  * @param opt1 first set to be merged [in]
353  * @param opt2 second set to be merged [in]
354  * @return the merged options.
355  */
356 static SWindowMaskerOptions*
358  const SWindowMaskerOptions* opt2)
359 {
360  SWindowMaskerOptions* retval = NULL;
361  const SWindowMaskerOptions* src = NULL;
362  Boolean have1 = FALSE, have2 = FALSE;
363 
364  have1 = opt1 && (opt1->database || opt1->taxid);
365  have2 = opt2 && (opt2->database || opt2->taxid);
366 
367  if (! (have1 || have2))
368  return NULL;
369 
370  if (have1 && ! have2) {
371  src = opt1;
372  } else if (! have1 && have2) {
373  src = opt2;
374  } else {
375  // We have data structures with some kind of content, so
376  // prefer structure 2 as repeat filter options do.
377  src = opt2;
378  }
379 
380  ASSERT(src);
381  ASSERT(src->database || src->taxid);
382 
383  SWindowMaskerOptionsNew(&retval);
384  SWindowMaskerOptionsResetDB(& retval, src->database);
385  retval->taxid = src->taxid;
386 
387  return retval;
388 }
389 
391  const SBlastFilterOptions* opt2)
392 {
393  SBlastFilterOptions* retval = NULL;
394  Int2 status = 0;
395 
396  *combined = NULL;
397 
398  if (opt1 == NULL && opt2 == NULL)
399  return 0;
400 
401  status = SBlastFilterOptionsNew(&retval, eEmpty);
402  if (status != 0)
403  return status;
404 
405  *combined = retval;
406 
407  if ((opt1 && opt1->mask_at_hash) || (opt2 && opt2->mask_at_hash))
408  retval->mask_at_hash = TRUE;
409 
410  retval->dustOptions =
411  s_MergeDustOptions(opt1 ? opt1->dustOptions : NULL, opt2 ? opt2->dustOptions : NULL);
412  retval->segOptions =
413  s_MergeSegOptions(opt1 ? opt1->segOptions : NULL, opt2 ? opt2->segOptions : NULL);
414  retval->repeatFilterOptions =
415  s_MergeRepeatOptions(opt1 ? opt1->repeatFilterOptions : NULL, opt2 ? opt2->repeatFilterOptions : NULL);
416  retval->windowMaskerOptions =
418 
419  return 0;
420 }
421 
423 {
424  if (filter_options == NULL)
425  return TRUE;
426 
427  return filter_options->dustOptions == NULL &&
428  filter_options->segOptions == NULL &&
429  filter_options->repeatFilterOptions == NULL &&
430  filter_options->windowMaskerOptions == NULL;
431 }
432 
434 {
435  if (filter_options == NULL)
436  return FALSE;
437 
438  return filter_options->mask_at_hash;
439 }
440 
441 Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions* filter_options, Blast_Message* *blast_message)
442 {
443  Int2 status = 0;
444 
445  if (filter_options == NULL)
446  {
448  "SBlastFilterOptionsValidate: NULL filter_options");
449  return BLASTERR_INVALIDPARAM;
450  }
451 
452  if (filter_options->repeatFilterOptions)
453  {
454  if (program_number != eBlastTypeBlastn &&
455  program_number != eBlastTypeMapping)
456  {
457  if (blast_message)
459  "SBlastFilterOptionsValidate: Repeat filtering only supported with blastn");
461  }
462  if (filter_options->repeatFilterOptions->database == NULL ||
463  strlen(filter_options->repeatFilterOptions->database) == 0)
464  {
465  if (blast_message)
467  "SBlastFilterOptionsValidate: No repeat database specified for repeat filtering");
468  return BLASTERR_INVALIDPARAM;
469  }
470  }
471 
472  if (filter_options->dustOptions)
473  {
474  if (program_number != eBlastTypeBlastn &&
475  program_number != eBlastTypeMapping)
476  {
477  if (blast_message)
479  "SBlastFilterOptionsValidate: Dust filtering only supported with blastn");
481  }
482  }
483 
484  if (filter_options->segOptions)
485  {
486  if (program_number == eBlastTypeBlastn &&
487  program_number != eBlastTypeMapping)
488  {
489  if (blast_message)
491  "SBlastFilterOptionsValidate: SEG filtering is not supported with blastn");
493  }
494  }
495 
496  return status;
497 }
498 
499 
502 
503 {
504  if (options)
505  {
506  sfree(options->filter_string);
508  sfree(options);
509  }
510  return NULL;
511 }
512 
513 Int2
515 {
516  Int2 status = 0;
517 
518  if (options == NULL)
519  return BLASTERR_INVALIDPARAM;
520 
521  *options = (QuerySetUpOptions*) calloc(1, sizeof(QuerySetUpOptions));
522 
523  if (*options == NULL)
524  return BLASTERR_MEMORY;
525 
526  (*options)->genetic_code = BLAST_GENETIC_CODE;
527 
528  /** @todo the code below should be deprecated */
529  status = SBlastFilterOptionsNew(&((*options)->filtering_options), eEmpty);
530 
531  return status;
532 }
533 
535  EBlastProgramType program, const char *filter_string, Uint1 strand_option)
536 {
537  Int2 status = 0;
538 
539  if (options == NULL)
540  return BLASTERR_INVALIDPARAM;
541 
542  if (strand_option &&
543  (program == eBlastTypeBlastn || program == eBlastTypePhiBlastn ||
544  program == eBlastTypeBlastx || program == eBlastTypeTblastx ||
545  program == eBlastTypeMapping)) {
546  options->strand_option = strand_option;
547  }
548 
549  if (filter_string) {
550  /* Free whatever filter string has been set before. */
551  sfree(options->filter_string);
552  /* Free whatever filtering options have been set. */
554  /* Parse the filter_string for options, do not save the string. */
555  status = BlastFilteringOptionsFromString(program, filter_string,
556  &options->filtering_options, NULL);
557  }
558  return status;
559 }
560 
563 
564 {
565 
566  sfree(options);
567 
568  return NULL;
569 }
570 
571 
572 Int2
574  BlastInitialWordOptions* *options)
575 {
576  *options =
578  if (*options == NULL)
579  return BLASTERR_MEMORY;
580 
581  if (/*program != eBlastTypeBlastn &&
582  program != eBlastTypePhiBlastn */
583  !Blast_ProgramIsNucleotide(program)) { /* protein-protein options. */
584  (*options)->window_size = BLAST_WINDOW_SIZE_PROT;
585  (*options)->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_PROT;
586  (*options)->gap_trigger = BLAST_GAP_TRIGGER_PROT;
587  } else {
588  (*options)->window_size = BLAST_WINDOW_SIZE_NUCL;
589  (*options)->scan_range = BLAST_SCAN_RANGE_NUCL;
590  (*options)->gap_trigger = BLAST_GAP_TRIGGER_NUCL;
591  (*options)->x_dropoff = BLAST_UNGAPPED_X_DROPOFF_NUCL;
592  }
593 
594  (*options)->program_number = program;
595 
596  return 0;
597 }
598 
599 
600 Int2
602  const BlastInitialWordOptions* options,
603  Blast_Message* *blast_msg)
604 {
605 
606  ASSERT(options);
607 
608  /* PHI-BLAST has no ungapped extension phase. Megablast may not have it,
609  but generally does now. */
610  if (program_number != eBlastTypeBlastn &&
611  program_number != eBlastTypeMapping &&
612  (!Blast_ProgramIsPhiBlast(program_number)) &&
613  options->x_dropoff <= 0.0)
614  {
616  "x_dropoff must be greater than zero");
618  }
619 
620  if (program_number == eBlastTypeBlastn &&
621  options->scan_range && !options->window_size)
622  {
624  "off_diagonal_range is only useful in 2-hit algorithm");
626  }
627 
628 
629  return 0;
630 }
631 
632 
633 Int2
636  double xdrop_ungapped)
637 {
638  if (!options)
639  return BLASTERR_INVALIDPARAM;
640 
641  if (window_size != 0)
642  options->window_size = window_size;
643  if (xdrop_ungapped != 0)
644  options->x_dropoff = xdrop_ungapped;
645 
646  return 0;
647 }
648 
651 
652 {
653 
654  sfree(options);
655 
656  return NULL;
657 }
658 
659 Int2
661 
662 {
663  *options = (BlastExtensionOptions*)
664  calloc(1, sizeof(BlastExtensionOptions));
665 
666  if (*options == NULL)
667  return BLASTERR_MEMORY;
668 
669  if (/* program != eBlastTypeBlastn &&
670  program != eBlastTypePhiBlastn*/
671  !Blast_ProgramIsNucleotide(program)) /* protein-protein options. */
672  {
673  (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_PROT;
674  (*options)->gap_x_dropoff_final =
676  } else {
677  (*options)->gap_x_dropoff = BLAST_GAP_X_DROPOFF_NUCL;
678  (*options)->gap_x_dropoff_final = BLAST_GAP_X_DROPOFF_FINAL_NUCL;
679  }
680 
681  (*options)->ePrelimGapExt = eDynProgScoreOnly;
682  (*options)->eTbackExt = eDynProgTbck;
683  (*options)->compositionBasedStats = eNoCompositionBasedStats;
684 
685  /** @todo how to determine this for PSI-BLAST bootstrap run (i.e. when
686  * program is blastp? */
687  if (gapped && (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program))) {
688  (*options)->compositionBasedStats = eCompositionBasedStats;
689  }
690 
691  (*options)->max_mismatches = 5;
692  (*options)->mismatch_window = 10;
693  (*options)->program_number = program;
694 
695  return 0;
696 }
697 
698 Int2
700  EBlastProgramType program, Int4 greedy, double x_dropoff,
701  double x_dropoff_final)
702 {
703  if (!options)
704  return BLASTERR_INVALIDPARAM;
705 
706  if (/*program == eBlastTypeBlastn || program == eBlastTypePhiBlastn*/
707  Blast_ProgramIsNucleotide(program)) {
708  if (greedy) {
711  options->ePrelimGapExt = eGreedyScoreOnly;
712  options->eTbackExt = eGreedyTbck;
713  } else {
716  options->ePrelimGapExt = eDynProgScoreOnly;
717  options->eTbackExt = eDynProgTbck;
718  }
719  }
720 
721  if (Blast_QueryIsPssm(program) && ! Blast_SubjectIsTranslated(program)) {
723  }
724 
725  if (x_dropoff)
726  options->gap_x_dropoff = x_dropoff;
727  if (x_dropoff_final) {
728  options->gap_x_dropoff_final = x_dropoff_final;
729  } else {
730  /* Final X-dropoff can't be smaller than preliminary X-dropoff */
731  options->gap_x_dropoff_final =
732  MAX(options->gap_x_dropoff_final, x_dropoff);
733  }
734 
735  return 0;
736 
737 }
738 
739 Int2
741  const BlastExtensionOptions* options, Blast_Message* *blast_msg)
742 
743 {
744  if (options == NULL)
745  return BLASTERR_INVALIDPARAM;
746 
747  if (program_number != eBlastTypeBlastn &&
748  program_number != eBlastTypeMapping &&
749  (options->ePrelimGapExt == eGreedyScoreOnly ||
750  options->eTbackExt == eGreedyTbck))
751  {
754  "Greedy extension only supported for BLASTN");
756  }
757 
758  if ((options->ePrelimGapExt == eSmithWatermanScoreOnly &&
759  options->eTbackExt != eSmithWatermanTbckFull) ||
760  (options->ePrelimGapExt != eSmithWatermanScoreOnly &&
761  options->eTbackExt == eSmithWatermanTbckFull))
762  {
765  "Score-only and traceback Smith-Waterman must "
766  "both be specified");
768  }
769 
770  return 0;
771 }
772 
775 
776 {
777  if (options == NULL)
778  return NULL;
779 
780  sfree(options->matrix);
781  sfree(options->matrix_path);
782  sfree(options);
783 
784  return NULL;
785 }
786 
787 Int2
789 {
790  *options = (BlastScoringOptions*) calloc(1, sizeof(BlastScoringOptions));
791 
792  if (*options == NULL)
793  return BLASTERR_INVALIDPARAM;
794 
795  if (/*program_number != eBlastTypeBlastn &&
796  program_number != eBlastTypePhiBlastn*/
797  !Blast_ProgramIsNucleotide(program_number)) {/*protein-protein options.*/
798  (*options)->shift_pen = INT2_MAX;
799  (*options)->is_ooframe = FALSE;
800  (*options)->gap_open = BLAST_GAP_OPEN_PROT;
801  (*options)->gap_extend = BLAST_GAP_EXTN_PROT;
802  (*options)->matrix = strdup(BLAST_DEFAULT_MATRIX);
803  } else { /* nucleotide-nucleotide options. */
804  (*options)->penalty = BLAST_PENALTY;
805  (*options)->reward = BLAST_REWARD;
806  /* This is correct except when greedy extension is used. In that case
807  these values would have to be reset. */
808  (*options)->gap_open = BLAST_GAP_OPEN_NUCL;
809  (*options)->gap_extend = BLAST_GAP_EXTN_NUCL;
810  }
811  if (program_number != eBlastTypeTblastx) {
812  (*options)->gapped_calculation = TRUE;
813  }
814  (*options)->program_number = program_number;
815  /* By default cross_match-like complexity adjusted scoring is
816  turned off. RMBlastN is currently the only program to use this. -RMH */
817  (*options)->complexity_adjusted_scoring = FALSE;
818 
819  return 0;
820 }
821 
822 Int2
824  EBlastProgramType program_number, Boolean greedy_extension, Int4 penalty, Int4 reward,
825  const char *matrix, Int4 gap_open, Int4 gap_extend)
826 {
827  if (!options)
828  return BLASTERR_INVALIDPARAM;
829 
830  if (/*program_number != eBlastTypeBlastn &&
831  program_number != eBlastTypePhiBlastn*/
832  !Blast_ProgramIsNucleotide(program_number)) {/* protein-protein options. */
833  /* If matrix name is not provided, keep the default "BLOSUM62" value filled in
834  BlastScoringOptionsNew, otherwise reset it. */
835  if (matrix)
836  BlastScoringOptionsSetMatrix(options, matrix);
837  } else { /* nucleotide-nucleotide options. */
838  if (penalty)
839  options->penalty = penalty;
840  if (reward)
841  options->reward = reward;
842 
843  if (greedy_extension) {
846  } else {
847  options->gap_open = BLAST_GAP_OPEN_NUCL;
848  options->gap_extend = BLAST_GAP_EXTN_NUCL;
849  }
850  }
851  if (gap_open >= 0)
852  options->gap_open = gap_open;
853  if (gap_extend >= 0)
854  options->gap_extend = gap_extend;
855 
856  options->program_number = program_number;
857 
858  return 0;
859 }
860 
861 Int2
863  const BlastScoringOptions* options, Blast_Message* *blast_msg)
864 
865 {
866  if (options == NULL)
867  return BLASTERR_INVALIDPARAM;
868 
869  if (program_number == eBlastTypeTblastx && options->gapped_calculation)
870  {
872  "Gapped search is not allowed for tblastx");
874  }
875 
876  if (/*program_number == eBlastTypeBlastn || program_number == eBlastTypePhiBlastn*/
877  Blast_ProgramIsNucleotide(program_number))
878  {
879  // A penalty/reward of 0/0 is a signal that this is rmblastn
880  // which allows specification of penalties as positive integers.
881  if ( ! ( options->penalty == 0 && options->reward == 0 ) )
882  {
883  if (options->penalty >= 0)
884  {
886  "BLASTN penalty must be negative");
888  }
889 
890  /* !!! this is temporary until there is jumper or mapping options handle */
891  if (0 && options->gapped_calculation &&
892  !Blast_ProgramIsMapping(program_number) &&
893  !BLAST_CheckRewardPenaltyScores(options->reward, options->penalty))
894  {
896  "BLASTN reward/penalty combination not supported for gapped search");
898  }
899  }
900 
901  if (options->gapped_calculation && options->gap_open > 0 && options->gap_extend == 0)
902  {
904  "BLASTN gap extension penalty cannot be 0");
906  }
907  }
908  else
909  {
910  if (options->gapped_calculation && !Blast_ProgramIsRpsBlast(program_number))
911  {
912  Int2 status=0;
913  Boolean std_matrix_only =
914  (program_number != eBlastTypeBlastp &&
915  program_number != eBlastTypeTblastn);
916  if ((status=Blast_KarlinBlkGappedLoadFromTables(NULL, options->gap_open,
917  options->gap_extend, options->matrix, std_matrix_only)) != 0)
918  {
919  if (status == 1)
920  {
921  char* buffer;
922 
924  std_matrix_only);
925 
927  sfree(buffer);
929 
930  }
931  else if (status == 2)
932  {
933  char* buffer;
934 
936  options->gap_open, options->gap_extend);
938  sfree(buffer);
940  }
941  }
942  }
943  }
944 
945  if (program_number != eBlastTypeBlastx && program_number != eBlastTypeTblastn && options->is_ooframe)
946  {
948  "Out-of-frame only permitted for blastx and tblastn");
950  }
951 
952  return 0;
953 }
954 
955 Int2
957 {
958  if (old_opt == NULL || new_opt == NULL)
959  return BLASTERR_INVALIDPARAM;
960 
961  *new_opt = (BlastScoringOptions*) BlastMemDup(old_opt, sizeof(BlastScoringOptions));
962  if (*new_opt == NULL)
963  return BLASTERR_MEMORY;
964 
965  if (old_opt->matrix)
966  (*new_opt)->matrix = strdup(old_opt->matrix);
967 
968  if (old_opt->matrix_path)
969  (*new_opt)->matrix_path = strdup(old_opt->matrix_path);
970 
971  return 0;
972 }
973 
975  const char* matrix_name)
976 {
977  Uint4 i;
978 
979  if (matrix_name) {
980  sfree(opts->matrix);
981  opts->matrix = strdup(matrix_name);
982  /* Make it all upper case */
983  for (i=0; i<strlen(opts->matrix); ++i)
984  opts->matrix[i] = toupper((unsigned char) opts->matrix[i]);
985  }
986  return 0;
987 }
988 
991 
992 {
993  if (options == NULL)
994  return NULL;
995 
996  sfree(options->searchsp_eff);
997  sfree(options);
998  return NULL;
999 }
1000 
1001 
1002 Int2
1004 
1005 {
1006  if (options == NULL) {
1007  return BLASTERR_INVALIDPARAM;
1008  }
1009 
1010  *options = (BlastEffectiveLengthsOptions*)
1012 
1013  if (*options == NULL)
1014  return BLASTERR_MEMORY;
1015 
1016  return 0;
1017 }
1018 
1019 Boolean
1022  options)
1023 {
1024  int i;
1025  if ( !options || options->searchsp_eff == NULL) {
1026  return FALSE;
1027  }
1028 
1029  for (i = 0; i < options->num_searchspaces; i++) {
1030  if (options->searchsp_eff[i] != 0) {
1031  return TRUE;
1032  }
1033  }
1034  return FALSE;
1035 }
1036 
1037 Int2
1039  Int4 dbseq_num, Int8 db_length, Int8* searchsp_eff, Int4 num_searchsp)
1040 {
1041  Int4 index;
1042  if (!options)
1043  return BLASTERR_INVALIDPARAM;
1044 
1045  if (num_searchsp > options->num_searchspaces) {
1046  options->num_searchspaces = num_searchsp;
1047  options->searchsp_eff = (Int8 *)realloc(options->searchsp_eff,
1048  num_searchsp * sizeof(Int8));
1049  if (options->searchsp_eff == NULL)
1050  return BLASTERR_MEMORY;
1051  }
1052 
1053  for (index = 0; index < options->num_searchspaces; index++)
1054  options->searchsp_eff[index] = searchsp_eff[index];
1055 
1056  options->dbseq_num = dbseq_num;
1057  options->db_length = db_length;
1058 
1059  return 0;
1060 }
1061 
1064 
1065 {
1066 
1067  if (options == NULL)
1068  return NULL;
1069 
1070  sfree(options->phi_pattern);
1071 
1072  sfree(options);
1073  return NULL;
1074 }
1075 
1076 Int2
1078 {
1079  *options = (LookupTableOptions*) calloc(1, sizeof(LookupTableOptions));
1080 
1081  if (*options == NULL)
1082  return BLASTERR_INVALIDPARAM;
1083 
1084  switch (program_number) {
1085  case eBlastTypeMapping:
1086  (*options)->max_db_word_count = MAX_DB_WORD_COUNT_MAPPER;
1087  case eBlastTypeBlastn:
1088  /* Blastn default is megablast. */
1089  (*options)->word_size = BLAST_WORDSIZE_MEGABLAST;
1090  (*options)->lut_type = eMBLookupTable;
1091  break;
1093  (*options)->word_size = BLAST_WORDSIZE_PROT;
1094  (*options)->lut_type = eRPSLookupTable;
1095 
1096  if (program_number == eBlastTypeRpsBlast)
1097  (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTP;
1098  else
1099  (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTN;
1100  break;
1101  case eBlastTypePhiBlastn:
1102  (*options)->lut_type = ePhiNaLookupTable;
1103  break;
1104  case eBlastTypePhiBlastp:
1105  (*options)->lut_type = ePhiLookupTable;
1106  break;
1107  default:
1108  (*options)->word_size = BLAST_WORDSIZE_PROT;
1109  (*options)->lut_type = eAaLookupTable;
1110 
1111  if (program_number == eBlastTypeBlastp)
1112  (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTP;
1113  else if (program_number == eBlastTypeBlastx)
1114  (*options)->threshold = BLAST_WORD_THRESHOLD_BLASTX;
1115  else if (program_number == eBlastTypeTblastn)
1116  (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTN;
1117  else if (program_number == eBlastTypeTblastx)
1118  (*options)->threshold = BLAST_WORD_THRESHOLD_TBLASTX;
1119  break;
1120  }
1121 
1122  (*options)->program_number = program_number;
1123  (*options)->stride = 0;
1124 
1125  return 0;
1126 }
1127 
1128 Int2
1130  EBlastProgramType program_number, Boolean is_megablast,
1131  double threshold, Int4 word_size)
1132 {
1133  if (!options)
1134  return BLASTERR_INVALIDPARAM;
1135 
1136  if (program_number == eBlastTypeBlastn) {
1137 
1138  if (is_megablast) {
1139  options->lut_type = eMBLookupTable;
1141  } else {
1142  options->lut_type = eNaLookupTable;
1143  options->word_size = BLAST_WORDSIZE_NUCL;
1144  }
1145  } else if (program_number == eBlastTypeMapping) {
1146  options->lut_type = eNaHashLookupTable;
1147  options->word_size = BLAST_WORDSIZE_MAPPER;
1149  } else {
1150  options->lut_type = eAaLookupTable;
1151  }
1152 
1153  /* if the supplied threshold is negative, disable neighboring words */
1154  if (threshold < 0)
1155  options->threshold = 0;
1156 
1157  /* if the supplied threshold is > 0, use it otherwise, use the default */
1158  if (threshold > 0)
1159  options->threshold = threshold;
1160 
1161  if (Blast_ProgramIsRpsBlast(program_number))
1162  options->lut_type = eRPSLookupTable;
1163  if (word_size)
1164  options->word_size = word_size;
1165  if ((program_number == eBlastTypeTblastn ||
1166  program_number == eBlastTypeBlastp ||
1167  program_number == eBlastTypeBlastx) &&
1168  word_size > 5)
1170 
1171  return 0;
1172 }
1173 
1174 Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char* matrixName, double* threshold)
1175 {
1176 
1177  const double kB62_threshold = 11;
1178 
1179  if (program_number == eBlastTypeBlastn ||
1180  program_number == eBlastTypeMapping)
1181  return 0;
1182 
1183  if (matrixName == NULL)
1184  return BLASTERR_INVALIDPARAM;
1185 
1186  if(strcasecmp(matrixName, "BLOSUM62") == 0)
1187  *threshold = kB62_threshold;
1188  else if(strcasecmp(matrixName, "BLOSUM45") == 0)
1189  *threshold = 14;
1190  else if(strcasecmp(matrixName, "BLOSUM62_20") == 0)
1191  *threshold = 100;
1192  else if(strcasecmp(matrixName, "BLOSUM80") == 0)
1193  *threshold = 12;
1194  else if(strcasecmp(matrixName, "PAM30") == 0)
1195  *threshold = 16;
1196  else if(strcasecmp(matrixName, "PAM70") == 0)
1197  *threshold = 14;
1198  else if(strcasecmp(matrixName, "IDENTITY") == 0)
1199  *threshold = 27;
1200  else
1201  *threshold = kB62_threshold;
1202 
1203  if (Blast_SubjectIsTranslated(program_number) == TRUE)
1204  *threshold += 2; /* Covers tblastn, tblastx, psi-tblastn rpstblastn. */
1205  else if (Blast_QueryIsTranslated(program_number) == TRUE)
1206  *threshold += 1;
1207 
1208  return 0;
1209 }
1210 
1211 Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char* matrixName, Int4* window_size)
1212 {
1213  const Int4 kB62_windowsize = 40;
1214 
1215  if (program_number == eBlastTypeBlastn ||
1216  program_number == eBlastTypeMapping)
1217  return 0;
1218 
1219  if (matrixName == NULL)
1220  return BLASTERR_INVALIDPARAM;
1221 
1222  if(strcasecmp(matrixName, "BLOSUM62") == 0)
1223  *window_size = kB62_windowsize;
1224  else if(strcasecmp(matrixName, "BLOSUM45") == 0)
1225  *window_size = 60;
1226  else if(strcasecmp(matrixName, "BLOSUM80") == 0)
1227  *window_size = 25;
1228  else if(strcasecmp(matrixName, "PAM30") == 0)
1229  *window_size = 15;
1230  else if(strcasecmp(matrixName, "PAM70") == 0)
1231  *window_size = 20;
1232  else
1233  *window_size = kB62_windowsize;
1234 
1235  return 0;
1236 }
1237 
1238 /** Validate options for the discontiguous word megablast
1239  * Word size must be 11 or 12; template length 16, 18 or 21;
1240  * template type 0, 1 or 2.
1241  * @param word_size Word size option [in]
1242  * @param template_length Discontiguous template length [in]
1243  * @param template_type Discontiguous template type [in]
1244  * @param blast_msg Used for storing error messages [in][out]
1245  * @return TRUE if options combination valid.
1246  */
1247 static Boolean
1248 s_DiscWordOptionsValidate(Int4 word_size, Uint1 template_length,
1249  Uint1 template_type,
1250  Blast_Message** blast_msg)
1251 {
1252  if (template_length == 0)
1253  return TRUE;
1254 
1255 
1256  if (word_size != 11 && word_size != 12) {
1258  "Invalid discontiguous template parameters: word "
1259  "size must be either 11 or 12");
1260  return FALSE;
1261  }
1262 
1263  if (template_length != 16 && template_length != 18 &&
1264  template_length != 21) {
1266  "Invalid discontiguous template parameters: "
1267  "template length must be 16, 18, or 21");
1268  return FALSE;
1269  }
1270 
1271  if (template_type > 2) {
1272  /* should never fail coming from the C++ APIs as we represent these as
1273  * strings */
1275  "Invalid discontiguous template parameters: "
1276  "template type must be 0, 1, or 2");
1277  return FALSE;
1278  }
1279 
1280  return TRUE;
1281 }
1282 
1283 Int2
1285  const LookupTableOptions* options, Blast_Message* *blast_msg)
1286 
1287 {
1288  const Boolean kPhiBlast = Blast_ProgramIsPhiBlast(program_number);
1289 
1290  if (options == NULL)
1291  return BLASTERR_INVALIDPARAM;
1292 
1293  if (options->phi_pattern && !kPhiBlast) {
1295  "PHI pattern can be specified only for blastp and blastn");
1297  }
1298 
1299  /* For PHI BLAST, the subsequent word size tests are not needed. */
1300  if (kPhiBlast)
1301  return 0;
1302 
1303  if (program_number != eBlastTypeBlastn &&
1304  program_number != eBlastTypeMapping &&
1305  (!Blast_ProgramIsRpsBlast(program_number)) &&
1306  options->threshold <= 0)
1307  {
1309  "Non-zero threshold required");
1311  }
1312 
1313  if (options->word_size <= 0)
1314  {
1315  if ( !Blast_ProgramIsRpsBlast(program_number)) {
1317  "Word-size must be greater than zero");
1319  }
1320  } else if (/*program_number == eBlastTypeBlastn*/
1321  Blast_ProgramIsNucleotide(program_number) &&
1322  !Blast_QueryIsPattern(program_number) && options->word_size < 4)
1323  {
1325  "Word-size must be 4 or greater for nucleotide comparison");
1327  } else if (program_number == eBlastTypeBlastn &&
1328  options->word_size > DBSEQ_CHUNK_OVERLAP) {
1329  char buffer[256];
1330  int bytes_written = snprintf(buffer, DIM(buffer),
1331  "Word-size must be less than or equal to %d", DBSEQ_CHUNK_OVERLAP);
1332  ASSERT(bytes_written < DIM(buffer));
1335  } else if (program_number != eBlastTypeBlastn &&
1336  program_number != eBlastTypeMapping && options->word_size > 4)
1337  {
1338  if (program_number == eBlastTypeBlastp ||
1339  program_number == eBlastTypeTblastn ||
1340  program_number == eBlastTypeBlastx)
1341  {
1342  if (options->word_size > 7) {
1343  Blast_MessageWrite(blast_msg, eBlastSevError,
1345  "Word-size must be less than "
1346  "8 for a tblastn, blastp or blastx search");
1348  }
1349  }
1350  else if (program_number == eBlastTypePsiBlast &&
1351  options->word_size > 4) {
1352  Blast_MessageWrite(blast_msg, eBlastSevError,
1354  "Word-size must be less "
1355  "than 5 for psiblast");
1357  }
1358  else {
1359  Blast_MessageWrite(blast_msg, eBlastSevError,
1361  "Word-size must be less "
1362  "than 6 for protein comparison");
1364  }
1365  }
1366 
1367  if (program_number != eBlastTypeBlastn &&
1368  program_number != eBlastTypeMapping &&
1369  options->lut_type == eMBLookupTable)
1370  {
1372  "Megablast lookup table only supported with blastn");
1374  }
1375 
1376  if (program_number == eBlastTypeBlastp ||
1377  program_number == eBlastTypeTblastn ||
1378  program_number == eBlastTypeBlastx)
1379  {
1380  if (options->word_size > 5 &&
1381  options->lut_type != eCompressedAaLookupTable) {
1384  "Blastp, Blastx or Tblastn with word size"
1385  " > 5 requires a "
1386  "compressed alphabet lookup table");
1388  }
1389  else if (options->lut_type == eCompressedAaLookupTable &&
1390  options->word_size != 5 && options->word_size != 6 &&
1391  options->word_size != 7) {
1393  "Compressed alphabet lookup table requires "
1394  "word size 5, 6 or 7");
1396  }
1397  }
1398 
1399  if (/*program_number == eBlastTypeBlastn &&*/
1400  Blast_ProgramIsNucleotide(program_number) &&
1401  !Blast_QueryIsPattern(program_number) &&
1402  options->mb_template_length > 0) {
1403  if (!s_DiscWordOptionsValidate(options->word_size,
1404  options->mb_template_length,
1405  options->mb_template_type,
1406  blast_msg)) {
1408  } else if (options->lut_type != eMBLookupTable) {
1410  "Invalid lookup table type for discontiguous Mega BLAST");
1412  }
1413  }
1414 
1415  if (!Blast_ProgramIsNucleotide(program_number) && options->db_filter) {
1417  "The limit_lookup option can only be used for "
1418  "nucleotide searches");
1420  }
1421 
1422  if (options->db_filter && options->word_size < 16) {
1424  "The limit_lookup option can only be used with "
1425  "word size >= 16");
1427  }
1428 
1429  return 0;
1430 }
1431 
1434 
1435 {
1436  if (options) {
1438  }
1439  sfree(options);
1440  return NULL;
1441 }
1442 
1443 
1445  BlastHitSavingOptions** options,
1446  Boolean gapped_calculation)
1447 {
1448  *options = (BlastHitSavingOptions*) calloc(1, sizeof(BlastHitSavingOptions));
1449 
1450  if (*options == NULL)
1451  return BLASTERR_INVALIDPARAM;
1452 
1453  (*options)->hitlist_size = BLAST_HITLIST_SIZE;
1454  (*options)->expect_value = BLAST_EXPECT_VALUE;
1455  (*options)->program_number = program_number;
1456 
1457  // Initialize mask_level parameter -RMH-
1458  (*options)->mask_level = 101;
1459 
1460  /* By default, sum statistics is used for all translated searches
1461  * (except RPS BLAST), and for all ungapped searches.
1462  */
1463  if (program_number == eBlastTypeRpsTblastn) {
1464  (*options)->do_sum_stats = FALSE;
1465  } else if (!gapped_calculation ||
1466  Blast_QueryIsTranslated(program_number) ||
1467  Blast_SubjectIsTranslated(program_number)) {
1468  (*options)->do_sum_stats = TRUE;
1469  } else {
1470  (*options)->do_sum_stats = FALSE;
1471  }
1472 
1473  (*options)->hsp_filt_opt = NULL;
1474 
1475  (*options)->max_edit_distance = INT4_MAX;
1476 
1477  return 0;
1478 
1479 }
1480 
1481 Int2
1483  double evalue, Int4 hitlist_size,
1484  Boolean is_gapped, Int4 culling_limit,
1485  Int4 min_diag_separation)
1486 {
1487  if (!options)
1488  return BLASTERR_INVALIDPARAM;
1489 
1490  if (hitlist_size)
1491  options->hitlist_size = hitlist_size;
1492  if (evalue)
1493  options->expect_value = evalue;
1494  if (min_diag_separation)
1495  options->min_diag_separation = min_diag_separation;
1496  options->culling_limit = culling_limit;
1497  options->hsp_filt_opt = NULL;
1498  options->max_edit_distance = INT4_MAX;
1499 
1500  return 0;
1501 
1502 }
1503 
1504 Int2
1506  const BlastHitSavingOptions* options, Blast_Message* *blast_msg)
1507 {
1508  if (options == NULL)
1509  return BLASTERR_INVALIDPARAM;
1510 
1511  if (options->hitlist_size < 1)
1512  {
1514  "No hits are being saved");
1516  }
1517 
1518  if (options->expect_value <= 0.0 && options->cutoff_score <= 0)
1519  {
1521  "expect value or cutoff score must be greater than zero");
1523  }
1524 
1525  if (options->longest_intron != 0 &&
1526  program_number != eBlastTypeTblastn &&
1527  program_number != eBlastTypePsiTblastn &&
1528  program_number != eBlastTypeBlastx &&
1529  program_number != eBlastTypeMapping) {
1531  "Uneven gap linking of HSPs is allowed for blastx, "
1532  "tblastn, and psitblastn only");
1534  }
1535 
1536  if (options->culling_limit < 0)
1537  {
1539  "culling limit must be greater than or equal to zero");
1541  }
1542 
1543  if (options->hsp_filt_opt) {
1544  if (BlastHSPFilteringOptionsValidate(options->hsp_filt_opt) != 0) {
1546  "HSP Filtering options invalid");
1548  }
1549  }
1550 
1551  return 0;
1552 }
1553 
1555 {
1556  PSIBlastOptions* options = NULL;
1557 
1558  if ( !psi_options )
1559  return BLASTERR_INVALIDPARAM;
1560 
1561  options = (PSIBlastOptions*)calloc(1, sizeof(PSIBlastOptions));
1562  if ( !options )
1563  return BLASTERR_MEMORY;
1564 
1565  *psi_options = options;
1568  options->use_best_alignment = TRUE;
1569 
1570  options->nsg_compatibility_mode = FALSE;
1572  options->ignore_unaligned_positions = FALSE;
1573 
1574  return 0;
1575 }
1576 
1578  Blast_Message** blast_msg)
1579 {
1580  Int2 retval = 1; /* assume failure */
1581 
1582  if ( !psi_options ) {
1583  return retval;
1584  }
1585 
1586  if (psi_options->pseudo_count < 0) {
1588  "Pseudo count must be greater than or equal to 0");
1589  return retval;
1590  }
1591 
1592  if (psi_options->inclusion_ethresh <= 0.0) {
1594  "Inclusion threshold must be greater than 0");
1595  return retval;
1596  }
1597 
1598  retval = 0;
1599  return retval;
1600 }
1601 
1603 {
1604  sfree(psi_options);
1605  return NULL;
1606 }
1607 
1609 {
1610  BlastDatabaseOptions* options = NULL;
1611 
1612  if ( !db_options ) {
1613  return BLASTERR_INVALIDPARAM;
1614  }
1615 
1616  options = (BlastDatabaseOptions*) calloc(1, sizeof(BlastDatabaseOptions));
1617  if ( !options ) {
1618  return BLASTERR_MEMORY;
1619  }
1620 
1621  options->genetic_code = BLAST_GENETIC_CODE;
1622  *db_options = options;
1623 
1624  return 0;
1625 }
1626 
1629 {
1630 
1631  if (db_options == NULL)
1632  return NULL;
1633 
1634  sfree(db_options);
1635  return NULL;
1636 }
1637 
1639  LookupTableOptions** lookup_options,
1640  QuerySetUpOptions** query_setup_options,
1641  BlastInitialWordOptions** word_options,
1642  BlastExtensionOptions** ext_options,
1643  BlastHitSavingOptions** hit_options,
1644  BlastScoringOptions** score_options,
1645  BlastEffectiveLengthsOptions** eff_len_options,
1646  PSIBlastOptions** psi_options,
1647  BlastDatabaseOptions** db_options)
1648 {
1649  Int2 status;
1650 
1651  if ((status = LookupTableOptionsNew(program_number, lookup_options)))
1652  return status;
1653 
1654  if ((status=BlastQuerySetUpOptionsNew(query_setup_options)))
1655  return status;
1656 
1657  if ((status=BlastInitialWordOptionsNew(program_number, word_options)))
1658  return status;
1659 
1660  if ((status=BlastScoringOptionsNew(program_number, score_options)))
1661  return status;
1662 
1663  if ((status = BlastExtensionOptionsNew(program_number, ext_options,
1664  (*score_options)->gapped_calculation)))
1665  return status;
1666 
1667  if ((status=BlastHitSavingOptionsNew(program_number, hit_options,
1668  (*score_options)->gapped_calculation)))
1669  return status;
1670 
1671  if ((status=BlastEffectiveLengthsOptionsNew(eff_len_options)))
1672  return status;
1673 
1674  if ((status=PSIBlastOptionsNew(psi_options)))
1675  return status;
1676 
1677  if ((status=BlastDatabaseOptionsNew(db_options)))
1678  return status;
1679 
1680  return 0;
1681 
1682 }
1683 
1684 /** Checks that the extension and scoring options are consistent with each other
1685  * @param program_number identifies the program [in]
1686  * @param ext_options the extension options [in]
1687  * @param score_options the scoring options [in]
1688  * @param blast_msg returns a message on errors. [in|out]
1689  * @return zero on success, an error code otherwise.
1690  */
1692  const BlastExtensionOptions* ext_options,
1693  const BlastScoringOptions* score_options,
1694  Blast_Message* *blast_msg)
1695 {
1696  if (ext_options == NULL || score_options == NULL)
1697  return BLASTERR_INVALIDPARAM;
1698 
1699  if (program_number == eBlastTypeBlastn)
1700  {
1701  if (score_options->gap_open == 0 && score_options->gap_extend == 0)
1702  {
1703  if (ext_options->ePrelimGapExt != eGreedyScoreOnly &&
1704  ext_options->eTbackExt != eGreedyTbck)
1705  {
1708  "Greedy extension must be used if gap existence and extension options are zero");
1710  }
1711  }
1712  }
1713 
1714  if (program_number == eBlastTypeMapping) {
1715  if (ext_options->ePrelimGapExt != eJumperWithTraceback) {
1716 
1719  "Jumper extension must be used for mapping");
1720 
1722  }
1723  }
1724 
1725  if (ext_options->compositionBasedStats != eNoCompositionBasedStats)
1726  {
1727  if (!Blast_QueryIsPssm(program_number) && program_number != eBlastTypeTblastn &&
1728  program_number != eBlastTypeBlastp &&
1729  program_number != eBlastTypeBlastx &&
1730  program_number != eBlastTypeRpsBlast &&
1731  program_number != eBlastTypeRpsTblastn &&
1732  program_number != eBlastTypePsiBlast) {
1734  "Compositional adjustments are only supported with blastp, blastx, or tblastn");
1736  }
1737  if (!score_options->gapped_calculation) {
1739  "Compositional adjustments are only supported for gapped searches");
1741  }
1742 
1743  }
1744 
1745  return 0;
1746 }
1747 
1748 
1750  const BlastExtensionOptions* ext_options,
1751  const BlastScoringOptions* score_options,
1752  const LookupTableOptions* lookup_options,
1753  const BlastInitialWordOptions* word_options,
1754  const BlastHitSavingOptions* hit_options,
1755  Blast_Message* *blast_msg)
1756 {
1757  Int2 status = 0;
1758 
1759  if ((status = BlastExtensionOptionsValidate(program_number, ext_options,
1760  blast_msg)) != 0)
1761  return status;
1762  if ((status = BlastScoringOptionsValidate(program_number, score_options,
1763  blast_msg)) != 0)
1764  return status;
1765  if ((status = LookupTableOptionsValidate(program_number,
1766  lookup_options, blast_msg)) != 0)
1767  return status;
1768  if ((status = BlastInitialWordOptionsValidate(program_number,
1769  word_options, blast_msg)) != 0)
1770  return status;
1771  if ((status = BlastHitSavingOptionsValidate(program_number, hit_options,
1772  blast_msg)) != 0)
1773  return status;
1774  if ((status = s_BlastExtensionScoringOptionsValidate(program_number, ext_options,
1775  score_options, blast_msg)) != 0)
1776  return status;
1777 
1778  /* Word sizes larger than 5 are not suported for IDENTITY scoring matrix.
1779  Identity matrix is only supported for blastp and tblastn. */
1780  if (program_number == eBlastTypeBlastp ||
1781  program_number == eBlastTypeTblastn) {
1782 
1783  char* matrix = BLAST_StrToUpper(score_options->matrix);
1784  Boolean is_identity = strcmp(matrix, "IDENTITY") == 0;
1785 
1786  if (matrix) {
1787  free(matrix);
1788  }
1789 
1790  if (lookup_options->word_size > 5 && is_identity) {
1791 
1794  "Word size larger than 5 is not supported for "
1795  "the identity scoring matrix");
1796 
1798  }
1799  }
1800 
1801  if (program_number == eBlastTypeRpsBlast ||
1802  program_number == eBlastTypeRpsTblastn) {
1803  if((hit_options->culling_limit != 0) || (hit_options->hsp_filt_opt != NULL)) {
1806  "Culling or best hit filtering is not supported");
1808  }
1809  }
1810 
1811  return status;
1812 }
1813 
1814 BlastHSPBestHitOptions* BlastHSPBestHitOptionsNew(double overhang, double score_edge)
1815 {
1816  BlastHSPBestHitOptions* retval =
1818  retval->overhang = overhang;
1819  retval->score_edge = score_edge;
1820  return retval;
1821 }
1822 
1823 Int2
1825 {
1826  Int2 retval = 0; /* assume success */
1827  BlastHSPBestHitOptions* best_hit = opts->best_hit;
1828 
1829  if ( !best_hit ) {
1830  return retval;
1831  }
1832 
1833  if (best_hit->overhang <= kBestHit_OverhangMin ||
1834  best_hit->overhang >= kBestHit_OverhangMax) {
1835  return -1;
1836  }
1837 
1838  if (best_hit->score_edge <= kBestHit_ScoreEdgeMin ||
1839  best_hit->score_edge >= kBestHit_ScoreEdgeMax) {
1840  return -1;
1841  }
1842 
1843  return retval;
1844 }
1845 
1847 {
1848  if ( !opt ) {
1849  return NULL;
1850  }
1851  sfree(opt);
1852  return NULL;
1853 }
1854 
1856 {
1857  BlastHSPCullingOptions* retval =
1859  retval->max_hits = max;
1860  return retval;
1861 }
1862 
1863 Int2
1865 {
1866  Int2 retval = 0;
1867  BlastHSPCullingOptions* culling_opts = opts->culling_opts;
1868  if (!culling_opts)
1869  return retval;
1870 
1871  if (culling_opts->max_hits < 0)
1872  return -1;
1873 
1874  return retval;
1875 }
1876 
1879 {
1880  if (!culling_opts)
1881  return NULL;
1882 
1883  sfree(culling_opts);
1884  return NULL;
1885 }
1886 
1887 
1889 {
1890  return (BlastHSPFilteringOptions*)calloc(1,
1891  sizeof(BlastHSPFilteringOptions));
1892 }
1893 
1894 Int2
1896  BlastHSPBestHitOptions** best_hit,
1897  EBlastStage stage)
1898 {
1899  if ( filt_opts == NULL || best_hit == NULL || *best_hit == NULL) {
1900  return 1;
1901  }
1902 
1903  filt_opts->best_hit = *best_hit;
1904  *best_hit = NULL;
1905  filt_opts->best_hit_stage = stage;
1906 
1907  return 0;
1908 }
1909 
1910 Int2
1912  BlastHSPCullingOptions** culling,
1913  EBlastStage stage)
1914 {
1915  if ( filt_opts == NULL || culling == NULL || *culling == NULL) {
1916  return 1;
1917  }
1918 
1919  filt_opts->culling_opts = *culling;
1920  *culling = NULL;
1921  filt_opts->culling_stage = stage;
1922 
1923  return 0;
1924 }
1925 
1926 Int2
1928 {
1929  Int2 retval = 0; /* assume success */
1930  Boolean writer_found = FALSE;
1931 
1932  if ( (retval = BlastHSPBestHitOptionsValidate(opts)) != 0) {
1933  return retval;
1934  }
1935  if (opts->best_hit_stage & ePrelimSearch) {
1936  writer_found = TRUE;
1937  }
1938 
1939  if ( (retval = BlastHSPCullingOptionsValidate(opts)) != 0) {
1940  return retval;
1941  }
1942  if ((opts->culling_stage & ePrelimSearch) && writer_found) {
1943  return 1;
1944  }
1945 
1946  return retval;
1947 }
1948 
1951 {
1952  if ( !opts ) {
1953  return NULL;
1954  }
1958  sfree(opts);
1959  return opts;
1960 }
1961 
1964 {
1967  if(isProtein){
1969  }
1970  else {
1972  }
1973  return retval;
1974 }
1975 
1976 Int2
1978 {
1979  Int2 retval = 0;
1981  if (!besthit_opts)
1982  return retval;
1983 
1984  return retval;
1985 }
1986 
1989 {
1990  if (!subject_besthit_opts)
1991  return NULL;
1992 
1993  sfree(subject_besthit_opts);
1994  return NULL;
1995 }
1996 
1997 Int2
1999  BlastHSPSubjectBestHitOptions** subject_besthit)
2000 {
2001  if ( filt_opts == NULL || subject_besthit == NULL || *subject_besthit == NULL) {
2002  return 1;
2003  }
2004 
2005  filt_opts->subject_besthit_opts = *subject_besthit;
2006  *subject_besthit = NULL;
2007 
2008  return 0;
2009 }
2010 
2011 
2012 
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
const double kSegLocut
Locut parameter for SEG.
Definition: blast_seg.c:46
const int kSegWindow
Window that SEG examines at once.
Definition: blast_seg.c:45
const double kSegHicut
Hicut parameter for SEG.
Definition: blast_seg.c:47
EBlastStage
Enumeration for the stages in the BLAST search.
Definition: blast_def.h:324
@ ePrelimSearch
Preliminary stage.
Definition: blast_def.h:328
BLAST filtering functions.
Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)
Produces SBlastFilterOptions from a string that has been traditionally supported in blast.
Definition: blast_filter.c:436
#define DBSEQ_CHUNK_OVERLAP
By how much should the chunks of a subject sequence overlap if it is too long and has to be split.
Definition: blast_hits.h:192
@ eBlastSevError
Definition: blast_message.h:58
@ eBlastSevWarning
Definition: blast_message.h:57
#define BLASTERR_OPTION_PROGRAM_INVALID
The option is not supported with the specified program.
#define BLASTERR_OPTION_VALUE_INVALID
The value of the option is not supported (e.g., word size too small)
Int2 Blast_MessageWrite(Blast_Message **blast_msg, EBlastSeverity severity, int context, const char *message)
Writes a message to a structure.
const int kBlastMessageNoContext
Declared in blast_message.h as extern const.
Definition: blast_message.c:36
#define BLASTERR_MEMORY
System error: out of memory condition.
#define BLASTERR_INVALIDPARAM
Invalid parameter: possible programmer error or pre-condition not met.
PSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)
Deallocate PSI BLAST options.
Int2 BLAST_FillQuerySetUpOptions(QuerySetUpOptions *options, EBlastProgramType program, const char *filter_string, Uint1 strand_option)
Fill non-default contents of the QuerySetUpOptions.
BlastHSPCullingOptions * BlastHSPCullingOptionsNew(int max)
Allocate a new object for culling options.
Int2 BlastDatabaseOptionsNew(BlastDatabaseOptions **db_options)
Allocates the BlastDatabase options structure and sets the default database genetic code value (BLAST...
Int2 SWindowMaskerOptionsResetDB(SWindowMaskerOptions **winmask_options, const char *db)
Resets name of db for window masker filtering.
Int2 SRepeatFilterOptionsNew(SRepeatFilterOptions **repeat_options)
Allocates memory for SRepeatFilterOptions, fills in defaults.
Int2 PSIBlastOptionsValidate(const PSIBlastOptions *psi_options, Blast_Message **blast_msg)
Validates the PSI BLAST options so that they have sane values.
Int2 BlastHSPBestHitOptionsValidate(const BlastHSPFilteringOptions *opts)
Validate the best hit algorithm parameters (if any) in the.
Int2 BLAST_ValidateOptions(EBlastProgramType program_number, const BlastExtensionOptions *ext_options, const BlastScoringOptions *score_options, const LookupTableOptions *lookup_options, const BlastInitialWordOptions *word_options, const BlastHitSavingOptions *hit_options, Blast_Message **blast_msg)
Validate all options.
Int2 BlastHSPFilteringOptions_AddCulling(BlastHSPFilteringOptions *filt_opts, BlastHSPCullingOptions **culling, EBlastStage stage)
Validates the BlastHSPFilteringOptions structure.
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
Int2 BlastScoringOptionsValidate(EBlastProgramType program_number, const BlastScoringOptions *options, Blast_Message **blast_msg)
Validate contents of BlastScoringOptions.
Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)
Allocate memory for QuerySetUpOptions and fill with default values.
Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char *matrixName, double *threshold)
Get thresholds for word-finding suggested by Stephen Altschul.
SDustOptions * SDustOptionsFree(SDustOptions *dust_options)
Frees SDustOptions.
Definition: blast_options.c:50
Int2 BLAST_FillScoringOptions(BlastScoringOptions *options, EBlastProgramType program_number, Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix, Int4 gap_open, Int4 gap_extend)
Fill non-default values in the BlastScoringOptions structure.
BlastHSPFilteringOptions * BlastHSPFilteringOptionsFree(BlastHSPFilteringOptions *opts)
Deallocate a BlastHSPFilteringOptions structure.
Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions **repeat_options, const char *db)
Resets name of db for repeat filtering.
Int2 BlastScoringOptionsNew(EBlastProgramType program_number, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
Int2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)
Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.
SReadQualityOptions * SReadQualityOptionsFree(SReadQualityOptions *read_quality_options)
Frees memory for SReadQualityOptions.
Boolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions *filter_options)
Queries whether masking should be done only for the lookup table or for the entire search.
Boolean SBlastFilterOptionsNoFiltering(const SBlastFilterOptions *filter_options)
Queries whether no masking is required.
Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char *matrixName, Int4 *window_size)
Get window sizes for two hit algorithm suggested by Stephen Altschul.
Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions *filter_options, Blast_Message **blast_message)
Validates filter options to ensure that program and options are consistent and that options have vali...
const int kDustLinker
Parameter used by dust to link together close low-complexity segments.
Definition: blast_options.c:48
BlastHSPSubjectBestHitOptions * BlastHSPSubjectBestHitOptionsFree(BlastHSPSubjectBestHitOptions *subject_besthit_opts)
Deallocates subject besthit structure.
Int2 BLAST_InitDefaultOptions(EBlastProgramType program_number, LookupTableOptions **lookup_options, QuerySetUpOptions **query_setup_options, BlastInitialWordOptions **word_options, BlastExtensionOptions **ext_options, BlastHitSavingOptions **hit_options, BlastScoringOptions **score_options, BlastEffectiveLengthsOptions **eff_len_options, PSIBlastOptions **psi_options, BlastDatabaseOptions **db_options)
Initialize all the BLAST search options structures with the default values.
SRepeatFilterOptions * SRepeatFilterOptionsFree(SRepeatFilterOptions *repeat_options)
Frees SRepeatFilterOptions.
BlastInitialWordOptions * BlastInitialWordOptionsFree(BlastInitialWordOptions *options)
Deallocate memory for BlastInitialWordOptions.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program_number, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
Int2 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions *options, Int4 dbseq_num, Int8 db_length, Int8 *searchsp_eff, Int4 num_searchsp)
Fill the non-default values in the BlastEffectiveLengthsOptions structure.
Int2 SSegOptionsNew(SSegOptions **seg_options)
Allocates memory for SSegOptions, fills in defaults.
Definition: blast_options.c:77
SWindowMaskerOptions * SWindowMaskerOptionsFree(SWindowMaskerOptions *winmask_options)
Frees SWindowMaskerOptions.
BlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)
Deallocate memory for BlastEffectiveLengthsOptions*.
SBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)
Frees SBlastFilterOptions and all subservient structures.
Int2 SWindowMaskerOptionsNew(SWindowMaskerOptions **winmask_options)
Allocates memory for SWindowMaskerOptions, fills in defaults.
Definition: blast_options.c:90
Int2 BlastScoringOptionsSetMatrix(BlastScoringOptions *opts, const char *matrix_name)
Resets matrix name option.
static SDustOptions * s_MergeDustOptions(const SDustOptions *opt1, const SDustOptions *opt2)
Merges together two sets of dust options, choosing the most non-default one.
Int2 SBlastFilterOptionsMerge(SBlastFilterOptions **combined, const SBlastFilterOptions *opt1, const SBlastFilterOptions *opt2)
Merges two sets of options together, taking the non-default one as preferred.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
Int2 BLAST_FillInitialWordOptions(BlastInitialWordOptions *options, EBlastProgramType program, Int4 window_size, double xdrop_ungapped)
Fill non-default values in the BlastInitialWordOptions structure.
Int2 SDustOptionsNew(SDustOptions **dust_options)
Allocates memory for SDustOptions, fills in defaults.
Definition: blast_options.c:57
Int2 BlastInitialWordOptionsValidate(EBlastProgramType program_number, const BlastInitialWordOptions *options, Blast_Message **blast_msg)
Validate correctness of the initial word options.
Int2 BLAST_FillExtensionOptions(BlastExtensionOptions *options, EBlastProgramType program, Int4 greedy, double x_dropoff, double x_dropoff_final)
Fill non-default values in the BlastExtensionOptions structure.
Int2 LookupTableOptionsValidate(EBlastProgramType program_number, const LookupTableOptions *options, Blast_Message **blast_msg)
Validate LookupTableOptions.
Int2 BlastHitSavingOptionsValidate(EBlastProgramType program_number, const BlastHitSavingOptions *options, Blast_Message **blast_msg)
Validate BlastHitSavingOptions.
const double kPSSM_NoImpalaScaling
Value used to indicate that no IMPALA-style scaling should be performed when scaling a PSSM.
Definition: blast_options.c:43
BlastHSPCullingOptions * BlastHSPCullingOptionsFree(BlastHSPCullingOptions *culling_opts)
Deallocates culling options structure.
Int2 BLAST_FillHitSavingOptions(BlastHitSavingOptions *options, double evalue, Int4 hitlist_size, Boolean is_gapped, Int4 culling_limit, Int4 min_diag_separation)
Allocate memory for BlastHitSavingOptions.
SSegOptions * SSegOptionsFree(SSegOptions *seg_options)
Frees SSegOptions.
Definition: blast_options.c:70
Int2 BlastHSPCullingOptionsValidate(const BlastHSPFilteringOptions *opts)
Validate culling options.
Int2 BlastHSPSubjectBestHitOptionsValidate(const BlastHSPFilteringOptions *opts)
Validate subject besthit options.
static Boolean s_DiscWordOptionsValidate(Int4 word_size, Uint1 template_length, Uint1 template_type, Blast_Message **blast_msg)
Validate options for the discontiguous word megablast Word size must be 11 or 12; template length 16,...
BlastHSPFilteringOptions * BlastHSPFilteringOptionsNew()
Allocate and initialize a BlastHSPFilteringOptions structure.
Int2 LookupTableOptionsNew(EBlastProgramType program_number, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
BlastDatabaseOptions * BlastDatabaseOptionsFree(BlastDatabaseOptions *db_options)
Deallocate database options.
const int kDustLevel
Declared in blast_def.h as extern const.
Definition: blast_options.c:46
BlastHSPSubjectBestHitOptions * BlastHSPSubjectBestHitOptionsNew(Boolean isProtein)
Allocate a new object for subject besthit options.
const int kDustWindow
Window parameter used by dust.
Definition: blast_options.c:47
Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program_number, Boolean is_megablast, double threshold, Int4 word_size)
Allocate memory for lookup table options and fill with default values.
Int2 SReadQualityOptionsNew(SReadQualityOptions **read_quality_options)
Allocates memory for SReadQualityOptions, fills in defaults.
Int2 BlastHSPFilteringOptions_AddSubjectBestHit(BlastHSPFilteringOptions *filt_opts, BlastHSPSubjectBestHitOptions **subject_besthit)
static SWindowMaskerOptions * s_MergeWindowMaskerOptions(const SWindowMaskerOptions *opt1, const SWindowMaskerOptions *opt2)
Merges together two sets of window masker options, choosing the most non-default one.
BlastHSPBestHitOptions * BlastHSPBestHitOptionsNew(double overhang, double score_edge)
Allocate and initialize a BlastHSPBestHitOptions structure.
Int2 SBlastFilterOptionsNew(SBlastFilterOptions **filter_options, EFilterOptions type)
Allocates memory for SBlastFilterOptions and.
static Int2 s_BlastExtensionScoringOptionsValidate(EBlastProgramType program_number, const BlastExtensionOptions *ext_options, const BlastScoringOptions *score_options, Blast_Message **blast_msg)
Checks that the extension and scoring options are consistent with each other.
Boolean BlastEffectiveLengthsOptions_IsSearchSpaceSet(const BlastEffectiveLengthsOptions *options)
Return true if the search spaces is set for any of the queries in the search.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
static SSegOptions * s_MergeSegOptions(const SSegOptions *opt1, const SSegOptions *opt2)
Merges together two sets of SEG options, choosing the most non-default one.
Int2 BlastHSPFilteringOptions_AddBestHit(BlastHSPFilteringOptions *filt_opts, BlastHSPBestHitOptions **best_hit, EBlastStage stage)
Add the best hit options.
static SRepeatFilterOptions * s_MergeRepeatOptions(const SRepeatFilterOptions *opt1, const SRepeatFilterOptions *opt2)
Merges together two sets of repeat filter options, choosing the most non-default one.
Int2 BlastInitialWordOptionsNew(EBlastProgramType program, BlastInitialWordOptions **options)
Allocate memory for BlastInitialWordOptions and fill with default values.
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
Int2 BlastExtensionOptionsValidate(EBlastProgramType program_number, const BlastExtensionOptions *options, Blast_Message **blast_msg)
Validate contents of BlastExtensionOptions.
BlastHSPBestHitOptions * BlastHSPBestHitOptionsFree(BlastHSPBestHitOptions *opt)
Deallocate a BlastHSPBestHitOptions structure.
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
Int2 BlastScoringOptionsDup(BlastScoringOptions **new_opt, const BlastScoringOptions *old_opt)
Produces copy of "old" options, with new memory allocated.
QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)
Deallocate memory for QuerySetUpOptions.
Int2 BlastHSPFilteringOptionsValidate(const BlastHSPFilteringOptions *opts)
Validates the BlastHSPFilteringOptions structure.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
The structures and functions in blast_options.
#define PSI_INCLUSION_ETHRESH
Defaults for PSI-BLAST and DELTA-BLAST options.
#define BLAST_GAP_X_DROPOFF_NUCL
default dropoff for non-greedy nucleotide gapped extensions
#define BLAST_HITLIST_SIZE
Number of database sequences to save hits for.
#define BLAST_GAP_OPEN_MEGABLAST
default gap open penalty (megablast with greedy gapped alignment)
Definition: blast_options.h:87
#define BLAST_UNGAPPED_X_DROPOFF_NUCL
ungapped dropoff score for blastn (and megablast)
#define BLAST_WORD_THRESHOLD_BLASTX
default threshold (blastx)
#define BLAST_UNGAPPED_X_DROPOFF_PROT
default dropoff for ungapped extension; ungapped extensions will stop when the score for the extensio...
#define BLAST_WORDSIZE_MAPPER
default word size for mapping rna-seq to a genome
Definition: blast_options.h:73
#define BLAST_GAP_EXTN_MEGABLAST
default gap open penalty (megablast) with greedy gapped alignment)
Definition: blast_options.h:95
#define DEFAULT_SUBJECT_BESTHIT_PROT_MAX_RANGE_DIFF
@ eJumperWithTraceback
Jumper extension (mapping)
@ eDynProgScoreOnly
standard affine gapping
@ eGreedyScoreOnly
Greedy extension (megaBlast)
@ eSmithWatermanScoreOnly
Score-only smith-waterman.
#define BLAST_GAP_OPEN_PROT
Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
Definition: blast_options.h:84
#define BLAST_WORDSIZE_MEGABLAST
default word size (contiguous megablast; for discontig megablast the word size is explicitly overridd...
Definition: blast_options.h:68
#define DEFAULT_SUBJECT_BESTHIT_NUCL_MAX_RANGE_DIFF
#define BLAST_GAP_OPEN_NUCL
default gap open penalty (blastn)
Definition: blast_options.h:86
#define BLAST_GAP_TRIGGER_NUCL
default bit score that will trigger a gapped extension for blastn
#define BLAST_GAP_EXTN_PROT
cost to extend a gap.
Definition: blast_options.h:92
#define BLAST_GAP_X_DROPOFF_FINAL_NUCL
default dropoff for nucleotide gapped extensions)
#define MAX_DB_WORD_COUNT_MAPPER
Default max frequency for a database word.
#define BLAST_DEFAULT_MATRIX
Default matrix name: BLOSUM62.
Definition: blast_options.h:77
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
#define BLAST_SCAN_RANGE_NUCL
default scan range (blastn)
Definition: blast_options.h:63
#define BLAST_WINDOW_SIZE_NUCL
default window size (blastn)
Definition: blast_options.h:58
#define BLAST_GAP_X_DROPOFF_GREEDY
default dropoff for greedy nucleotide gapped extensions
#define BLAST_WORD_THRESHOLD_TBLASTX
default threshold (tblastx)
#define BLAST_WORD_THRESHOLD_BLASTP
neighboring word score thresholds; a threshold of zero means that only query and subject words that m...
#define BLAST_GAP_EXTN_NUCL
default gap open penalty (blastn)
Definition: blast_options.h:94
#define BLAST_GAP_TRIGGER_PROT
default bit score that will trigger gapped extension
#define BLAST_GAP_X_DROPOFF_PROT
default dropoff for preliminary gapped extensions
#define PSI_PSEUDO_COUNT_CONST
Pseudo-count constant for PSI-BLAST.
#define BLAST_REWARD
default nucleotide match score
@ eGreedyTbck
Greedy extension (megaBlast)
@ eDynProgTbck
standard affine gapping
@ eSmithWatermanTbckFull
Smith-waterman to find all alignments.
#define BLAST_PENALTY
default reward and penalty (only applies to blastn/megablast)
#define BLAST_WINDOW_SIZE_PROT
Some default values (used when creating blast options block and for command-line program defaults.
Definition: blast_options.h:57
EFilterOptions
Types of filtering options.
@ eDustRepeats
Repeat and dust filtering for nucleotides.
@ eRepeats
Repeat filtering for nucleotides.
@ eDust
low-complexity for nucleotides.
@ eEmpty
no filtering at all.
@ eSeg
low-complexity for proteins.
#define BLAST_GAP_X_DROPOFF_FINAL_PROT
default dropoff for the final gapped extension with traceback
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
#define BLAST_WORD_THRESHOLD_TBLASTN
default neighboring threshold (tblastn/rpstblastn)
#define BLAST_WORDSIZE_PROT
length of word to trigger an extension.
Definition: blast_options.h:66
#define BLAST_WORDSIZE_NUCL
default word size (blastn)
Definition: blast_options.h:67
#define kDefaultRepeatFilterDb
Default value for repeats database filtering.
@ eNaLookupTable
blastn lookup table
@ eMBLookupTable
megablast lookup table (includes both contiguous and discontiguous megablast)
@ ePhiNaLookupTable
nucleotide lookup table for phi-blast
@ eAaLookupTable
standard protein (blastp) lookup table
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
@ ePhiLookupTable
protein lookup table specialized for phi-blast
@ eRPSLookupTable
RPS lookup table (rpsblast and rpstblastn)
@ eNaHashLookupTable
used for 16-base words
Boolean Blast_ProgramIsMapping(EBlastProgramType p)
Definition: blast_program.c:76
Boolean Blast_QueryIsPssm(EBlastProgramType p)
Returns true if the query is PSSM.
Definition: blast_program.c:46
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Definition: blast_program.c:70
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Definition: blast_program.c:60
Boolean Blast_ProgramIsNucleotide(EBlastProgramType p)
Definition: blast_program.c:82
Boolean Blast_QueryIsPattern(EBlastProgramType p)
Definition: blast_program.c:79
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
Definition: blast_program.c:73
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
@ eBlastTypeBlastx
Definition: blast_program.h:75
@ eBlastTypePsiTblastn
Definition: blast_program.h:83
@ eBlastTypeRpsTblastn
Definition: blast_program.h:85
@ eBlastTypePhiBlastn
Definition: blast_program.h:87
@ eBlastTypeMapping
Definition: blast_program.h:88
@ eBlastTypeTblastx
Definition: blast_program.h:79
@ eBlastTypePsiBlast
Definition: blast_program.h:82
@ eBlastTypePhiBlastp
Definition: blast_program.h:86
@ eBlastTypeRpsBlast
Definition: blast_program.h:84
@ eBlastTypeTblastn
Definition: blast_program.h:77
@ eBlastTypeBlastp
Definition: blast_program.h:73
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Definition: blast_program.c:63
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
char * BLAST_PrintAllowedValues(const char *matrix, Int4 gap_open, Int4 gap_extend)
Prints a messages about the allowed open etc values for the given matrix, BlastKarlinBlkGappedFill sh...
Definition: blast_stat.c:3787
Int2 Blast_KarlinBlkGappedLoadFromTables(Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name, Boolean standard_only)
Attempts to fill KarlinBlk for given gap opening, extensions etc.
Definition: blast_stat.c:3577
char * BLAST_PrintMatrixMessage(const char *matrix, Boolean standard_only)
Prints a messages about the allowed matrices, BlastKarlinBlkGappedFill should return 1 before this is...
Definition: blast_stat.c:3760
Boolean BLAST_CheckRewardPenaltyScores(Int4 reward, Int4 penalty)
Check the validity of the reward and penalty scores.
Definition: blast_stat.c:3454
Various auxiliary BLAST utility functions.
char * BLAST_StrToUpper(const char *string)
Returns a copy of the input string with all its characters turned to uppercase.
Definition: blast_util.c:1352
Constants used in compositional score matrix adjustment.
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
static ulg window_size
#define NULL
Definition: ncbistd.hpp:225
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
#define kBestHit_OverhangMax
Maximum value for overhang.
#define kBestHit_OverhangMin
Minimum value for overhang.
#define kBestHit_ScoreEdgeMin
Minimum value for score_edge.
#define kBestHit_ScoreEdgeMax
Maximum value for score_edge.
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
int i
if(yy_accept[yy_current_state])
int strcmp(const char *str1, const char *str2)
Definition: odbc_utils.hpp:160
#define strdup
Definition: ncbi_ansi_ext.h:70
#define strcasecmp
#define DIM(A)
dimension of an array.
Definition: ncbi_std.h:176
#define INT4_MAX
largest nubmer represented by signed int
Definition: ncbi_std.h:141
void * BlastMemDup(const void *orig, size_t size)
Copies memory using memcpy and malloc.
Definition: ncbi_std.c:35
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define INT2_MAX
largest number represented by signed (two byte) short
Definition: ncbi_std.h:156
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
#define MAX(a, b)
returns larger of a and b.
Definition: ncbi_std.h:117
int toupper(Uchar c)
Definition: ncbictype.hpp:73
T max(T x_, T y_)
static pcre_uint8 * buffer
Definition: pcretest.c:1051
Options used to create the ReadDBFILE structure Include database name and various information for res...
Int4 genetic_code
Genetic code to use for translation, tblast[nx] only.
Options for setting up effective lengths and search spaces.
Int8 * searchsp_eff
Search space to be used for statistical calculations (one such per query context)
Int8 db_length
Database length to be used for statistical calculations.
Int4 dbseq_num
Number of database sequences to be used for statistical calculations.
Int4 num_searchspaces
Number of elements in searchsp_eff, this must be equal to the number of contexts in the search.
Options used for gapped extension These include: a.
EBlastTbackExt eTbackExt
type of traceback extension.
EBlastPrelimGapExt ePrelimGapExt
type of preliminary gapped extension (normally) for calculating score.
double gap_x_dropoff_final
X-dropoff value for the final gapped extension (in bits)
double gap_x_dropoff
X-dropoff value for gapped extension (in bits)
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Options for the Best Hit HSP collection algorithm.
Options for the HSP culling algorithm.
int max_hits
Maximum number of hits per area of query.
Structure containing the HSP filtering/writing options.
BlastHSPBestHitOptions * best_hit
Best Hit algorithm.
BlastHSPSubjectBestHitOptions * subject_besthit_opts
Subject Culling.
BlastHSPCullingOptions * culling_opts
culling algorithm
Options used when evaluating and saving hits These include: a.
Int4 culling_limit
If the query range of an HSP is contained in at least this many higher-scoring HSPs,...
Int4 longest_intron
The longest distance between HSPs allowed for combining via sum statistics with uneven gaps.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 cutoff_score
The (raw) score cut-off threshold.
Int4 hitlist_size
Maximal number of database sequences to return results for.
Int4 min_diag_separation
How many diagonals separate a hit from a substantial alignment before it's not blocked out.
Int4 max_edit_distance
Maximum number of mismatches and gaps.
BlastHSPFilteringOptions * hsp_filt_opt
Contains options to configure the HSP filtering/writering structures If not set, the default HSP filt...
Options needed for initial word finding and processing.
double x_dropoff
X-dropoff value (in bits) for the ungapped extension.
Int4 window_size
Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.
Int4 scan_range
Maximal number of gaps allowed between 2 hits.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int2 penalty
Penalty for a mismatch.
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Int2 reward
Reward for a match.
Boolean gapped_calculation
gap-free search if FALSE
char * matrix_path
Directory path to where matrices are stored.
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
Structure to hold the a message from the core of the BLAST engine.
Definition: blast_message.h:70
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 word_size
Determines the size of the lookup table.
char * phi_pattern
PHI-BLAST pattern.
Uint1 max_db_word_count
words with larger frequency in the database will be masked in the lookup table, if the db_filter opto...
Boolean db_filter
scan the database and include only words that appear in the database between 1 and 9 times (currently...
double threshold
Score threshold for putting words in a lookup table (fractional values are allowed,...
Int4 mb_template_type
Type of a discontiguous word template.
ELookupTableType lut_type
What kind of lookup table to construct?
Int4 mb_template_length
Length of the discontiguous words.
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
double impala_scaling_factor
Scaling factor as used in IMPALA to do the matrix rescaling.
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
Boolean ignore_unaligned_positions
This turns off a validation for the multiple sequence alignment in the PSSM engine for unaligned posi...
Int4 pseudo_count
Pseudocount constant.
Boolean use_best_alignment
If set to TRUE, use the best alignment when multiple HSPs are found in a query-subject alignment (i....
Options required for setting up the query sequence.
Uint1 strand_option
In blastn: which strand to search: 1 = forward; 2 = reverse; 3 = both.
char * filter_string
DEPRECATED, filtering options above.
SBlastFilterOptions * filtering_options
structured options for all filtering offered from algo/blast/core for BLAST.
All filtering options.
SRepeatFilterOptions * repeatFilterOptions
for organism specific repeat filtering.
SSegOptions * segOptions
low-complexity filtering for proteins sequences (includes translated nucleotides).
SReadQualityOptions * readQualityOptions
quality filtering for mapping next-generation sequences
Boolean mask_at_hash
mask query only for lookup table creation
SWindowMaskerOptions * windowMaskerOptions
organism specific filtering with window masker.
SDustOptions * dustOptions
low-complexity filtering for nucleotides.
Options for dust algorithm, applies only to nucl.
int linker
min distance to link segments.
Filtering options for mapping next-generation sequences.
Filtering options for organsim specific repeats filtering.
char * database
Nucleotide database for mini BLAST search.
Options for SEG algorithm, applies only to protein-protein comparisons.
int window
initial window to trigger further work.
Filtering options for organism-specific filtering with Window Masker.
const char * database
Use winmasker database at this location.
int taxid
Select masking database for this TaxID.
Definition: type.c:6
@ FALSE
Definition: testodbc.c:27
@ TRUE
Definition: testodbc.c:27
void free(voidpf ptr)
voidp malloc(uInt size)
voidp calloc(uInt items, uInt size)
Modified on Wed Feb 28 07:12:13 2024 by modify_doxy.py rev. 669887