NCBI C++ ToolKit
blast_options.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_options.h 99040 2023-02-07 13:36:00Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Tom Madden
27  *
28  */
29 
30 /** @file blast_options.h
31  * The structures and functions in blast_options.[ch] should be used to specify
32  * user preferences. The options structures should not be changed by the BLAST code
33  * but rather be read to determine user preferences. When possible these structures
34  * should be passed in as "const".
35  */
36 
37 #ifndef __BLASTOPTIONS__
38 #define __BLASTOPTIONS__
39 
45 
46 #ifdef __cplusplus
47 extern "C" {
48 #endif
49 
50 
51 /** Some default values (used when creating blast options block and for
52  * command-line program defaults. When changing these defaults, please
53  * remember to update the defaults in the command-line programs
54  */
55 
56 /** "window" between hits to trigger an extension. */
57 #define BLAST_WINDOW_SIZE_PROT 40 /**< default window (all protein searches) */
58 #define BLAST_WINDOW_SIZE_NUCL 0 /**< default window size (blastn) */
59 #define BLAST_WINDOW_SIZE_MEGABLAST 0 /**< default window size
60  (contiguous megablast) */
61 #define BLAST_WINDOW_SIZE_DISC 40 /**< default window size
62  (discontiguous megablast) */
63 #define BLAST_SCAN_RANGE_NUCL 0 /**< default scan range (blastn) */
64 
65 /** length of word to trigger an extension. */
66 #define BLAST_WORDSIZE_PROT 3 /**< default word size (all protein searches) */
67 #define BLAST_WORDSIZE_NUCL 11 /**< default word size (blastn) */
68 #define BLAST_WORDSIZE_MEGABLAST 28 /**< default word size (contiguous
69  megablast; for discontig megablast
70  the word size is explicitly
71  overridden) */
72 
73 #define BLAST_WORDSIZE_MAPPER 18 /**< default word size for mapping rna-seq
74  to a genome */
75 
76 /** Default matrix name: BLOSUM62 */
77 #define BLAST_DEFAULT_MATRIX "BLOSUM62"
78 
79 /** Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
80  * More gap costs are listed in BLASTOptionSetGapParams
81  */
82 
83 /** cost for the existence of a gap.*/
84 #define BLAST_GAP_OPEN_PROT 11 /**< default gap open penalty (all
85  protein searches) */
86 #define BLAST_GAP_OPEN_NUCL 5 /**< default gap open penalty (blastn) */
87 #define BLAST_GAP_OPEN_MEGABLAST 0 /**< default gap open penalty (megablast
88  with greedy gapped alignment) */
89 #define BLAST_GAP_OPEN_MAPPER 0
90 
91 /** cost to extend a gap. */
92 #define BLAST_GAP_EXTN_PROT 1 /**< default gap open penalty (all
93  protein searches) */
94 #define BLAST_GAP_EXTN_NUCL 2 /**< default gap open penalty (blastn) */
95 #define BLAST_GAP_EXTN_MEGABLAST 0 /**< default gap open penalty (megablast)
96  with greedy gapped alignment) */
97 
98 #define BLAST_GAP_EXTN_MAPPER 4
99 
100 /** neighboring word score thresholds; a threshold of zero
101  * means that only query and subject words that match exactly
102  * will go into the BLAST lookup table when it is generated
103  */
104 #define BLAST_WORD_THRESHOLD_BLASTP 11 /**< default neighboring threshold
105  (blastp and for rpsblast at RPS-BLAST
106  database creation time) */
107 #define BLAST_WORD_THRESHOLD_BLASTP_FAST 19.3 //neighboring threshold for word size 5 (blastp/x-fast)
108 #define BLAST_WORD_THRESHOLD_BLASTP_WD_SZ_6 21 //neighboring threshold for word size 6
109 #define BLAST_WORD_THRESHOLD_BLASTP_WD_SZ_7 20.25 //neighboring threshold for word size 7
110 
111 #define BLAST_WORD_THRESHOLD_BLASTN 0 /**< default threshold (blastn) */
112 #define BLAST_WORD_THRESHOLD_BLASTX 12 /**< default threshold (blastx) */
113 #define BLAST_WORD_THRESHOLD_TBLASTN 13 /**< default neighboring threshold
114  (tblastn/rpstblastn) */
115 #define BLAST_WORD_THRESHOLD_TBLASTX 13 /**< default threshold (tblastx) */
116 #define BLAST_WORD_THRESHOLD_MEGABLAST 0 /**< default threshold (megablast) */
117 
118 /** default dropoff for ungapped extension; ungapped extensions
119  * will stop when the score for the extension has dropped from
120  * the current best score by at least this much
121  */
122 #define BLAST_UNGAPPED_X_DROPOFF_PROT 7 /**< ungapped dropoff score for all
123  searches except blastn */
124 #define BLAST_UNGAPPED_X_DROPOFF_NUCL 20 /**< ungapped dropoff score for
125  blastn (and megablast) */
126 
127 /** default dropoff for preliminary gapped extensions */
128 #define BLAST_GAP_X_DROPOFF_PROT 15 /**< default dropoff (all protein-
129  based gapped extensions) */
130 #define BLAST_GAP_X_DROPOFF_NUCL 30 /**< default dropoff for non-greedy
131  nucleotide gapped extensions */
132 #define BLAST_GAP_X_DROPOFF_GREEDY 25 /**< default dropoff for greedy
133  nucleotide gapped extensions */
134 #define BLAST_GAP_X_DROPOFF_TBLASTX 0 /**< default dropoff for tblastx */
135 
136 /** default bit score that will trigger gapped extension */
137 #define BLAST_GAP_TRIGGER_PROT 22.0 /**< default bit score that will trigger
138  a gapped extension for all protein-
139  based searches */
140 #define BLAST_GAP_TRIGGER_NUCL 27.0 /**< default bit score that will trigger
141  a gapped extension for blastn */
142 
143 /** default dropoff for the final gapped extension with traceback */
144 #define BLAST_GAP_X_DROPOFF_FINAL_PROT 25 /**< default dropoff (all protein-
145  based gapped extensions) */
146 #define BLAST_GAP_X_DROPOFF_FINAL_NUCL 100 /**< default dropoff for nucleotide
147  gapped extensions) */
148 #define BLAST_GAP_X_DROPOFF_FINAL_TBLASTX 0 /**< default dropoff for tblastx */
149 
150 /** default reward and penalty (only applies to blastn/megablast) */
151 #define BLAST_PENALTY -3 /**< default nucleotide mismatch score */
152 #define BLAST_REWARD 1 /**< default nucleotide match score */
153 
154 #define BLAST_PENALTY_MAPPER -4
155 #define BLAST_REWARD_MAPPER 1
156 
157 /** Default parameters for saving hits */
158 #define BLAST_EXPECT_VALUE 10.0 /**< by default, alignments whose expect
159  value exceeds this number are discarded */
160 #define BLAST_HITLIST_SIZE 500 /**< Number of database sequences to save hits
161  for */
162 /** Defaults for PSI-BLAST and DELTA-BLAST options */
163 #define PSI_INCLUSION_ETHRESH 0.002 /**< Inclusion threshold for PSI BLAST */
164 #define PSI_PSEUDO_COUNT_CONST 0 /**< Pseudo-count constant for PSI-BLAST */
165 #define DELTA_INCLUSION_ETHRESH 0.05 /**< Inclusion threshold for DELTA-BLAST */
166 
167 /** Default genetic code for query and/or database */
168 #define BLAST_GENETIC_CODE 1 /**< Use the standard genetic code for converting
169  groups of three nucleotide bases to protein
170  letters */
171 
172 /** Default max frequency for a database word. Words with higher frequency
173  will be masked in the lookup table. */
174 #define MAX_DB_WORD_COUNT_MAPPER 30
175 
176 /** Default maximum insert size: distance on the subject between reads that
177  belong to a pair, for spliced and non-spliced alignments */
178 #define MAGICBLAST_MAX_INSERT_SIZE_SPLICED 1000000
179 #define MAGICBLAST_MAX_INSERT_SIZE_NONSPLICED 100000
180 
181 
182 /** Value used to indicate that no IMPALA-style scaling should be performed
183  * when scaling a PSSM */
184 extern const double kPSSM_NoImpalaScaling;
185 
186 /** Types of the lookup table */
187 typedef enum {
188  eMBLookupTable, /**< megablast lookup table (includes both
189  contiguous and discontiguous megablast) */
190  eSmallNaLookupTable, /**< lookup table for blastn with small query*/
191  eNaLookupTable, /**< blastn lookup table */
192  eAaLookupTable, /**< standard protein (blastp) lookup table */
193  eCompressedAaLookupTable, /**< compressed alphabet (blastp) lookup table */
194  ePhiLookupTable, /**< protein lookup table specialized for phi-blast */
195  ePhiNaLookupTable, /**< nucleotide lookup table for phi-blast */
196  eRPSLookupTable, /**< RPS lookup table (rpsblast and rpstblastn) */
197  eIndexedMBLookupTable, /**< use database index as a lookup structure */
198  eMixedMBLookupTable, /**< use when some volumes are searched with index and
199  some are not */
200  eNaHashLookupTable /**< used for 16-base words */
202 
203 /** Options needed to construct a lookup table
204  * Also needed: query sequence and query length.
205  */
206 typedef struct LookupTableOptions {
207  double threshold; /**< Score threshold for putting words in a lookup table
208  (fractional values are allowed, and could be
209  important if there is scaling involved) */
210  ELookupTableType lut_type; /**< What kind of lookup table to construct? */
211  Int4 word_size; /**< Determines the size of the lookup table */
212  Int4 mb_template_length; /**< Length of the discontiguous words */
213  Int4 mb_template_type; /**< Type of a discontiguous word template */
214  char* phi_pattern; /**< PHI-BLAST pattern */
215  EBlastProgramType program_number; /**< indicates blastn, blastp, etc. */
216  Uint4 stride; /**< number of words to skip after collecting each word */
217  Boolean db_filter; /**< scan the database and include only words that appear
218  in the database between 1 and 9 times
219  (currently implemented only for MB lookuptable
220  and lookup table word size 16) */
221  Uint1 max_db_word_count; /**< words with larger frequency in the database
222  will be masked in the lookup table, if the
223  db_filter optoion is on */
225 
226 /** Options for dust algorithm, applies only to nucl.-nucl. comparisons.
227  * value of less than zero means default value will be applied.
228  */
229 typedef struct SDustOptions {
230  int level;
231  int window;
232  int linker; /**< min distance to link segments. */
234 
235 
236 /** Options for SEG algorithm, applies only to protein-protein comparisons.
237  * value of less than zero means default value will be applied.
238  */
239 typedef struct SSegOptions {
240  int window; /**< initial window to trigger further work. */
241  double locut;
242  double hicut;
244 
245 /// Default value for repeats database filtering
246 #define kDefaultRepeatFilterDb "repeat/repeat_9606"
247 
248 /** Filtering options for organsim specific repeats filtering.
249  Currently this consist of only the db name but could be expanded
250  in the future to include other types of filtering or other options.
251  */
252 typedef struct SRepeatFilterOptions {
253  char* database; /**< Nucleotide database for mini BLAST search. */
255 
256 /** Filtering options for organism-specific filtering with Window
257  Masker. The taxid and filename are alternative means of choosing
258  which Window Masker database to use.
259  */
260 typedef struct SWindowMaskerOptions {
261  int taxid; /**< Select masking database for this TaxID. */
262  const char * database; /**< Use winmasker database at this location. */
264 
265 /** Filtering options for mapping next-generation sequences */
266 typedef struct SReadQualityOptions {
267  double frac_ambig; /**< Fraction of ambiguous bases */
268  int entropy; /**< Dimer entropy */
270 
271 /** All filtering options */
272 typedef struct SBlastFilterOptions {
273  Boolean mask_at_hash; /**< mask query only for lookup table creation */
274  SDustOptions* dustOptions; /**< low-complexity filtering for nucleotides. */
275  SSegOptions* segOptions; /**< low-complexity filtering for proteins sequences
276  (includes translated nucleotides). */
277  SRepeatFilterOptions* repeatFilterOptions; /**< for organism specific repeat filtering. */
278  SWindowMaskerOptions* windowMaskerOptions; /**< organism specific filtering with window masker. */
279 
280  SReadQualityOptions* readQualityOptions; /**< quality filtering for mapping next-generation sequences */
282 
283 
284 /** Options required for setting up the query sequence */
285 typedef struct QuerySetUpOptions {
286  SBlastFilterOptions* filtering_options; /**< structured options for all filtering
287  offered from algo/blast/core for BLAST. */
288  char* filter_string; /**< DEPRECATED, filtering options above. */
289 
290  Uint1 strand_option; /**< In blastn: which strand to search: 1 = forward;
291  2 = reverse; 3 = both */
292  Int4 genetic_code; /**< Genetic code to use for translation,
293  [t]blastx only */
295 
296 /** Options needed for initial word finding and processing */
297 typedef struct BlastInitialWordOptions {
298  double gap_trigger; /**< Score in bits for starting gapped extension */
299  Int4 window_size; /**< Maximal allowed distance between 2 hits in case 2
300  hits are required to trigger the extension */
301  Int4 scan_range; /**< Maximal number of gaps allowed between 2 hits */
302  double x_dropoff; /**< X-dropoff value (in bits) for the ungapped
303  extension */
304  EBlastProgramType program_number; /**< indicates blastn, blastp, etc. */
306 
307 /** The algorithm to be used for preliminary
308  * gapped extensions
309  */
310 typedef enum EBlastPrelimGapExt {
311  eDynProgScoreOnly, /**< standard affine gapping */
312  eGreedyScoreOnly, /**< Greedy extension (megaBlast) */
313  eJumperWithTraceback, /**< Jumper extension (mapping) */
314  eSmithWatermanScoreOnly /**< Score-only smith-waterman */
316 
317 /** The algorithm to be used for final gapped
318  * extensions with traceback
319  */
320 typedef enum EBlastTbackExt {
321  eDynProgTbck, /**< standard affine gapping */
322  eGreedyTbck, /**< Greedy extension (megaBlast) */
323  eSmithWatermanTbck, /**< Smith-waterman finds optimal scores, then
324  ALIGN_EX to find alignment. */
325  eSmithWatermanTbckFull /**< Smith-waterman to find all alignments */
327 
328 /** Options used for gapped extension
329  * These include:
330  * a. Penalties for various types of gapping;
331  * b. Drop-off values for the extension algorithms tree exploration;
332  * c. Parameters identifying what kind of extension algorithm(s) should
333  * be used.
334  */
335 typedef struct BlastExtensionOptions {
336  double gap_x_dropoff; /**< X-dropoff value for gapped extension (in bits) */
337  double gap_x_dropoff_final;/**< X-dropoff value for the final gapped
338  extension (in bits) */
339  EBlastPrelimGapExt ePrelimGapExt; /**< type of preliminary gapped extension (normally) for calculating
340  score. */
341  EBlastTbackExt eTbackExt; /**< type of traceback extension. */
342  Int4 compositionBasedStats; /**< mode of compositional adjustment to use;
343  if zero then compositional adjustment is
344  not used */
345  Int4 unifiedP; /**< Indicates unified P values to be used in blastp or tblastn */
346 
347  Int4 max_mismatches; /**< Maximum number of mismatches allowed for Jumper */
348 
349  Int4 mismatch_window; /**< Widnow for counting mismatches for Jumper */
350 
351  EBlastProgramType program_number; /**< indicates blastn, blastp, etc. */
353 
354 /** Options for the Best Hit HSP collection algorithm */
355 typedef struct BlastHSPBestHitOptions {
356  double overhang;
357  double score_edge;
359 
360 /** Options for the HSP culling algorithm */
361 typedef struct BlastHSPCullingOptions {
362  int max_hits; /**< Maximum number of hits per area of query. */
364 
366  unsigned int max_range_diff;
368 
369 /** Structure containing the HSP filtering/writing options */
370 typedef struct BlastHSPFilteringOptions {
371  /** Best Hit algorithm */
373  EBlastStage best_hit_stage; /*<< when to apply the best hit algorithm */
374 
375  /** culling algorithm */
377  EBlastStage culling_stage; /*<< when to apply the culling algorithm */
378 
379  /** Subject Culling */
382 
383 /** Options used when evaluating and saving hits
384  * These include:
385  * a. Restrictions on the number of hits to be saved;
386  * b. Restrictions on the quality and positions of hits to be saved;
387  * c. Parameters used to evaluate the quality of hits.
388  */
389 typedef struct BlastHitSavingOptions {
390  double expect_value; /**< The expect value cut-off threshold for an HSP, or
391  a combined hit if sum statistics is used */
392  Int4 cutoff_score; /**< The (raw) score cut-off threshold */
393  Int4 cutoff_score_fun[2]; /**< Coefficients x100 for the raw score cut-off
394  threshold as a function of query length:
395  x[0] + x[1] * query_length*/
396  double percent_identity; /**< The percent identity cut-off threshold */
397 
398  Int4 max_edit_distance; /**< Maximum number of mismatches and gaps */
399 
400  Int4 hitlist_size;/**< Maximal number of database sequences to return
401  results for */
402  Int4 hsp_num_max; /**< Maximal number of HSPs to save for one database
403  sequence */
404  Int4 total_hsp_limit; /**< Maximal total number of HSPs to keep */
405  Int4 culling_limit; /**< If the query range of an HSP is contained in
406  at least this many higher-scoring HSPs, throw
407  away the HSP as redundant (turned off if zero) */
408  Int4 mask_level; /**< Only keep the highest scoring HSP when more than
409  one HSP overlaps the same region of the query by
410  more than or equal to mask_level %. -RMH- */
411 
412  /********************************************************************/
413  /* Merge all these in a structure for clarity? */
414  Boolean do_sum_stats; /**< Force sum statistics to be used to combine HSPs,
415  TRUE by default for all ungapped searches and translated
416  gapped searches (except RPS-BLAST) */
417  Int4 longest_intron; /**< The longest distance between HSPs allowed for
418  combining via sum statistics with uneven gaps */
419  /********************************************************************/
420 
421  Int4 min_hit_length; /**< optional minimum alignment length; alignments
422  not at least this long are discarded */
423  Int4 min_diag_separation; /**< How many diagonals separate a hit from a substantial alignment
424  before it's not blocked out. Must be > 0 to be used. */
425  EBlastProgramType program_number; /**< indicates blastn, blastp, etc. */
426 
427  /** Contains options to configure the HSP filtering/writering structures
428  * If not set, the default HSP filtering strategy is used.
429  */
431 
432  /** Low-score option. Do not pass ungapped alignments on for later processing if
433  * the hitlist is already full of other alignments unless the ungapped aligment
434  * is above the fraction X of the least significant database match.
435  * zero should turn this off.
436  */
438 
439  double query_cov_hsp_perc; /**< Min query coverage hsp percentage */
440 
441  /* Used by default hsp filtering strategy, num of best hsps to keep per subject
442  * seq for each query. Note that hsp_num_max should be used only to reduce memory footprint,
443  * it does not guarantee best hsp per query due to query concatenation
444  */
446 
447  /**< Queries are paired reads, for mapping */
449  /**< Splice HSPs for each query (for mapping RNA-Seq to a genome) */
451 
453 
454 /** Scoring options block
455  * Used to produce the BlastScoreBlk structure
456  * This structure may be needed for lookup table construction (proteins only),
457  * and for evaluating alignments.
458  */
459 typedef struct BlastScoringOptions {
460  char* matrix; /**< Name of the matrix containing all scores: needed for
461  finding neighboring words */
462  char* matrix_path; /**< Directory path to where matrices are stored. */
463  Int2 reward; /**< Reward for a match */
464  Int2 penalty; /**< Penalty for a mismatch */
465  Boolean gapped_calculation; /**< gap-free search if FALSE */
466  Boolean complexity_adjusted_scoring; /**< Use cross_match-like complexity
467  adjustment on raw scores. -RMH- */
468  Int4 gap_open; /**< Extra penalty for starting a gap */
469  Int4 gap_extend; /**< Penalty for each gap residue */
470 
471  /* only blastx and tblastn (When query & subj are diff) */
472  Boolean is_ooframe; /**< Should out-of-frame gapping be used in a translated
473  search? */
474  Int4 shift_pen; /**< Penalty for shifting a frame in out-of-frame
475  gapping */
476  EBlastProgramType program_number; /**< indicates blastn, blastp, etc. */
478 
479 /** Options for setting up effective lengths and search spaces.
480  * The values are those the user has specified to override the real sizes.
481  */
483  Int8 db_length; /**< Database length to be used for statistical
484  calculations */
485  Int4 dbseq_num; /**< Number of database sequences to be used for
486  statistical calculations */
487  Int4 num_searchspaces; /**< Number of elements in searchsp_eff, this must be
488  equal to the number of contexts in the search */
489  Int8 *searchsp_eff; /**< Search space to be used for statistical
490  calculations (one such per query context) */
492 
493 /** Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST)
494  * Some of these possibly should be transfered elsewhere
495  */
496 typedef struct PSIBlastOptions {
497  /** Pseudocount constant. Needed for the computing the PSSM residue
498  * frequencies */
500 
501  /*** The following options are used at the API layer to specify how the
502  * multiple sequence alignment is built from pairwise alignments. These
503  * could go in their own structure in the future. */
504 
505  /** Minimum evalue for inclusion in PSSM calculation. Needed for the
506  * conversion of Seq-align into a multiple sequence alignment and for
507  * composition based statistics */
509 
510  /** If set to TRUE, use the best alignment when multiple HSPs are found
511  * in a query-subject alignment (i.e.: HSP with the lowest e-value), else
512  * use all HSPs in a query-subject alignment. This option does not apply to
513  * the PSSM engine, it applies to the processing of pairwise sequence
514  * alignments to build a multiple sequence alignment structure
515  * @sa CPsiBlastInputData (to be implemented)
516  */
518 
519  /** Compatibility option for the NCBI's structure group (note
520  * nsg_ prefix, stands for NCBI's structure group). When set to true, the
521  * PSSM engine will function in the same way the C toolkit PSSM engine did
522  * in the structure group's cddumper application. This option should be
523  * set to FALSE by default as it enables the following behavior in the
524  * PSSM engine:
525  * <pre>
526  * 1) Ignores the query sequence (on certain stages of PSSM creation only)
527  * 2) Skips validation of multiple sequence alignment data
528  * 3) Disables assertions and validation in _PSICheckSequenceWeights
529  * 4) If no aligned sequences are provided in the multiple sequence
530  * alignment, NULL PSSM frequency ratios are returned and the PSSM is built
531  * based on the underlying substitution matrix.
532  * </pre>
533  * Do not set this to TRUE unless you know what you are doing.
534  */
536 
537  /** Scaling factor as used in IMPALA to do the matrix rescaling. Default
538  * value of 1.0 means not to use it. Makemat/formatrpsdb set this value to
539  * 100 by default, Kappa_RedoAlignmentCore uses 32. Provided so that the
540  * NCBI structure group can create scaled PSSMs as the output of the PSSM
541  * engine. Do not change this unless you know what you are doing.
542  */
544 
545  /** This turns off a validation for the multiple sequence alignment in the
546  * PSSM engine for unaligned positions. Needed when a multiple sequence
547  * alignment is provided on the command line (e.g.: -in_msa option).
548  */
550 
552 
553 
554 /** Options used to create the ReadDBFILE structure
555  * Include database name and various information for restricting the database
556  * to a subset.
557  */
558 typedef struct BlastDatabaseOptions {
559  Int4 genetic_code; /**< Genetic code to use for translation,
560  tblast[nx] only */
562 
563 /********************************************************************************
564 
565  Functions to create options blocks with default values
566  and free them after use.
567 
568 *********************************************************************************/
569 
570 /** Frees SDustOptions.
571  * @param dust_options object to free
572  * @return NULL pointer
573  */
576 
577 /** Allocates memory for SDustOptions, fills in defaults.
578  * @param dust_options options that are being returned [in|out]
579  * @return zero on sucess
580  */
582 Int2 SDustOptionsNew(SDustOptions* *dust_options);
583 
584 /** Frees SSegOptions.
585  * @param seg_options object to free [in]
586  * @return NULL pointer
587  */
590 
591 /** Allocates memory for SSegOptions, fills in defaults. [in|out]
592  * @param seg_options options that are being returned [in|out]
593  * @return zero on sucess
594  */
596 Int2 SSegOptionsNew(SSegOptions* *seg_options);
597 
598 /** Resets name of db for repeat filtering.
599  * @param repeat_options already allocated options constaining field to be reset [in|out]
600  * @param dbname name of the database(s) [in]
601  * @return zero on sucess
602  */
604 Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions* *repeat_options, const char* dbname);
605 
606 /** Resets name of db for window masker filtering.
607  * @param winmask_options options block constaining field to be reset [in|out]
608  * @param dbname name of the database(s) [in]
609  * @return zero on sucess
610  */
613  const char * dbname);
614 
615 /** Frees SRepeatFilterOptions.
616  * @param repeat_options object to free [in]
617  * @return NULL pointer
618  */
621 
622 /** Frees SWindowMaskerOptions.
623  * @param winmask_options object to free [in]
624  * @return NULL pointer
625  */
628 
629 /** Allocates memory for SRepeatFilterOptions, fills in defaults.
630  * @param repeat_options options that are being returned [in|out]
631  * @return zero on sucess
632  */
635 
636 /** Allocates memory for SWindowMaskerOptions, fills in defaults.
637  * @param winmask_options options that are being returned [in|out]
638  * @return zero on sucess
639  */
642 
643 /** Allocates memory for SReadQualityOptions, fills in defaults.
644  * @param read_quality_ptions options that are being returned [in|out]
645  * @return zero on sucess
646  */
648 Int2 SReadQualityOptionsNew(SReadQualityOptions ** read_quality_options);
649 
650 /** Frees memory for SReadQualityOptions */
653  SReadQualityOptions * read_quality_options);
654 
655 
656 /** Frees SBlastFilterOptions and all subservient structures.
657  * @param filter_options object to free
658  * @return NULL pointer
659  */
662 
663 /** Merges two sets of options together, taking the non-default one as preferred. if
664  * both are non-default then one or the other is taken.
665  * @param combined object that is returned [in|out]
666  * @param opt1 first set of options [in]
667  * @param opt2 second set of options [in]
668  * @return zero on success.
669  */
672  const SBlastFilterOptions* opt2);
673 
674 /** Types of filtering options. */
675 typedef enum EFilterOptions {
676  eSeg, /**< low-complexity for proteins. */
677  eDust, /**< low-complexity for nucleotides. */
678  eRepeats, /**< Repeat filtering for nucleotides. */
679  eDustRepeats, /**< Repeat and dust filtering for nucleotides. */
680  eEmpty /**< no filtering at all. */
682 
683 /** Allocates memory for SBlastFilterOptions and
684  * @param filter_options options that are being returned [in|out]
685  * @param type specify either dust or seg (now) with EFilterOptions [in]
686  * @return zero on sucess
687  */
690 
691 /** Queries whether no masking is required
692  * @param filter_options the object to be queried [in]
693  * @return TRUE if no filtering is required or argument is NULL, FALSE
694  * otherwise
695  */
698 
699 /** Queries whether masking should be done only for the lookup table or for the entire search.
700  * @param filter_options the object to be queried [in]
701  * @return TRUE or FALSE, FALSE if filter_options is NULL.
702  */
705 
706 /** Validates filter options to ensure that program and options are consistent
707  * and that options have valid values.
708  * @param program_number Program number (blastn, blastp, etc.) [in]
709  * @param filter_options options to add to [in]
710  * @param blast_message error or warning (optional) [out]
711  * @return zero on success
712  */
714 Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions* filter_options,
715  Blast_Message* *blast_message);
716 
717 
718 /** Deallocate memory for QuerySetUpOptions.
719  * @param options Structure to free [in]
720  */
723 
724 
725 /** Allocate memory for QuerySetUpOptions and fill with default values.
726  * @param options The options that have are being returned [out]
727  */
730 
731 /** Fill non-default contents of the QuerySetUpOptions.
732  * @param options The options structure [in] [out]
733  * @param program Program number (blastn, blastp, etc.) [in]
734  * @param filter_string Parsable string of filtering options [in]
735  * @param strand_option which strand to search [in]
736 */
739  EBlastProgramType program, const char *filter_string, Uint1 strand_option);
740 
741 
742 /** Deallocate memory for BlastInitialWordOptions.
743  * @param options Structure to free [in]
744  */
748 
749 /** Allocate memory for BlastInitialWordOptions and fill with default values.
750  * @param program Program number (blastn, blastp, etc.) [in]
751  * @param options The options that have are being returned [out]
752 */
754 Int2
756  BlastInitialWordOptions* *options);
757 
758 /** Validate correctness of the initial word options.
759  * @param program_number Type of BLAST program [in]
760  * @param options Initial word options [in]
761  * @param blast_msg Describes any validation problems found [out]
762  * @return Validation status
763  */
765 Int2
767  const BlastInitialWordOptions* options,
768  Blast_Message* *blast_msg);
769 
770 /** Fill non-default values in the BlastInitialWordOptions structure.
771  * @param options The options structure [in] [out]
772  * @param program Program number (blastn, blastp, etc.) [in]
773  * @param window_size Size of a largest window between 2 words for the two-hit
774  * version [in]
775  * @param xdrop_ungapped The value of the X-dropoff for ungapped extensions [in]
776 */
778 Int2
780  EBlastProgramType program,
781  Int4 window_size, double xdrop_ungapped);
782 
783 /** Deallocate memory for BlastExtensionOptions.
784  * @param options Structure to free [in]
785  */
789 
790 /** Allocate memory for BlastExtensionOptions and fill with default values.
791  * @param program Program number (blastn, blastp, etc.) [in]
792  * @param options The options that are being returned [out]
793  * @param gapped The search is gapped [in]
794 */
796 Int2
798 
799 /** Fill non-default values in the BlastExtensionOptions structure.
800  * @param options The options structure [in] [out]
801  * @param program Program number (blastn, blastp, etc.) [in]
802  * @param greedy In how many stages of the search greedy alignment is
803  * used (values 0, 1, 2)? FIXME [in]
804  * @param x_dropoff X-dropoff parameter value for preliminary gapped
805  * extensions [in]
806  * @param x_dropoff_final X-dropoff parameter value for final gapped
807  * extensions with traceback [in]
808  * @todo the greedy parameter to this function is tied to the blast_driver's
809  * command line argument for greedy... couldn't this be EBlastPrelimGapExt?
810 */
812 Int2
814  EBlastProgramType program, Int4 greedy, double x_dropoff,
815  double x_dropoff_final);
816 
817 
818 /** Validate contents of BlastExtensionOptions.
819  * @param program_number Type of BLAST program [in]
820  * @param options Options to be validated [in]
821  * @param blast_msg Describes any validation problems found [out]
822 */
825  const BlastExtensionOptions* options, Blast_Message* *blast_msg);
826 
827 /** Deallocate memory for BlastScoringOptions.
828  * @param options Structure to free [in]
829  */
832 
833 /** Allocate memory for BlastScoringOptions and fill with default values.
834  * @param program Program number (blastn, blastp, etc.) [in]
835  * @param options The options that are being returned [out]
836 */
839 
840 /** Fill non-default values in the BlastScoringOptions structure.
841  * @param options The options structure [in] [out]
842  * @param program Program number (blastn, blastp, etc.) [in]
843  * @param greedy_extension Is greedy extension algorithm used? [in]
844  * @param penalty Mismatch penalty score (blastn only) [in]
845  * @param reward Match reward score (blastn only) [in]
846  * @param matrix Name of the BLAST matrix (all except blastn) [in]
847  * @param gap_open Extra cost for opening a gap [in]
848  * @param gap_extend Cost of a gap [in]
849 */
851 Int2
853  Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix,
854  Int4 gap_open, Int4 gap_extend);
855 
856 /** Validate contents of BlastScoringOptions.
857  * @param program_number Type of BLAST program [in]
858  * @param options Options to be validated [in]
859  * @param blast_msg Describes any validation problems found [out]
860 */
862 Int2
864  const BlastScoringOptions* options, Blast_Message* *blast_msg);
865 
866 /** Produces copy of "old" options, with new memory allocated.
867  * @param new_opt Contains copied BlastScoringOptions upon return [out]
868  * @param old_opt BlastScoringOptions to be copied [in]
869 */
872 
873 /** Resets matrix name option. Automatically converts the name to upper case.
874  * @param opts Options structure to update. [in] [out]
875  * @param matrix_name New matrix name. If NULL, old matrix name is left
876  * as is. [in]
877  */
880  const char* matrix_name);
881 
882 
883 /** Deallocate memory for BlastEffectiveLengthsOptions*.
884  * @param options Structure to free [in]
885  */
889 
890 /** Allocate memory for BlastEffectiveLengthsOptions* and fill with
891  * default values.
892  * @param options The options that are being returned [out]
893  */
896 
897 /** Return true if the search spaces is set for any of the queries in the
898  * search
899  * @param options The options to examine [in]
900  */
902 Boolean
905  options);
906 
907 /** Fill the non-default values in the BlastEffectiveLengthsOptions structure.
908  * @param options The options [in] [out]
909  * @param dbseq_num Number of sequences in the database (if zero real value will be used) [in]
910  * @param db_length Total length of the database (if zero real value will be used) [in]
911  * @param *searchsp_eff Array of effective search spaces (the real value
912  * will be used for elements that are 0). If array
913  * contains one element, all contexts use this value.
914  * If array has multiple elements, the number must match
915  * the number of contexts in the search [in]
916  * @param num_searchsp The number of elements in searchsp_eff [in]
917  */
919 Int2
921  Int4 dbseq_num, Int8 db_length,
922  Int8 *searchsp_eff, Int4 num_searchsp);
923 
924 
925 /** Allocate memory for lookup table options and fill with default values.
926  * @param program Program number (blastn, blastp, etc.) [in]
927  * @param options The options that are being returned [out]
928  */
931 
932 
933 /** Allocate memory for lookup table options and fill with default values.
934  * @param options The options [in] [out]
935  * @param program Program number (blastn, blastp, etc.) [in]
936  * @param is_megablast Megablast (instead of blastn) if TRUE [in]
937  * @param threshold Threshold value for finding neighboring words
938  (fractional values are allowed, though unless
939  the engine scales up alignment scores a fractional
940  threshold will be rounded down) [in]
941  * @param word_size Number of matched residues in an initial word [in]
942  */
944 Int2
946  EBlastProgramType program, Boolean is_megablast, double threshold,
947  Int4 word_size);
948 
949 
950 /** Deallocates memory for LookupTableOptions*.
951  * @param options Structure to free [in]
952  */
956 
957 /** Validate LookupTableOptions.
958  * @param program_number BLAST program [in]
959  * @param options The options that have are being returned [in]
960  * @param blast_msg Describes any validation problems found [out]
961 */
963 Int2
965  const LookupTableOptions* options, Blast_Message* *blast_msg);
966 
967 /** Deallocate memory for BlastHitSavingOptions.
968  * @param options Structure to free [in]
969  */
973 
974 /** Validate BlastHitSavingOptions
975  * @param program_number BLAST program [in]
976  * @param options The options that have are being returned [in]
977  * @param blast_msg Describes any validation problems found [out]
978 */
979 
981 Int2
983  const BlastHitSavingOptions* options, Blast_Message* *blast_msg);
984 
985 /** Allocate memory for BlastHitSavingOptions.
986  * @param program Program number (blastn, blastp, etc.) [in]
987  * @param options The options that are being returned [out]
988  * @param gapped_calculation is this search gapped? [in]
989 */
992  BlastHitSavingOptions** options,
993  Boolean gapped_calculation);
994 
995 /** Allocate memory for BlastHitSavingOptions.
996  * @param options The options [in] [out]
997  * @param evalue The expected value threshold [in]
998  * @param hitlist_size How many database sequences to save per query? [in]
999  * @param is_gapped is this a gapped alignment? [in]
1000  * @param culling_limit Number of higher-scoring HSPs that must contain
1001  * the query range of an HSP before that HSP is declared
1002  * to be redundant (ignored if zero) [in]
1003  * @param min_diag_separation Delete HSPs whose endpoints are at most this
1004  * many diagonals from a higher-scoring HSP. If zero,
1005  * delete HSPs whose query and subject ranges are
1006  * enveloped by those of a higher-scoring HSP [in]
1007 */
1009 Int2
1011  double evalue, Int4 hitlist_size,
1012  Boolean is_gapped,
1013  Int4 culling_limit,
1014  Int4 min_diag_separation);
1015 
1016 /** Initialize default options for PSI BLAST
1017  * @param psi_options pointer to pointer where structure will be allocated [in]
1018  * @return 1 in case of memory allocation failure or if psi_options is NULL, 0
1019  * in case of success
1020  */
1022 Int2 PSIBlastOptionsNew(PSIBlastOptions** psi_options);
1023 
1024 /** Validates the PSI BLAST options so that they have sane values.
1025  * @param psi_options structure to validate [in]
1026  * @param blast_msg Describes any validation problems found [out]
1027  * @return 0 on success 1 on failure
1028  */
1029 Int2 PSIBlastOptionsValidate(const PSIBlastOptions* psi_options,
1030  Blast_Message** blast_msg);
1031 
1032 /** Deallocate PSI BLAST options */
1035 
1036 /** Allocate and initialize a BlastHSPBestHitOptions structure */
1039  double score_edge);
1040 
1041 /** Validate the best hit algorithm parameters (if any) in the
1042  * @param opts BlastHSPFilteringOptions structure
1043  * @return 0 on success, else non-zero
1044  */
1046 Int2
1048 
1049 /** Deallocate a BlastHSPBestHitOptions structure
1050  * @param opt object to be deallocated. [in]
1051  */
1054 
1055 /** Allocate a new object for culling options.
1056  * @param max number of HSPs that may be aligned to one part of query [in]
1057  */
1060 
1061 /** Validate culling options.
1062  * @param opts BlastHSPFilteringOptions structure
1063  * @return 0 on success, else non-zero
1064  */
1066 Int2
1068 
1069 /** Deallocates culling options structure.
1070  * @param culling_opts object to be deallocated. [in]
1071  */
1075 
1076 /** Allocate and initialize a BlastHSPFilteringOptions structure */
1079 
1080 /** Add the best hit options. Responsibility for best_hit is taken over by the
1081  * BlastHSPFilteringOptions
1082  * @param filt_opts HSP filtering options [in]
1083  * @param best_hit Best Hit algorithm options. Ownership of this is taken by
1084  * the BlastHSPFilteringOptions structure [in|out]
1085  */
1087 Int2
1089  BlastHSPBestHitOptions** opts,
1090  EBlastStage stage);
1091 /** Validates the BlastHSPFilteringOptions structure */
1093 Int2
1095  BlastHSPCullingOptions** opts,
1096  EBlastStage stage);
1097 
1098 /** Validates the BlastHSPFilteringOptions structure */
1100 Int2
1102 
1103 /** Deallocate a BlastHSPFilteringOptions structure */
1107 
1108 /** Allocates the BlastDatabase options structure and sets the default
1109  * database genetic code value (BLAST_GENETIC_CODE). Genetic code string in
1110  * ncbistdaa must be populated by client code */
1113 
1114 /** Deallocate database options */
1118 
1119 /** Initialize all the BLAST search options structures with the default
1120  * values.
1121  * @param blast_program Type of blast program: blastn, blastp, blastx,
1122  * tblastn, tblastx) [in]
1123  * @param lookup_options Lookup table options [out]
1124  * @param query_setup_options Query options [out]
1125  * @param word_options Initial word processing options [out]
1126  * @param ext_options Extension options [out]
1127  * @param hit_options Hit saving options [out]
1128  * @param score_options Scoring options [out]
1129  * @param eff_len_options Effective length options [out]
1130  * @param protein_options Protein BLAST options [out]
1131  * @param db_options BLAST database options [out]
1132  */
1135  LookupTableOptions** lookup_options,
1136  QuerySetUpOptions** query_setup_options,
1137  BlastInitialWordOptions** word_options,
1138  BlastExtensionOptions** ext_options,
1139  BlastHitSavingOptions** hit_options,
1140  BlastScoringOptions** score_options,
1141  BlastEffectiveLengthsOptions** eff_len_options,
1142  PSIBlastOptions** protein_options,
1143  BlastDatabaseOptions** db_options);
1144 
1145 /** Validate all options */
1148  const BlastExtensionOptions* ext_options,
1149  const BlastScoringOptions* score_options,
1150  const LookupTableOptions* lookup_options,
1151  const BlastInitialWordOptions* word_options,
1152  const BlastHitSavingOptions* hit_options,
1153  Blast_Message* *blast_msg);
1154 
1155 
1156 
1157 /** Get thresholds for word-finding suggested by Stephen Altschul.
1158  *
1159  * @param program_number Type of blast program: blastn, blastp, blastx,
1160  * tblastn, tblastx) [in]
1161  * @param matrixName matrix, e.g., BLOSUM62 [in]
1162  * @param threshold returns suggested value [in|out]
1163  * @return zero on success
1164  */
1167  const char* matrixName,
1168  double* threshold);
1169 
1170 /** Get window sizes for two hit algorithm suggested by Stephen Altschul.
1171  *
1172  * @param program_number Type of blast program: blastn, blastp, blastx,
1173  * tblastn, tblastx) [in]
1174  * @param matrixName matrix, e.g., BLOSUM62 [in]
1175  * @param window_size returns suggested value [in|out]
1176  * @return zero on success
1177  */
1180  const char* matrixName,
1181  Int4* window_size);
1182 
1183 /** Allocate a new object for subject besthit options.
1184  * @params isProtein true if protein alignment [in]
1185  */
1188 
1189 /** Validate subject besthit options.
1190  * @param opts BlastHSPFilteringOptions structure
1191  * @return 0 on success, else non-zero
1192  */
1194 Int2
1196 
1197 /** Deallocates subject besthit structure.
1198  * @param subject_besthit object to be deallocated. [in]
1199  */
1203 
1205 Int2
1207  BlastHSPSubjectBestHitOptions** subject_besthit);
1208 
1209 #define DEFAULT_SUBJECT_BESTHIT_PROT_MAX_RANGE_DIFF 3
1210 #define DEFAULT_SUBJECT_BESTHIT_NUCL_MAX_RANGE_DIFF 3
1211 
1212 #ifdef __cplusplus
1213 }
1214 #endif
1215 #endif /* !__BLASTOPTIONS__ */
1216 
Definitions used throughout BLAST.
EBlastStage
Enumeration for the stages in the BLAST search.
Definition: blast_def.h:324
Defines to provide correct exporting from BLAST DLL in Windows.
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
Structures for BLAST messages.
PSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)
Deallocate PSI BLAST options.
Int2 BLAST_FillQuerySetUpOptions(QuerySetUpOptions *options, EBlastProgramType program, const char *filter_string, Uint1 strand_option)
Fill non-default contents of the QuerySetUpOptions.
BlastHSPCullingOptions * BlastHSPCullingOptionsNew(int max)
Allocate a new object for culling options.
Int2 BlastDatabaseOptionsNew(BlastDatabaseOptions **db_options)
Allocates the BlastDatabase options structure and sets the default database genetic code value (BLAST...
struct BlastHitSavingOptions BlastHitSavingOptions
Options used when evaluating and saving hits These include: a.
struct SRepeatFilterOptions SRepeatFilterOptions
Filtering options for organsim specific repeats filtering.
Int2 SRepeatFilterOptionsNew(SRepeatFilterOptions **repeat_options)
Allocates memory for SRepeatFilterOptions, fills in defaults.
Int2 PSIBlastOptionsValidate(const PSIBlastOptions *psi_options, Blast_Message **blast_msg)
Validates the PSI BLAST options so that they have sane values.
Int2 BLAST_InitDefaultOptions(EBlastProgramType blast_program, LookupTableOptions **lookup_options, QuerySetUpOptions **query_setup_options, BlastInitialWordOptions **word_options, BlastExtensionOptions **ext_options, BlastHitSavingOptions **hit_options, BlastScoringOptions **score_options, BlastEffectiveLengthsOptions **eff_len_options, PSIBlastOptions **protein_options, BlastDatabaseOptions **db_options)
Initialize all the BLAST search options structures with the default values.
Int2 BlastHSPBestHitOptionsValidate(const BlastHSPFilteringOptions *opts)
Validate the best hit algorithm parameters (if any) in the.
Int2 BLAST_ValidateOptions(EBlastProgramType program_number, const BlastExtensionOptions *ext_options, const BlastScoringOptions *score_options, const LookupTableOptions *lookup_options, const BlastInitialWordOptions *word_options, const BlastHitSavingOptions *hit_options, Blast_Message **blast_msg)
Validate all options.
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
struct PSIBlastOptions PSIBlastOptions
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Int2 BlastScoringOptionsValidate(EBlastProgramType program_number, const BlastScoringOptions *options, Blast_Message **blast_msg)
Validate contents of BlastScoringOptions.
Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)
Allocate memory for QuerySetUpOptions and fill with default values.
Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char *matrixName, double *threshold)
Get thresholds for word-finding suggested by Stephen Altschul.
SDustOptions * SDustOptionsFree(SDustOptions *dust_options)
Frees SDustOptions.
Definition: blast_options.c:50
EBlastPrelimGapExt
The algorithm to be used for preliminary gapped extensions.
@ eJumperWithTraceback
Jumper extension (mapping)
@ eDynProgScoreOnly
standard affine gapping
@ eGreedyScoreOnly
Greedy extension (megaBlast)
@ eSmithWatermanScoreOnly
Score-only smith-waterman.
BlastHSPFilteringOptions * BlastHSPFilteringOptionsFree(BlastHSPFilteringOptions *opts)
Deallocate a BlastHSPFilteringOptions structure.
Int2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)
Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.
SReadQualityOptions * SReadQualityOptionsFree(SReadQualityOptions *read_quality_options)
Frees memory for SReadQualityOptions.
Boolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions *filter_options)
Queries whether masking should be done only for the lookup table or for the entire search.
Boolean SBlastFilterOptionsNoFiltering(const SBlastFilterOptions *filter_options)
Queries whether no masking is required.
Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char *matrixName, Int4 *window_size)
Get window sizes for two hit algorithm suggested by Stephen Altschul.
struct BlastHSPSubjectBestHitOptions BlastHSPSubjectBestHitOptions
Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions *filter_options, Blast_Message **blast_message)
Validates filter options to ensure that program and options are consistent and that options have vali...
struct LookupTableOptions LookupTableOptions
Options needed to construct a lookup table Also needed: query sequence and query length.
Int2 BLAST_FillScoringOptions(BlastScoringOptions *options, EBlastProgramType program, Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix, Int4 gap_open, Int4 gap_extend)
Fill non-default values in the BlastScoringOptions structure.
BlastHSPSubjectBestHitOptions * BlastHSPSubjectBestHitOptionsFree(BlastHSPSubjectBestHitOptions *subject_besthit_opts)
Deallocates subject besthit structure.
Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions **repeat_options, const char *dbname)
Resets name of db for repeat filtering.
SRepeatFilterOptions * SRepeatFilterOptionsFree(SRepeatFilterOptions *repeat_options)
Frees SRepeatFilterOptions.
BlastInitialWordOptions * BlastInitialWordOptionsFree(BlastInitialWordOptions *options)
Deallocate memory for BlastInitialWordOptions.
Int2 BlastHSPFilteringOptions_AddCulling(BlastHSPFilteringOptions *filt_opts, BlastHSPCullingOptions **opts, EBlastStage stage)
Validates the BlastHSPFilteringOptions structure.
Int2 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions *options, Int4 dbseq_num, Int8 db_length, Int8 *searchsp_eff, Int4 num_searchsp)
Fill the non-default values in the BlastEffectiveLengthsOptions structure.
struct BlastScoringOptions BlastScoringOptions
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int2 SSegOptionsNew(SSegOptions **seg_options)
Allocates memory for SSegOptions, fills in defaults.
Definition: blast_options.c:77
SWindowMaskerOptions * SWindowMaskerOptionsFree(SWindowMaskerOptions *winmask_options)
Frees SWindowMaskerOptions.
Int2 BlastScoringOptionsNew(EBlastProgramType program, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
BlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)
Deallocate memory for BlastEffectiveLengthsOptions*.
SBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)
Frees SBlastFilterOptions and all subservient structures.
Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program, Boolean is_megablast, double threshold, Int4 word_size)
Allocate memory for lookup table options and fill with default values.
Int2 LookupTableOptionsNew(EBlastProgramType program, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
Int2 SWindowMaskerOptionsNew(SWindowMaskerOptions **winmask_options)
Allocates memory for SWindowMaskerOptions, fills in defaults.
Definition: blast_options.c:90
Int2 BlastScoringOptionsSetMatrix(BlastScoringOptions *opts, const char *matrix_name)
Resets matrix name option.
Int2 SBlastFilterOptionsMerge(SBlastFilterOptions **combined, const SBlastFilterOptions *opt1, const SBlastFilterOptions *opt2)
Merges two sets of options together, taking the non-default one as preferred.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
struct BlastEffectiveLengthsOptions BlastEffectiveLengthsOptions
Options for setting up effective lengths and search spaces.
Int2 BLAST_FillInitialWordOptions(BlastInitialWordOptions *options, EBlastProgramType program, Int4 window_size, double xdrop_ungapped)
Fill non-default values in the BlastInitialWordOptions structure.
struct BlastHSPBestHitOptions BlastHSPBestHitOptions
Options for the Best Hit HSP collection algorithm.
struct SBlastFilterOptions SBlastFilterOptions
All filtering options.
struct SWindowMaskerOptions SWindowMaskerOptions
Filtering options for organism-specific filtering with Window Masker.
struct SSegOptions SSegOptions
Options for SEG algorithm, applies only to protein-protein comparisons.
Int2 SDustOptionsNew(SDustOptions **dust_options)
Allocates memory for SDustOptions, fills in defaults.
Definition: blast_options.c:57
Int2 BlastInitialWordOptionsValidate(EBlastProgramType program_number, const BlastInitialWordOptions *options, Blast_Message **blast_msg)
Validate correctness of the initial word options.
Int2 BLAST_FillExtensionOptions(BlastExtensionOptions *options, EBlastProgramType program, Int4 greedy, double x_dropoff, double x_dropoff_final)
Fill non-default values in the BlastExtensionOptions structure.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
struct QuerySetUpOptions QuerySetUpOptions
Options required for setting up the query sequence.
Int2 LookupTableOptionsValidate(EBlastProgramType program_number, const LookupTableOptions *options, Blast_Message **blast_msg)
Validate LookupTableOptions.
Int2 SWindowMaskerOptionsResetDB(SWindowMaskerOptions **winmask_options, const char *dbname)
Resets name of db for window masker filtering.
Int2 BlastHitSavingOptionsValidate(EBlastProgramType program_number, const BlastHitSavingOptions *options, Blast_Message **blast_msg)
Validate BlastHitSavingOptions.
const double kPSSM_NoImpalaScaling
Value used to indicate that no IMPALA-style scaling should be performed when scaling a PSSM.
Definition: blast_options.c:43
BlastHSPCullingOptions * BlastHSPCullingOptionsFree(BlastHSPCullingOptions *culling_opts)
Deallocates culling options structure.
Int2 BLAST_FillHitSavingOptions(BlastHitSavingOptions *options, double evalue, Int4 hitlist_size, Boolean is_gapped, Int4 culling_limit, Int4 min_diag_separation)
Allocate memory for BlastHitSavingOptions.
SSegOptions * SSegOptionsFree(SSegOptions *seg_options)
Frees SSegOptions.
Definition: blast_options.c:70
struct BlastExtensionOptions BlastExtensionOptions
Options used for gapped extension These include: a.
Int2 BlastHSPCullingOptionsValidate(const BlastHSPFilteringOptions *opts)
Validate culling options.
Int2 BlastHSPSubjectBestHitOptionsValidate(const BlastHSPFilteringOptions *opts)
Validate subject besthit options.
struct BlastHSPCullingOptions BlastHSPCullingOptions
Options for the HSP culling algorithm.
EBlastTbackExt
The algorithm to be used for final gapped extensions with traceback.
@ eGreedyTbck
Greedy extension (megaBlast)
@ eSmithWatermanTbck
Smith-waterman finds optimal scores, then ALIGN_EX to find alignment.
@ eDynProgTbck
standard affine gapping
@ eSmithWatermanTbckFull
Smith-waterman to find all alignments.
BlastHSPFilteringOptions * BlastHSPFilteringOptionsNew()
Allocate and initialize a BlastHSPFilteringOptions structure.
BlastDatabaseOptions * BlastDatabaseOptionsFree(BlastDatabaseOptions *db_options)
Deallocate database options.
EFilterOptions
Types of filtering options.
@ eDustRepeats
Repeat and dust filtering for nucleotides.
@ eRepeats
Repeat filtering for nucleotides.
@ eDust
low-complexity for nucleotides.
@ eEmpty
no filtering at all.
@ eSeg
low-complexity for proteins.
BlastHSPSubjectBestHitOptions * BlastHSPSubjectBestHitOptionsNew(Boolean isProtein)
Allocate a new object for subject besthit options.
Int2 SReadQualityOptionsNew(SReadQualityOptions **read_quality_options)
Allocates memory for SReadQualityOptions, fills in defaults.
Int2 BlastHSPFilteringOptions_AddSubjectBestHit(BlastHSPFilteringOptions *filt_opts, BlastHSPSubjectBestHitOptions **subject_besthit)
BlastHSPBestHitOptions * BlastHSPBestHitOptionsNew(double overhang, double score_edge)
Allocate and initialize a BlastHSPBestHitOptions structure.
Int2 SBlastFilterOptionsNew(SBlastFilterOptions **filter_options, EFilterOptions type)
Allocates memory for SBlastFilterOptions and.
struct SReadQualityOptions SReadQualityOptions
Filtering options for mapping next-generation sequences.
Boolean BlastEffectiveLengthsOptions_IsSearchSpaceSet(const BlastEffectiveLengthsOptions *options)
Return true if the search spaces is set for any of the queries in the search.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
struct BlastInitialWordOptions BlastInitialWordOptions
Options needed for initial word finding and processing.
struct SDustOptions SDustOptions
Options for dust algorithm, applies only to nucl.
struct BlastDatabaseOptions BlastDatabaseOptions
Options used to create the ReadDBFILE structure Include database name and various information for res...
Int2 BlastInitialWordOptionsNew(EBlastProgramType program, BlastInitialWordOptions **options)
Allocate memory for BlastInitialWordOptions and fill with default values.
ELookupTableType
Types of the lookup table.
@ eSmallNaLookupTable
lookup table for blastn with small query
@ eMixedMBLookupTable
use when some volumes are searched with index and some are not
@ eNaLookupTable
blastn lookup table
@ eMBLookupTable
megablast lookup table (includes both contiguous and discontiguous megablast)
@ eIndexedMBLookupTable
use database index as a lookup structure
@ ePhiNaLookupTable
nucleotide lookup table for phi-blast
@ eAaLookupTable
standard protein (blastp) lookup table
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
@ ePhiLookupTable
protein lookup table specialized for phi-blast
@ eRPSLookupTable
RPS lookup table (rpsblast and rpstblastn)
@ eNaHashLookupTable
used for 16-base words
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
Int2 BlastExtensionOptionsValidate(EBlastProgramType program_number, const BlastExtensionOptions *options, Blast_Message **blast_msg)
Validate contents of BlastExtensionOptions.
BlastHSPBestHitOptions * BlastHSPBestHitOptionsFree(BlastHSPBestHitOptions *opt)
Deallocate a BlastHSPBestHitOptions structure.
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
Int2 BlastHSPFilteringOptions_AddBestHit(BlastHSPFilteringOptions *filt_opts, BlastHSPBestHitOptions **opts, EBlastStage stage)
Add the best hit options.
Int2 BlastScoringOptionsDup(BlastScoringOptions **new_opt, const BlastScoringOptions *old_opt)
Produces copy of "old" options, with new memory allocated.
QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)
Deallocate memory for QuerySetUpOptions.
struct BlastHSPFilteringOptions BlastHSPFilteringOptions
Structure containing the HSP filtering/writing options.
Int2 BlastHSPFilteringOptionsValidate(const BlastHSPFilteringOptions *opts)
Validates the BlastHSPFilteringOptions structure.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
Definitions for various programs supported by core BLAST.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
static ulg window_size
uint8_t Uint1
1-byte (8-bit) unsigned integer
Definition: ncbitype.h:99
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
T max(T x_, T y_)
Options used to create the ReadDBFILE structure Include database name and various information for res...
Int4 genetic_code
Genetic code to use for translation, tblast[nx] only.
Options for setting up effective lengths and search spaces.
Int8 * searchsp_eff
Search space to be used for statistical calculations (one such per query context)
Int8 db_length
Database length to be used for statistical calculations.
Int4 dbseq_num
Number of database sequences to be used for statistical calculations.
Int4 num_searchspaces
Number of elements in searchsp_eff, this must be equal to the number of contexts in the search.
Options used for gapped extension These include: a.
EBlastTbackExt eTbackExt
type of traceback extension.
Int4 unifiedP
Indicates unified P values to be used in blastp or tblastn.
Int4 max_mismatches
Maximum number of mismatches allowed for Jumper.
Int4 mismatch_window
Widnow for counting mismatches for Jumper.
EBlastPrelimGapExt ePrelimGapExt
type of preliminary gapped extension (normally) for calculating score.
double gap_x_dropoff_final
X-dropoff value for the final gapped extension (in bits)
double gap_x_dropoff
X-dropoff value for gapped extension (in bits)
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Options for the Best Hit HSP collection algorithm.
Options for the HSP culling algorithm.
int max_hits
Maximum number of hits per area of query.
Structure containing the HSP filtering/writing options.
BlastHSPBestHitOptions * best_hit
Best Hit algorithm.
BlastHSPSubjectBestHitOptions * subject_besthit_opts
Subject Culling.
BlastHSPCullingOptions * culling_opts
culling algorithm
Options used when evaluating and saving hits These include: a.
Int4 culling_limit
If the query range of an HSP is contained in at least this many higher-scoring HSPs,...
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 longest_intron
The longest distance between HSPs allowed for combining via sum statistics with uneven gaps.
Int4 max_hsps_per_subject
Queries are paired reads, for mapping.
double low_score_perc
Low-score option.
Int4 total_hsp_limit
Maximal total number of HSPs to keep.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 cutoff_score
The (raw) score cut-off threshold.
Int4 hsp_num_max
Maximal number of HSPs to save for one database sequence.
Boolean paired
Splice HSPs for each query (for mapping RNA-Seq to a genome)
Int4 mask_level
Only keep the highest scoring HSP when more than one HSP overlaps the same region of the query by mor...
Boolean do_sum_stats
Force sum statistics to be used to combine HSPs, TRUE by default for all ungapped searches and transl...
Int4 hitlist_size
Maximal number of database sequences to return results for.
Int4 min_diag_separation
How many diagonals separate a hit from a substantial alignment before it's not blocked out.
Int4 max_edit_distance
Maximum number of mismatches and gaps.
Int4 min_hit_length
optional minimum alignment length; alignments not at least this long are discarded
Int4 cutoff_score_fun[2]
Coefficients x100 for the raw score cut-off threshold as a function of query length: x[0] + x[1] * qu...
double query_cov_hsp_perc
Min query coverage hsp percentage.
BlastHSPFilteringOptions * hsp_filt_opt
Contains options to configure the HSP filtering/writering structures If not set, the default HSP filt...
double percent_identity
The percent identity cut-off threshold.
Options needed for initial word finding and processing.
double gap_trigger
Score in bits for starting gapped extension.
EBlastProgramType program_number
indicates blastn, blastp, etc.
double x_dropoff
X-dropoff value (in bits) for the ungapped extension.
Int4 window_size
Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.
Int4 scan_range
Maximal number of gaps allowed between 2 hits.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int2 penalty
Penalty for a mismatch.
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Int2 reward
Reward for a match.
Boolean gapped_calculation
gap-free search if FALSE
char * matrix_path
Directory path to where matrices are stored.
Int4 shift_pen
Penalty for shifting a frame in out-of-frame gapping.
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
Boolean complexity_adjusted_scoring
Use cross_match-like complexity adjustment on raw scores.
Structure to hold the a message from the core of the BLAST engine.
Definition: blast_message.h:70
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 word_size
Determines the size of the lookup table.
char * phi_pattern
PHI-BLAST pattern.
Uint1 max_db_word_count
words with larger frequency in the database will be masked in the lookup table, if the db_filter opto...
Boolean db_filter
scan the database and include only words that appear in the database between 1 and 9 times (currently...
EBlastProgramType program_number
indicates blastn, blastp, etc.
double threshold
Score threshold for putting words in a lookup table (fractional values are allowed,...
Int4 mb_template_type
Type of a discontiguous word template.
ELookupTableType lut_type
What kind of lookup table to construct?
Uint4 stride
number of words to skip after collecting each word
Int4 mb_template_length
Length of the discontiguous words.
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
double impala_scaling_factor
Scaling factor as used in IMPALA to do the matrix rescaling.
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
Boolean ignore_unaligned_positions
This turns off a validation for the multiple sequence alignment in the PSSM engine for unaligned posi...
Int4 pseudo_count
Pseudocount constant.
Boolean use_best_alignment
If set to TRUE, use the best alignment when multiple HSPs are found in a query-subject alignment (i....
Options required for setting up the query sequence.
Uint1 strand_option
In blastn: which strand to search: 1 = forward; 2 = reverse; 3 = both.
char * filter_string
DEPRECATED, filtering options above.
SBlastFilterOptions * filtering_options
structured options for all filtering offered from algo/blast/core for BLAST.
Int4 genetic_code
Genetic code to use for translation, [t]blastx only.
All filtering options.
SRepeatFilterOptions * repeatFilterOptions
for organism specific repeat filtering.
SSegOptions * segOptions
low-complexity filtering for proteins sequences (includes translated nucleotides).
SReadQualityOptions * readQualityOptions
quality filtering for mapping next-generation sequences
Boolean mask_at_hash
mask query only for lookup table creation
SWindowMaskerOptions * windowMaskerOptions
organism specific filtering with window masker.
SDustOptions * dustOptions
low-complexity filtering for nucleotides.
Options for dust algorithm, applies only to nucl.
int linker
min distance to link segments.
Filtering options for mapping next-generation sequences.
double frac_ambig
Fraction of ambiguous bases.
int entropy
Dimer entropy.
Filtering options for organsim specific repeats filtering.
char * database
Nucleotide database for mini BLAST search.
Options for SEG algorithm, applies only to protein-protein comparisons.
int window
initial window to trigger further work.
Filtering options for organism-specific filtering with Window Masker.
const char * database
Use winmasker database at this location.
int taxid
Select masking database for this TaxID.
Definition: type.c:6
Modified on Mon Mar 04 05:13:33 2024 by modify_doxy.py rev. 669887