59 if (dust_options ==
NULL)
79 if (seg_options ==
NULL)
92 if (winmask_options) {
94 if (*winmask_options ==
NULL)
97 (*winmask_options)->taxid = 0;
98 (*winmask_options)->database =
NULL;
112 sfree(winmask_options);
122 sfree(repeat_options);
130 if (repeat_options ==
NULL)
134 if (*repeat_options ==
NULL)
146 if (*repeat_options ==
NULL)
152 sfree((*repeat_options)->database);
153 (*repeat_options)->database =
strdup(db);
162 if (*winmask_options ==
NULL)
168 sfree((*winmask_options)->database);
171 (*winmask_options)->database =
strdup(db);
180 if (read_quality_options) {
181 free(read_quality_options);
189 if (!read_quality_options) {
194 if (!*read_quality_options) {
198 (*read_quality_options)->frac_ambig = 0.5;
199 (*read_quality_options)->entropy = 16;
218 sfree(filter_options);
231 (*filter_options)->mask_at_hash =
FALSE;
267 else if (!opt1 && opt2)
305 else if (!opt1 && opt2)
339 else if (!opt1 && opt2)
367 if (! (have1 || have2))
370 if (have1 && ! have2) {
372 }
else if (! have1 && have2) {
424 if (filter_options ==
NULL)
435 if (filter_options ==
NULL)
445 if (filter_options ==
NULL)
448 "SBlastFilterOptionsValidate: NULL filter_options");
459 "SBlastFilterOptionsValidate: Repeat filtering only supported with blastn");
467 "SBlastFilterOptionsValidate: No repeat database specified for repeat filtering");
479 "SBlastFilterOptionsValidate: Dust filtering only supported with blastn");
491 "SBlastFilterOptionsValidate: SEG filtering is not supported with blastn");
523 if (*options ==
NULL)
578 if (*options ==
NULL)
594 (*options)->program_number = program;
616 "x_dropoff must be greater than zero");
624 "off_diagonal_range is only useful in 2-hit algorithm");
636 double xdrop_ungapped)
643 if (xdrop_ungapped != 0)
666 if (*options ==
NULL)
674 (*options)->gap_x_dropoff_final =
691 (*options)->max_mismatches = 5;
692 (*options)->mismatch_window = 10;
693 (*options)->program_number = program;
701 double x_dropoff_final)
727 if (x_dropoff_final) {
754 "Greedy extension only supported for BLASTN");
765 "Score-only and traceback Smith-Waterman must "
766 "both be specified");
792 if (*options ==
NULL)
799 (*options)->is_ooframe =
FALSE;
812 (*options)->gapped_calculation =
TRUE;
814 (*options)->program_number = program_number;
817 (*options)->complexity_adjusted_scoring =
FALSE;
825 const char *matrix,
Int4 gap_open,
Int4 gap_extend)
843 if (greedy_extension) {
872 "Gapped search is not allowed for tblastx");
886 "BLASTN penalty must be negative");
896 "BLASTN reward/penalty combination not supported for gapped search");
904 "BLASTN gap extension penalty cannot be 0");
931 else if (status == 2)
948 "Out-of-frame only permitted for blastx and tblastn");
958 if (old_opt ==
NULL || new_opt ==
NULL)
962 if (*new_opt ==
NULL)
975 const char* matrix_name)
1006 if (options ==
NULL) {
1013 if (*options ==
NULL)
1048 num_searchsp *
sizeof(
Int8));
1067 if (options ==
NULL)
1081 if (*options ==
NULL)
1084 switch (program_number) {
1122 (*options)->program_number = program_number;
1123 (*options)->stride = 0;
1131 double threshold,
Int4 word_size)
1177 const double kB62_threshold = 11;
1183 if (matrixName ==
NULL)
1187 *threshold = kB62_threshold;
1188 else if(
strcasecmp(matrixName,
"BLOSUM45") == 0)
1190 else if(
strcasecmp(matrixName,
"BLOSUM62_20") == 0)
1192 else if(
strcasecmp(matrixName,
"BLOSUM80") == 0)
1194 else if(
strcasecmp(matrixName,
"PAM30") == 0)
1196 else if(
strcasecmp(matrixName,
"PAM70") == 0)
1198 else if(
strcasecmp(matrixName,
"IDENTITY") == 0)
1201 *threshold = kB62_threshold;
1213 const Int4 kB62_windowsize = 40;
1219 if (matrixName ==
NULL)
1224 else if(
strcasecmp(matrixName,
"BLOSUM45") == 0)
1226 else if(
strcasecmp(matrixName,
"BLOSUM80") == 0)
1228 else if(
strcasecmp(matrixName,
"PAM30") == 0)
1230 else if(
strcasecmp(matrixName,
"PAM70") == 0)
1249 Uint1 template_type,
1252 if (template_length == 0)
1256 if (word_size != 11 && word_size != 12) {
1258 "Invalid discontiguous template parameters: word "
1259 "size must be either 11 or 12");
1263 if (template_length != 16 && template_length != 18 &&
1264 template_length != 21) {
1266 "Invalid discontiguous template parameters: "
1267 "template length must be 16, 18, or 21");
1271 if (template_type > 2) {
1275 "Invalid discontiguous template parameters: "
1276 "template type must be 0, 1, or 2");
1290 if (options ==
NULL)
1295 "PHI pattern can be specified only for blastp and blastn");
1309 "Non-zero threshold required");
1317 "Word-size must be greater than zero");
1325 "Word-size must be 4 or greater for nucleotide comparison");
1345 "Word-size must be less than "
1346 "8 for a tblastn, blastp or blastx search");
1354 "Word-size must be less "
1355 "than 5 for psiblast");
1361 "Word-size must be less "
1362 "than 6 for protein comparison");
1372 "Megablast lookup table only supported with blastn");
1384 "Blastp, Blastx or Tblastn with word size"
1386 "compressed alphabet lookup table");
1393 "Compressed alphabet lookup table requires "
1394 "word size 5, 6 or 7");
1410 "Invalid lookup table type for discontiguous Mega BLAST");
1417 "The limit_lookup option can only be used for "
1418 "nucleotide searches");
1424 "The limit_lookup option can only be used with "
1450 if (*options ==
NULL)
1455 (*options)->program_number = program_number;
1458 (*options)->mask_level = 101;
1464 (*options)->do_sum_stats =
FALSE;
1465 }
else if (!gapped_calculation ||
1468 (*options)->do_sum_stats =
TRUE;
1470 (*options)->do_sum_stats =
FALSE;
1473 (*options)->hsp_filt_opt =
NULL;
1475 (*options)->max_edit_distance =
INT4_MAX;
1483 double evalue,
Int4 hitlist_size,
1485 Int4 min_diag_separation)
1494 if (min_diag_separation)
1508 if (options ==
NULL)
1514 "No hits are being saved");
1521 "expect value or cutoff score must be greater than zero");
1531 "Uneven gap linking of HSPs is allowed for blastx, "
1532 "tblastn, and psitblastn only");
1539 "culling limit must be greater than or equal to zero");
1546 "HSP Filtering options invalid");
1565 *psi_options = options;
1582 if ( !psi_options ) {
1588 "Pseudo count must be greater than or equal to 0");
1594 "Inclusion threshold must be greater than 0");
1612 if ( !db_options ) {
1622 *db_options = options;
1631 if (db_options ==
NULL)
1664 (*score_options)->gapped_calculation)))
1668 (*score_options)->gapped_calculation)))
1696 if (ext_options ==
NULL || score_options ==
NULL)
1708 "Greedy extension must be used if gap existence and extension options are zero");
1719 "Jumper extension must be used for mapping");
1734 "Compositional adjustments are only supported with blastp, blastx, or tblastn");
1739 "Compositional adjustments are only supported for gapped searches");
1766 lookup_options, blast_msg)) != 0)
1769 word_options, blast_msg)) != 0)
1775 score_options, blast_msg)) != 0)
1790 if (lookup_options->
word_size > 5 && is_identity) {
1794 "Word size larger than 5 is not supported for "
1795 "the identity scoring matrix");
1806 "Culling or best hit filtering is not supported");
1883 sfree(culling_opts);
1899 if ( filt_opts ==
NULL || best_hit ==
NULL || *best_hit ==
NULL) {
1915 if ( filt_opts ==
NULL || culling ==
NULL || *culling ==
NULL) {
1936 writer_found =
TRUE;
1990 if (!subject_besthit_opts)
1993 sfree(subject_besthit_opts);
2001 if ( filt_opts ==
NULL || subject_besthit ==
NULL || *subject_besthit ==
NULL) {
2006 *subject_besthit =
NULL;
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
const double kSegLocut
Locut parameter for SEG.
const int kSegWindow
Window that SEG examines at once.
const double kSegHicut
Hicut parameter for SEG.
EBlastStage
Enumeration for the stages in the BLAST search.
@ ePrelimSearch
Preliminary stage.
BLAST filtering functions.
Int2 BlastFilteringOptionsFromString(EBlastProgramType program_number, const char *instructions, SBlastFilterOptions **filtering_options, Blast_Message **blast_message)
Produces SBlastFilterOptions from a string that has been traditionally supported in blast.
#define DBSEQ_CHUNK_OVERLAP
By how much should the chunks of a subject sequence overlap if it is too long and has to be split.
#define BLASTERR_OPTION_PROGRAM_INVALID
The option is not supported with the specified program.
#define BLASTERR_OPTION_VALUE_INVALID
The value of the option is not supported (e.g., word size too small)
Int2 Blast_MessageWrite(Blast_Message **blast_msg, EBlastSeverity severity, int context, const char *message)
Writes a message to a structure.
const int kBlastMessageNoContext
Declared in blast_message.h as extern const.
#define BLASTERR_MEMORY
System error: out of memory condition.
#define BLASTERR_INVALIDPARAM
Invalid parameter: possible programmer error or pre-condition not met.
PSIBlastOptions * PSIBlastOptionsFree(PSIBlastOptions *psi_options)
Deallocate PSI BLAST options.
Int2 BLAST_FillQuerySetUpOptions(QuerySetUpOptions *options, EBlastProgramType program, const char *filter_string, Uint1 strand_option)
Fill non-default contents of the QuerySetUpOptions.
BlastHSPCullingOptions * BlastHSPCullingOptionsNew(int max)
Allocate a new object for culling options.
Int2 BlastDatabaseOptionsNew(BlastDatabaseOptions **db_options)
Allocates the BlastDatabase options structure and sets the default database genetic code value (BLAST...
Int2 SWindowMaskerOptionsResetDB(SWindowMaskerOptions **winmask_options, const char *db)
Resets name of db for window masker filtering.
Int2 SRepeatFilterOptionsNew(SRepeatFilterOptions **repeat_options)
Allocates memory for SRepeatFilterOptions, fills in defaults.
Int2 PSIBlastOptionsValidate(const PSIBlastOptions *psi_options, Blast_Message **blast_msg)
Validates the PSI BLAST options so that they have sane values.
Int2 BlastHSPBestHitOptionsValidate(const BlastHSPFilteringOptions *opts)
Validate the best hit algorithm parameters (if any) in the.
Int2 BLAST_ValidateOptions(EBlastProgramType program_number, const BlastExtensionOptions *ext_options, const BlastScoringOptions *score_options, const LookupTableOptions *lookup_options, const BlastInitialWordOptions *word_options, const BlastHitSavingOptions *hit_options, Blast_Message **blast_msg)
Validate all options.
Int2 BlastHSPFilteringOptions_AddCulling(BlastHSPFilteringOptions *filt_opts, BlastHSPCullingOptions **culling, EBlastStage stage)
Validates the BlastHSPFilteringOptions structure.
BlastHitSavingOptions * BlastHitSavingOptionsFree(BlastHitSavingOptions *options)
Deallocate memory for BlastHitSavingOptions.
Int2 BlastScoringOptionsValidate(EBlastProgramType program_number, const BlastScoringOptions *options, Blast_Message **blast_msg)
Validate contents of BlastScoringOptions.
Int2 BlastQuerySetUpOptionsNew(QuerySetUpOptions **options)
Allocate memory for QuerySetUpOptions and fill with default values.
Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char *matrixName, double *threshold)
Get thresholds for word-finding suggested by Stephen Altschul.
SDustOptions * SDustOptionsFree(SDustOptions *dust_options)
Frees SDustOptions.
Int2 BLAST_FillScoringOptions(BlastScoringOptions *options, EBlastProgramType program_number, Boolean greedy_extension, Int4 penalty, Int4 reward, const char *matrix, Int4 gap_open, Int4 gap_extend)
Fill non-default values in the BlastScoringOptions structure.
BlastHSPFilteringOptions * BlastHSPFilteringOptionsFree(BlastHSPFilteringOptions *opts)
Deallocate a BlastHSPFilteringOptions structure.
Int2 SRepeatFilterOptionsResetDB(SRepeatFilterOptions **repeat_options, const char *db)
Resets name of db for repeat filtering.
Int2 BlastScoringOptionsNew(EBlastProgramType program_number, BlastScoringOptions **options)
Allocate memory for BlastScoringOptions and fill with default values.
Int2 BlastEffectiveLengthsOptionsNew(BlastEffectiveLengthsOptions **options)
Allocate memory for BlastEffectiveLengthsOptions* and fill with default values.
SReadQualityOptions * SReadQualityOptionsFree(SReadQualityOptions *read_quality_options)
Frees memory for SReadQualityOptions.
Boolean SBlastFilterOptionsMaskAtHash(const SBlastFilterOptions *filter_options)
Queries whether masking should be done only for the lookup table or for the entire search.
Boolean SBlastFilterOptionsNoFiltering(const SBlastFilterOptions *filter_options)
Queries whether no masking is required.
Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char *matrixName, Int4 *window_size)
Get window sizes for two hit algorithm suggested by Stephen Altschul.
Int2 SBlastFilterOptionsValidate(EBlastProgramType program_number, const SBlastFilterOptions *filter_options, Blast_Message **blast_message)
Validates filter options to ensure that program and options are consistent and that options have vali...
const int kDustLinker
Parameter used by dust to link together close low-complexity segments.
BlastHSPSubjectBestHitOptions * BlastHSPSubjectBestHitOptionsFree(BlastHSPSubjectBestHitOptions *subject_besthit_opts)
Deallocates subject besthit structure.
Int2 BLAST_InitDefaultOptions(EBlastProgramType program_number, LookupTableOptions **lookup_options, QuerySetUpOptions **query_setup_options, BlastInitialWordOptions **word_options, BlastExtensionOptions **ext_options, BlastHitSavingOptions **hit_options, BlastScoringOptions **score_options, BlastEffectiveLengthsOptions **eff_len_options, PSIBlastOptions **psi_options, BlastDatabaseOptions **db_options)
Initialize all the BLAST search options structures with the default values.
SRepeatFilterOptions * SRepeatFilterOptionsFree(SRepeatFilterOptions *repeat_options)
Frees SRepeatFilterOptions.
BlastInitialWordOptions * BlastInitialWordOptionsFree(BlastInitialWordOptions *options)
Deallocate memory for BlastInitialWordOptions.
Int2 BlastHitSavingOptionsNew(EBlastProgramType program_number, BlastHitSavingOptions **options, Boolean gapped_calculation)
Allocate memory for BlastHitSavingOptions.
Int2 BLAST_FillEffectiveLengthsOptions(BlastEffectiveLengthsOptions *options, Int4 dbseq_num, Int8 db_length, Int8 *searchsp_eff, Int4 num_searchsp)
Fill the non-default values in the BlastEffectiveLengthsOptions structure.
Int2 SSegOptionsNew(SSegOptions **seg_options)
Allocates memory for SSegOptions, fills in defaults.
SWindowMaskerOptions * SWindowMaskerOptionsFree(SWindowMaskerOptions *winmask_options)
Frees SWindowMaskerOptions.
BlastEffectiveLengthsOptions * BlastEffectiveLengthsOptionsFree(BlastEffectiveLengthsOptions *options)
Deallocate memory for BlastEffectiveLengthsOptions*.
SBlastFilterOptions * SBlastFilterOptionsFree(SBlastFilterOptions *filter_options)
Frees SBlastFilterOptions and all subservient structures.
Int2 SWindowMaskerOptionsNew(SWindowMaskerOptions **winmask_options)
Allocates memory for SWindowMaskerOptions, fills in defaults.
Int2 BlastScoringOptionsSetMatrix(BlastScoringOptions *opts, const char *matrix_name)
Resets matrix name option.
static SDustOptions * s_MergeDustOptions(const SDustOptions *opt1, const SDustOptions *opt2)
Merges together two sets of dust options, choosing the most non-default one.
Int2 SBlastFilterOptionsMerge(SBlastFilterOptions **combined, const SBlastFilterOptions *opt1, const SBlastFilterOptions *opt2)
Merges two sets of options together, taking the non-default one as preferred.
BlastExtensionOptions * BlastExtensionOptionsFree(BlastExtensionOptions *options)
Deallocate memory for BlastExtensionOptions.
Int2 BLAST_FillInitialWordOptions(BlastInitialWordOptions *options, EBlastProgramType program, Int4 window_size, double xdrop_ungapped)
Fill non-default values in the BlastInitialWordOptions structure.
Int2 SDustOptionsNew(SDustOptions **dust_options)
Allocates memory for SDustOptions, fills in defaults.
Int2 BlastInitialWordOptionsValidate(EBlastProgramType program_number, const BlastInitialWordOptions *options, Blast_Message **blast_msg)
Validate correctness of the initial word options.
Int2 BLAST_FillExtensionOptions(BlastExtensionOptions *options, EBlastProgramType program, Int4 greedy, double x_dropoff, double x_dropoff_final)
Fill non-default values in the BlastExtensionOptions structure.
Int2 LookupTableOptionsValidate(EBlastProgramType program_number, const LookupTableOptions *options, Blast_Message **blast_msg)
Validate LookupTableOptions.
Int2 BlastHitSavingOptionsValidate(EBlastProgramType program_number, const BlastHitSavingOptions *options, Blast_Message **blast_msg)
Validate BlastHitSavingOptions.
const double kPSSM_NoImpalaScaling
Value used to indicate that no IMPALA-style scaling should be performed when scaling a PSSM.
BlastHSPCullingOptions * BlastHSPCullingOptionsFree(BlastHSPCullingOptions *culling_opts)
Deallocates culling options structure.
Int2 BLAST_FillHitSavingOptions(BlastHitSavingOptions *options, double evalue, Int4 hitlist_size, Boolean is_gapped, Int4 culling_limit, Int4 min_diag_separation)
Allocate memory for BlastHitSavingOptions.
SSegOptions * SSegOptionsFree(SSegOptions *seg_options)
Frees SSegOptions.
Int2 BlastHSPCullingOptionsValidate(const BlastHSPFilteringOptions *opts)
Validate culling options.
Int2 BlastHSPSubjectBestHitOptionsValidate(const BlastHSPFilteringOptions *opts)
Validate subject besthit options.
static Boolean s_DiscWordOptionsValidate(Int4 word_size, Uint1 template_length, Uint1 template_type, Blast_Message **blast_msg)
Validate options for the discontiguous word megablast Word size must be 11 or 12; template length 16,...
BlastHSPFilteringOptions * BlastHSPFilteringOptionsNew()
Allocate and initialize a BlastHSPFilteringOptions structure.
Int2 LookupTableOptionsNew(EBlastProgramType program_number, LookupTableOptions **options)
Allocate memory for lookup table options and fill with default values.
BlastDatabaseOptions * BlastDatabaseOptionsFree(BlastDatabaseOptions *db_options)
Deallocate database options.
const int kDustLevel
Declared in blast_def.h as extern const.
BlastHSPSubjectBestHitOptions * BlastHSPSubjectBestHitOptionsNew(Boolean isProtein)
Allocate a new object for subject besthit options.
const int kDustWindow
Window parameter used by dust.
Int2 BLAST_FillLookupTableOptions(LookupTableOptions *options, EBlastProgramType program_number, Boolean is_megablast, double threshold, Int4 word_size)
Allocate memory for lookup table options and fill with default values.
Int2 SReadQualityOptionsNew(SReadQualityOptions **read_quality_options)
Allocates memory for SReadQualityOptions, fills in defaults.
Int2 BlastHSPFilteringOptions_AddSubjectBestHit(BlastHSPFilteringOptions *filt_opts, BlastHSPSubjectBestHitOptions **subject_besthit)
static SWindowMaskerOptions * s_MergeWindowMaskerOptions(const SWindowMaskerOptions *opt1, const SWindowMaskerOptions *opt2)
Merges together two sets of window masker options, choosing the most non-default one.
BlastHSPBestHitOptions * BlastHSPBestHitOptionsNew(double overhang, double score_edge)
Allocate and initialize a BlastHSPBestHitOptions structure.
Int2 SBlastFilterOptionsNew(SBlastFilterOptions **filter_options, EFilterOptions type)
Allocates memory for SBlastFilterOptions and.
static Int2 s_BlastExtensionScoringOptionsValidate(EBlastProgramType program_number, const BlastExtensionOptions *ext_options, const BlastScoringOptions *score_options, Blast_Message **blast_msg)
Checks that the extension and scoring options are consistent with each other.
Boolean BlastEffectiveLengthsOptions_IsSearchSpaceSet(const BlastEffectiveLengthsOptions *options)
Return true if the search spaces is set for any of the queries in the search.
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
static SSegOptions * s_MergeSegOptions(const SSegOptions *opt1, const SSegOptions *opt2)
Merges together two sets of SEG options, choosing the most non-default one.
Int2 BlastHSPFilteringOptions_AddBestHit(BlastHSPFilteringOptions *filt_opts, BlastHSPBestHitOptions **best_hit, EBlastStage stage)
Add the best hit options.
static SRepeatFilterOptions * s_MergeRepeatOptions(const SRepeatFilterOptions *opt1, const SRepeatFilterOptions *opt2)
Merges together two sets of repeat filter options, choosing the most non-default one.
Int2 BlastInitialWordOptionsNew(EBlastProgramType program, BlastInitialWordOptions **options)
Allocate memory for BlastInitialWordOptions and fill with default values.
BlastScoringOptions * BlastScoringOptionsFree(BlastScoringOptions *options)
Deallocate memory for BlastScoringOptions.
Int2 BlastExtensionOptionsValidate(EBlastProgramType program_number, const BlastExtensionOptions *options, Blast_Message **blast_msg)
Validate contents of BlastExtensionOptions.
BlastHSPBestHitOptions * BlastHSPBestHitOptionsFree(BlastHSPBestHitOptions *opt)
Deallocate a BlastHSPBestHitOptions structure.
LookupTableOptions * LookupTableOptionsFree(LookupTableOptions *options)
Deallocates memory for LookupTableOptions*.
Int2 BlastScoringOptionsDup(BlastScoringOptions **new_opt, const BlastScoringOptions *old_opt)
Produces copy of "old" options, with new memory allocated.
QuerySetUpOptions * BlastQuerySetUpOptionsFree(QuerySetUpOptions *options)
Deallocate memory for QuerySetUpOptions.
Int2 BlastHSPFilteringOptionsValidate(const BlastHSPFilteringOptions *opts)
Validates the BlastHSPFilteringOptions structure.
Int2 BlastExtensionOptionsNew(EBlastProgramType program, BlastExtensionOptions **options, Boolean gapped)
Allocate memory for BlastExtensionOptions and fill with default values.
The structures and functions in blast_options.
#define PSI_INCLUSION_ETHRESH
Defaults for PSI-BLAST and DELTA-BLAST options.
#define BLAST_GAP_X_DROPOFF_NUCL
default dropoff for non-greedy nucleotide gapped extensions
#define BLAST_HITLIST_SIZE
Number of database sequences to save hits for.
#define BLAST_GAP_OPEN_MEGABLAST
default gap open penalty (megablast with greedy gapped alignment)
#define BLAST_UNGAPPED_X_DROPOFF_NUCL
ungapped dropoff score for blastn (and megablast)
#define BLAST_WORD_THRESHOLD_BLASTX
default threshold (blastx)
#define BLAST_UNGAPPED_X_DROPOFF_PROT
default dropoff for ungapped extension; ungapped extensions will stop when the score for the extensio...
#define BLAST_WORDSIZE_MAPPER
default word size for mapping rna-seq to a genome
#define BLAST_GAP_EXTN_MEGABLAST
default gap open penalty (megablast) with greedy gapped alignment)
#define DEFAULT_SUBJECT_BESTHIT_PROT_MAX_RANGE_DIFF
@ eJumperWithTraceback
Jumper extension (mapping)
@ eDynProgScoreOnly
standard affine gapping
@ eGreedyScoreOnly
Greedy extension (megaBlast)
@ eSmithWatermanScoreOnly
Score-only smith-waterman.
#define BLAST_GAP_OPEN_PROT
Protein gap costs are the defaults for the BLOSUM62 scoring matrix.
#define BLAST_WORDSIZE_MEGABLAST
default word size (contiguous megablast; for discontig megablast the word size is explicitly overridd...
#define DEFAULT_SUBJECT_BESTHIT_NUCL_MAX_RANGE_DIFF
#define BLAST_GAP_OPEN_NUCL
default gap open penalty (blastn)
#define BLAST_GAP_TRIGGER_NUCL
default bit score that will trigger a gapped extension for blastn
#define BLAST_GAP_EXTN_PROT
cost to extend a gap.
#define BLAST_GAP_X_DROPOFF_FINAL_NUCL
default dropoff for nucleotide gapped extensions)
#define MAX_DB_WORD_COUNT_MAPPER
Default max frequency for a database word.
#define BLAST_DEFAULT_MATRIX
Default matrix name: BLOSUM62.
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
#define BLAST_SCAN_RANGE_NUCL
default scan range (blastn)
#define BLAST_WINDOW_SIZE_NUCL
default window size (blastn)
#define BLAST_GAP_X_DROPOFF_GREEDY
default dropoff for greedy nucleotide gapped extensions
#define BLAST_WORD_THRESHOLD_TBLASTX
default threshold (tblastx)
#define BLAST_WORD_THRESHOLD_BLASTP
neighboring word score thresholds; a threshold of zero means that only query and subject words that m...
#define BLAST_GAP_EXTN_NUCL
default gap open penalty (blastn)
#define BLAST_GAP_TRIGGER_PROT
default bit score that will trigger gapped extension
#define BLAST_GAP_X_DROPOFF_PROT
default dropoff for preliminary gapped extensions
#define PSI_PSEUDO_COUNT_CONST
Pseudo-count constant for PSI-BLAST.
#define BLAST_REWARD
default nucleotide match score
@ eGreedyTbck
Greedy extension (megaBlast)
@ eDynProgTbck
standard affine gapping
@ eSmithWatermanTbckFull
Smith-waterman to find all alignments.
#define BLAST_PENALTY
default reward and penalty (only applies to blastn/megablast)
#define BLAST_WINDOW_SIZE_PROT
Some default values (used when creating blast options block and for command-line program defaults.
EFilterOptions
Types of filtering options.
@ eDustRepeats
Repeat and dust filtering for nucleotides.
@ eRepeats
Repeat filtering for nucleotides.
@ eDust
low-complexity for nucleotides.
@ eEmpty
no filtering at all.
@ eSeg
low-complexity for proteins.
#define BLAST_GAP_X_DROPOFF_FINAL_PROT
default dropoff for the final gapped extension with traceback
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
#define BLAST_WORD_THRESHOLD_TBLASTN
default neighboring threshold (tblastn/rpstblastn)
#define BLAST_WORDSIZE_PROT
length of word to trigger an extension.
#define BLAST_WORDSIZE_NUCL
default word size (blastn)
#define kDefaultRepeatFilterDb
Default value for repeats database filtering.
@ eNaLookupTable
blastn lookup table
@ eMBLookupTable
megablast lookup table (includes both contiguous and discontiguous megablast)
@ ePhiNaLookupTable
nucleotide lookup table for phi-blast
@ eAaLookupTable
standard protein (blastp) lookup table
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
@ ePhiLookupTable
protein lookup table specialized for phi-blast
@ eRPSLookupTable
RPS lookup table (rpsblast and rpstblastn)
@ eNaHashLookupTable
used for 16-base words
Boolean Blast_ProgramIsMapping(EBlastProgramType p)
Boolean Blast_QueryIsPssm(EBlastProgramType p)
Returns true if the query is PSSM.
Boolean Blast_ProgramIsPhiBlast(EBlastProgramType p)
Returns true if program is PHI-BLAST (i.e.
Boolean Blast_QueryIsTranslated(EBlastProgramType p)
Returns true if the query is translated.
Boolean Blast_ProgramIsNucleotide(EBlastProgramType p)
Boolean Blast_QueryIsPattern(EBlastProgramType p)
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Boolean Blast_SubjectIsTranslated(EBlastProgramType p)
Returns true if the subject is translated.
Definitions and prototypes used by blast_stat.c to calculate BLAST statistics.
char * BLAST_PrintAllowedValues(const char *matrix, Int4 gap_open, Int4 gap_extend)
Prints a messages about the allowed open etc values for the given matrix, BlastKarlinBlkGappedFill sh...
Int2 Blast_KarlinBlkGappedLoadFromTables(Blast_KarlinBlk *kbp, Int4 gap_open, Int4 gap_extend, const char *matrix_name, Boolean standard_only)
Attempts to fill KarlinBlk for given gap opening, extensions etc.
char * BLAST_PrintMatrixMessage(const char *matrix, Boolean standard_only)
Prints a messages about the allowed matrices, BlastKarlinBlkGappedFill should return 1 before this is...
Boolean BLAST_CheckRewardPenaltyScores(Int4 reward, Int4 penalty)
Check the validity of the reward and penalty scores.
Various auxiliary BLAST utility functions.
char * BLAST_StrToUpper(const char *string)
Returns a copy of the input string with all its characters turned to uppercase.
Constants used in compositional score matrix adjustment.
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eNoCompositionBasedStats
Don't use composition based statistics.
uint8_t Uint1
1-byte (8-bit) unsigned integer
int16_t Int2
2-byte (16-bit) signed integer
int32_t Int4
4-byte (32-bit) signed integer
uint32_t Uint4
4-byte (32-bit) unsigned integer
int64_t Int8
8-byte (64-bit) signed integer
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
#define kBestHit_OverhangMax
Maximum value for overhang.
#define kBestHit_OverhangMin
Minimum value for overhang.
#define kBestHit_ScoreEdgeMin
Minimum value for score_edge.
#define kBestHit_ScoreEdgeMax
Maximum value for score_edge.
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
if(yy_accept[yy_current_state])
int strcmp(const char *str1, const char *str2)
#define DIM(A)
dimension of an array.
#define INT4_MAX
largest nubmer represented by signed int
void * BlastMemDup(const void *orig, size_t size)
Copies memory using memcpy and malloc.
Uint1 Boolean
bool replacment for C
#define TRUE
bool replacment for C indicating true.
#define FALSE
bool replacment for C indicating false.
#define INT2_MAX
largest number represented by signed (two byte) short
#define ASSERT
macro for assert.
#define MAX(a, b)
returns larger of a and b.
Options used to create the ReadDBFILE structure Include database name and various information for res...
Int4 genetic_code
Genetic code to use for translation, tblast[nx] only.
Options for setting up effective lengths and search spaces.
Int8 * searchsp_eff
Search space to be used for statistical calculations (one such per query context)
Int8 db_length
Database length to be used for statistical calculations.
Int4 dbseq_num
Number of database sequences to be used for statistical calculations.
Int4 num_searchspaces
Number of elements in searchsp_eff, this must be equal to the number of contexts in the search.
Options used for gapped extension These include: a.
EBlastTbackExt eTbackExt
type of traceback extension.
EBlastPrelimGapExt ePrelimGapExt
type of preliminary gapped extension (normally) for calculating score.
double gap_x_dropoff_final
X-dropoff value for the final gapped extension (in bits)
double gap_x_dropoff
X-dropoff value for gapped extension (in bits)
Int4 compositionBasedStats
mode of compositional adjustment to use; if zero then compositional adjustment is not used
Options for the Best Hit HSP collection algorithm.
Options for the HSP culling algorithm.
int max_hits
Maximum number of hits per area of query.
Structure containing the HSP filtering/writing options.
EBlastStage culling_stage
BlastHSPBestHitOptions * best_hit
Best Hit algorithm.
BlastHSPSubjectBestHitOptions * subject_besthit_opts
Subject Culling.
EBlastStage best_hit_stage
BlastHSPCullingOptions * culling_opts
culling algorithm
unsigned int max_range_diff
Options used when evaluating and saving hits These include: a.
Int4 culling_limit
If the query range of an HSP is contained in at least this many higher-scoring HSPs,...
Int4 longest_intron
The longest distance between HSPs allowed for combining via sum statistics with uneven gaps.
double expect_value
The expect value cut-off threshold for an HSP, or a combined hit if sum statistics is used.
Int4 cutoff_score
The (raw) score cut-off threshold.
Int4 hitlist_size
Maximal number of database sequences to return results for.
Int4 min_diag_separation
How many diagonals separate a hit from a substantial alignment before it's not blocked out.
Int4 max_edit_distance
Maximum number of mismatches and gaps.
BlastHSPFilteringOptions * hsp_filt_opt
Contains options to configure the HSP filtering/writering structures If not set, the default HSP filt...
Options needed for initial word finding and processing.
double x_dropoff
X-dropoff value (in bits) for the ungapped extension.
Int4 window_size
Maximal allowed distance between 2 hits in case 2 hits are required to trigger the extension.
Int4 scan_range
Maximal number of gaps allowed between 2 hits.
Scoring options block Used to produce the BlastScoreBlk structure This structure may be needed for lo...
Int2 penalty
Penalty for a mismatch.
EBlastProgramType program_number
indicates blastn, blastp, etc.
Int4 gap_open
Extra penalty for starting a gap.
Int4 gap_extend
Penalty for each gap residue.
Int2 reward
Reward for a match.
Boolean gapped_calculation
gap-free search if FALSE
char * matrix_path
Directory path to where matrices are stored.
char * matrix
Name of the matrix containing all scores: needed for finding neighboring words.
Boolean is_ooframe
Should out-of-frame gapping be used in a translated search?
Structure to hold the a message from the core of the BLAST engine.
Options needed to construct a lookup table Also needed: query sequence and query length.
Int4 word_size
Determines the size of the lookup table.
char * phi_pattern
PHI-BLAST pattern.
Uint1 max_db_word_count
words with larger frequency in the database will be masked in the lookup table, if the db_filter opto...
Boolean db_filter
scan the database and include only words that appear in the database between 1 and 9 times (currently...
double threshold
Score threshold for putting words in a lookup table (fractional values are allowed,...
Int4 mb_template_type
Type of a discontiguous word template.
ELookupTableType lut_type
What kind of lookup table to construct?
Int4 mb_template_length
Length of the discontiguous words.
Options used in protein BLAST only (PSI, PHI, RPS and translated BLAST) Some of these possibly should...
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
double impala_scaling_factor
Scaling factor as used in IMPALA to do the matrix rescaling.
double inclusion_ethresh
Minimum evalue for inclusion in PSSM calculation.
Boolean ignore_unaligned_positions
This turns off a validation for the multiple sequence alignment in the PSSM engine for unaligned posi...
Int4 pseudo_count
Pseudocount constant.
Boolean use_best_alignment
If set to TRUE, use the best alignment when multiple HSPs are found in a query-subject alignment (i....
Options required for setting up the query sequence.
Uint1 strand_option
In blastn: which strand to search: 1 = forward; 2 = reverse; 3 = both.
char * filter_string
DEPRECATED, filtering options above.
SBlastFilterOptions * filtering_options
structured options for all filtering offered from algo/blast/core for BLAST.
SRepeatFilterOptions * repeatFilterOptions
for organism specific repeat filtering.
SSegOptions * segOptions
low-complexity filtering for proteins sequences (includes translated nucleotides).
SReadQualityOptions * readQualityOptions
quality filtering for mapping next-generation sequences
Boolean mask_at_hash
mask query only for lookup table creation
SWindowMaskerOptions * windowMaskerOptions
organism specific filtering with window masker.
SDustOptions * dustOptions
low-complexity filtering for nucleotides.
Options for dust algorithm, applies only to nucl.
int linker
min distance to link segments.
Filtering options for mapping next-generation sequences.
Filtering options for organsim specific repeats filtering.
char * database
Nucleotide database for mini BLAST search.
Options for SEG algorithm, applies only to protein-protein comparisons.
int window
initial window to trigger further work.
Filtering options for organism-specific filtering with Window Masker.
const char * database
Use winmasker database at this location.
int taxid
Select masking database for this TaxID.
voidp calloc(uInt items, uInt size)