72 const string& program_desc)
73 : m_ProgName(program_name), m_ProgDesc(program_desc)
85 const string& default_task)
86 : m_SupportedTasks(supported_tasks), m_DefaultTask(default_task)
102 arg_desc.
AddKey(
kTask,
"task_name",
"Task to execute",
142 string des =
"Expectation value (E) threshold for saving hits. Default = 10";
144 des +=
" (1000 for blastn-short)";
149 "Expectation value (E) threshold for saving hits ",
155 "Expectation value (E) threshold for saving hits ",
164 ?
"Word size for wordfinder algorithm"
165 :
"Word size for wordfinder algorithm (length of best perfect match)";
176 "Cost to open a gap",
181 "Cost to extend a gap",
198 "Percent query coverage per hsp",
204 "Set maximum number of HSPs per subject sequence to save for each query",
213 "X-dropoff value (in bits) for ungapped extensions",
221 "X-dropoff value (in bits) for preliminary gapped extensions",
227 "X-dropoff value (in bits) for final gapped alignment",
235 "Effective length of the search space",
242 "Use sum statistics",
257 int gap_open=0, gap_extend=0;
260 (args[
kArgMatrixName].AsString().c_str(), &gap_open, &gap_extend);
305 env.Set(
"OLD_FSC",
"true");
333 "Filter query sequence with SEG "
340 "Apply filtering locations as soft masks",
345 "Filter query sequence with DUST "
351 "BLAST database containing filtering elements (i.e.: repeats)",
355 "Enable WindowMasker filtering using a Taxonomic ID",
359 "Enable WindowMasker filtering using this repeats database.",
363 "Apply filtering locations as soft masks",
373 vector<string>&
output)
const
379 "Invalid number of arguments to filtering option");
390 vector<string> tokens;
423 "Invalid input for filtering parameters");
450 string msg(
"Only optimized binary windowmasker stat files are supported");
458 if (filter_dbs > 1) {
473 "Multiple hits window size, use 0 to specify "
503 "Number of off-diagonals to search for the 2nd hit, "
529 "Scoring matrix name",
534 "Use complexity adjusted scoring",
539 "Masklevel - percentage overlap allowed per "
540 "query domain [0-101]",
576 "Minimum word score such that the word is added to the "
577 "BLAST lookup table",
587 int word_threshold =
static_cast<int>(threshold);
592 }
else if (program ==
eBlastx &&
610 double threshold = -1;
614 if (threshold != -1) {
625 "Scoring matrix name (normally BLOSUM62)",
646 "Penalty for a nucleotide mismatch",
653 "Reward for a nucleotide match",
661 "Use non-greedy dynamic programming extension",
699 "Minimum raw gapped score to keep an alignment "
700 "in the preliminary gapped and traceback stages",
706 "Discontiguous MegaBLAST template type",
717 "Discontiguous MegaBLAST template length",
720 allowed_values.
insert(16);
721 allowed_values.
insert(18);
722 allowed_values.
insert(21);
774 " 0 or F or f: No composition-based statistics\n";
779 " 2 or T or t : Composition-based score adjustment as in "
780 "Bioinformatics 21:902-911,\n"
781 " 2005, conditioned on sequence properties\n"
782 " 3: Composition-based score adjustment as in "
783 "Bioinformatics 21:902-911,\n"
784 " 2005, unconditionally\n" :
"";
786 string legend = (
string)
"Use composition-based statistics:\n"
787 " D or d: default (equivalent to " +
m_DefaultOpt +
" )\n"
789 +
" 1" + one_opt_insrt +
": Composition-based statistics "
790 "as in NAR 29:2994-3005, 2001\n"
802 "Compute locally optimal Smith-Waterman alignments?",
823 const string& comp_stat_string,
824 bool smith_waterman_value,
835 switch (comp_stat_string[0]) {
836 case '0':
case 'F':
case 'f':
871 "Composition-adjusted searched are not supported with "
872 "an ungapped search, please add -comp_based_stats F or "
873 "do a gapped search");
879 tolower(comp_stat_string[1]) ==
'u') {
907 "Perform gapped alignment (default T, but "
908 "not available for tblastx)",
911 arg_desc.
AddAlias(
"-gapped", ARG_GAPPED);
935 "Length of the largest intron allowed in a translated "
936 "nucleotide sequence when linking multiple distinct "
964 "Frame shift penalty (for use with out-of-frame "
965 "gapped alignment in blastx or tblastn, default "
982 if ((cbs[0] !=
'0' )&& (cbs[0] !=
'F') && (cbs[0] !=
'f')) {
984 "Composition-adjusted searches are not supported with "
985 "Out-Of-Frame option, please add -comp_based_stats F ");
1000 static int gcs[] = {1,2,3,4,5,6,9,10,11,12,13,14,15,16,21,22,23,24,25,26,27,28,29,30,31,33};
1001 static const set<int> genetic_codes(gcs, gcs+
sizeof(gcs)/
sizeof(*gcs));
1003 return (genetic_codes.
find(
val) != genetic_codes.
end());
1008 return "values between: 1-6, 9-16, 21-31, 33";
1019 "Genetic code to use to translate query (see https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=cgencodes for details)\n",
1028 "Genetic code to use to translate "
1029 "database/subjects (see user manual for details)\n",
1063 "Number of bits to trigger gapping",
1085 "Pseudo-count value used when constructing PSSM",
1091 "E-value inclusion threshold for alignments "
1092 "with conserved domains",
1099 "E-value inclusion threshold for pairwise alignments",
1135 "PSI-TBLASTN checkpoint file",
1145 "Number of iterations to perform (0 means run "
1156 "File name to store checkpoint file",
1160 "File name to store ASCII version of PSSM",
1166 "(file name is given in -save_pssm or "
1167 "-save_ascii_pssm options)");
1170 vector<string> msa_exclusions;
1182 "File name of multiple sequence alignment to "
1183 "restart PSI-BLAST",
1185 ITERATE(vector<string>, exclusion, msa_exclusions) {
1192 "Ordinal number (1-based index) of the sequence"
1193 " to use as a master in the multiple sequence "
1194 "alignment. If not provided, the first sequence"
1195 " in the multiple sequence alignment will be "
1199 ITERATE(vector<string>, exclusion, msa_exclusions) {
1212 "Ignore the master sequence when creating PSSM",
true);
1214 vector<string> ignore_pssm_master_exclusions;
1217 ignore_pssm_master_exclusions.push_back(
kArgQuery);
1219 ITERATE(vector<string>, exclusion, msa_exclusions) {
1230 "PSI-BLAST checkpoint file",
1252 unsigned int msa_master_idx,
1253 bool ignore_pssm_tmplt_seq)
1267 return pssm_engine.
Run();
1302 const bool kSaveAllPssms
1311 if (kSaveAsciiPssm) {
1318 unsigned int msa_master_idx = 0;
1352 "Unsupported format for PSSM");
1355 string msg(
"Unrecognized format for PSSM in ");
1357 msg +=
"PssmWithParameters)";
1370 "File name containing pattern to search",
1393 string ltype = line.substr(0, 2);
1395 name = line.substr(4);
1396 else if (ltype ==
"PA")
1397 pattern = line.substr(4);
1399 if (!pattern.empty())
1405 "PHI pattern not read");
1468 "alignments. Can be expressed as a number or a "
1469 "function of read length: "
1470 "L,b,a for a * length + b.\n"
1471 "Zero means that the cutoff score will be equal to:\n"
1472 "read length, if read length <= 20,\n"
1473 "20, if read length <= 30,\n"
1474 "read length - 10, if read length <= 50,\n"
1478 "accepting an alignment\nDefault = unlimited",
1483 "genome or transcriptome",
1490 "high frequency in the searched database",
1493 "than this number of times in the database will be"
1494 " masked in the lookup table",
1500 "after collecting one while creating a lookup table",
1517 list<string> tokens;
1519 vector<double> coeffs;
1520 if (tokens.size() < 3) {
1522 (
string)
"Incorrectly formatted score function: " +
1523 s +
". It should be of the form 'L,b,a' for ax + b,"
1524 "a, b must be numbers");
1526 auto it = tokens.begin();
1529 for (; it != tokens.end(); ++it) {
1535 (
string)
"Incorrectly formatted score function: " +
1536 s +
". It should be of the form 'L,b,a' for ax + b,"
1537 " a, b must be real numbers");
1548 (
string)
"Incorrectly formatted score threshold: " +
1549 s +
". It must be either an integer or a linear "
1550 "function in the form: L,b,a for ax + b, a and b "
1551 "must be real numbers");
1564 string ref_type =
"genome";
1590 const static char suffix[] =
"VDJ";
1591 const static int df_num_align[3] = {3,3,3};
1595 for (
int gene=0; gene<num_genes; ++gene) {
1606 arg_db.push_back(suffix[gene]);
1608 "Germline database name",
1613 arg_na.push_back(suffix[gene]);
1615 "Number of Germline sequences to show alignments for",
1622 "Restrict search of germline database to list of SeqIds's",
1628 "Number of Germline sequences to show alignments for",
1632 "C region database name",
1636 "custom internal data file for V region annotation",
1640 "D gene frame definition file",
1644 "File containing the coding frame start positions for sequences in germline J database",
1648 "Required minimal consecutive nucleotide base matches for D genes ",
1654 "Penalty for a nucleotide mismatch in V gene",
1661 "Penalty for a nucleotide mismatch in D gene",
1668 "Penalty for a nucleotide mismatch in J gene",
1675 "Number of top clonotypes to show ",
1681 "Output file name for clonotype info",
1684 arg_desc.
AddFlag(
kArgDetectOverlap,
"Allow V(D)J genes to overlap. This option is active only when D_penalty and J_penalty are set to -4 and -3, respectively",
true);
1690 "The organism for your query sequence. Supported organisms include human, mouse, rat, rabbit and rhesus_monkey for Ig and human and mouse for TCR. Custom organism is also supported but you need to supply your own germline annotations (see IgBLAST web site for details)",
1694 "Domain system to be used for segment annotation",
1699 "Specify Ig or T cell receptor sequence",
1704 arg_desc.
AddFlag(
kArgGLFocusV,
"Should the search only be for V segment (effective only for non-germline database search using -db option)?",
true);
1711 "Minimal required V gene length",
1719 "Minimal required J gene length",
1742 _TRACE(
"Registering " << retval <<
" at priority " <<
1760 #if defined(NCBI_OS_DARWIN)
1761 paths[2] =
"/usr/local/ncbi/igblast/data";
1763 paths[2] = paths[0];
1795 for (
int i=0;
i<3;
i++) {
1818 for (
int i=0;
i<3;
i++) {
1842 "Germline annotation database " + df_db_name +
" could not be found in [internal_data] directory");
1870 const static char suffix[] =
"VDJ";
1872 for (
int gene=0; gene< num_genes; ++gene) {
1877 arg_sub.push_back(suffix[gene]);
1878 arg_db.push_back(suffix[gene]);
1879 arg_na.push_back(suffix[gene]);
1883 if (args.
Exist(arg_sub) && args[arg_sub]) {
1884 CNcbiIstream& subj_input_stream = args[arg_sub].AsInputFile();
1896 subj_range, parse_deflines,
1897 use_lcase_masks, subjects);
1901 new blast::CObjMgr_QueryFactory(*subjects));
1903 sub_seqs, opts_hndl));
1906 gl_db_name.push_back(suffix[gene]);
1907 string db_name = (args.
Exist(arg_db) && args[arg_db])
1908 ? args[arg_db].AsString() : gl_db_name;
1911 if (args.
Exist(arg_db +
"_seqidlist") && args[arg_db +
"_seqidlist"]) {
1939 "Use lower case filtering in query and subject sequence(s)?",
true);
1944 "Location on the query sequence in 1-based offsets "
1945 "(Format: start-stop)",
1951 "Query strand(s) to search against database/subject",
1959 "Should the query and subject defline(s) be parsed?",
true);
1978 }
else if (
kStrand ==
"plus") {
1980 }
else if (
kStrand ==
"minus") {
1995 "Invalid specification of query location");
2011 "Use lower case filtering in subject sequence(s)?",
true);
2019 "fasta",
"fastc",
"fastq",
2023 "mates for query sequences (if given in "
2029 "Comma-separated SRA accessions",
2037 "File with a list of SRA accessions, one per line",
2048 "subject defline(s) be parsed?",
2088 "Unexpected input format: " +
2135 if (!line.empty()) {
2143 "No SRA accessions provided");
2165 : m_RequestMoleculeType(request_mol_type),
2166 m_IsRpsBlast(is_rpsblast),
2167 m_IsIgBlast(is_igblast),
2169 m_IsMapper(is_mapper),
2170 m_IsKBlast(is_kblast),
2171 m_SupportsDatabaseMasking(
false),
2172 m_SupportIPGFiltering(
false)
2202 "BLAST database molecule type",
2208 vector<string> database_args;
2209 database_args.push_back(
kArgDb);
2232 "Effective length of the database ",
2240 "Restrict search of database to list of GIs",
2244 "Restrict search of database to list of SeqIDs",
2248 "Restrict search of database to everything"
2249 " except the specified GIs",
2254 "Restrict search of database to everything"
2255 " except the specified SeqIDs",
2260 "Restrict search of database to include only "
2261 "the specified taxonomy IDs and their descendants "
2262 "(multiple IDs delimited by ',')",
2265 "Restrict search of database to everything "
2266 "except the specified taxonomy IDs and their descendants "
2267 "(multiple IDs delimited by ',')",
2271 "Restrict search of database to include only "
2272 "the specified taxonomy IDs and their descendants ",
2275 "Restrict search of database to everything "
2276 "except the specified taxonomy IDs and their descendants ",
2290 "Restrict search of database to list of IPGs",
2295 "Restrict search of database to everything"
2296 " except the specified IPGs",
2300 const vector<string> kBlastDBFilteringOptions = {
2311 for (
size_t i = 0;
i < kBlastDBFilteringOptions.size();
i++) {
2312 for (
size_t j =
i+1; j < kBlastDBFilteringOptions.size(); j++) {
2314 kBlastDBFilteringOptions[j]);
2320 for (
const string& s: kBlastDBFilteringOptions) {
2328 "Restrict search with the given Entrez query",
2337 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
2338 (!defined(NCBI_COMPILER_MIPSPRO)) )
2342 "filtering_algorithm",
2343 "Filtering algorithm ID to apply to the BLAST database as soft "
2350 "filtering_algorithm",
2351 "Filtering algorithm ID to apply to the BLAST database as hard "
2362 "Subject sequence(s) to search",
2364 ITERATE(vector<string>, dbarg, database_args) {
2371 "Location on the subject sequence in 1-based offsets "
2372 "(Format: start-stop)",
2374 ITERATE(vector<string>, dbarg, database_args) {
2411 while (!reader.
AtEOF()) {
2418 unique_ptr<ITaxonomy4Blast> tb;
2419 if( !isTargetOnly ) {
2424 LOG_POST(
Warning <<
"The -taxids command line option requires additional data files. Please see the section 'Taxonomic filtering for BLAST databases' in https://www.ncbi.nlm.nih.gov/books/NBK569839/ for details.");
2428 for (
auto id : ids) {
2434 if( isTargetOnly ) {
2439 tb->GetLeafNodeTaxids(taxid, desc);
2450 if(isNegativeList) {
2514 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
2515 (!defined(NCBI_COMPILER_MIPSPRO)) )
2525 unique_ptr<CDecompressIStream> decompress_stream;
2528 decompress_stream.reset(
2531 subj_input_stream = decompress_stream.get();
2534 subj_input_stream = &args[
kArgSubject].AsInputFile();
2541 "Invalid specification of subject location");
2552 subj_range, parse_deflines,
2559 "Either a BLAST database or subject sequence(s) must be specified");
2578 string kOutputFormatDescription =
string(
2579 "alignment view options:\n"
2581 " 1 = Query-anchored showing identities,\n"
2582 " 2 = Query-anchored no identities,\n"
2583 " 3 = Flat query-anchored showing identities,\n"
2584 " 4 = Flat query-anchored no identities,\n"
2587 " 7 = Tabular with comment lines,\n"
2588 " 8 = Seqalign (Text ASN.1),\n"
2589 " 9 = Seqalign (Binary ASN.1),\n"
2590 " 10 = Comma-separated values,\n"
2591 " 11 = BLAST archive (ASN.1),\n"
2592 " 12 = Seqalign (JSON),\n"
2593 " 13 = Multiple-file BLAST JSON,\n"
2594 " 14 = Multiple-file BLAST XML2,\n"
2595 " 15 = Single-file BLAST JSON,\n"
2596 " 16 = Single-file BLAST XML2");
2599 kOutputFormatDescription +=
",\n 17 = Sequence Alignment/Map (SAM)";
2601 kOutputFormatDescription +=
",\n 18 = Organism Report\n\n";
2603 kOutputFormatDescription +=
2604 "Options 6, 7, 10 and 17 "
2605 "can be additionally configured to produce\n"
2606 "a custom format specified by space delimited format specifiers,\n"
2607 "or in the case of options 6, 7, and 10, by a token specified\n"
2608 "by the delim keyword. E.g.: \"17 delim=@ qacc sacc score\".\n"
2609 "The delim keyword must appear after the numeric output format\n"
2611 "The supported format specifiers for options 6, 7 and 10 are:\n";
2614 kOutputFormatDescription +=
2615 "Options 6, 7 and 10 "
2616 "can be additionally configured to produce\n"
2617 "a custom format specified by space delimited format specifiers,\n"
2618 "or by a token specified by the delim keyword.\n"
2619 " E.g.: \"10 delim=@ qacc sacc score\".\n"
2620 "The delim keyword must appear after the numeric output format\n"
2622 "The supported format specifiers are:\n";
2628 kOutputFormatDescription +=
2629 "The supported format specifier for option 17 is:\n" +
2638 kOutputFormatDescription =
string(
2639 "alignment view options:\n"
2640 " 3 = Flat query-anchored, show identities,\n"
2641 " 4 = Flat query-anchored, no identities,\n"
2642 " 7 = Tabular with comment lines\n"
2643 " 19 = Rearrangement summary report (AIRR format)\n\n"
2644 "Options 7 can be additionally configured to produce\n"
2645 "a custom format specified by space delimited format specifiers.\n"
2646 "The supported format specifiers are:\n") +
2654 kOutputFormatDescription,
2663 "Number of database sequences to show one-line "
2664 "descriptions for\n"
2665 "Not applicable for outfmt > 4\n"
2673 "Number of database sequences to show alignments for\n"
2680 "Line length for formatting alignments\n"
2681 "Not applicable for outfmt > 4\n"
2693 "Sorting option for hits:\n"
2694 "alignment view options:\n"
2695 " 0 = Sort by evalue,\n"
2696 " 1 = Sort by bit score,\n"
2697 " 2 = Sort by total score,\n"
2698 " 3 = Sort by percent identity,\n"
2699 " 4 = Sort by query coverage\n"
2700 "Not applicable for outfmt > 4\n",
2708 "Sorting option for hps:\n"
2709 " 0 = Sort by hsp evalue,\n"
2710 " 1 = Sort by hsp score,\n"
2711 " 2 = Sort by hsp query start,\n"
2712 " 3 = Sort by hsp percent identity,\n"
2713 " 4 = Sort by hsp subject start\n"
2714 "Not applicable for outfmt != 0\n",
2723 "Maximum number of aligned sequences to keep \n"
2724 "(value of 5 or more is recommended)\n"
2743 string ignore1, ignore2;
2754 if(custom_fmt_spec.empty())
return;
2757 const string kFieldsWithSemicolSeparator =
"sallseqid staxids sscinames scomnames sblastnames sskingdoms";
2758 const string kFramesField =
"frames";
2759 const string kAllTitlesField =
"salltitles";
2761 if(customDelim ==
";") {
2762 vector <string> tokens;
2763 NStr::Split(kFieldsWithSemicolSeparator,
" ", tokens);
2764 for(
size_t i = 0;
i < tokens.size();
i++) {
2766 checkfield = tokens[
i];
2773 if(customDelim ==
"/") {
2774 checkfield = kFramesField;
2776 else if(customDelim ==
"<>") {
2777 checkfield = kAllTitlesField;
2779 if(!checkfield.empty() &&
NStr::Find(custom_fmt_spec,checkfield) !=
NPOS) {
2785 string msg(
"Your custom record separator (" + customDelim +
") is also used by the format specifier (" + checkfield +
2786 ") to separate multiple entries. Please use a different record separator (delim keyword).");
2794 string& custom_fmt_spec,
2795 string& custom_delim)
const
2797 custom_fmt_spec.clear();
2801 string::size_type pos;
2802 if ( (pos = fmt_choice.find_first_of(
' ')) != string::npos) {
2803 custom_fmt_spec.assign(fmt_choice, pos+1,
2804 fmt_choice.size()-(pos+1));
2805 fmt_choice.erase(pos);
2807 if(!custom_fmt_spec.empty()) {
2809 vector <string> tokens;
2811 if(tokens.size() > 0) {
2815 string msg(
"Delimiter format is invalid. Valid format is delim=<delimiter value>");
2819 custom_fmt_spec =
NStr::Replace(custom_fmt_spec,tokens[0],
"");
2828 os <<
"'" << fmt_choice <<
"' is not a valid output format";
2832 if (val < 0 || val >=
static_cast<int>(
eEndValue)) {
2833 string msg(
"Formatting choice is out of range");
2834 throw std::out_of_range(
msg);
2837 string msg(
"Formatting choice is not valid");
2838 throw std::out_of_range(
msg);
2844 fmt_type ==
eSAM) ) {
2845 custom_fmt_spec.clear();
2858 "SAM format is only applicable to blastn" );
2862 "AIRR rearrangement format is only applicable to igblastn" );
2866 "FASTA output format is only applicable to magicblast");
2932 "output formats > 4 . Use -max_target_seqs "
2933 "to control output");
2937 ERR_POST(
Warning <<
"The parameter -line_length is not applicable for "
2938 "output formats > 4 .");
2952 ERR_POST(
Warning <<
"The parameter -sorthits is ignored for output formats > 4.");
2956 if(hitlist_size < 5){
2968 ERR_POST(
Warning <<
"The parameter -sorthsps is ignored for output formats != 0.");
2979 string kOutputFormatDescription =
string(
2980 "alignment view options:\n"
2981 "sam = SAM format,\n"
2982 "tabular = Tabular format,\n"
2983 "asn = text ASN.1\n");
2985 string kUnalignedOutputFormatDescription =
string(
2986 "format for reporting unaligned reads:\n"
2987 "sam = SAM format,\n"
2988 "tabular = Tabular format,\n"
2989 "fasta = sequences in FASTA format\n"
2990 "Default = same as ") +
2994 kOutputFormatDescription,
2998 set<string> allowed_formats = {
"sam",
"tabular",
"asn"};
3003 kUnalignedOutputFormatDescription,
3006 set<string> allowed_unaligned_formats = {
"sam",
"tabular",
"fasta"};
3016 "or '/2' at the end of read ids for SAM format and" \
3022 "Suppress discordant alignments for paired reads");
3025 "A user tag to add to each alignment",
3036 if (fmt_choice ==
"sam") {
3039 else if (fmt_choice ==
"tabular") {
3042 else if (fmt_choice ==
"asn") {
3047 os <<
"'" << fmt_choice <<
"' is not a valid output format";
3057 if (fmt_choice ==
"sam") {
3060 else if (fmt_choice ==
"tabular") {
3063 else if (fmt_choice ==
"fasta") {
3068 os <<
"'" << fmt_choice
3069 <<
"' is not a valid output format for unaligned reads";
3137 ? std::min<int>(
static_cast<int>(
m_NumThreads), kMaxValue) : kMinValue;
3140 "Number of threads (CPUs) to use in the BLAST search",
3151 "Multi-thread mode to use in BLAST search:\n "
3152 "0 auto split by database or queries \n "
3153 "1 split by queries\n "
3154 "2 split by database",
3194 if (num_threads > kMaxValue) {
3199 " to match the number of available CPUs");
3216 <<
"ignored when '" <<
kArgSubject <<
"' is specified.");
3248 arg_desc.
AddFlag(
"verbose",
"Produce verbose output (show BLAST options)",
3250 arg_desc.
AddFlag(
"remote_verbose",
3251 "Produce verbose output for remote searches",
true);
3252 arg_desc.
AddFlag(
"use_test_remote_service",
3253 "Send remote requests to test servers",
true);
3264 if (args[
"use_test_remote_service"]) {
3278 "If the query range of a hit is enveloped by that of at "
3279 "least this many higher-scoring hits, delete the hit",
3286 "Best Hit algorithm overhang value "
3287 "(recommended value: " +
3299 "Best Hit algorithm score edge value "
3300 "(recommended value: " +
3339 "Use MegaBLAST database index",
3343 "MegaBLAST database index name (deprecated; use only for old style indices)",
3366 bool use_index =
true;
3367 bool force_index =
false;
3368 bool old_style_index =
false;
3371 if( args[
kArgUseIndex].AsBoolean() ) force_index =
true;
3372 else use_index =
false;
3376 args[
kTask].AsString() !=
"megablast" ) {
3385 old_style_index =
true;
3388 index_name = args[
kArgDb].AsString();
3392 "Can not deduce database index name" );
3395 opts.
SetUseIndex(
true, index_name, force_index, old_style_index );
3412 "Comma-separated SRA accessions",
3511 "Search strategy to use",
3515 "File name to record the search strategy used",
3581 (*arg)->ExtractAlgorithmOptions(args, opts);
3586 (*arg)->ExtractAlgorithmOptions(args, opts);
3613 (*arg)->ExtractAlgorithmOptions(args, opts);
3647 retval->SetCurrentGroup(
"Input query options");
3648 retval->SetCurrentGroup(
"General search options");
3649 retval->SetCurrentGroup(
"BLAST database options");
3650 retval->SetCurrentGroup(
"BLAST-2-Sequences options");
3651 retval->SetCurrentGroup(
"Formatting options");
3652 retval->SetCurrentGroup(
"Query filtering options");
3653 retval->SetCurrentGroup(
"Restrict search or results");
3654 retval->SetCurrentGroup(
"Discontiguous MegaBLAST options");
3655 retval->SetCurrentGroup(
"Statistical options");
3656 retval->SetCurrentGroup(
"Search strategy options");
3657 retval->SetCurrentGroup(
"Extension options");
3658 retval->SetCurrentGroup(
"");
3662 (*arg)->SetArgumentDescriptions(*retval);
3664 return retval.release();
3706 can_override.
insert(
"remote_verbose");
3707 can_override.
insert(
"verbose");
3754 typedef vector< CRef<CArgValue> > TArgs;
3755 TArgs arguments = args.
GetAll();
3757 const string& arg_name = (*a)->GetName();
3758 const string& arg_value = (*a)->AsString();
3761 if (has_defaults.
find(arg_name) != has_defaults.
end()) {
3762 if (has_defaults[arg_name] == arg_value) {
3765 if (arg_name ==
kTask && arg_value ==
"megablast") {
3770 "using a search strategy");
3774 if (can_override.
find(arg_name) == can_override.
end()) {
3776 "using a search strategy");
3807 (*arg)->ExtractAlgorithmOptions(args, opts);
User-defined methods of the data storage class.
Declares singleton objects to store the version and reference for the BLAST engine.
static void s_GetTaxIDList(const string &in, bool isFile, bool isNegativeList, CRef< CSearchDatabase > &sdb, bool isTargetOnly)
static bool s_IsDefaultWordThreshold(EProgram program, double threshold)
static void s_ValidateCustomDelim(string custom_fmt_spec, string customDelim)
static void s_SetCompositionBasedStats(CBlastOptions &opt, const string &comp_stat_string, bool smith_waterman_value, bool *ungapped)
Auxiliary function to set the composition based statistics and smith waterman options.
const char * kTemplType_Coding
Value to specify coding template type.
const char * kTemplType_Optimal
Value to specify optimal template type.
const char * kTemplType_CodingAndOptimal
Value to specify coding+optimal template type.
CArgDescriptions * SetUpCommandLineArguments(TBlastCmdLineArgs &args)
Create a CArgDescriptions object and invoke SetArgumentDescriptions for each of the TBlastCmdLineArgs...
static string s_RegisterOMDataLoader(CRef< CSeqDB > db_handle)
Interface for converting blast-related command line arguments into blast options.
vector< CRef< IBlastCmdLineArgs > > TBlastCmdLineArgs
Type definition of a container of IBlastCmdLineArgs.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
Declares the BLAST exception class.
Routines for creating nucleotide BLAST lookup tables.
EDiscWordType
General types of discontiguous word templates.
#define PSI_INCLUSION_ETHRESH
Defaults for PSI-BLAST and DELTA-BLAST options.
#define BLAST_HITLIST_SIZE
Number of database sequences to save hits for.
#define BLAST_WORD_THRESHOLD_BLASTX
default threshold (blastx)
Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char *matrixName, double *threshold)
Get thresholds for word-finding suggested by Stephen Altschul.
@ eDynProgScoreOnly
standard affine gapping
Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char *matrixName, Int4 *window_size)
Get window sizes for two hit algorithm suggested by Stephen Altschul.
#define BLAST_GAP_TRIGGER_NUCL
default bit score that will trigger a gapped extension for blastn
#define MAX_DB_WORD_COUNT_MAPPER
Default max frequency for a database word.
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
#define DELTA_INCLUSION_ETHRESH
Inclusion threshold for DELTA-BLAST.
#define BLAST_WORD_THRESHOLD_BLASTP
neighboring word score thresholds; a threshold of zero means that only query and subject words that m...
#define BLAST_GAP_TRIGGER_PROT
default bit score that will trigger gapped extension
#define PSI_PSEUDO_COUNT_CONST
Pseudo-count constant for PSI-BLAST.
@ eDynProgTbck
standard affine gapping
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
#define BLAST_WORD_THRESHOLD_TBLASTN
default neighboring threshold (tblastn/rpstblastn)
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
PSIDiagnosticsRequest * PSIDiagnosticsRequestNewEx(Boolean save_ascii_pssm)
Allocates a PSIDiagnosticsRequest structure, setting fields to their default values for their use in ...
Int2 BLAST_GetProteinGapExistenceExtendParams(const char *matrixName, Int4 *gap_existence, Int4 *gap_extension)
Extract the recommended gap existence and extension values.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eTblastx
Translated nucl-Translated nucl.
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
@ eBlastp
Protein-Protein.
@ eTblastn
Protein-Translated nucl.
@ eDeltaBlast
Delta Blast.
@ ePSITblastn
PSI Tblastn.
@ eRPSTblastn
nucleotide-pssm (RPS blast with translated query)
@ eBlastx
Translated nucl-Protein.
Auxiliary class to validate the genetic code input.
virtual string GetUsage(void) const
Overloaded method from CArgAllow.
virtual bool Verify(const string &value) const
Overloaded method from CArgAllow.
Class to constrain the length of the file name passed to a given CArgDescriptions key.
Class to constrain the values of an argument to those in between the values specified in the construc...
Class to constrain the values of an argument to those greater than or equal to the value specified in...
Class to constrain the values of an argument to those less than or equal to the value specified in th...
Auxiliary class to store the name of an output file, which is reset every time its GetStream method i...
CRef< CRemoteArgs > m_RemoteArgs
remote vs. local execution options
CRef< CBlastOptionsHandle > SetOptionsForSavedStrategy(const CArgs &args)
Combine the command line arguments into a CBlastOptions object recovered from saved search strategy.
string GetTask() const
Get the task for this object.
virtual CNcbiIstream & GetInputStream()
Get the input stream.
CRef< CBlastOptionsHandle > m_OptsHandle
The BLAST options handle, only non-NULL if assigned via SetOptionsHandle, i.e.
CRef< CQueryOptionsArgs > m_QueryOptsArgs
query options object
CRef< CBlastDatabaseArgs > m_BlastDbArgs
database/subject object
virtual CRef< CBlastOptionsHandle > x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs &args)=0
Create the options handle based on the command line arguments.
CRef< CBlastOptionsHandle > SetOptions(const CArgs &args)
Extract the command line arguments into a CBlastOptionsHandle object.
CRef< CSearchStrategyArgs > m_SearchStrategyArgs
arguments for dealing with search strategies
string m_Task
Task specified in the command line.
CRef< CDebugArgs > m_DebugArgs
Debugging arguments.
CRef< CBlastOptionsHandle > x_CreateOptionsHandleWithTask(CBlastOptions::EAPILocality locality, const string &task)
Creates the BLAST options handle based on the task argument.
CBlastAppArgs()
Default constructor.
CRef< CMTArgs > m_MTArgs
multi-threaded options
CArgDescriptions * SetCommandLine()
Set the command line arguments.
CRef< CFormattingArgs > m_FormattingArgs
formatting options
void x_IssueWarningsForIgnoredOptions(const CArgs &args)
Issue warnings when recovering from a search strategy (command line applications only)
bool m_IsUngapped
Is this application being run ungapped.
TBlastCmdLineArgs m_Args
Set of command line argument objects.
CNcbiOstream * GetExportSearchStrategyStream(const CArgs &args)
Get the output stream for the search strategy.
void SetTask(const string &task)
Set the task for this object.
virtual CNcbiOstream & GetOutputStream()
Get the output stream.
CRef< CStdCmdLineArgs > m_StdCmdLineArgs
standard command line arguments class
Argument class to collect database/subject arguments.
CBlastDatabaseArgs(bool request_mol_type=false, bool is_rpsblast=false, bool is_igblast=false, bool is_mapper=false, bool is_kblast=false)
Constructor.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
static bool HasBeenSet(const CArgs &args)
Auxiliary function to determine if the database/subject sequence has been set.
CRef< objects::CScope > m_Scope
CScope object in which all subject sequences read are kept.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_IsMapper
true for short read mapper
bool IsProtein() const
Is the database/subject protein?
bool m_SupportsDatabaseMasking
true if it's supported
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
bool m_IsProtein
Is the database/subject(s) protein?
bool m_RequestMoleculeType
Determines whether the database's molecule type should be requested in the command line,...
bool m_IsIgBlast
true if the search is Ig-BLAST
CRef< IQueryFactory > m_Subjects
The subject sequences.
bool m_IsRpsBlast
true if the search is RPS-BLAST
CRef< CSearchDatabase > m_SearchDb
Description of the BLAST database.
bool m_SupportIPGFiltering
true if IPG filtering is supported
bool m_IsKBlast
true for Kblastp
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
static string GetLoaderNameFromArgs(CConstRef< CSeqDB > db_handle)
Defines BLAST error codes (user errors included)
Encapsulates ALL the BLAST algorithm's options.
EAPILocality
Enumerates the possible contexts in which objects of this type can be used.
@ eLocal
To be used for running BLAST locally.
@ eRemote
To be used when running BLAST remotely.
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
bool m_Is2and3Supported
Are options 2 and 3 supported.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
string m_ZeroOptDescr
Non standard description for option zero.
string m_DefaultOpt
Default option.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_DebugOutput
Should debugging (verbose) output be printed.
bool m_RmtDebugOutput
Should debugging (verbose) output be printed for remote BLAST.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CRef< CSearchDatabase > m_DomainDb
Conserved Domain Database.
bool m_ShowDomainHits
Is printing CDD hits requested.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_QueryIsProtein
true if the query is protein
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
void x_TokenizeFilteringArgs(const string &filtering_args, vector< string > &output) const
Auxiliary method to tokenize the filtering string.
bool m_FilterByDefault
Should filtering be applied by default?
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_QueryIsProtein
true if the query is protein
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_QueryIsProtein
true if the query is protein
bool m_IsRpsBlast
true if the search is RPS-BLAST
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_IsIgBlast
true if the search is igblast
bool m_IsTblastx
true if the search is tblastx
bool m_ShowPercentIdentity
true if the percent identity option should be shown
CGenericSearchArgs(bool query_is_protein=true, bool is_rpsblast=false, bool show_perc_identity=false, bool is_tblastx=false, bool is_igblast=false, bool suppress_sum_stats=false)
Constructor.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_SuppressSumStats
true if search is blastn or blastp
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
ETarget m_Target
Genetic code target.
@ eQuery
Query genetic code.
@ eDatabase
Database genetic code.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CRef< CIgBlastOptions > m_IgOptions
Igblast options to fill.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_IsProtein
Is this a protein search?
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CRef< objects::CScope > m_Scope
scope to get sequences
double m_JDistance
Jaccard distance.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
int m_CandidateSeqs
Number of candidate sequences to try BLAST on.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
int m_MinHits
Minimum number of hits in LSH phase.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
size_t m_NumThreads
Number of threads to spawn.
void x_ExtractAlgorithmOptions(const CArgs &args)
CMTArgs(size_t default_num_threads=CThreadable::kMinNumThreads, EMTMode mt_mode=eNotSupported)
Default Constructor.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CNcbiIstream * m_MateInputStream
EInputFormat m_InputFormat
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opt)
Interface method,.
vector< string > m_SraAccessions
unique_ptr< CDecompressIStream > m_DecompressIStream
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Argument class to retrieve megablast database indexing options.
static bool HasBeenSet(const CArgs &args)
Auxiliary function to determine if the megablast database indexing options have been set.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
static CNcbiApplication * Instance(void)
Singleton method.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Wrapper class for PSIBlastOptions .
Wrapper class for PSIDiagnosticsRequest .
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
string m_ProgDesc
Application's description.
string m_ProgName
Application's name.
CProgramDescriptionArgs(const string &program_name, const string &program_description)
Constructor.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_SaveLastPssm
Save PSSM after the last database search.
CRef< CAutoOutputFileReset > m_AsciiMatrixOutput
ASCII matrix output file.
bool m_IsDeltaBlast
Are the aruments set up for Delta Blast.
@ eProteinDb
Traditional, iterated PSI-BLAST.
@ eNucleotideDb
PSI-Tblastn, non-iterated.
CRef< CAutoOutputFileReset > m_CheckPointOutput
checkpoint output file
ETargetDatabase m_DbTarget
Molecule of the database.
CRef< objects::CPssmWithParameters > x_CreatePssmFromMsa(CNcbiIstream &input_stream, CBlastOptions &opt, bool save_ascii_pssm, unsigned int msa_master_idx, bool ignore_pssm_tmpl_seq)
Auxiliary function to create a PSSM from a multiple sequence alignment file.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CRef< objects::CPssmWithParameters > m_Pssm
PSSM.
size_t m_NumIterations
number of iterations to perform
bool m_IsDeltaBlast
Are these arumnets for Delta Blast.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Computes a PSSM as specified in PSI-BLAST.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_UseLCaseMask
use lowercase masking in FASTA input
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
objects::ENa_strand m_Strand
Strand(s) to search.
TSeqRange m_Range
range to restrict the query sequence(s)
bool m_ParseDeflines
Should the deflines be parsed?
bool m_QueryCannotBeNucl
only false for blast[xn], and tblastx true in case of PSI-BLAST
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_IsRemote
Should the search be executed remotely?
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Argument class to import/export the search strategy.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CNcbiIstream * GetImportStream(const CArgs &args) const
Get the input stream for the search strategy.
CNcbiOstream * GetExportStream(const CArgs &args) const
Get the output stream for the search strategy.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
void AddTaxIds(const set< TTaxId > &tax_ids)
EStatType
Counts statistics formats.
static EStatType DiscoverStatType(string const &name)
Return the format of the counts statistics file.
Root class for all serialization exceptions.
bool m_GzipEnabled
If true input file will be decompressed with gzip if filename ends with ".gz".
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CNcbiIstream & GetInputStream() const
Get the input stream for a command line application.
unique_ptr< CDecompressIStream > m_DecompressIStream
CNcbiOstream & GetOutputStream() const
Get the output stream for a command line application.
CRef< CTmpFile > m_QueryTmpInputFile
ASN.1 specification of query sequences when read from a saved search strategy.
unique_ptr< CCompressOStream > m_CompressOStream
CNcbiOstream * m_OutputStream
Application's output stream.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CNcbiIstream * m_InputStream
Application's input stream.
bool m_SRAaccessionEnabled
If true, option to specify SRA runs will be presented as possible query input.
void SetInputStream(CRef< CTmpFile > input_file)
Set the input stream if read from a saved search strategy.
CNcbiOstream * m_UnalignedOutputStream
Output stream to report unaligned sequences/reads.
unique_ptr< CCompressOStream > m_UnalignedCompressOStream
Simple implementation of ILineReader for i(o)streams.
static unsigned int GetCpuCount(void)
Return number of active CPUs/cores (never less than 1).
const set< string > m_SupportedTasks
Set of supported tasks by this command line argument.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CTaskCmdLineArgs(const set< string > &supported_tasks, const string &default_task)
Constructor.
string m_DefaultTask
Default task for this command line argument.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Clas to retrieve taxonomic information for filtering BLASTDBs.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
BLAST Command line arguments design The idea is to have several small objects (subclasses of IBlastCm...
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
const string kArgMatrixName
Argument for scoring matrix.
const string kArgWindowMaskerDatabase
Argument to specify a path to a Window Masker database.
const string kArgGLChainType
Argument to specify the germline database chaintype name for igblast.
const string kArgAsciiPssmOutputFile
Argument to specify the file name for saving the ASCII representation of the PSSM.
const string kArgMaxDbWordCount
Argument to specify a maximum number of times a word can be repeated in a database.
const string kArgGLOrigin
Argument to specify the germline origin for igblast.
const string kDfltArgJDistance
Jaccard default value.
const string kArgPSIPseudocount
Argument to specify the pseudo-count value used when constructing PSSM.
const string kArgNoGreedyExtension
Argument to specify non-greedy dynamic programming extension.
const string kDfltArgApplyFiltering
Default argument to specify filtering.
const string kArgPSIOutputChkPntFile
Argument to specify a 'checkpoint' file to write the PSSM.
const string kArgSplice
Argument to specify whether to search for spliced alignments.
const string kArgMinRawGappedScore
Argument for minimum raw gapped score for preliminary gapped and traceback stages.
const string kArgGLNumAlign
Argument to specify the number of alignments for germline database.
const string kArgLookupStride
Argument to sepcify the stride when creating a lookup table.
const string kArgDbSize
Effective length of BLAST database.
const string kArgDbGeneticCode
Database genetic code.
const string kArgPSIInclusionEThreshold
Argument to specify the evalue inclusion threshold for considering aligned sequences for PSSM constru...
const string kArgRevFwd
Argument to specify reverse/forward strand specificity.
const string kArgPSIInputChkPntFile
Argument to specify a 'checkpoint' file to recover the PSSM from.
const string kArgScore
Argument to specify cutoff score for accepting a spliced alignment.
const string kArgMaxIntronLength
Argument to specify the maximum length of an intron when linking multiple distinct alignments (applic...
const string kArgTranslate
Arugment to specify if Igblast alignment should be translated to protein.
const bool kDfltArgParseDeflines
Default argument to specify whether sequences deflines should be parsed.
const string kArgDMBTemplateLength
Argument to specify the discontinuous megablast template length.
const string kArgOutput
Output file name.
const string kArgClonotypeFile
Argument to specify number of clonotype file.
const int kDfltArgCullingLimit
Default argument to specify the culling limit.
const string kArgPercentIdentity
Argument to specify the target percent identity.
const string kArgStrand
Argument to select the query strand(s) to search.
const string kDfltArgCompBasedStatsDelta
const string kArgDMBTemplateType
Argument to specify the discontinuous megablast template type.
const string kArgCandidateSeqs
Number of sequences to attempt BLAST on.
const string kArgOutputSearchStrategy
Argument to specify the file name to save the search strategy used for a BLAST search.
const string kArgDPenalty
Argument to specify mismatch penalty for D gene search.
const string kArgGapExtend
Argument to select the gap extending penalty.
const string kArgRemote
Argument to determine whether searches should be run locally or remotely.
const string kArgQueryLocation
Argument to specify a location to restrict the query sequence(s)
const string kArgDbHardMask
const string kArgDbSoftMask
List of filtering algorithms to apply to subjects as soft masking.
const string kArgOnlyStrandSpecific
Argument to specify only strand specific results.
const int kDfltArgMaxIntronLength
Default value for maximum intron length.
const double kDfltArgBestHitOverhang
Default argument for the overhang parameter to the best hit algorithm.
const string kArgJPenalty
Argument to specify mismatch penalty for J gene search.
const string kArgFilteringDb
Argument to specify a filtering database (i.e.
const string kArgSegFiltering
Argument to specify SEG filtering on query sequence(s)
const string kArgDbType
BLAST database molecule type.
const string kArgTaxIdListFile
Argument to specify file with taxonomy ids for filtering.
const string kArgUnalignedOutput
Argument to output unaligned reads in a separate file.
const string kArgNoTaxIdExpansion
Argument to not to resolve TaxId to descendant.
const string kArgMinJLength
Argument to specify minimal required J gene length.
const string kArgPrintMdTag
Argument to specify printing SAN MD tag.
const string kArgGappedXDropoff
Argument to select the gapped X dropoff value.
const string kArgUseSWTraceback
Argument to specify that Smith-Waterman algorithm should be used to compute locally optimal alignment...
const string kArgIndexName
Megablast database index name.
const string kArgGapOpen
Argument to select the gap opening penalty.
const string kArgDustFiltering
Argument to specify DUST filtering on query sequence(s)
const string kArgSubjectBestHit
Argument to specify the culling limit.
const string kArgQueryMate
Mates for the query sequences if given in a separate file.
const string kArgFinalGappedXDropoff
Argument to select the final gapped X dropoff value.
const string kArgBestHitOverhang
Argument to specify the overhang parameter to the best hit algorithm.
const string kArgNegativeSeqidList
argument for gi list to exclude from a BLAST database search
const string kArgEntrezQuery
Entrez query.
const string kArgJDistance
KBLASTP arguments Specifies Jaccard distance (threshold)
const string kArgGLDatabase
Argument to specify the germline database name for igblast.
const string kArgGLFocusV
Arugment to specify if Igblast alignment should restrict to V seg.
const string kTask
Task to perform.
const string kArgSraAccessionBatch
Argument to specify a file with a list of SRA accessions.
const string kArgLineLength
Argument to specify line length for displaying alignments.
const string kArgMaxTargetSequences
Argument to specify the maximum number of target sequences to keep (a.k.a.
const string kArgFrameShiftPenalty
Argument to specify the frame shift penality.
const string kArgUseIndex
Flag to force using or not using megablast database index.
const bool kDfltArgUseIndex
Default value for megablast database index flag.
const string kArgMinDMatch
Arugment to specify if Igblast min D gene match.
const string kDfltArgQuery
Default value for query sequence input.
const string kArgRpsDb
Argument to specify domain database name for DELTA-BLAST.
const string kArgQualityFilter
Argyment to specify whether quality filtering is to be done.
const string kArgNegativeGiList
argument for seqid list to exclude from a BLAST database search
const string kArgInputFormat
Argument to specify input format.
const string kArgLookupTableMaskingOnly
Argument to specify to mask query during lookup table creation.
const string kArgMismatch
Argument to select the nucleotide mismatch penalty.
const string kArgParseDeflines
Argument to specify if the query and subject sequences defline should be parsed.
const string kArgSaveAllPssms
Argument to specify whether to save PSSM after each psiblast iteration.
const string kDfltArgCandidateSeqs
const string kArgIgnoreMsaMaster
Argument to specify whether the template sequence (usually the query) should be ignored for the purpo...
const string kArgEvalue
Argument for expectation value cutoff.
const string kArgFwdRev
Argument to specify forward/reverse strand specificity.
const string kArgOldStyleIndex
Use old style megablast index.
const string kArgMaskLevel
const string kArgIgSeqType
Argument to specify IgBlast sequence type.
const string kArgGLDomainSystem
Argument to specify the Ig domain system.
const string kArgIpgList
IPG list file name to restrict BLAST database.
const string kArgMaxEditDist
Argument to specify a cutoff edit distance fot an alignment.
const string kArgEnableSraCache
Argument to enable SRA caching in local files.
const bool kDfltArgUseLCaseMasking
Default argument to specify whether lowercase masking should be used.
const string kArgCullingLimit
Argument to specify the culling limit.
const string kArgGapTrigger
Argument to specify number of bits to initiate gapping.
const string kArgEffSearchSpace
Argument to specify the effective length of the search space.
const string kArgSubjectLocation
Argument to specify a location to restrict the subject sequence(s)
const string kArgOffDiagonalRange
Argument to select the off-diagonal scan range in the 2-hit wordfinder algorithm.
const string kDfltArgStrand
Default value for strand selection.
const string kArgPaired
Argument to specify whether mapped reads are paired.
const string kArgQueryCovHspPerc
Argument to specify min query coverage percentage for each hsp.
const string kDfltArgSegFiltering
Default arguments to apply SEG filtering on query sequence(s)
const string kArgMTMode
Argument to specify mt mode (split by db or split by queries)
const string kArgPSINumIterations
Argument to select the number of iterations to perform in PSI-BLAST.
const string kArgQuery
Query sequence(s)
const string kArgNumClonotype
Argument to specify number of clonotype to show.
const string kArgMinVLength
Argument to specify minimal required V length.
const string kArgNegativeIpgList
argument for IPG list to exclude from a BLAST database search
const string kArgNoUnaligned
Argument to trun off printing of unaligned reads.
const string kArgComplexityAdj
const string kArgMSAInputFile
Argument to specify a multiple sequence alignment file to create a PSSM from.
const string kArgUnalignedFormat
Argument to specify format for reporting unaligned reads.
const string kArgNegativeTaxIdList
Argument to specify negative taxonomy ids filtering.
const string kDfltArgOldStyleIndex
Default value for use old style megablast index.
const string kArgVPenalty
Argument to specify mismatch penalty for V gene search.
const string kDfltArgDustFiltering
Default arguments to apply DUST filtering on query sequence(s)
const string kArgSeqIdList
seqid list file name to restrict BLAST database
const string kDfltArgLookupTableMaskingOnlyProt
Default argument mask a protein query during lookup table construction.
const unsigned int kDfltArgPSINumIterations
const string kArgRevOnly
Argument to specify reverse-only strand specificity.
const string kArgDb
BLAST database name.
const string kArgOutputGzip
Argument to specify that the output will be compressed with gzip.
const string kArgCustomInternalData
Argument to specify custom internal data file.
const string kArgWindowMaskerTaxId
Argument to specify a taxid for Window Masker.
const string kArgCRegionNumAlign
Argument to specify the number of alignments for c gene db.
const string kArgWindowSize
Argument to select the window size in the 2-hit wordfinder algorithm.
const string kArgRefType
Reference type: genome or transcriptome.
const string kArgWordSize
Argument to select the wordfinder's word size.
<