72 const string& program_desc)
73 : m_ProgName(program_name), m_ProgDesc(program_desc)
85 const string& default_task)
86 : m_SupportedTasks(supported_tasks), m_DefaultTask(default_task)
102 arg_desc.
AddKey(
kTask,
"task_name",
"Task to execute",
142 string des =
"Expectation value (E) threshold for saving hits. Default = 10";
144 des +=
" (1000 for blastn-short)";
149 "Expectation value (E) threshold for saving hits ",
155 "Expectation value (E) threshold for saving hits ",
164 ?
"Word size for wordfinder algorithm"
165 :
"Word size for wordfinder algorithm (length of best perfect match)";
176 "Cost to open a gap",
181 "Cost to extend a gap",
198 "Percent query coverage per hsp",
204 "Set maximum number of HSPs per subject sequence to save for each query",
213 "X-dropoff value (in bits) for ungapped extensions",
221 "X-dropoff value (in bits) for preliminary gapped extensions",
227 "X-dropoff value (in bits) for final gapped alignment",
235 "Effective length of the search space",
242 "Use sum statistics",
257 int gap_open=0, gap_extend=0;
260 (args[
kArgMatrixName].AsString().c_str(), &gap_open, &gap_extend);
305 env.Set(
"OLD_FSC",
"true");
333 "Filter query sequence with SEG "
340 "Apply filtering locations as soft masks",
345 "Filter query sequence with DUST "
351 "BLAST database containing filtering elements (i.e.: repeats)",
355 "Enable WindowMasker filtering using a Taxonomic ID",
359 "Enable WindowMasker filtering using this repeats database.",
363 "Apply filtering locations as soft masks",
373 vector<string>&
output)
const
379 "Invalid number of arguments to filtering option");
390 vector<string> tokens;
423 "Invalid input for filtering parameters");
450 string msg(
"Only optimized binary windowmasker stat files are supported");
458 if (filter_dbs > 1) {
473 "Multiple hits window size, use 0 to specify "
503 "Number of off-diagonals to search for the 2nd hit, "
529 "Scoring matrix name",
534 "Use complexity adjusted scoring",
539 "Masklevel - percentage overlap allowed per "
540 "query domain [0-101]",
576 "Minimum word score such that the word is added to the "
577 "BLAST lookup table",
587 int word_threshold =
static_cast<int>(threshold);
592 }
else if (program ==
eBlastx &&
610 double threshold = -1;
614 if (threshold != -1) {
625 "Scoring matrix name (normally BLOSUM62)",
646 "Penalty for a nucleotide mismatch",
653 "Reward for a nucleotide match",
661 "Use non-greedy dynamic programming extension",
699 "Minimum raw gapped score to keep an alignment "
700 "in the preliminary gapped and traceback stages",
706 "Discontiguous MegaBLAST template type",
717 "Discontiguous MegaBLAST template length",
720 allowed_values.
insert(16);
721 allowed_values.
insert(18);
722 allowed_values.
insert(21);
774 " 0 or F or f: No composition-based statistics\n";
779 " 2 or T or t : Composition-based score adjustment as in "
780 "Bioinformatics 21:902-911,\n"
781 " 2005, conditioned on sequence properties\n"
782 " 3: Composition-based score adjustment as in "
783 "Bioinformatics 21:902-911,\n"
784 " 2005, unconditionally\n" :
"";
786 string legend = (
string)
"Use composition-based statistics:\n"
787 " D or d: default (equivalent to " +
m_DefaultOpt +
" )\n"
789 +
" 1" + one_opt_insrt +
": Composition-based statistics "
790 "as in NAR 29:2994-3005, 2001\n"
802 "Compute locally optimal Smith-Waterman alignments?",
823 const string& comp_stat_string,
824 bool smith_waterman_value,
835 switch (comp_stat_string[0]) {
836 case '0':
case 'F':
case 'f':
871 "Composition-adjusted searched are not supported with "
872 "an ungapped search, please add -comp_based_stats F or "
873 "do a gapped search");
879 tolower(comp_stat_string[1]) ==
'u') {
907 "Perform gapped alignment (default T, but "
908 "not available for tblastx)",
911 arg_desc.
AddAlias(
"-gapped", ARG_GAPPED);
935 "Length of the largest intron allowed in a translated "
936 "nucleotide sequence when linking multiple distinct "
964 "Frame shift penalty (for use with out-of-frame "
965 "gapped alignment in blastx or tblastn, default "
982 if ((cbs[0] !=
'0' )&& (cbs[0] !=
'F') && (cbs[0] !=
'f')) {
984 "Composition-adjusted searches are not supported with "
985 "Out-Of-Frame option, please add -comp_based_stats F ");
1000 static int gcs[] = {1,2,3,4,5,6,9,10,11,12,13,14,15,16,21,22,23,24,25,26,27,28,29,30,31,33};
1001 static const set<int> genetic_codes(gcs, gcs+
sizeof(gcs)/
sizeof(*gcs));
1003 return (genetic_codes.
find(
val) != genetic_codes.
end());
1008 return "values between: 1-6, 9-16, 21-31, 33";
1019 "Genetic code to use to translate query (see https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=cgencodes for details)\n",
1028 "Genetic code to use to translate "
1029 "database/subjects (see user manual for details)\n",
1063 "Number of bits to trigger gapping",
1085 "Pseudo-count value used when constructing PSSM",
1091 "E-value inclusion threshold for alignments "
1092 "with conserved domains",
1099 "E-value inclusion threshold for pairwise alignments",
1135 "PSI-TBLASTN checkpoint file",
1145 "Number of iterations to perform (0 means run "
1156 "File name to store checkpoint file",
1160 "File name to store ASCII version of PSSM",
1166 "(file name is given in -save_pssm or "
1167 "-save_ascii_pssm options)");
1170 vector<string> msa_exclusions;
1182 "File name of multiple sequence alignment to "
1183 "restart PSI-BLAST",
1185 ITERATE(vector<string>, exclusion, msa_exclusions) {
1192 "Ordinal number (1-based index) of the sequence"
1193 " to use as a master in the multiple sequence "
1194 "alignment. If not provided, the first sequence"
1195 " in the multiple sequence alignment will be "
1199 ITERATE(vector<string>, exclusion, msa_exclusions) {
1212 "Ignore the master sequence when creating PSSM",
true);
1214 vector<string> ignore_pssm_master_exclusions;
1217 ignore_pssm_master_exclusions.push_back(
kArgQuery);
1219 ITERATE(vector<string>, exclusion, msa_exclusions) {
1230 "PSI-BLAST checkpoint file",
1252 unsigned int msa_master_idx,
1253 bool ignore_pssm_tmplt_seq)
1267 return pssm_engine.
Run();
1302 const bool kSaveAllPssms
1311 if (kSaveAsciiPssm) {
1318 unsigned int msa_master_idx = 0;
1352 "Unsupported format for PSSM");
1355 string msg(
"Unrecognized format for PSSM in ");
1357 msg +=
"PssmWithParameters)";
1370 "File name containing pattern to search",
1393 string ltype = line.substr(0, 2);
1395 name = line.substr(4);
1396 else if (ltype ==
"PA")
1397 pattern = line.substr(4);
1399 if (!pattern.empty())
1405 "PHI pattern not read");
1468 "alignments. Can be expressed as a number or a "
1469 "function of read length: "
1470 "L,b,a for a * length + b.\n"
1471 "Zero means that the cutoff score will be equal to:\n"
1472 "read length, if read length <= 20,\n"
1473 "20, if read length <= 30,\n"
1474 "read length - 10, if read length <= 50,\n"
1478 "accepting an alignment\nDefault = unlimited",
1483 "genome or transcriptome",
1490 "high frequency in the searched database",
1493 "than this number of times in the database will be"
1494 " masked in the lookup table",
1500 "after collecting one while creating a lookup table",
1517 list<string> tokens;
1519 vector<double> coeffs;
1520 if (tokens.size() < 3) {
1522 (
string)
"Incorrectly formatted score function: " +
1523 s +
". It should be of the form 'L,b,a' for ax + b,"
1524 "a, b must be numbers");
1526 auto it = tokens.begin();
1529 for (; it != tokens.end(); ++it) {
1535 (
string)
"Incorrectly formatted score function: " +
1536 s +
". It should be of the form 'L,b,a' for ax + b,"
1537 " a, b must be real numbers");
1548 (
string)
"Incorrectly formatted score threshold: " +
1549 s +
". It must be either an integer or a linear "
1550 "function in the form: L,b,a for ax + b, a and b "
1551 "must be real numbers");
1564 string ref_type =
"genome";
1590 const static char suffix[] =
"VDJ";
1591 const static int df_num_align[3] = {3,3,3};
1595 for (
int gene=0; gene<num_genes; ++gene) {
1606 arg_db.push_back(suffix[gene]);
1608 "Germline database name",
1613 arg_na.push_back(suffix[gene]);
1615 "Number of Germline sequences to show alignments for",
1622 "Restrict search of germline database to list of SeqIds's",
1628 "Number of Germline sequences to show alignments for",
1632 "C region database name",
1636 "custom internal data file for V region annotation",
1640 "D gene frame definition file",
1644 "File containing the coding frame start positions for sequences in germline J database",
1648 "Required minimal consecutive nucleotide base matches for D genes ",
1654 "Penalty for a nucleotide mismatch in V gene",
1661 "Penalty for a nucleotide mismatch in D gene",
1668 "Penalty for a nucleotide mismatch in J gene",
1675 "Number of top clonotypes to show ",
1681 "Output file name for clonotype info",
1684 arg_desc.
AddFlag(
kArgDetectOverlap,
"Allow V(D)J genes to overlap. This option is active only when D_penalty and J_penalty are set to -4 and -3, respectively",
true);
1690 "The organism for your query sequence. Supported organisms include human, mouse, rat, rabbit and rhesus_monkey for Ig and human and mouse for TCR. Custom organism is also supported but you need to supply your own germline annotations (see IgBLAST web site for details)",
1694 "Domain system to be used for segment annotation",
1699 "Specify Ig or T cell receptor sequence",
1704 arg_desc.
AddFlag(
kArgGLFocusV,
"Should the search only be for V segment (effective only for non-germline database search using -db option)?",
true);
1711 "Minimal required V gene length",
1719 "Minimal required J gene length",
1742 _TRACE(
"Registering " << retval <<
" at priority " <<
1760 #if defined(NCBI_OS_DARWIN)
1761 paths[2] =
"/usr/local/ncbi/igblast/data";
1763 paths[2] = paths[0];
1795 for (
int i=0;
i<3;
i++) {
1818 for (
int i=0;
i<3;
i++) {
1842 "Germline annotation database " + df_db_name +
" could not be found in [internal_data] directory");
1870 const static char suffix[] =
"VDJ";
1872 for (
int gene=0; gene< num_genes; ++gene) {
1877 arg_sub.push_back(suffix[gene]);
1878 arg_db.push_back(suffix[gene]);
1879 arg_na.push_back(suffix[gene]);
1883 if (args.
Exist(arg_sub) && args[arg_sub]) {
1884 CNcbiIstream& subj_input_stream = args[arg_sub].AsInputFile();
1896 subj_range, parse_deflines,
1897 use_lcase_masks, subjects);
1901 new blast::CObjMgr_QueryFactory(*subjects));
1903 sub_seqs, opts_hndl));
1906 gl_db_name.push_back(suffix[gene]);
1907 string db_name = (args.
Exist(arg_db) && args[arg_db])
1908 ? args[arg_db].AsString() : gl_db_name;
1911 if (args.
Exist(arg_db +
"_seqidlist") && args[arg_db +
"_seqidlist"]) {
1939 "Use lower case filtering in query and subject sequence(s)?",
true);
1944 "Location on the query sequence in 1-based offsets "
1945 "(Format: start-stop)",
1951 "Query strand(s) to search against database/subject",
1959 "Should the query and subject defline(s) be parsed?",
true);
1978 }
else if (
kStrand ==
"plus") {
1980 }
else if (
kStrand ==
"minus") {
1995 "Invalid specification of query location");
2011 "Use lower case filtering in subject sequence(s)?",
true);
2019 "fasta",
"fastc",
"fastq",
2023 "mates for query sequences (if given in "
2029 "Comma-separated SRA accessions",
2037 "File with a list of SRA accessions, one per line",
2048 "subject defline(s) be parsed?",
2088 "Unexpected input format: " +
2135 if (!line.empty()) {
2143 "No SRA accessions provided");
2165 : m_RequestMoleculeType(request_mol_type),
2166 m_IsRpsBlast(is_rpsblast),
2167 m_IsIgBlast(is_igblast),
2169 m_IsMapper(is_mapper),
2170 m_IsKBlast(is_kblast),
2171 m_SupportsDatabaseMasking(
false),
2172 m_SupportIPGFiltering(
false)
2202 "BLAST database molecule type",
2208 vector<string> database_args;
2209 database_args.push_back(
kArgDb);
2232 "Effective length of the database ",
2240 "Restrict search of database to list of GIs",
2244 "Restrict search of database to list of SeqIDs",
2248 "Restrict search of database to everything"
2249 " except the specified GIs",
2254 "Restrict search of database to everything"
2255 " except the specified SeqIDs",
2260 "Restrict search of database to include only "
2261 "the specified taxonomy IDs and their descendants "
2262 "(multiple IDs delimited by ',')",
2265 "Restrict search of database to everything "
2266 "except the specified taxonomy IDs and their descendants "
2267 "(multiple IDs delimited by ',')",
2271 "Restrict search of database to include only "
2272 "the specified taxonomy IDs and their descendants ",
2275 "Restrict search of database to everything "
2276 "except the specified taxonomy IDs and their descendants ",
2290 "Restrict search of database to list of IPGs",
2295 "Restrict search of database to everything"
2296 " except the specified IPGs",
2300 const vector<string> kBlastDBFilteringOptions = {
2311 for (
size_t i = 0;
i < kBlastDBFilteringOptions.size();
i++) {
2312 for (
size_t j =
i+1; j < kBlastDBFilteringOptions.size(); j++) {
2314 kBlastDBFilteringOptions[j]);
2320 for (
const string& s: kBlastDBFilteringOptions) {
2328 "Restrict search with the given Entrez query",
2337 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
2338 (!defined(NCBI_COMPILER_MIPSPRO)) )
2342 "filtering_algorithm",
2343 "Filtering algorithm ID to apply to the BLAST database as soft "
2350 "filtering_algorithm",
2351 "Filtering algorithm ID to apply to the BLAST database as hard "
2362 "Subject sequence(s) to search",
2364 ITERATE(vector<string>, dbarg, database_args) {
2371 "Location on the subject sequence in 1-based offsets "
2372 "(Format: start-stop)",
2374 ITERATE(vector<string>, dbarg, database_args) {
2411 while (!reader.
AtEOF()) {
2418 unique_ptr<ITaxonomy4Blast> tb;
2419 if( !isTargetOnly ) {
2424 LOG_POST(
Warning <<
"The -taxids command line option requires additional data files. Please see the section 'Taxonomic filtering for BLAST databases' in https://www.ncbi.nlm.nih.gov/books/NBK569839/ for details.");
2428 for (
auto id : ids) {
2434 if( isTargetOnly ) {
2439 tb->GetLeafNodeTaxids(taxid, desc);
2450 if(isNegativeList) {
2514 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
2515 (!defined(NCBI_COMPILER_MIPSPRO)) )
2525 unique_ptr<CDecompressIStream> decompress_stream;
2528 decompress_stream.reset(
2531 subj_input_stream = decompress_stream.get();
2534 subj_input_stream = &args[
kArgSubject].AsInputFile();
2541 "Invalid specification of subject location");
2552 subj_range, parse_deflines,
2559 "Either a BLAST database or subject sequence(s) must be specified");
2578 string kOutputFormatDescription =
string(
2579 "alignment view options:\n"
2581 " 1 = Query-anchored showing identities,\n"
2582 " 2 = Query-anchored no identities,\n"
2583 " 3 = Flat query-anchored showing identities,\n"
2584 " 4 = Flat query-anchored no identities,\n"
2587 " 7 = Tabular with comment lines,\n"
2588 " 8 = Seqalign (Text ASN.1),\n"
2589 " 9 = Seqalign (Binary ASN.1),\n"
2590 " 10 = Comma-separated values,\n"
2591 " 11 = BLAST archive (ASN.1),\n"
2592 " 12 = Seqalign (JSON),\n"
2593 " 13 = Multiple-file BLAST JSON,\n"
2594 " 14 = Multiple-file BLAST XML2,\n"
2595 " 15 = Single-file BLAST JSON,\n"
2596 " 16 = Single-file BLAST XML2");
2599 kOutputFormatDescription +=
",\n 17 = Sequence Alignment/Map (SAM)";
2601 kOutputFormatDescription +=
",\n 18 = Organism Report\n\n";
2603 kOutputFormatDescription +=
2604 "Options 6, 7, 10 and 17 "
2605 "can be additionally configured to produce\n"
2606 "a custom format specified by space delimited format specifiers,\n"
2607 "or in the case of options 6, 7, and 10, by a token specified\n"
2608 "by the delim keyword. E.g.: \"17 delim=@ qacc sacc score\".\n"
2609 "The delim keyword must appear after the numeric output format\n"
2611 "The supported format specifiers for options 6, 7 and 10 are:\n";
2614 kOutputFormatDescription +=
2615 "Options 6, 7 and 10 "
2616 "can be additionally configured to produce\n"
2617 "a custom format specified by space delimited format specifiers,\n"
2618 "or by a token specified by the delim keyword.\n"
2619 " E.g.: \"10 delim=@ qacc sacc score\".\n"
2620 "The delim keyword must appear after the numeric output format\n"
2622 "The supported format specifiers are:\n";
2628 kOutputFormatDescription +=
2629 "The supported format specifier for option 17 is:\n" +
2638 kOutputFormatDescription =
string(
2639 "alignment view options:\n"
2640 " 3 = Flat query-anchored, show identities,\n"
2641 " 4 = Flat query-anchored, no identities,\n"
2642 " 7 = Tabular with comment lines\n"
2643 " 19 = Rearrangement summary report (AIRR format)\n\n"
2644 "Options 7 can be additionally configured to produce\n"
2645 "a custom format specified by space delimited format specifiers.\n"
2646 "The supported format specifiers are:\n") +
2654 kOutputFormatDescription,
2663 "Number of database sequences to show one-line "
2664 "descriptions for\n"
2665 "Not applicable for outfmt > 4\n"
2673 "Number of database sequences to show alignments for\n"
2680 "Line length for formatting alignments\n"
2681 "Not applicable for outfmt > 4\n"
2693 "Sorting option for hits:\n"
2694 "alignment view options:\n"
2695 " 0 = Sort by evalue,\n"
2696 " 1 = Sort by bit score,\n"
2697 " 2 = Sort by total score,\n"
2698 " 3 = Sort by percent identity,\n"
2699 " 4 = Sort by query coverage\n"
2700 "Not applicable for outfmt > 4\n",
2708 "Sorting option for hps:\n"
2709 " 0 = Sort by hsp evalue,\n"
2710 " 1 = Sort by hsp score,\n"
2711 " 2 = Sort by hsp query start,\n"
2712 " 3 = Sort by hsp percent identity,\n"
2713 " 4 = Sort by hsp subject start\n"
2714 "Not applicable for outfmt != 0\n",
2723 "Maximum number of aligned sequences to keep \n"
2724 "(value of 5 or more is recommended)\n"
2743 string ignore1, ignore2;
2754 if(custom_fmt_spec.empty())
return;
2757 const string kFieldsWithSemicolSeparator =
"sallseqid staxids sscinames scomnames sblastnames sskingdoms";
2758 const string kFramesField =
"frames";
2759 const string kAllTitlesField =
"salltitles";
2761 if(customDelim ==
";") {
2762 vector <string> tokens;
2763 NStr::Split(kFieldsWithSemicolSeparator,
" ", tokens);
2764 for(
size_t i = 0;
i < tokens.size();
i++) {
2766 checkfield = tokens[
i];
2773 if(customDelim ==
"/") {
2774 checkfield = kFramesField;
2776 else if(customDelim ==
"<>") {
2777 checkfield = kAllTitlesField;
2779 if(!checkfield.empty() &&
NStr::Find(custom_fmt_spec,checkfield) !=
NPOS) {
2785 string msg(
"Your custom record separator (" + customDelim +
") is also used by the format specifier (" + checkfield +
2786 ") to separate multiple entries. Please use a different record separator (delim keyword).");
2794 string& custom_fmt_spec,
2795 string& custom_delim)
const
2797 custom_fmt_spec.clear();
2801 string::size_type pos;
2802 if ( (pos = fmt_choice.find_first_of(
' ')) != string::npos) {
2803 custom_fmt_spec.assign(fmt_choice, pos+1,
2804 fmt_choice.size()-(pos+1));
2805 fmt_choice.erase(pos);
2807 if(!custom_fmt_spec.empty()) {
2809 vector <string> tokens;
2811 if(tokens.size() > 0) {
2815 string msg(
"Delimiter format is invalid. Valid format is delim=<delimiter value>");
2819 custom_fmt_spec =
NStr::Replace(custom_fmt_spec,tokens[0],
"");
2828 os <<
"'" << fmt_choice <<
"' is not a valid output format";
2832 if (val < 0 || val >=
static_cast<int>(
eEndValue)) {
2833 string msg(
"Formatting choice is out of range");
2834 throw std::out_of_range(
msg);
2837 string msg(
"Formatting choice is not valid");
2838 throw std::out_of_range(
msg);
2844 fmt_type ==
eSAM) ) {
2845 custom_fmt_spec.clear();
2858 "SAM format is only applicable to blastn" );
2862 "AIRR rearrangement format is only applicable to igblastn" );
2866 "FASTA output format is only applicable to magicblast");
2932 "output formats > 4 . Use -max_target_seqs "
2933 "to control output");
2937 ERR_POST(
Warning <<
"The parameter -line_length is not applicable for "
2938 "output formats > 4 .");
2952 ERR_POST(
Warning <<
"The parameter -sorthits is ignored for output formats > 4.");
2956 if(hitlist_size < 5){
2968 ERR_POST(
Warning <<
"The parameter -sorthsps is ignored for output formats != 0.");
2979 string kOutputFormatDescription =
string(
2980 "alignment view options:\n"
2981 "sam = SAM format,\n"
2982 "tabular = Tabular format,\n"
2983 "asn = text ASN.1\n");
2985 string kUnalignedOutputFormatDescription =
string(
2986 "format for reporting unaligned reads:\n"
2987 "sam = SAM format,\n"
2988 "tabular = Tabular format,\n"
2989 "fasta = sequences in FASTA format\n"
2990 "Default = same as ") +
2994 kOutputFormatDescription,
2998 set<string> allowed_formats = {
"sam",
"tabular",
"asn"};
3003 kUnalignedOutputFormatDescription,
3006 set<string> allowed_unaligned_formats = {
"sam",
"tabular",
"fasta"};
3016 "or '/2' at the end of read ids for SAM format and" \
3022 "Suppress discordant alignments for paired reads");
3025 "A user tag to add to each alignment",
3036 if (fmt_choice ==
"sam") {
3039 else if (fmt_choice ==
"tabular") {
3042 else if (fmt_choice ==
"asn") {
3047 os <<
"'" << fmt_choice <<
"' is not a valid output format";
3057 if (fmt_choice ==
"sam") {
3060 else if (fmt_choice ==
"tabular") {
3063 else if (fmt_choice ==
"fasta") {
3068 os <<
"'" << fmt_choice
3069 <<
"' is not a valid output format for unaligned reads";
3140 "Number of threads (CPUs) to use in the BLAST search",
3151 "Multi-thread mode to use in BLAST search:\n "
3152 "0 auto split by database or queries \n "
3153 "1 split by queries\n "
3154 "2 split by database",
3199 " to match the number of available CPUs");
3216 <<
"ignored when '" <<
kArgSubject <<
"' is specified.");
3248 arg_desc.
AddFlag(
"verbose",
"Produce verbose output (show BLAST options)",
3250 arg_desc.
AddFlag(
"remote_verbose",
3251 "Produce verbose output for remote searches",
true);
3252 arg_desc.
AddFlag(
"use_test_remote_service",
3253 "Send remote requests to test servers",
true);
3264 if (args[
"use_test_remote_service"]) {
3278 "If the query range of a hit is enveloped by that of at "
3279 "least this many higher-scoring hits, delete the hit",
3286 "Best Hit algorithm overhang value "
3287 "(recommended value: " +
3299 "Best Hit algorithm score edge value "
3300 "(recommended value: " +
3339 "Use MegaBLAST database index",
3343 "MegaBLAST database index name (deprecated; use only for old style indices)",
3366 bool use_index =
true;
3367 bool force_index =
false;
3368 bool old_style_index =
false;
3371 if( args[
kArgUseIndex].AsBoolean() ) force_index =
true;
3372 else use_index =
false;
3376 args[
kTask].AsString() !=
"megablast" ) {
3385 old_style_index =
true;
3388 index_name = args[
kArgDb].AsString();
3392 "Can not deduce database index name" );
3395 opts.
SetUseIndex(
true, index_name, force_index, old_style_index );
3412 "Comma-separated SRA accessions",
3511 "Search strategy to use",
3515 "File name to record the search strategy used",
3581 (*arg)->ExtractAlgorithmOptions(args, opts);
3586 (*arg)->ExtractAlgorithmOptions(args, opts);
3613 (*arg)->ExtractAlgorithmOptions(args, opts);
3647 retval->SetCurrentGroup(
"Input query options");
3648 retval->SetCurrentGroup(
"General search options");
3649 retval->SetCurrentGroup(
"BLAST database options");
3650 retval->SetCurrentGroup(
"BLAST-2-Sequences options");
3651 retval->SetCurrentGroup(
"Formatting options");
3652 retval->SetCurrentGroup(
"Query filtering options");
3653 retval->SetCurrentGroup(
"Restrict search or results");
3654 retval->SetCurrentGroup(
"Discontiguous MegaBLAST options");
3655 retval->SetCurrentGroup(
"Statistical options");
3656 retval->SetCurrentGroup(
"Search strategy options");
3657 retval->SetCurrentGroup(
"Extension options");
3658 retval->SetCurrentGroup(
"");
3662 (*arg)->SetArgumentDescriptions(*retval);
3664 return retval.release();
3706 can_override.
insert(
"remote_verbose");
3707 can_override.
insert(
"verbose");
3754 typedef vector< CRef<CArgValue> > TArgs;
3755 TArgs arguments = args.
GetAll();
3757 const string& arg_name = (*a)->GetName();
3758 const string& arg_value = (*a)->AsString();
3761 if (has_defaults.
find(arg_name) != has_defaults.
end()) {
3762 if (has_defaults[arg_name] == arg_value) {
3765 if (arg_name ==
kTask && arg_value ==
"megablast") {
3770 "using a search strategy");
3774 if (can_override.
find(arg_name) == can_override.
end()) {
3776 "using a search strategy");
3807 (*arg)->ExtractAlgorithmOptions(args, opts);
User-defined methods of the data storage class.
Declares singleton objects to store the version and reference for the BLAST engine.
static void s_GetTaxIDList(const string &in, bool isFile, bool isNegativeList, CRef< CSearchDatabase > &sdb, bool isTargetOnly)
static bool s_IsDefaultWordThreshold(EProgram program, double threshold)
static void s_ValidateCustomDelim(string custom_fmt_spec, string customDelim)
static void s_SetCompositionBasedStats(CBlastOptions &opt, const string &comp_stat_string, bool smith_waterman_value, bool *ungapped)
Auxiliary function to set the composition based statistics and smith waterman options.
const char * kTemplType_Coding
Value to specify coding template type.
const char * kTemplType_Optimal
Value to specify optimal template type.
const char * kTemplType_CodingAndOptimal
Value to specify coding+optimal template type.
CArgDescriptions * SetUpCommandLineArguments(TBlastCmdLineArgs &args)
Create a CArgDescriptions object and invoke SetArgumentDescriptions for each of the TBlastCmdLineArgs...
static string s_RegisterOMDataLoader(CRef< CSeqDB > db_handle)
Interface for converting blast-related command line arguments into blast options.
vector< CRef< IBlastCmdLineArgs > > TBlastCmdLineArgs
Type definition of a container of IBlastCmdLineArgs.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
Declares the BLAST exception class.
Routines for creating nucleotide BLAST lookup tables.
EDiscWordType
General types of discontiguous word templates.
#define PSI_INCLUSION_ETHRESH
Defaults for PSI-BLAST and DELTA-BLAST options.
#define BLAST_HITLIST_SIZE
Number of database sequences to save hits for.
#define BLAST_WORD_THRESHOLD_BLASTX
default threshold (blastx)
Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char *matrixName, double *threshold)
Get thresholds for word-finding suggested by Stephen Altschul.
@ eDynProgScoreOnly
standard affine gapping
Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char *matrixName, Int4 *window_size)
Get window sizes for two hit algorithm suggested by Stephen Altschul.
#define BLAST_GAP_TRIGGER_NUCL
default bit score that will trigger a gapped extension for blastn
#define MAX_DB_WORD_COUNT_MAPPER
Default max frequency for a database word.
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
#define DELTA_INCLUSION_ETHRESH
Inclusion threshold for DELTA-BLAST.
#define BLAST_WORD_THRESHOLD_BLASTP
neighboring word score thresholds; a threshold of zero means that only query and subject words that m...
#define BLAST_GAP_TRIGGER_PROT
default bit score that will trigger gapped extension
#define PSI_PSEUDO_COUNT_CONST
Pseudo-count constant for PSI-BLAST.
@ eDynProgTbck
standard affine gapping
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
#define BLAST_WORD_THRESHOLD_TBLASTN
default neighboring threshold (tblastn/rpstblastn)
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
PSIDiagnosticsRequest * PSIDiagnosticsRequestNewEx(Boolean save_ascii_pssm)
Allocates a PSIDiagnosticsRequest structure, setting fields to their default values for their use in ...
Int2 BLAST_GetProteinGapExistenceExtendParams(const char *matrixName, Int4 *gap_existence, Int4 *gap_extension)
Extract the recommended gap existence and extension values.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
@ eTblastx
Translated nucl-Translated nucl.
@ eBlastn
Nucl-Nucl (traditional blastn)
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
@ eBlastp
Protein-Protein.
@ eTblastn
Protein-Translated nucl.
@ eDeltaBlast
Delta Blast.
@ ePSITblastn
PSI Tblastn.
@ eRPSTblastn
nucleotide-pssm (RPS blast with translated query)
@ eBlastx
Translated nucl-Protein.
Auxiliary class to validate the genetic code input.
virtual string GetUsage(void) const
Overloaded method from CArgAllow.
virtual bool Verify(const string &value) const
Overloaded method from CArgAllow.
Class to constrain the length of the file name passed to a given CArgDescriptions key.
Class to constrain the values of an argument to those in between the values specified in the construc...
Class to constrain the values of an argument to those greater than or equal to the value specified in...
Class to constrain the values of an argument to those less than or equal to the value specified in th...
Auxiliary class to store the name of an output file, which is reset every time its GetStream method i...
CRef< CRemoteArgs > m_RemoteArgs
remote vs. local execution options
CRef< CBlastOptionsHandle > SetOptionsForSavedStrategy(const CArgs &args)
Combine the command line arguments into a CBlastOptions object recovered from saved search strategy.
string GetTask() const
Get the task for this object.
virtual CNcbiIstream & GetInputStream()
Get the input stream.
CRef< CBlastOptionsHandle > m_OptsHandle
The BLAST options handle, only non-NULL if assigned via SetOptionsHandle, i.e.
CRef< CQueryOptionsArgs > m_QueryOptsArgs
query options object
CRef< CBlastDatabaseArgs > m_BlastDbArgs
database/subject object
virtual CRef< CBlastOptionsHandle > x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs &args)=0
Create the options handle based on the command line arguments.
CRef< CBlastOptionsHandle > SetOptions(const CArgs &args)
Extract the command line arguments into a CBlastOptionsHandle object.
CRef< CSearchStrategyArgs > m_SearchStrategyArgs
arguments for dealing with search strategies
string m_Task
Task specified in the command line.
CRef< CDebugArgs > m_DebugArgs
Debugging arguments.
CRef< CBlastOptionsHandle > x_CreateOptionsHandleWithTask(CBlastOptions::EAPILocality locality, const string &task)
Creates the BLAST options handle based on the task argument.
CBlastAppArgs()
Default constructor.
CRef< CMTArgs > m_MTArgs
multi-threaded options
CArgDescriptions * SetCommandLine()
Set the command line arguments.
CRef< CFormattingArgs > m_FormattingArgs
formatting options
void x_IssueWarningsForIgnoredOptions(const CArgs &args)
Issue warnings when recovering from a search strategy (command line applications only)
bool m_IsUngapped
Is this application being run ungapped.
TBlastCmdLineArgs m_Args
Set of command line argument objects.
CNcbiOstream * GetExportSearchStrategyStream(const CArgs &args)
Get the output stream for the search strategy.
void SetTask(const string &task)
Set the task for this object.
virtual CNcbiOstream & GetOutputStream()
Get the output stream.
CRef< CStdCmdLineArgs > m_StdCmdLineArgs
standard command line arguments class
Argument class to collect database/subject arguments.
CBlastDatabaseArgs(bool request_mol_type=false, bool is_rpsblast=false, bool is_igblast=false, bool is_mapper=false, bool is_kblast=false)
Constructor.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
static bool HasBeenSet(const CArgs &args)
Auxiliary function to determine if the database/subject sequence has been set.
CRef< objects::CScope > m_Scope
CScope object in which all subject sequences read are kept.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_IsMapper
true for short read mapper
bool IsProtein() const
Is the database/subject protein?
bool m_SupportsDatabaseMasking
true if it's supported
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
bool m_IsProtein
Is the database/subject(s) protein?
bool m_RequestMoleculeType
Determines whether the database's molecule type should be requested in the command line,...
bool m_IsIgBlast
true if the search is Ig-BLAST
CRef< IQueryFactory > m_Subjects
The subject sequences.
bool m_IsRpsBlast
true if the search is RPS-BLAST
CRef< CSearchDatabase > m_SearchDb
Description of the BLAST database.
bool m_SupportIPGFiltering
true if IPG filtering is supported
bool m_IsKBlast
true for Kblastp
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
static string GetLoaderNameFromArgs(CConstRef< CSeqDB > db_handle)
Defines BLAST error codes (user errors included)
Encapsulates ALL the BLAST algorithm's options.
EAPILocality
Enumerates the possible contexts in which objects of this type can be used.
@ eLocal
To be used for running BLAST locally.
@ eRemote
To be used when running BLAST remotely.
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
bool m_Is2and3Supported
Are options 2 and 3 supported.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
string m_ZeroOptDescr
Non standard description for option zero.
string m_DefaultOpt
Default option.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_DebugOutput
Should debugging (verbose) output be printed.
bool m_RmtDebugOutput
Should debugging (verbose) output be printed for remote BLAST.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CRef< CSearchDatabase > m_DomainDb
Conserved Domain Database.
bool m_ShowDomainHits
Is printing CDD hits requested.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_QueryIsProtein
true if the query is protein
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
void x_TokenizeFilteringArgs(const string &filtering_args, vector< string > &output) const
Auxiliary method to tokenize the filtering string.
bool m_FilterByDefault
Should filtering be applied by default?
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_QueryIsProtein
true if the query is protein
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_QueryIsProtein
true if the query is protein
bool m_IsRpsBlast
true if the search is RPS-BLAST
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_IsIgBlast
true if the search is igblast
bool m_IsTblastx
true if the search is tblastx
bool m_ShowPercentIdentity
true if the percent identity option should be shown
CGenericSearchArgs(bool query_is_protein=true, bool is_rpsblast=false, bool show_perc_identity=false, bool is_tblastx=false, bool is_igblast=false, bool suppress_sum_stats=false)
Constructor.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_SuppressSumStats
true if search is blastn or blastp
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
ETarget m_Target
Genetic code target.
@ eQuery
Query genetic code.
@ eDatabase
Database genetic code.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CRef< CIgBlastOptions > m_IgOptions
Igblast options to fill.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_IsProtein
Is this a protein search?
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CRef< objects::CScope > m_Scope
scope to get sequences
double m_JDistance
Jaccard distance.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
int m_CandidateSeqs
Number of candidate sequences to try BLAST on.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
int m_MinHits
Minimum number of hits in LSH phase.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
size_t m_NumThreads
Number of threads to spawn.
void x_ExtractAlgorithmOptions(const CArgs &args)
CMTArgs(size_t default_num_threads=CThreadable::kMinNumThreads, EMTMode mt_mode=eNotSupported)
Default Constructor.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CNcbiIstream * m_MateInputStream
EInputFormat m_InputFormat
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opt)
Interface method,.
vector< string > m_SraAccessions
unique_ptr< CDecompressIStream > m_DecompressIStream
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Argument class to retrieve megablast database indexing options.
static bool HasBeenSet(const CArgs &args)
Auxiliary function to determine if the megablast database indexing options have been set.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
static CNcbiApplication * Instance(void)
Singleton method.
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Wrapper class for PSIBlastOptions .
Wrapper class for PSIDiagnosticsRequest .
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
string m_ProgDesc
Application's description.
string m_ProgName
Application's name.
CProgramDescriptionArgs(const string &program_name, const string &program_description)
Constructor.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_SaveLastPssm
Save PSSM after the last database search.
CRef< CAutoOutputFileReset > m_AsciiMatrixOutput
ASCII matrix output file.
bool m_IsDeltaBlast
Are the aruments set up for Delta Blast.
@ eProteinDb
Traditional, iterated PSI-BLAST.
@ eNucleotideDb
PSI-Tblastn, non-iterated.
CRef< CAutoOutputFileReset > m_CheckPointOutput
checkpoint output file
ETargetDatabase m_DbTarget
Molecule of the database.
CRef< objects::CPssmWithParameters > x_CreatePssmFromMsa(CNcbiIstream &input_stream, CBlastOptions &opt, bool save_ascii_pssm, unsigned int msa_master_idx, bool ignore_pssm_tmpl_seq)
Auxiliary function to create a PSSM from a multiple sequence alignment file.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CRef< objects::CPssmWithParameters > m_Pssm
PSSM.
size_t m_NumIterations
number of iterations to perform
bool m_IsDeltaBlast
Are these arumnets for Delta Blast.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Computes a PSSM as specified in PSI-BLAST.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_UseLCaseMask
use lowercase masking in FASTA input
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
objects::ENa_strand m_Strand
Strand(s) to search.
TSeqRange m_Range
range to restrict the query sequence(s)
bool m_ParseDeflines
Should the deflines be parsed?
bool m_QueryCannotBeNucl
only false for blast[xn], and tblastx true in case of PSI-BLAST
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_IsRemote
Should the search be executed remotely?
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Argument class to import/export the search strategy.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CNcbiIstream * GetImportStream(const CArgs &args) const
Get the input stream for the search strategy.
CNcbiOstream * GetExportStream(const CArgs &args) const
Get the output stream for the search strategy.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
void AddTaxIds(const set< TTaxId > &tax_ids)
EStatType
Counts statistics formats.
static EStatType DiscoverStatType(string const &name)
Return the format of the counts statistics file.
Root class for all serialization exceptions.
bool m_GzipEnabled
If true input file will be decompressed with gzip if filename ends with ".gz".
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CNcbiIstream & GetInputStream() const
Get the input stream for a command line application.
unique_ptr< CDecompressIStream > m_DecompressIStream
CNcbiOstream & GetOutputStream() const
Get the output stream for a command line application.
CRef< CTmpFile > m_QueryTmpInputFile
ASN.1 specification of query sequences when read from a saved search strategy.
unique_ptr< CCompressOStream > m_CompressOStream
CNcbiOstream * m_OutputStream
Application's output stream.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CNcbiIstream * m_InputStream
Application's input stream.
bool m_SRAaccessionEnabled
If true, option to specify SRA runs will be presented as possible query input.
void SetInputStream(CRef< CTmpFile > input_file)
Set the input stream if read from a saved search strategy.
CNcbiOstream * m_UnalignedOutputStream
Output stream to report unaligned sequences/reads.
unique_ptr< CCompressOStream > m_UnalignedCompressOStream
Simple implementation of ILineReader for i(o)streams.
static unsigned int GetCpuCount(void)
Return number of active CPUs/cores (never less than 1).
const set< string > m_SupportedTasks
Set of supported tasks by this command line argument.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CTaskCmdLineArgs(const set< string > &supported_tasks, const string &default_task)
Constructor.
string m_DefaultTask
Default task for this command line argument.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Clas to retrieve taxonomic information for filtering BLASTDBs.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
BLAST Command line arguments design The idea is to have several small objects (subclasses of IBlastCm...
const_iterator end() const
const_iterator find(const key_type &key) const
iterator_bool insert(const value_type &val)
const_iterator find(const key_type &key) const
const_iterator end() const
const string kArgMatrixName
Argument for scoring matrix.
const string kArgWindowMaskerDatabase
Argument to specify a path to a Window Masker database.
const string kArgGLChainType
Argument to specify the germline database chaintype name for igblast.
const string kArgAsciiPssmOutputFile
Argument to specify the file name for saving the ASCII representation of the PSSM.
const string kArgMaxDbWordCount
Argument to specify a maximum number of times a word can be repeated in a database.
const string kArgGLOrigin
Argument to specify the germline origin for igblast.
const string kDfltArgJDistance
Jaccard default value.
const string kArgPSIPseudocount
Argument to specify the pseudo-count value used when constructing PSSM.
const string kArgNoGreedyExtension
Argument to specify non-greedy dynamic programming extension.
const string kDfltArgApplyFiltering
Default argument to specify filtering.
const string kArgPSIOutputChkPntFile
Argument to specify a 'checkpoint' file to write the PSSM.
const string kArgSplice
Argument to specify whether to search for spliced alignments.
const string kArgMinRawGappedScore
Argument for minimum raw gapped score for preliminary gapped and traceback stages.
const string kArgGLNumAlign
Argument to specify the number of alignments for germline database.
const string kArgLookupStride
Argument to sepcify the stride when creating a lookup table.
const string kArgDbSize
Effective length of BLAST database.
const string kArgDbGeneticCode
Database genetic code.
const string kArgPSIInclusionEThreshold
Argument to specify the evalue inclusion threshold for considering aligned sequences for PSSM constru...
const string kArgRevFwd
Argument to specify reverse/forward strand specificity.
const string kArgPSIInputChkPntFile
Argument to specify a 'checkpoint' file to recover the PSSM from.
const string kArgScore
Argument to specify cutoff score for accepting a spliced alignment.
const string kArgMaxIntronLength
Argument to specify the maximum length of an intron when linking multiple distinct alignments (applic...
const string kArgTranslate
Arugment to specify if Igblast alignment should be translated to protein.
const bool kDfltArgParseDeflines
Default argument to specify whether sequences deflines should be parsed.
const string kArgDMBTemplateLength
Argument to specify the discontinuous megablast template length.
const string kArgOutput
Output file name.
const string kArgClonotypeFile
Argument to specify number of clonotype file.
const int kDfltArgCullingLimit
Default argument to specify the culling limit.
const string kArgPercentIdentity
Argument to specify the target percent identity.
const string kArgStrand
Argument to select the query strand(s) to search.
const string kDfltArgCompBasedStatsDelta
const string kArgDMBTemplateType
Argument to specify the discontinuous megablast template type.
const string kArgCandidateSeqs
Number of sequences to attempt BLAST on.
const string kArgOutputSearchStrategy
Argument to specify the file name to save the search strategy used for a BLAST search.
const string kArgDPenalty
Argument to specify mismatch penalty for D gene search.
const string kArgGapExtend
Argument to select the gap extending penalty.
const string kArgRemote
Argument to determine whether searches should be run locally or remotely.
const string kArgQueryLocation
Argument to specify a location to restrict the query sequence(s)
const string kArgDbHardMask
const string kArgDbSoftMask
List of filtering algorithms to apply to subjects as soft masking.
const string kArgOnlyStrandSpecific
Argument to specify only strand specific results.
const int kDfltArgMaxIntronLength
Default value for maximum intron length.
const double kDfltArgBestHitOverhang
Default argument for the overhang parameter to the best hit algorithm.
const string kArgJPenalty
Argument to specify mismatch penalty for J gene search.
const string kArgFilteringDb
Argument to specify a filtering database (i.e.
const string kArgSegFiltering
Argument to specify SEG filtering on query sequence(s)
const string kArgDbType
BLAST database molecule type.
const string kArgTaxIdListFile
Argument to specify file with taxonomy ids for filtering.
const string kArgUnalignedOutput
Argument to output unaligned reads in a separate file.
const string kArgNoTaxIdExpansion
Argument to not to resolve TaxId to descendant.
const string kArgMinJLength
Argument to specify minimal required J gene length.
const string kArgPrintMdTag
Argument to specify printing SAN MD tag.
const string kArgGappedXDropoff
Argument to select the gapped X dropoff value.
const string kArgUseSWTraceback
Argument to specify that Smith-Waterman algorithm should be used to compute locally optimal alignment...
const string kArgIndexName
Megablast database index name.
const string kArgGapOpen
Argument to select the gap opening penalty.
const string kArgDustFiltering
Argument to specify DUST filtering on query sequence(s)
const string kArgSubjectBestHit
Argument to specify the culling limit.
const string kArgQueryMate
Mates for the query sequences if given in a separate file.
const string kArgFinalGappedXDropoff
Argument to select the final gapped X dropoff value.
const string kArgBestHitOverhang
Argument to specify the overhang parameter to the best hit algorithm.
const string kArgNegativeSeqidList
argument for gi list to exclude from a BLAST database search
const string kArgEntrezQuery
Entrez query.
const string kArgJDistance
KBLASTP arguments Specifies Jaccard distance (threshold)
const string kArgGLDatabase
Argument to specify the germline database name for igblast.
const string kArgGLFocusV
Arugment to specify if Igblast alignment should restrict to V seg.
const string kTask
Task to perform.
const string kArgSraAccessionBatch
Argument to specify a file with a list of SRA accessions.
const string kArgLineLength
Argument to specify line length for displaying alignments.
const string kArgMaxTargetSequences
Argument to specify the maximum number of target sequences to keep (a.k.a.
const string kArgFrameShiftPenalty
Argument to specify the frame shift penality.
const string kArgUseIndex
Flag to force using or not using megablast database index.
const bool kDfltArgUseIndex
Default value for megablast database index flag.
const string kArgMinDMatch
Arugment to specify if Igblast min D gene match.
const string kDfltArgQuery
Default value for query sequence input.
const string kArgRpsDb
Argument to specify domain database name for DELTA-BLAST.
const string kArgQualityFilter
Argyment to specify whether quality filtering is to be done.
const string kArgNegativeGiList
argument for seqid list to exclude from a BLAST database search
const string kArgInputFormat
Argument to specify input format.
const string kArgLookupTableMaskingOnly
Argument to specify to mask query during lookup table creation.
const string kArgMismatch
Argument to select the nucleotide mismatch penalty.
const string kArgParseDeflines
Argument to specify if the query and subject sequences defline should be parsed.
const string kArgSaveAllPssms
Argument to specify whether to save PSSM after each psiblast iteration.
const string kDfltArgCandidateSeqs
const string kArgIgnoreMsaMaster
Argument to specify whether the template sequence (usually the query) should be ignored for the purpo...
const string kArgEvalue
Argument for expectation value cutoff.
const string kArgFwdRev
Argument to specify forward/reverse strand specificity.
const string kArgOldStyleIndex
Use old style megablast index.
const string kArgMaskLevel
const string kArgIgSeqType
Argument to specify IgBlast sequence type.
const string kArgGLDomainSystem
Argument to specify the Ig domain system.
const string kArgIpgList
IPG list file name to restrict BLAST database.
const string kArgMaxEditDist
Argument to specify a cutoff edit distance fot an alignment.
const string kArgEnableSraCache
Argument to enable SRA caching in local files.
const bool kDfltArgUseLCaseMasking
Default argument to specify whether lowercase masking should be used.
const string kArgCullingLimit
Argument to specify the culling limit.
const string kArgGapTrigger
Argument to specify number of bits to initiate gapping.
const string kArgEffSearchSpace
Argument to specify the effective length of the search space.
const string kArgSubjectLocation
Argument to specify a location to restrict the subject sequence(s)
const string kArgOffDiagonalRange
Argument to select the off-diagonal scan range in the 2-hit wordfinder algorithm.
const string kDfltArgStrand
Default value for strand selection.
const string kArgPaired
Argument to specify whether mapped reads are paired.
const string kArgQueryCovHspPerc
Argument to specify min query coverage percentage for each hsp.
const string kDfltArgSegFiltering
Default arguments to apply SEG filtering on query sequence(s)
const string kArgMTMode
Argument to specify mt mode (split by db or split by queries)
const string kArgPSINumIterations
Argument to select the number of iterations to perform in PSI-BLAST.
const string kArgQuery
Query sequence(s)
const string kArgNumClonotype
Argument to specify number of clonotype to show.
const string kArgMinVLength
Argument to specify minimal required V length.
const string kArgNegativeIpgList
argument for IPG list to exclude from a BLAST database search
const string kArgNoUnaligned
Argument to trun off printing of unaligned reads.
const string kArgComplexityAdj
const string kArgMSAInputFile
Argument to specify a multiple sequence alignment file to create a PSSM from.
const string kArgUnalignedFormat
Argument to specify format for reporting unaligned reads.
const string kArgNegativeTaxIdList
Argument to specify negative taxonomy ids filtering.
const string kDfltArgOldStyleIndex
Default value for use old style megablast index.
const string kArgVPenalty
Argument to specify mismatch penalty for V gene search.
const string kDfltArgDustFiltering
Default arguments to apply DUST filtering on query sequence(s)
const string kArgSeqIdList
seqid list file name to restrict BLAST database
const string kDfltArgLookupTableMaskingOnlyProt
Default argument mask a protein query during lookup table construction.
const unsigned int kDfltArgPSINumIterations
const string kArgRevOnly
Argument to specify reverse-only strand specificity.
const string kArgDb
BLAST database name.
const string kArgOutputGzip
Argument to specify that the output will be compressed with gzip.
const string kArgCustomInternalData
Argument to specify custom internal data file.
const string kArgWindowMaskerTaxId
Argument to specify a taxid for Window Masker.
const string kArgCRegionNumAlign
Argument to specify the number of alignments for c gene db.
const string kArgWindowSize
Argument to select the window size in the 2-hit wordfinder algorithm.
const string kArgRefType
Reference type: genome or transcriptome.
const string kArgWordSize
Argument to select the wordfinder's word size.
const string kArgUseLCaseMasking
Argument to specify whether lowercase masking in the query sequence(s) should be interpreted as maski...
const string kArgNumThreads
Argument to determine the number of threads to use when running BLAST.
const string kDfltArgLookupTableMaskingOnlyNucl
Default argument mask a nucleotide query during lookup table construction.
const string kArgMatch
Argument to select the nucleotide match reward.
const string kDfltArgMaskLevel
const string kDfltArgNoFiltering
Default argument to specify no filtering.
const string kArgPHIPatternFile
Argument to specify a PHI-BLAST pattern file.
const string kArgTaxIdList
Argument to specify taxonomy ids for filtering.
const string kArgDetectOverlap
Arugment to to detect overlap at vdj junction.
const string kArgCRegionDatabase
Argument to specify the C region gene database.
const string kArgMaxHSPsPerSubject
Argument to specify the maximum number of HPSs to save per subject for each query.
const string kArgUngapped
Argument to specify whether the search should be ungapped only.
const string kArgQueryGeneticCode
Query genetic code.
const string kArgSraAccession
Argument to specify SRA accessions.
const string kArgShowDomainHits
Argument to specify whether show domain hits in DELTA-BLAST.
const string kArgGLSubject
Argument to specify the germline subject file for igblast.
const string kArgNoDiscordant
Argument to specify if non-concordant pairs should be displayed.
const double kDfltArgBestHitScoreEdge
Default argument for the score edge parameter to the best hit algorithm.
const string kArgDomainInclusionEThreshold
Argument to specify inclusion e-value threshold for conserved domains.
const string kArgSumStats
Argument to turn on sum statistics.
const string kArgUserTag
Argument to specify user tag for alignments (magicblast)
const string kArgNoReadIdTrim
Argument to specify not trimming of '.1' and '.2' at the end of read ids in SAM format for paired rea...
const string kArgInputSearchStrategy
Argument to specify the search strategy file to read and use for a BLAST search.
const string kArgMinHits
Specifies minimal number of LSH matches.
const string kArgBestHitScoreEdge
Argument to specify the score edge parameter to the best hit algorithm.
const string kArgNegativeTaxIdListFile
Argument to specify file with taxonomy ids for Negative filtering.
const string kArgExtendAlign3end
Arugment to specify if Igblast alignment should be extends at 3' end.
const string kDfltArgMinHits
LSH matches default value.
const int kDfltOffDiagonalRange
const string kArgSubject
Subject input file to search.
const string kDfltArgCompBasedStats
Default argument for composition based statistics.
const string kArgWordScoreThreshold
Argument to specify the minimum word score such that the word is added to the lookup table.
const string kArgMSAMasterIndex
Argument to specify the index (1-based) of the sequence in the multiple sequence alignment to use as ...
const string kDfltArgRpsDb
Default value for domain database name.
const string kArgSaveLastPssm
Argument to specify whether the PSSM after the last psiblast database search should be saved.
const string kArgDFrameDefinitionFile
Argument to specify d gene frame definition data file.
const string kArgExtendAlign5end
Arugment to specify if Igblast alignment should be extends at 5' end.
const string kArgUngappedXDropoff
Argument to select the ungapped X dropoff value.
const string kArgCompBasedStats
Argument to specify the composition based statistics mode to sue.
const string kArgFwdOnly
Argument to specify forward-only strand specificity.
const string kArgGiList
gi list file name to restrict BLAST database
const string kArgLimitLookup
Argument to specify filtering lookup tables words by frequency in the searched database.
void Print(const CCompactSAMApplication::AlignInfo &ai)
ECompoAdjustModes
An collection of constants that specify all permissible modes of composition adjustment.
@ eCompositionBasedStats
Composition-based statistics as in NAR 29:2994-3005, 2001.
@ eCompoForceFullMatrixAdjust
Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, unconditionally.
@ eNoCompositionBasedStats
Don't use composition based statistics.
@ eCompositionMatrixAdjust
Composition-based score adjustment as in Bioinformatics 21:902-911, 2005, conditioned on sequence pro...
static SQLCHAR output[256]
static void des(const char *src, const char *out)
void SetEntrezQueryLimitation(const string &entrez_query)
Mutator for the entrez query.
void SetPHIPattern(const char *pattern, bool is_dna)
void SetInclusionThreshold(double u)
void SetCompositionBasedStats(ECompoAdjustModes mode)
CRef< objects::CPssmWithParameters > Run()
Runs the PSSM engine to compute the PSSM.
void SetLookupDbFilter(bool val)
void SetPseudoCount(int u)
void SetWindowSize(int w)
void SetNegativeGiList(CSeqDBGiList *gilist)
Mutator for the negative gi list.
void SetGapExtnAlgorithm(EBlastPrelimGapExt a)
void SetEvalueThreshold(double eval)
void SetWindowMaskerTaxId(int taxid)
Sets the tax id to select an appropriate windowmasker database Conversion algorithm from tax id to da...
void SetQueryCovHspPerc(double p)
void SetOutOfFrameMode(bool m=true)
void SetDomainInclusionThreshold(double th)
void SetDustFilteringLinker(int m)
CRef< CSeqDB > GetSeqDb() const
Obtain a reference to the database.
void SetSegFilteringHicut(double m)
void SetGapOpeningCost(int g)
void SetHitlistSize(int s)
void SetQueryGeneticCode(int gc)
CRef< CLocalDbAdapter > m_Db[5]
void SetEffectiveSearchSpace(Int8 eff)
void SetComplexityAdjMode(bool m=true)
void SetFrameShiftPenalty(int p)
void SetReadQualityFiltering(bool val=true)
Turn on/off next-generation read quality filtering with deafult parameters.
void SetRepeatFilteringDB(const char *db)
Sets the repeat filtering database to use.
void SetGapTracebackAlgorithm(EBlastTbackExt a)
void SetWindowMaskerDatabase(const char *db)
Sets the windowmasker database to use.
void SetIgnoreMsaMaster(bool val)
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
int GetHitlistSize() const
void SetWordThreshold(double w)
Sets WordThreshold.
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
void SetGapExtensionCost(int e)
void SetSumStatisticsMode(bool m=true)
void SetUnifiedP(int u=0)
void SetUseIndex(bool use_index=true, const string &index_name="", bool force_index=false, bool old_style_index=false)
void SetDustFilteringWindow(int m)
void SetCutoffScoreCoeffs(const vector< double > &c)
int GetGapExtensionCost() const
void SetMismatchPenalty(int p)
void SetOffDiagonalRange(int r)
void SetGapXDropoffFinal(double x)
void SetMatrixName(const char *matrix)
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
void SetBestHitOverhang(double overhang)
void SetSpliceAlignments(bool s)
void SetGapXDropoff(double x)
EProgram GetProgram() const
Accessors/Mutators for individual options.
void SetPercentIdentity(double p)
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
void SetSegFilteringLocut(double m)
double GetWordThreshold() const
Returns WordThreshold.
bool Validate() const
Validate the options contained in this object.
void SetMBTemplateType(unsigned char type)
void SetGapTrigger(double g)
void SetDbGeneticCode(int gc)
void SetMBTemplateLength(unsigned char len)
void SetXDropoff(double x)
void SetMaskAtHash(bool val=true)
void SetBestHitScoreEdge(double score_edge)
void SetDustFilteringLevel(int m)
int GetGapOpeningCost() const
void SetDustFiltering(bool val=true)
void SetSegFiltering(bool val=true)
void SetSegFilteringWindow(int m)
void ThrowIfInvalidTask(const string &task)
Validates that the task provided is indeed a valid task, otherwise throws a CBlastException.
void SetCutoffScore(int s)
void SetFilteringAlgorithm(int filt_algorithm_id)
Temporary fix for backwards compatibility with other 6.0 SCs.
void SetSmithWatermanMode(bool m=true)
void SetMaxDbWordCount(Uint1 num)
Set maximum word count for lookup table word masking by database frequency.
void SetGappedMode(bool m=true)
Int8 GetEffectiveSearchSpace() const
string m_CustomInternalData
const char * GetMatrixName() const
void SetLongestIntronLength(int l)
for linking HSPs with uneven gaps
void SetMaxEditDistance(int e)
void SetLookupTableStride(Uint4 val)
void SetMaxHspsPerSubject(int m)
EMoleculeType
Molecule of the BLAST database.
void SetCullingLimit(int s)
void SetLookupTableType(ELookupTableType type)
static CBlastOptionsHandle * CreateTask(string task, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested task,...
bool GetGappedMode() const
Returns true if gapped BLAST is set, false otherwise.
void SetMatchReward(int r)
void SetGiList(CSeqDBGiList *gilist)
Mutator for the gi list.
@ eBlastDbIsNucleotide
nucleotide
@ eBlastDbIsProtein
protein
const CNcbiRegistry & GetConfig(void) const
Get the application's cached configuration parameters (read-only).
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
#define NON_CONST_ITERATE(Type, Var, Cont)
Non constant version of ITERATE macro.
SStrictId_Tax::TId TTaxId
Taxon id type.
@ eFollowLinks
Follow symbolic links.
void AddFlag(const string &name, const string &comment, CBoolEnum< EFlagValue > set_value=eFlagHasValueIfSet, TFlags flags=0)
Add description for flag argument.
void SetConstraint(const string &name, const CArgAllow *constraint, EConstraintNegate negate=eConstraint)
Set additional user defined constraint on argument value.
void SetDependency(const string &arg1, EDependency dep, const string &arg2)
Define a dependency.
bool Exist(const string &name) const
Check existence of argument description.
void AddKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for mandatory key.
void SetUsageContext(const string &usage_name, const string &usage_description, bool usage_sort_args=false, SIZE_TYPE usage_width=78)
Set extra info to be used by PrintUsage().
void AddAlias(const string &alias, const string &arg_name)
Add argument alias.
void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for optional key without default value.
vector< CRef< CArgValue > > GetAll(void) const
Get all available arguments.
void SetCurrentGroup(const string &group)
Set current arguments group name.
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
@ fOptionalSeparator
Allow to ignore separator between the argument's name and value.
@ eRequires
One argument requires another.
@ eExcludes
One argument excludes another.
@ eInt8
Convertible into an integer number (Int8 only)
@ eInputFile
Name of file (must exist and be readable)
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
@ eDouble
Convertible into a floating point number (double)
@ eString
An arbitrary string.
@ eOutputFile
Name of file (must be writable)
@ eInteger
Convertible into an integer number (int or Int8)
@ eGZipFile
.gz file (including concatenated files)
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
void Set(const string &name, const string &value)
Set an environment variable by name.
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
const string & GetMsg(void) const
Get message string.
void Warning(CExceptionArgs_Base &args)
static string NormalizePath(const string &path, EFollowLinks follow_links=eIgnoreLinks)
Normalize a path.
virtual bool Exists(void) const
Check the entry existence.
bool IsDir(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a directory.
bool IsFile(EFollowLinks follow=eFollowLinks) const
Check whether a directory entry is a file.
static string ConcatPath(const string &first, const string &second)
Concatenate two parts of the path for the current OS.
static string GetCwd(void)
Get the current working directory.
@ eIfExists_Throw
You can make call of AsInputFile/AsOutputFile only once, on each following call throws CFileException...
#define MSerial_AsnBinary
#define MSerial_AsnText
I/O stream manipulators –.
CTempString GetCurrentLine(void) const
bool AtEOF(void) const
Indicates (negatively) whether there is any more input.
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
void Reset(void)
Reset reference object.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
virtual const string & Get(const string §ion, const string &name, TFlags flags=0) const
Get the parameter value.
bool Set(const string §ion, const string &name, const string &value, TFlags flags=0, const string &comment=kEmptyStr)
Set the configuration parameter value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
IO_PREFIX::ifstream CNcbiIfstream
Portable alias for ifstream.
static string DoubleToString(double value, int precision=-1, TNumToStringFlags flags=0)
Convert double to string.
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
static list< string > & Split(const CTempString str, const CTempString delim, list< string > &arr, TSplitFlags flags=0, vector< SIZE_TYPE > *token_pos=NULL)
Split a string using specified delimiters.
static bool EndsWith(const CTempString str, const CTempString end, ECase use_case=eCase)
Check if a string ends with a specified suffix value.
static bool IsBlank(const CTempString str, SIZE_TYPE pos=0)
Check if a string is blank (has no text).
static double StringToDouble(const CTempStringEx str, TStringToNumFlags flags=0)
Convert string to double.
static const string BoolToString(bool value)
Convert bool to string.
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
static SIZE_TYPE Find(const CTempString str, const CTempString pattern, ECase use_case=eCase, EDirection direction=eForwardSearch, SIZE_TYPE occurrence=0)
Find the pattern in the string.
static string & Replace(const string &src, const string &search, const string &replace, string &dst, SIZE_TYPE start_pos=0, SIZE_TYPE max_replace=0, SIZE_TYPE *num_replace=0)
Replace occurrences of a substring within a string.
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
static bool SplitInTwo(const CTempString str, const CTempString delim, string &str1, string &str2, TSplitFlags flags=0)
Split a string into two pieces using the specified delimiters.
TErrCode GetErrCode(void) const
Get error code.
static enable_if< is_arithmetic< TNumeric >::value||is_convertible< TNumeric, Int8 >::value, string >::type NumericToString(TNumeric value, TNumToStringFlags flags=0, int base=10)
Convert numeric value to string.
static string TruncateSpaces(const string &str, ETrunc where=eTrunc_Both)
Truncate whitespace in a string.
@ fAllowTrailingSpaces
Ignore trailing whitespace characters.
@ fAllowLeadingSpaces
Ignore leading whitespace characters in converted string.
@ fSplit_Tokenize
All delimiters are merged and trimmed, to get non-empty tokens only.
@ eConvert
Failure to convert string.
@ eNocase
Case insensitive compare.
#define DEF_CONN_REG_SECTION
#define REG_CONN_SERVICE_NAME
@ eNa_strand_both
in forward orientation
Implementation of a number of BlastHSPWriters to save hits from a BLAST search, and subsequently retu...
#define kBestHit_OverhangMax
Maximum value for overhang.
#define kBestHit_OverhangMin
Minimum value for overhang.
#define kBestHit_ScoreEdgeMin
Minimum value for score_edge.
#define kBestHit_ScoreEdgeMax
Maximum value for score_edge.
Lightweight interface for getting lines of data with minimal memory copying.
const GenericPointer< typename T::ValueType > T2 value
std::istream & in(std::istream &in_, double &x_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
C++ API for the PSI-BLAST PSSM engine.
Defines BLAST database access classes.
Defines exception class and several constants for SeqDB.
string SeqDB_ResolveDbPath(const string &filename)
Resolve a file path using SeqDB's path algorithms.
static SLJIT_INLINE sljit_ins msg(sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b)
CRef< objects::CObjectManager > om
Boolean nsg_compatibility_mode
Compatibility option for the NCBI's structure group (note nsg_ prefix, stands for NCBI's structure gr...
static string kMaxValue("MaxValue")