NCBI C++ ToolKit
blast_args.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_args.cpp 101043 2023-10-19 14:38:29Z camacho $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's offical duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================*/
25 
26 /*****************************************************************************
27 
28 File name: blast_args.cpp
29 
30 Author: Jason Papadopoulos
31 
32 ******************************************************************************/
33 
34 /** @file blast_args.cpp
35  * convert blast-related command line
36  * arguments into blast options
37 */
38 #include <ncbi_pch.hpp>
39 #include <corelib/ncbi_system.hpp>
44 #include <algo/blast/api/objmgr_query_data.hpp> /* for CObjMgrQueryFactory */
48 #include <util/format_guess.hpp>
49 #include <util/line_reader.hpp>
51 #include <algo/blast/blastinput/blast_input.hpp> // for CInputException
52 #include <algo/winmask/seq_masker_istat_factory.hpp> // for CSeqMaskerIstatFactory::DiscoverStatType
53 #include <connect/ncbi_connutil.h>
55 
56 #include <algo/blast/api/msa_pssm_input.hpp> // for CPsiBlastInputClustalW
57 #include <algo/blast/api/pssm_engine.hpp> // for CPssmEngine
58 
60 #include <objtools/blast/seqdb_reader/tax4blastsqlite.hpp> // for taxid to their descendant taxids lookup
62 BEGIN_SCOPE(blast)
64 USING_SCOPE(align_format);
65 
66 void
67 IBlastCmdLineArgs::ExtractAlgorithmOptions(const CArgs& /* cmd_line_args */,
68  CBlastOptions& /* options */)
69 {}
70 
72  const string& program_desc)
73  : m_ProgName(program_name), m_ProgDesc(program_desc)
74 {}
75 
76 void
78 {
79  // program description
80  arg_desc.SetUsageContext(m_ProgName, m_ProgDesc + " " +
81  CBlastVersion().Print());
82 }
83 
85  const string& default_task)
86 : m_SupportedTasks(supported_tasks), m_DefaultTask(default_task)
87 {
89  if ( !m_DefaultTask.empty() ) {
91  }
92 }
93 
94 void
96 {
97  arg_desc.SetCurrentGroup("General search options");
98  if ( !m_DefaultTask.empty() ) {
99  arg_desc.AddDefaultKey(kTask, "task_name", "Task to execute",
101  } else {
102  arg_desc.AddKey(kTask, "task_name", "Task to execute",
104  }
106  arg_desc.SetCurrentGroup("");
107 
108 }
109 
110 void
112  CBlastOptions& /* options */)
113 {
114  // N.B.: handling of tasks occurs at the application level to ensure that
115  // only relevant tasks are added (@sa CBlastnAppArgs)
116 }
117 
119 {
120  // Only support blastn for now
121  if (program == eBlastTypeBlastn) {
122  m_QueryIsProtein = false;
123  m_IsRpsBlast = false;
124  m_ShowPercentIdentity= true;
125  m_IsTblastx= false;
126  m_IsIgBlast = false;
127  m_SuppressSumStats = true;
128  m_IsBlastn = true;
129  }
130  else {
131  NCBI_THROW(CInputException, eInvalidInput, "Invalid program");
132  }
133 }
134 
135 void
137 {
138  arg_desc.SetCurrentGroup("General search options");
139 
140  // evalue cutoff
141  if (!m_IsIgBlast) {
142  string des = "Expectation value (E) threshold for saving hits. Default = 10";
143  if(m_IsBlastn) {
144  des += " (1000 for blastn-short)";
145  }
147  } else if (m_QueryIsProtein) {
148  arg_desc.AddDefaultKey(kArgEvalue, "evalue",
149  "Expectation value (E) threshold for saving hits ",
151  NStr::DoubleToString(1.0));
152  } else {
153  //igblastn
154  arg_desc.AddDefaultKey(kArgEvalue, "evalue",
155  "Expectation value (E) threshold for saving hits ",
157  NStr::DoubleToString(20.0));
158  }
159 
160  // word size
161  // Default values: blastn=11, megablast=28, others=3
162  if(!m_IsRpsBlast) {
163  const string description = m_QueryIsProtein
164  ? "Word size for wordfinder algorithm"
165  : "Word size for wordfinder algorithm (length of best perfect match)";
166  arg_desc.AddOptionalKey(kArgWordSize, "int_value", description,
171  }
172 
173  if ( !m_IsRpsBlast && !m_IsTblastx) {
174  // gap open penalty
175  arg_desc.AddOptionalKey(kArgGapOpen, "open_penalty",
176  "Cost to open a gap",
178 
179  // gap extend penalty
180  arg_desc.AddOptionalKey(kArgGapExtend, "extend_penalty",
181  "Cost to extend a gap",
183  }
184 
185 
187  arg_desc.SetCurrentGroup("Restrict search or results");
188  arg_desc.AddOptionalKey(kArgPercentIdentity, "float_value",
189  "Percent identity",
192  new CArgAllow_Doubles(0.0, 100.0));
193  }
194 
195  if (!m_IsIgBlast) {
196  arg_desc.SetCurrentGroup("Restrict search or results");
197  arg_desc.AddOptionalKey(kArgQueryCovHspPerc, "float_value",
198  "Percent query coverage per hsp",
201  new CArgAllow_Doubles(0.0, 100.0));
202 
203  arg_desc.AddOptionalKey(kArgMaxHSPsPerSubject, "int_value",
204  "Set maximum number of HSPs per subject sequence to save for each query",
208 
209  arg_desc.SetCurrentGroup("Extension options");
210  // ungapped X-drop
211  // Default values: blastn=20, megablast=10, others=7
212  arg_desc.AddOptionalKey(kArgUngappedXDropoff, "float_value",
213  "X-dropoff value (in bits) for ungapped extensions",
215 
216  // Tblastx is ungapped only.
217  if (!m_IsTblastx) {
218  // initial gapped X-drop
219  // Default values: blastn=30, megablast=20, tblastx=0, others=15
220  arg_desc.AddOptionalKey(kArgGappedXDropoff, "float_value",
221  "X-dropoff value (in bits) for preliminary gapped extensions",
223 
224  // final gapped X-drop
225  // Default values: blastn/megablast=50, tblastx=0, others=25
226  arg_desc.AddOptionalKey(kArgFinalGappedXDropoff, "float_value",
227  "X-dropoff value (in bits) for final gapped alignment",
229  }
230  }
231  arg_desc.SetCurrentGroup("Statistical options");
232  // effective search space
233  // Default value is the real size
234  arg_desc.AddOptionalKey(kArgEffSearchSpace, "int_value",
235  "Effective length of the search space",
239 
240  if (!m_SuppressSumStats) {
241  arg_desc.AddOptionalKey(kArgSumStats, "bool_value",
242  "Use sum statistics",
244  }
245 
246  arg_desc.SetCurrentGroup("");
247 }
248 
249 void
251  CBlastOptions& opt)
252 {
253  if (args.Exist(kArgEvalue) && args[kArgEvalue]) {
254  opt.SetEvalueThreshold(args[kArgEvalue].AsDouble());
255  }
256 
257  int gap_open=0, gap_extend=0;
258  if (args.Exist(kArgMatrixName) && args[kArgMatrixName])
260  (args[kArgMatrixName].AsString().c_str(), &gap_open, &gap_extend);
261 
262  if (args.Exist(kArgGapOpen) && args[kArgGapOpen]) {
263  opt.SetGapOpeningCost(args[kArgGapOpen].AsInteger());
264  }
265  else if (args.Exist(kArgMatrixName) && args[kArgMatrixName]) {
266  opt.SetGapOpeningCost(gap_open);
267  }
268 
269  if (args.Exist(kArgGapExtend) && args[kArgGapExtend]) {
270  opt.SetGapExtensionCost(args[kArgGapExtend].AsInteger());
271  }
272  else if (args.Exist(kArgMatrixName) && args[kArgMatrixName]) {
273  opt.SetGapExtensionCost(gap_extend);
274  }
275 
276  if (args.Exist(kArgUngappedXDropoff) && args[kArgUngappedXDropoff]) {
277  opt.SetXDropoff(args[kArgUngappedXDropoff].AsDouble());
278  }
279 
280  if (args.Exist(kArgGappedXDropoff) && args[kArgGappedXDropoff]) {
281  opt.SetGapXDropoff(args[kArgGappedXDropoff].AsDouble());
282  }
283 
285  opt.SetGapXDropoffFinal(args[kArgFinalGappedXDropoff].AsDouble());
286  }
287 
288  if ( args.Exist(kArgWordSize) && args[kArgWordSize]) {
289  if (m_QueryIsProtein && args[kArgWordSize].AsInteger() > 4){
291  opt.SetWordThreshold(19.3);
292  if (args[kArgWordSize].AsInteger() > 5) {
293  opt.SetWordThreshold(21.0);
294  }
295  if (args[kArgWordSize].AsInteger() > 6) {
296  opt.SetWordThreshold(20.25);
297  }
298  }
299  opt.SetWordSize(args[kArgWordSize].AsInteger());
300 
301  }
302 
303  if (args.Exist(kArgEffSearchSpace) && args[kArgEffSearchSpace]) {
305  env.Set("OLD_FSC", "true");
306  opt.SetEffectiveSearchSpace(args[kArgEffSearchSpace].AsInt8());
307  }
308 
309  if (args.Exist(kArgPercentIdentity) && args[kArgPercentIdentity]) {
310  opt.SetPercentIdentity(args[kArgPercentIdentity].AsDouble());
311  }
312 
313  if (args.Exist(kArgQueryCovHspPerc) && args[kArgQueryCovHspPerc]) {
314  opt.SetQueryCovHspPerc(args[kArgQueryCovHspPerc].AsDouble());
315  }
316 
318  opt.SetMaxHspsPerSubject(args[kArgMaxHSPsPerSubject].AsInteger());
319  }
320 
321  if (args.Exist(kArgSumStats) && args[kArgSumStats]) {
322  opt.SetSumStatisticsMode(args[kArgSumStats].AsBoolean());
323  }
324 }
325 
326 void
328 {
329  arg_desc.SetCurrentGroup("Query filtering options");
330 
331  if (m_QueryIsProtein) {
332  arg_desc.AddDefaultKey(kArgSegFiltering, "SEG_options",
333  "Filter query sequence with SEG "
334  "(Format: '" + kDfltArgApplyFiltering + "', " +
335  "'window locut hicut', or '" + kDfltArgNoFiltering +
336  "' to disable)",
339  arg_desc.AddDefaultKey(kArgLookupTableMaskingOnly, "soft_masking",
340  "Apply filtering locations as soft masks",
343  } else {
344  arg_desc.AddOptionalKey(kArgDustFiltering, "DUST_options",
345  "Filter query sequence with DUST "
346  "(Format: '" + kDfltArgApplyFiltering + "', " +
347  "'level window linker', or '" + kDfltArgNoFiltering +
348  "' to disable) Default = '20 64 1' ('" + kDfltArgNoFiltering + "' for blastn-short)",
350  arg_desc.AddOptionalKey(kArgFilteringDb, "filtering_database",
351  "BLAST database containing filtering elements (i.e.: repeats)",
353 
354  arg_desc.AddOptionalKey(kArgWindowMaskerTaxId, "window_masker_taxid",
355  "Enable WindowMasker filtering using a Taxonomic ID",
357 
358  arg_desc.AddOptionalKey(kArgWindowMaskerDatabase, "window_masker_db",
359  "Enable WindowMasker filtering using this repeats database.",
361 
362  arg_desc.AddDefaultKey(kArgLookupTableMaskingOnly, "soft_masking",
363  "Apply filtering locations as soft masks",
366  }
367 
368  arg_desc.SetCurrentGroup("");
369 }
370 
371 void
372 CFilteringArgs::x_TokenizeFilteringArgs(const string& filtering_args,
373  vector<string>& output) const
374 {
375  output.clear();
376  NStr::Split(filtering_args, " ", output);
377  if (output.size() != 3) {
378  NCBI_THROW(CInputException, eInvalidInput,
379  "Invalid number of arguments to filtering option");
380  }
381 }
382 
383 void
385 {
386  if (args[kArgLookupTableMaskingOnly]) {
387  opt.SetMaskAtHash(args[kArgLookupTableMaskingOnly].AsBoolean());
388  }
389 
390  vector<string> tokens;
391 
392  try {
393  if (m_QueryIsProtein && args[kArgSegFiltering]) {
394  const string& seg_opts = args[kArgSegFiltering].AsString();
395  if (seg_opts == kDfltArgNoFiltering) {
396  opt.SetSegFiltering(false);
397  } else if (seg_opts == kDfltArgApplyFiltering) {
398  opt.SetSegFiltering(true);
399  } else {
400  x_TokenizeFilteringArgs(seg_opts, tokens);
404  }
405  }
406 
407  if ( !m_QueryIsProtein && args[kArgDustFiltering]) {
408  const string& dust_opts = args[kArgDustFiltering].AsString();
409  if (dust_opts == kDfltArgNoFiltering) {
410  opt.SetDustFiltering(false);
411  } else if (dust_opts == kDfltArgApplyFiltering) {
412  opt.SetDustFiltering(true);
413  } else {
414  x_TokenizeFilteringArgs(dust_opts, tokens);
418  }
419  }
420  } catch (const CStringException& e) {
422  NCBI_THROW(CInputException, eInvalidInput,
423  "Invalid input for filtering parameters");
424  }
425  }
426 
427  int filter_dbs = 0;
428 
429  if (args.Exist(kArgFilteringDb) && args[kArgFilteringDb]) {
430  opt.SetRepeatFilteringDB(args[kArgFilteringDb].AsString().c_str());
431  filter_dbs++;
432  }
433 
434  if (args.Exist(kArgWindowMaskerTaxId) &&
435  args[kArgWindowMaskerTaxId]) {
436 
438  (args[kArgWindowMaskerTaxId].AsInteger());
439 
440  filter_dbs++;
441  }
442 
443  if (args.Exist(kArgWindowMaskerDatabase) &&
444  args[kArgWindowMaskerDatabase]) {
445  const string& stat_file = args[kArgWindowMaskerDatabase].AsString();
450  string msg("Only optimized binary windowmasker stat files are supported");
451  NCBI_THROW(CInputException, eInvalidInput, msg);
452  }
453 
454  opt.SetWindowMaskerDatabase(stat_file.c_str());
455  filter_dbs++;
456  }
457 
458  if (filter_dbs > 1) {
459  string msg =
460  string("Please specify at most one of ") + kArgFilteringDb + ", " +
462 
463  NCBI_THROW(CInputException, eInvalidInput, msg);
464  }
465 }
466 
467 void
469 {
470  arg_desc.SetCurrentGroup("Extension options");
471  // 2-hit wordfinder window size
472  arg_desc.AddOptionalKey(kArgWindowSize, "int_value",
473  "Multiple hits window size, use 0 to specify "
474  "1-hit algorithm",
476  arg_desc.SetConstraint(kArgWindowSize,
478  arg_desc.SetCurrentGroup("");
479 }
480 
481 void
483 {
484  if (args[kArgWindowSize]) {
485  opt.SetWindowSize(args[kArgWindowSize].AsInteger());
486  } else {
487  int window = -1;
489  opt.GetMatrixName(),
490  &window);
491  if (window != -1) {
492  opt.SetWindowSize(window);
493  }
494  }
495 }
496 
497 void
499 {
500  arg_desc.SetCurrentGroup("Extension options");
501  // 2-hit wordfinder off diagonal range
502  arg_desc.AddDefaultKey(kArgOffDiagonalRange, "int_value",
503  "Number of off-diagonals to search for the 2nd hit, "
504  "use 0 to turn off",
509  arg_desc.SetCurrentGroup("");
510 }
511 
512 void
514 {
515  if (args[kArgOffDiagonalRange]) {
516  opt.SetOffDiagonalRange(args[kArgOffDiagonalRange].AsInteger());
517  } else {
518  opt.SetOffDiagonalRange(0);
519  }
520 }
521 
522 // Options specific to rmblastn -RMH-
523 void
525 {
526  arg_desc.SetCurrentGroup("General search options");
527 
528  arg_desc.AddDefaultKey(kArgMatrixName, "matrix_name",
529  "Scoring matrix name",
531  string(""));
532 
533  arg_desc.AddFlag(kArgComplexityAdj,
534  "Use complexity adjusted scoring",
535  true);
536 
537 
538  arg_desc.AddDefaultKey(kArgMaskLevel, "int_value",
539  "Masklevel - percentage overlap allowed per "
540  "query domain [0-101]",
543  arg_desc.SetConstraint(kArgMaskLevel,
545 
546  arg_desc.SetCurrentGroup("");
547 }
548 
549 // Options specific to rmblastn -RMH-
550 void
552 {
553  if (args[kArgMatrixName]) {
554  opt.SetMatrixName(args[kArgMatrixName].AsString().c_str());
555  }
556 
558 
559  if (args[kArgMaskLevel]) {
560  opt.SetMaskLevel(args[kArgMaskLevel].AsInteger());
561  }
562 
563  if (args[kArgMinRawGappedScore]) {
564  opt.SetCutoffScore(args[kArgMinRawGappedScore].AsInteger());
565  }else if (args[kArgUngappedXDropoff]) {
566  opt.SetCutoffScore(args[kArgUngappedXDropoff].AsInteger());
567  }
568 }
569 
570 void
572 {
573  arg_desc.SetCurrentGroup("General search options");
574  // lookup table word score threshold
575  arg_desc.AddOptionalKey(kArgWordScoreThreshold, "float_value",
576  "Minimum word score such that the word is added to the "
577  "BLAST lookup table",
581  arg_desc.SetCurrentGroup("");
582 }
583 
584 static bool
585 s_IsDefaultWordThreshold(EProgram program, double threshold)
586 {
587  int word_threshold = static_cast<int>(threshold);
588  bool retval = true;
589  if (program == eBlastp &&
590  word_threshold != BLAST_WORD_THRESHOLD_BLASTP) {
591  retval = false;
592  } else if (program == eBlastx &&
593  word_threshold != BLAST_WORD_THRESHOLD_BLASTX) {
594  retval = false;
595  } else if (program == eTblastn &&
596  word_threshold != BLAST_WORD_THRESHOLD_TBLASTN) {
597  retval = false;
598  }
599  return retval;
600 }
601 
602 void
604  CBlastOptions& opt)
605 {
606  if (args[kArgWordScoreThreshold]) {
607  opt.SetWordThreshold(args[kArgWordScoreThreshold].AsDouble());
608  } else if (s_IsDefaultWordThreshold(opt.GetProgram(),
609  opt.GetWordThreshold())) {
610  double threshold = -1;
612  opt.GetMatrixName(),
613  &threshold);
614  if (threshold != -1) {
615  opt.SetWordThreshold(threshold);
616  }
617  }
618 }
619 
620 void
622 {
623  arg_desc.SetCurrentGroup("General search options");
624  arg_desc.AddOptionalKey(kArgMatrixName, "matrix_name",
625  "Scoring matrix name (normally BLOSUM62)",
627  arg_desc.SetCurrentGroup("");
628 }
629 
630 void
632 {
633  if (args[kArgMatrixName]) {
634  opt.SetMatrixName(args[kArgMatrixName].AsString().c_str());
635  }
636 }
637 
638 void
640 {
641  // TLM arg_desc.SetCurrentGroup("Nucleotide scoring options");
642 
643  arg_desc.SetCurrentGroup("General search options");
644  // blastn mismatch penalty
645  arg_desc.AddOptionalKey(kArgMismatch, "penalty",
646  "Penalty for a nucleotide mismatch",
648  arg_desc.SetConstraint(kArgMismatch,
650 
651  // blastn match reward
652  arg_desc.AddOptionalKey(kArgMatch, "reward",
653  "Reward for a nucleotide match",
655  arg_desc.SetConstraint(kArgMatch,
657 
658 
659  arg_desc.SetCurrentGroup("Extension options");
661  "Use non-greedy dynamic programming extension",
662  true);
663 
664  arg_desc.SetCurrentGroup("");
665 }
666 
667 void
669  CBlastOptions& options)
670 {
671  if (cmd_line_args.Exist(kArgMismatch) && cmd_line_args[kArgMismatch]) {
672  options.SetMismatchPenalty(cmd_line_args[kArgMismatch].AsInteger());
673  }
674  if (cmd_line_args.Exist(kArgMatch) && cmd_line_args[kArgMatch]) {
675  options.SetMatchReward(cmd_line_args[kArgMatch].AsInteger());
676  }
677 
678  if (cmd_line_args.Exist(kArgNoGreedyExtension) &&
679  cmd_line_args[kArgNoGreedyExtension]) {
682  }
683 }
684 
687 const string
689 
690 void
692 {
693  arg_desc.SetCurrentGroup("Extension options");
694  // FIXME: this can be applied to any program, but since it was only offered
695  // in megablast, we're putting it here
696  arg_desc.AddOptionalKey(kArgMinRawGappedScore, "int_value",
697  "Minimum raw gapped score to keep an alignment "
698  "in the preliminary gapped and traceback stages",
700 
701  arg_desc.SetCurrentGroup("Discontiguous MegaBLAST options");
702 
703  arg_desc.AddOptionalKey(kArgDMBTemplateType, "type",
704  "Discontiguous MegaBLAST template type",
713 
714  arg_desc.AddOptionalKey(kArgDMBTemplateLength, "int_value",
715  "Discontiguous MegaBLAST template length",
717  set<int> allowed_values;
718  allowed_values.insert(16);
719  allowed_values.insert(18);
720  allowed_values.insert(21);
722  new CArgAllowIntegerSet(allowed_values));
726 
727  arg_desc.SetCurrentGroup("");
728 }
729 
730 void
732  CBlastOptions& options)
733 {
734  if (args[kArgMinRawGappedScore]) {
735  options.SetCutoffScore(args[kArgMinRawGappedScore].AsInteger());
736  }
737 
738  if (args[kArgDMBTemplateType]) {
739  const string& type = args[kArgDMBTemplateType].AsString();
740  EDiscWordType temp_type = eMBWordCoding;
741 
742  if (type == kTemplType_Coding) {
743  temp_type = eMBWordCoding;
744  } else if (type == kTemplType_Optimal) {
745  temp_type = eMBWordOptimal;
746  } else if (type == kTemplType_CodingAndOptimal) {
747  temp_type = eMBWordTwoTemplates;
748  } else {
749  abort();
750  }
751  options.SetMBTemplateType(static_cast<unsigned char>(temp_type));
752  }
753 
754  if (args[kArgDMBTemplateLength]) {
755  unsigned char tlen =
756  static_cast<unsigned char>(args[kArgDMBTemplateLength].AsInteger());
757  options.SetMBTemplateLength(tlen);
758  }
759 
760  // FIXME: should the window size be adjusted if this is set?
761 }
762 
763 void
765 {
766  arg_desc.SetCurrentGroup("General search options");
767  // composition based statistics, keep in sync with ECompoAdjustModes
768  // documentation in composition_constants.h
769 
770  string zero_opt = !m_ZeroOptDescr.empty() ?
771  (string)" 0 or F or f: " + m_ZeroOptDescr + "\n" :
772  " 0 or F or f: No composition-based statistics\n";
773 
774  string one_opt_insrt = m_Is2and3Supported ? "" : " or T or t";
775 
776  string more_opts = m_Is2and3Supported ?
777  " 2 or T or t : Composition-based score adjustment as in "
778  "Bioinformatics 21:902-911,\n"
779  " 2005, conditioned on sequence properties\n"
780  " 3: Composition-based score adjustment as in "
781  "Bioinformatics 21:902-911,\n"
782  " 2005, unconditionally\n" : "";
783 
784  string legend = (string)"Use composition-based statistics:\n"
785  " D or d: default (equivalent to " + m_DefaultOpt + " )\n"
786  + zero_opt
787  + " 1" + one_opt_insrt + ": Composition-based statistics "
788  "as in NAR 29:2994-3005, 2001\n"
789  + more_opts;
790 
791  arg_desc.AddDefaultKey(kArgCompBasedStats, "compo", legend,
793 
794 
795  arg_desc.SetCurrentGroup("Miscellaneous options");
796  // Use Smith-Waterman algorithm in traceback stage
797  // FIXME: available only for gapped blastp/tblastn, and with
798  // composition-based statistics
799  arg_desc.AddFlag(kArgUseSWTraceback,
800  "Compute locally optimal Smith-Waterman alignments?",
801  true);
802  arg_desc.SetCurrentGroup("");
803 }
804 
805 /**
806  * @brief Auxiliary function to set the composition based statistics and smith
807  * waterman options
808  *
809  * @param opt BLAST options object [in|out]
810  * @param comp_stat_string command line value for composition based statistics
811  * [in]
812  * @param smith_waterman_value command line value for determining the use of
813  * the smith-waterman algorithm [in]
814  * @param ungapped pointer to the value which determines whether the search
815  * should be ungapped or not. It is NULL if ungapped searches are not
816  * applicable
817  * @param is_deltablast is program deltablast [in]
818  */
819 static void
821  const string& comp_stat_string,
822  bool smith_waterman_value,
823  bool* ungapped)
824 {
825  const EProgram program = opt.GetProgram();
826  if (program == eBlastp || program == eTblastn ||
827  program == ePSIBlast || program == ePSITblastn ||
828  program == eRPSBlast || program == eRPSTblastn ||
829  program == eBlastx || program == eDeltaBlast) {
830 
832 
833  switch (comp_stat_string[0]) {
834  case '0': case 'F': case 'f':
835  compo_mode = eNoCompositionBasedStats;
836  break;
837  case '1':
838  compo_mode = eCompositionBasedStats;
839  break;
840  case 'D': case 'd':
841  if ((program == eRPSBlast) || (program == eRPSTblastn)) {
842  compo_mode = eNoCompositionBasedStats;
843  }
844  else if (program == eDeltaBlast) {
845  compo_mode = eCompositionBasedStats;
846  }
847  else {
848  compo_mode = eCompositionMatrixAdjust;
849  }
850  break;
851  case '2':
852  compo_mode = eCompositionMatrixAdjust;
853  break;
854  case '3':
855  compo_mode = eCompoForceFullMatrixAdjust;
856  break;
857  case 'T': case 't':
858  compo_mode = (program == eRPSBlast || program == eRPSTblastn || program == eDeltaBlast) ?
860  break;
861  }
862 
863  if(program == ePSITblastn) {
864  compo_mode = eNoCompositionBasedStats;
865  }
866 
867  if (ungapped && *ungapped && compo_mode != eNoCompositionBasedStats) {
868  NCBI_THROW(CInputException, eInvalidInput,
869  "Composition-adjusted searched are not supported with "
870  "an ungapped search, please add -comp_based_stats F or "
871  "do a gapped search");
872  }
873 
874  opt.SetCompositionBasedStats(compo_mode);
875  if (program == eBlastp &&
876  compo_mode != eNoCompositionBasedStats &&
877  tolower(comp_stat_string[1]) == 'u') {
878  opt.SetUnifiedP(1);
879  }
880  opt.SetSmithWatermanMode(smith_waterman_value);
881  }
882 }
883 
884 void
886  CBlastOptions& opt)
887 {
888  if (args[kArgCompBasedStats]) {
889  unique_ptr<bool> ungapped(args.Exist(kArgUngapped)
890  ? new bool(args[kArgUngapped]) : 0);
892  args[kArgCompBasedStats].AsString(),
893  args[kArgUseSWTraceback],
894  ungapped.get());
895  }
896 
897 }
898 
899 void
901 {
902  // perform gapped search
903 #if 0
904  arg_desc.AddOptionalKey(ARG_GAPPED, "gapped",
905  "Perform gapped alignment (default T, but "
906  "not available for tblastx)",
909  arg_desc.AddAlias("-gapped", ARG_GAPPED);
910 #endif
911  arg_desc.SetCurrentGroup("Extension options");
912  arg_desc.AddFlag(kArgUngapped, "Perform ungapped alignment only?", true);
913  arg_desc.SetCurrentGroup("");
914 }
915 
916 void
918 {
919 #if 0
920  if (args[ARG_GAPPED] && options.GetProgram() != eTblastx) {
921  options.SetGappedMode(args[ARG_GAPPED].AsBoolean());
922  }
923 #endif
924  options.SetGappedMode( !args[kArgUngapped] );
925 }
926 
927 void
929 {
930  arg_desc.SetCurrentGroup("General search options");
931  // largest intron length
932  arg_desc.AddDefaultKey(kArgMaxIntronLength, "length",
933  "Length of the largest intron allowed in a translated "
934  "nucleotide sequence when linking multiple distinct "
935  "alignments",
940  arg_desc.SetCurrentGroup("");
941 }
942 
943 void
945  CBlastOptions& opt)
946 {
947  if ( !args[kArgMaxIntronLength] ) {
948  return;
949  }
950 
951  // sum statistics are defauled to be on unless a cmdline option is set
952  opt.SetLongestIntronLength(args[kArgMaxIntronLength].AsInteger());
953 
954 }
955 
956 void
958 {
959  arg_desc.SetCurrentGroup("General search options");
960  // applicable in blastx/tblastn, off by default
961  arg_desc.AddOptionalKey(kArgFrameShiftPenalty, "frameshift",
962  "Frame shift penalty (for use with out-of-frame "
963  "gapped alignment in blastx or tblastn, default "
964  "ignored)",
969  arg_desc.SetCurrentGroup("");
970 }
971 
972 void
974  CBlastOptions& opt)
975 {
976  if (args[kArgFrameShiftPenalty]) {
977  if (args[kArgCompBasedStats]) {
978  string cbs = args[kArgCompBasedStats].AsString();
979 
980  if ((cbs[0] != '0' )&& (cbs[0] != 'F') && (cbs[0] != 'f')) {
981  NCBI_THROW(CInputException, eInvalidInput,
982  "Composition-adjusted searches are not supported with "
983  "Out-Of-Frame option, please add -comp_based_stats F ");
984  }
985  }
986 
987  opt.SetOutOfFrameMode();
988  opt.SetFrameShiftPenalty(args[kArgFrameShiftPenalty].AsInteger());
989  }
990 }
991 
992 /// Auxiliary class to validate the genetic code input
994 {
995 protected:
996  /// Overloaded method from CArgAllow
997  virtual bool Verify(const string& value) const {
998  static int gcs[] = {1,2,3,4,5,6,9,10,11,12,13,14,15,16,21,22,23,24,25,26,27,28,29,30,31,33};
999  static const set<int> genetic_codes(gcs, gcs+sizeof(gcs)/sizeof(*gcs));
1000  const int val = NStr::StringToInt(value);
1001  return (genetic_codes.find(val) != genetic_codes.end());
1002  }
1003 
1004  /// Overloaded method from CArgAllow
1005  virtual string GetUsage(void) const {
1006  return "values between: 1-6, 9-16, 21-31, 33";
1007  }
1008 };
1009 
1010 void
1012 {
1013  if (m_Target == eQuery) {
1014  arg_desc.SetCurrentGroup("Input query options");
1015  // query genetic code
1016  arg_desc.AddDefaultKey(kArgQueryGeneticCode, "int_value",
1017  "Genetic code to use to translate query (see https://www.ncbi.nlm.nih.gov/Taxonomy/taxonomyhome.html/index.cgi?chapter=cgencodes for details)\n",
1022  } else {
1023  arg_desc.SetCurrentGroup("General search options");
1024  // DB genetic code
1025  arg_desc.AddDefaultKey(kArgDbGeneticCode, "int_value",
1026  "Genetic code to use to translate "
1027  "database/subjects (see user manual for details)\n",
1032 
1033  }
1034  arg_desc.SetCurrentGroup("");
1035 }
1036 
1037 void
1039  CBlastOptions& opt)
1040 {
1041  const EProgram program = opt.GetProgram();
1042 
1043  if (m_Target == eQuery && args[kArgQueryGeneticCode]) {
1044  opt.SetQueryGeneticCode(args[kArgQueryGeneticCode].AsInteger());
1045  }
1046 
1047  if (m_Target == eDatabase && args[kArgDbGeneticCode] &&
1048  (program == eTblastn || program == eTblastx) ) {
1049  opt.SetDbGeneticCode(args[kArgDbGeneticCode].AsInteger());
1050  }
1051 }
1052 
1053 void
1055 {
1056  arg_desc.SetCurrentGroup("Extension options");
1057 
1058  const double default_value = m_QueryIsProtein
1060  arg_desc.AddDefaultKey(kArgGapTrigger, "float_value",
1061  "Number of bits to trigger gapping",
1063  NStr::DoubleToString(default_value));
1064  arg_desc.SetCurrentGroup("");
1065 }
1066 
1067 void
1069  CBlastOptions& opt)
1070 {
1071  if (args[kArgGapTrigger]) {
1072  opt.SetGapTrigger(args[kArgGapTrigger].AsDouble());
1073  }
1074 }
1075 
1076 void
1078 {
1079  arg_desc.SetCurrentGroup("PSSM engine options");
1080 
1081  // Pseudo count
1082  arg_desc.AddDefaultKey(kArgPSIPseudocount, "pseudocount",
1083  "Pseudo-count value used when constructing PSSM",
1086 
1087  if (m_IsDeltaBlast) {
1088  arg_desc.AddDefaultKey(kArgDomainInclusionEThreshold, "ethresh",
1089  "E-value inclusion threshold for alignments "
1090  "with conserved domains",
1093  }
1094 
1095  // Evalue inclusion threshold
1096  arg_desc.AddDefaultKey(kArgPSIInclusionEThreshold, "ethresh",
1097  "E-value inclusion threshold for pairwise alignments",
1100 
1101  arg_desc.SetCurrentGroup("");
1102 }
1103 
1104 void
1106  CBlastOptions& opt)
1107 {
1108  if (args[kArgPSIPseudocount]) {
1109  opt.SetPseudoCount(args[kArgPSIPseudocount].AsInteger());
1110  }
1111 
1112  if (args[kArgPSIInclusionEThreshold]) {
1113  opt.SetInclusionThreshold(args[kArgPSIInclusionEThreshold].AsDouble());
1114  }
1115 
1117  && args[kArgDomainInclusionEThreshold]) {
1118 
1120  args[kArgDomainInclusionEThreshold].AsDouble());
1121  }
1122 }
1123 
1124 void
1126 {
1127 
1128  if (m_DbTarget == eNucleotideDb) {
1129  arg_desc.SetCurrentGroup("PSI-TBLASTN options");
1130 
1131  // PSI-tblastn checkpoint
1132  arg_desc.AddOptionalKey(kArgPSIInputChkPntFile, "psi_chkpt_file",
1133  "PSI-TBLASTN checkpoint file",
1137  kArgRemote);
1138  } else {
1139  arg_desc.SetCurrentGroup("PSI-BLAST options");
1140 
1141  // Number of iterations
1142  arg_desc.AddDefaultKey(kArgPSINumIterations, "int_value",
1143  "Number of iterations to perform (0 means run "
1144  "until convergence)", CArgDescriptions::eInteger,
1150  kArgRemote);
1151  // checkpoint file
1152  arg_desc.AddOptionalKey(kArgPSIOutputChkPntFile, "checkpoint_file",
1153 
1154  "File name to store checkpoint file",
1156  // ASCII matrix file
1157  arg_desc.AddOptionalKey(kArgAsciiPssmOutputFile, "ascii_mtx_file",
1158  "File name to store ASCII version of PSSM",
1160 
1161  arg_desc.AddFlag(kArgSaveLastPssm, "Save PSSM after the last database "
1162  "search");
1163  arg_desc.AddFlag(kArgSaveAllPssms, "Save PSSM after each iteration "
1164  "(file name is given in -save_pssm or "
1165  "-save_ascii_pssm options)");
1166 
1167  if (!m_IsDeltaBlast) {
1168  vector<string> msa_exclusions;
1169  msa_exclusions.push_back(kArgPSIInputChkPntFile);
1170  msa_exclusions.push_back(kArgQuery);
1171  msa_exclusions.push_back(kArgQueryLocation);
1172  // pattern and MSA is not supported
1173  msa_exclusions.push_back(kArgPHIPatternFile);
1174  arg_desc.SetCurrentGroup("");
1175  arg_desc.SetCurrentGroup("");
1176 
1177  // MSA restart file
1178  arg_desc.SetCurrentGroup("PSSM engine options");
1179  arg_desc.AddOptionalKey(kArgMSAInputFile, "align_restart",
1180  "File name of multiple sequence alignment to "
1181  "restart PSI-BLAST",
1183  ITERATE(vector<string>, exclusion, msa_exclusions) {
1186  *exclusion);
1187  }
1188 
1189  arg_desc.AddOptionalKey(kArgMSAMasterIndex, "index",
1190  "Ordinal number (1-based index) of the sequence"
1191  " to use as a master in the multiple sequence "
1192  "alignment. If not provided, the first sequence"
1193  " in the multiple sequence alignment will be "
1194  "used", CArgDescriptions::eInteger);
1197  ITERATE(vector<string>, exclusion, msa_exclusions) {
1200  *exclusion);
1201  }
1208 
1209  arg_desc.AddFlag(kArgIgnoreMsaMaster,
1210  "Ignore the master sequence when creating PSSM", true);
1211 
1212  vector<string> ignore_pssm_master_exclusions;
1213  ignore_pssm_master_exclusions.push_back(kArgMSAMasterIndex);
1214  ignore_pssm_master_exclusions.push_back(kArgPSIInputChkPntFile);
1215  ignore_pssm_master_exclusions.push_back(kArgQuery);
1216  ignore_pssm_master_exclusions.push_back(kArgQueryLocation);
1217  ITERATE(vector<string>, exclusion, msa_exclusions) {
1220  *exclusion);
1221  }
1225 
1226  // PSI-BLAST checkpoint
1227  arg_desc.AddOptionalKey(kArgPSIInputChkPntFile, "psi_chkpt_file",
1228  "PSI-BLAST checkpoint file",
1232  kArgRemote);
1233  }
1234  }
1235 
1236  if (!m_IsDeltaBlast) {
1239  kArgQuery);
1243  }
1244  arg_desc.SetCurrentGroup("");
1245 }
1246 
1249  CBlastOptions& opt, bool save_ascii_pssm,
1250  unsigned int msa_master_idx,
1251  bool ignore_pssm_tmplt_seq)
1252 {
1253  // FIXME get these from CBlastOptions
1254  CPSIBlastOptions psiblast_opts;
1255  PSIBlastOptionsNew(&psiblast_opts);
1256  psiblast_opts->nsg_compatibility_mode = ignore_pssm_tmplt_seq;
1257 
1258  CPSIDiagnosticsRequest diags(PSIDiagnosticsRequestNewEx(save_ascii_pssm));
1259  CPsiBlastInputClustalW pssm_input(input_stream, *psiblast_opts,
1260  opt.GetMatrixName(), diags, NULL, 0,
1261  opt.GetGapOpeningCost(),
1262  opt.GetGapExtensionCost(),
1263  msa_master_idx);
1264  CPssmEngine pssm_engine(&pssm_input);
1265  return pssm_engine.Run();
1266 }
1267 
1268 void
1270  CBlastOptions& opt)
1271 {
1272  if (m_DbTarget == eProteinDb) {
1273  if (args[kArgPSINumIterations]) {
1274  if(m_NumIterations == 1)
1275  m_NumIterations = args[kArgPSINumIterations].AsInteger();
1276  }
1277 
1278  if (args.Exist(kArgSaveLastPssm) && args[kArgSaveLastPssm] &&
1279  (!args.Exist(kArgPSIOutputChkPntFile) ||
1280  !args[kArgPSIOutputChkPntFile]) &&
1281  (!args.Exist(kArgAsciiPssmOutputFile) ||
1282  !args[kArgAsciiPssmOutputFile])) {
1283 
1284  NCBI_THROW(CInputException, eInvalidInput, kArgSaveLastPssm +
1285  " option requires " + kArgPSIOutputChkPntFile + " or " +
1287  }
1288 
1289  if (args.Exist(kArgSaveAllPssms) && args[kArgSaveAllPssms] &&
1290  (!args.Exist(kArgPSIOutputChkPntFile) ||
1291  !args[kArgPSIOutputChkPntFile]) &&
1292  (!args.Exist(kArgAsciiPssmOutputFile) ||
1293  !args[kArgAsciiPssmOutputFile])) {
1294 
1295  NCBI_THROW(CInputException, eInvalidInput, kArgSaveAllPssms +
1296  " option requires " + kArgPSIOutputChkPntFile + " or " +
1298  }
1299 
1300  const bool kSaveAllPssms
1301  = args.Exist(kArgSaveAllPssms) && args[kArgSaveAllPssms];
1302  if (args.Exist(kArgPSIOutputChkPntFile) &&
1303  args[kArgPSIOutputChkPntFile]) {
1306  (args[kArgPSIOutputChkPntFile].AsString(), kSaveAllPssms));
1307  }
1308  const bool kSaveAsciiPssm = args[kArgAsciiPssmOutputFile];
1309  if (kSaveAsciiPssm) {
1312  (args[kArgAsciiPssmOutputFile].AsString(), kSaveAllPssms));
1313  }
1314  if (args.Exist(kArgMSAInputFile) && args[kArgMSAInputFile]) {
1315  CNcbiIstream& in = args[kArgMSAInputFile].AsInputFile();
1316  unsigned int msa_master_idx = 0;
1317  if (args[kArgMSAMasterIndex]) {
1318  msa_master_idx = args[kArgMSAMasterIndex].AsInteger() - 1;
1319  }
1320  m_Pssm = x_CreatePssmFromMsa(in, opt, kSaveAsciiPssm,
1321  msa_master_idx,
1322  args[kArgIgnoreMsaMaster]);
1323  }
1324  if (!m_IsDeltaBlast) {
1326  }
1327 
1328  if (args.Exist(kArgSaveLastPssm) && args[kArgSaveLastPssm]) {
1329  m_SaveLastPssm = true;
1330  }
1331  }
1332 
1334  CNcbiIstream& in = args[kArgPSIInputChkPntFile].AsInputFile();
1335  _ASSERT(m_Pssm.Empty());
1337  try {
1338  switch (CFormatGuess().Format(in)) {
1340  in >> MSerial_AsnBinary >> *m_Pssm;
1341  break;
1343  in >> MSerial_AsnText >> *m_Pssm;
1344  break;
1345  case CFormatGuess::eXml:
1346  in >> MSerial_Xml >> *m_Pssm;
1347  break;
1348  default:
1349  NCBI_THROW(CInputException, eInvalidInput,
1350  "Unsupported format for PSSM");
1351  }
1352  } catch (const CSerialException&) {
1353  string msg("Unrecognized format for PSSM in ");
1354  msg += args[kArgPSIInputChkPntFile].AsString() + " (must be ";
1355  msg += "PssmWithParameters)";
1356  NCBI_THROW(CInputException, eInvalidInput, msg);
1357  }
1358  _ASSERT(m_Pssm.NotEmpty());
1359  }
1360 }
1361 
1362 void
1364 {
1365  arg_desc.SetCurrentGroup("PHI-BLAST options");
1366 
1367  arg_desc.AddOptionalKey(kArgPHIPatternFile, "file",
1368  "File name containing pattern to search",
1373 
1374  arg_desc.SetCurrentGroup("");
1375 }
1376 
1377 void
1379  CBlastOptions& opt)
1380 {
1381  if (args.Exist(kArgPHIPatternFile) && args[kArgPHIPatternFile]) {
1382  CNcbiIstream& in = args[kArgPHIPatternFile].AsInputFile();
1383  in.clear();
1384  in.seekg(0);
1385  char buffer[4096];
1386  string line;
1387  string pattern;
1388  string name;
1389  while (in.getline(buffer, 4096)) {
1390  line = buffer;
1391  string ltype = line.substr(0, 2);
1392  if (ltype == "ID")
1393  name = line.substr(4);
1394  else if (ltype == "PA")
1395  pattern = line.substr(4);
1396  }
1397  if (!pattern.empty())
1398  opt.SetPHIPattern(pattern.c_str(),
1400  ? true : false));
1401  else
1402  NCBI_THROW(CInputException, eInvalidInput,
1403  "PHI pattern not read");
1404  }
1405 }
1406 
1407 void
1409 {
1410  arg_desc.SetCurrentGroup("KBLASTP options");
1411  arg_desc.AddDefaultKey(kArgJDistance, "threshold", "Jaccard Distance",
1413  arg_desc.AddDefaultKey(kArgMinHits, "minhits", "minimal number of LSH matches",
1415  arg_desc.AddDefaultKey(kArgCandidateSeqs, "candidates", "Number of candidate sequences to process with BLAST",
1417 }
1418 
1419 void
1421  CBlastOptions& opt)
1422 {
1423  if (args.Exist(kArgJDistance))
1424  m_JDistance = args[kArgJDistance].AsDouble();
1425  if (args.Exist(kArgMinHits))
1426  m_MinHits = args[kArgMinHits].AsInteger();
1427  if (args.Exist(kArgCandidateSeqs))
1428  m_CandidateSeqs = args[kArgCandidateSeqs].AsInteger();
1429 }
1430 
1431 
1432 void
1434 {
1435  arg_desc.SetCurrentGroup("DELTA-BLAST options");
1436 
1437  arg_desc.AddDefaultKey(kArgRpsDb, "database_name", "BLAST domain "
1438  "database name", CArgDescriptions::eString,
1439  kDfltArgRpsDb);
1440 
1441  arg_desc.AddFlag(kArgShowDomainHits, "Show domain hits");
1443  kArgRemote);
1445  kArgSubject);
1446 }
1447 
1448 void
1450  CBlastOptions& opt)
1451 {
1452  m_DomainDb.Reset(new CSearchDatabase(args[kArgRpsDb].AsString(),
1454 
1455  if (args.Exist(kArgShowDomainHits)) {
1457  }
1458 }
1459 
1460 void
1462 {
1463 
1464  arg_desc.SetCurrentGroup("Mapping options");
1465  arg_desc.AddDefaultKey(kArgScore, "num", "Cutoff score for accepting "
1466  "alignments. Can be expressed as a number or a "
1467  "function of read length: "
1468  "L,b,a for a * length + b.\n"
1469  "Zero means that the cutoff score will be equal to:\n"
1470  "read length, if read length <= 20,\n"
1471  "20, if read length <= 30,\n"
1472  "read length - 10, if read length <= 50,\n"
1473  "40, otherwise.",
1475  arg_desc.AddOptionalKey(kArgMaxEditDist, "num", "Cutoff edit distance for "
1476  "accepting an alignment\nDefault = unlimited",
1478  arg_desc.AddDefaultKey(kArgSplice, "TF", "Search for spliced alignments",
1479  CArgDescriptions::eBoolean, "true");
1480  arg_desc.AddDefaultKey(kArgRefType, "type", "Type of the reference: "
1481  "genome or transcriptome",
1482  CArgDescriptions::eString, "genome");
1483  arg_desc.SetConstraint(kArgRefType,
1484  &(*new CArgAllow_Strings, "genome", "transcriptome"));
1485 
1486  arg_desc.SetCurrentGroup("Query filtering options");
1487  arg_desc.AddDefaultKey(kArgLimitLookup, "TF", "Remove word seeds with "
1488  "high frequency in the searched database",
1489  CArgDescriptions::eBoolean, "true");
1490  arg_desc.AddDefaultKey(kArgMaxDbWordCount, "num", "Words that appear more "
1491  "than this number of times in the database will be"
1492  " masked in the lookup table",
1496  new CArgAllowValuesBetween(2, 255, true));
1497  arg_desc.AddDefaultKey(kArgLookupStride, "num", "Number of words to skip "
1498  "after collecting one while creating a lookup table",
1500 
1501  arg_desc.SetCurrentGroup("");
1502 }
1503 
1504 
1505 void
1507  CBlastOptions& opt)
1508 {
1509  if (args.Exist(kArgScore) && args[kArgScore]) {
1510 
1511  string s = args[kArgScore].AsString();
1512  // score cutoff may be defined as a liner function of query length:
1513  // L,0.0,0.6 ...
1514  if (s[0] == 'L') {
1515  list<string> tokens;
1516  NStr::Split(s, ",", tokens);
1517  vector<double> coeffs;
1518  if (tokens.size() < 3) {
1519  NCBI_THROW(CInputException, eInvalidInput,
1520  (string)"Incorrectly formatted score function: " +
1521  s + ". It should be of the form 'L,b,a' for ax + b,"
1522  "a, b must be numbers");
1523  }
1524  auto it = tokens.begin();
1525  ++it;
1526  try {
1527  for (; it != tokens.end(); ++it) {
1528  coeffs.push_back(NStr::StringToDouble(*it));
1529  }
1530  }
1531  catch (CException& e) {
1532  NCBI_THROW(CInputException, eInvalidInput,
1533  (string)"Incorrectly formatted score function: " +
1534  s + ". It should be of the form 'L,b,a' for ax + b,"
1535  " a, b must be real numbers");
1536  }
1537  opt.SetCutoffScoreCoeffs(coeffs);
1538  }
1539  else {
1540  // ... or a numerical constant
1541  try {
1543  }
1544  catch (CException&) {
1545  NCBI_THROW(CInputException, eInvalidInput,
1546  (string)"Incorrectly formatted score threshold: " +
1547  s + ". It must be either an integer or a linear "
1548  "function in the form: L,b,a for ax + b, a and b "
1549  "must be real numbers");
1550  }
1551  }
1552  }
1553 
1554  if (args.Exist(kArgMaxEditDist) && args[kArgMaxEditDist]) {
1555  opt.SetMaxEditDistance(args[kArgMaxEditDist].AsInteger());
1556  }
1557 
1558  if (args.Exist(kArgSplice) && args[kArgSplice]) {
1559  opt.SetSpliceAlignments(args[kArgSplice].AsBoolean());
1560  }
1561 
1562  string ref_type = "genome";
1563  if (args.Exist(kArgRefType) && args[kArgRefType]) {
1564  ref_type = args[kArgRefType].AsString();
1565  }
1566 
1567  if (args.Exist(kArgLimitLookup) && args[kArgLimitLookup]) {
1568  opt.SetLookupDbFilter(args[kArgLimitLookup].AsBoolean());
1569  }
1570  else {
1571  opt.SetLookupDbFilter(ref_type == "genome");
1572  }
1573 
1574  if (args.Exist(kArgMaxDbWordCount) && args[kArgMaxDbWordCount]) {
1575  opt.SetMaxDbWordCount(args[kArgMaxDbWordCount].AsInteger());
1576  }
1577 
1578  if (args.Exist(kArgLookupStride) && args[kArgLookupStride]) {
1579  opt.SetLookupTableStride(args[kArgLookupStride].AsInteger());
1580  }
1581 }
1582 
1583 
1584 void
1586 {
1587  arg_desc.SetCurrentGroup("Ig-BLAST options");
1588  const static char suffix[] = "VDJ";
1589  const static int df_num_align[3] = {3,3,3};
1590  int num_genes = (m_IsProtein) ? 1 : 3;
1591 
1592 
1593  for (int gene=0; gene<num_genes; ++gene) {
1594  // Subject sequence input
1595  /* TODO disabled for now
1596  string arg_sub = kArgGLSubject;
1597  arg_sub.push_back(suffix[gene]);
1598  arg_desc.AddOptionalKey(arg_sub , "filename",
1599  "Germline subject sequence to align",
1600  CArgDescriptions::eInputFile);
1601  */
1602  // Germline database file name
1603  string arg_db = kArgGLDatabase;
1604  arg_db.push_back(suffix[gene]);
1605  arg_desc.AddOptionalKey(arg_db, "germline_database_name",
1606  "Germline database name",
1608  //arg_desc.SetDependency(arg_db, CArgDescriptions::eExcludes, arg_sub);
1609  // Number of alignments to show
1610  string arg_na = kArgGLNumAlign;
1611  arg_na.push_back(suffix[gene]);
1612  arg_desc.AddDefaultKey(arg_na, "int_value",
1613  "Number of Germline sequences to show alignments for",
1615  NStr::IntToString(df_num_align[gene]));
1616  //arg_desc.SetConstraint(arg_na,
1617  // new CArgAllowValuesBetween(0, 4));
1618  // Seqidlist
1619  arg_desc.AddOptionalKey(arg_db + "_seqidlist", "filename",
1620  "Restrict search of germline database to list of SeqIds's",
1622  }
1623 
1624  if (!m_IsProtein) {
1625  arg_desc.AddDefaultKey(kArgCRegionNumAlign, "int_value",
1626  "Number of Germline sequences to show alignments for",
1628 
1629  arg_desc.AddOptionalKey(kArgCRegionDatabase, "constant_region_database_name",
1630  "C region database name",
1632 
1633  arg_desc.AddOptionalKey(kArgCustomInternalData, "filename",
1634  "custom internal data file for V region annotation",
1636 
1637  arg_desc.AddOptionalKey(kArgDFrameDefinitionFile, "filename",
1638  "D gene frame definition file",
1640 
1641  arg_desc.AddOptionalKey(kArgGLChainType, "filename",
1642  "File containing the coding frame start positions for sequences in germline J database",
1644 
1645  arg_desc.AddOptionalKey(kArgMinDMatch, "min_D_match",
1646  "Required minimal consecutive nucleotide base matches for D genes ",
1648  arg_desc.SetConstraint(kArgMinDMatch,
1650 
1651  arg_desc.AddDefaultKey(kArgVPenalty, "V_penalty",
1652  "Penalty for a nucleotide mismatch in V gene",
1654  arg_desc.SetConstraint(kArgVPenalty,
1655  new CArgAllowValuesBetween(-4, 0));
1656 
1657 
1658  arg_desc.AddDefaultKey(kArgDPenalty, "D_penalty",
1659  "Penalty for a nucleotide mismatch in D gene",
1661 
1662  arg_desc.SetConstraint(kArgDPenalty,
1663  new CArgAllowValuesBetween(-5, 0));
1664 
1665  arg_desc.AddDefaultKey(kArgJPenalty, "J_penalty",
1666  "Penalty for a nucleotide mismatch in J gene",
1668 
1669  arg_desc.SetConstraint(kArgJPenalty,
1670  new CArgAllowValuesBetween(-4, 0));
1671 
1672  arg_desc.AddDefaultKey(kArgNumClonotype, "num_clonotype",
1673  "Number of top clonotypes to show ",
1677 
1678  arg_desc.AddOptionalKey(kArgClonotypeFile, "clonotype_out",
1679  "Output file name for clonotype info",
1681 
1682  arg_desc.AddFlag(kArgDetectOverlap, "Allow V(D)J genes to overlap. This option is active only when D_penalty and J_penalty are set to -4 and -3, respectively", true);
1683 
1684 
1685  }
1686 
1687  arg_desc.AddDefaultKey(kArgGLOrigin, "germline_origin",
1688  "The organism for your query sequence. Supported organisms include human, mouse, rat, rabbit and rhesus_monkey for Ig and human and mouse for TCR. Custom organism is also supported but you need to supply your own germline annotations (see IgBLAST web site for details)",
1689  CArgDescriptions::eString, "human");
1690 
1691  arg_desc.AddDefaultKey(kArgGLDomainSystem, "domain_system",
1692  "Domain system to be used for segment annotation",
1693  CArgDescriptions::eString, "imgt");
1694  arg_desc.SetConstraint(kArgGLDomainSystem, &(*new CArgAllow_Strings, "kabat", "imgt"));
1695 
1696  arg_desc.AddDefaultKey(kArgIgSeqType, "sequence_type",
1697  "Specify Ig or T cell receptor sequence",
1699  arg_desc.SetConstraint(kArgIgSeqType, &(*new CArgAllow_Strings, "Ig", "TCR"));
1700 
1701 
1702  arg_desc.AddFlag(kArgGLFocusV, "Should the search only be for V segment (effective only for non-germline database search using -db option)?", true);
1703 
1704  arg_desc.AddFlag(kArgExtendAlign5end, "Extend V gene alignment at 5' end", true);
1705 
1706  arg_desc.AddFlag(kArgExtendAlign3end, "Extend J gene alignment at 3' end", true);
1707 
1708  arg_desc.AddDefaultKey(kArgMinVLength, "Min_V_Length",
1709  "Minimal required V gene length",
1711 
1712  arg_desc.SetConstraint(kArgMinVLength,
1714 
1715  if (! m_IsProtein) {
1716  arg_desc.AddDefaultKey(kArgMinJLength, "Min_J_Length",
1717  "Minimal required J gene length",
1719 
1720  arg_desc.SetConstraint(kArgMinJLength,
1722  }
1723 
1724  if (! m_IsProtein) {
1725  arg_desc.AddFlag(kArgTranslate, "Show translated alignments", true);
1726  }
1727 
1728  arg_desc.SetCurrentGroup("");
1729 }
1730 
1731 static string s_RegisterOMDataLoader(CRef<CSeqDB> db_handle)
1732 { // the blast formatter requires that the database coexist in
1733  // the same scope with the query sequences
1738  CBlastDbDataLoader::SBlastDbParam param(db_handle);
1739  string retval(CBlastDbDataLoader::GetLoaderNameFromArgs(param));
1740  _TRACE("Registering " << retval << " at priority " <<
1742  return retval;
1743 }
1744 
1745 void
1747  CBlastOptions& opts)
1748 {
1749  string paths[3];
1752  paths[1] = CDirEntry::NormalizePath(env.Get("IGDATA"), eFollowLinks);
1754  if (app) {
1755  const CNcbiRegistry& registry = app->GetConfig();
1756  paths[2] = CDirEntry::NormalizePath(registry.Get("BLAST","IGDATA"), eFollowLinks);
1757  } else {
1758 #if defined(NCBI_OS_DARWIN)
1759  paths[2] = "/usr/local/ncbi/igblast/data";
1760 #else
1761  paths[2] = paths[0];
1762 #endif
1763  }
1764 
1766 
1770 
1772  m_IgOptions->m_Origin = args[kArgGLOrigin].AsString();
1773  m_IgOptions->m_DomainSystem = args[kArgGLDomainSystem].AsString();
1774  m_IgOptions->m_FocusV = args.Exist(kArgGLFocusV) ? args[kArgGLFocusV] : false;
1778  m_IgOptions->m_MinVLength = args[kArgMinVLength].AsInteger();
1779  if (args.Exist(kArgMinJLength) && args[kArgMinJLength]) {
1780  m_IgOptions->m_MinJLength = args[kArgMinJLength].AsInteger();
1781  } else {
1783  }
1784  m_IgOptions->m_Translate = args.Exist(kArgTranslate) ? args[kArgTranslate] : false;
1787 
1788  if (!m_IsProtein) {
1789  string aux_file = (args.Exist(kArgGLChainType) && args[kArgGLChainType])
1790  ? args[kArgGLChainType].AsString()
1791  : m_IgOptions->m_Origin + "_gl.aux";
1792  m_IgOptions->m_AuxFilename = aux_file;
1793  for (int i=0; i<3; i++) {
1794  string aux_path = CDirEntry::ConcatPath(paths[i], aux_file);
1795  CDirEntry entry(aux_path);
1796  if (entry.Exists() && entry.IsFile()) {
1797  m_IgOptions->m_AuxFilename = aux_path;
1798  break;
1799  }
1800  }
1801 
1804  }
1805 
1808  }
1809  }
1810 
1812 
1814 
1815  // default germline database name for annotation
1816  for (int i=0; i<3; i++) {
1817  string int_data = CDirEntry::ConcatPath(paths[i], "internal_data");
1818  CDirEntry entry(int_data);
1819  if (entry.Exists() && entry.IsDir()) {
1820  m_IgOptions->m_IgDataPath = int_data;
1821  break;
1822  }
1823  }
1824 
1825  m_IgOptions->m_SequenceType = "Ig";
1826  if (args.Exist(kArgIgSeqType) && args[kArgIgSeqType]) {
1827  m_IgOptions->m_SequenceType = args[kArgIgSeqType].AsString();
1828  }
1829 
1830  string df_db_name = CDirEntry::ConcatPath(
1833  ((m_IgOptions->m_SequenceType == "TCR")?"_TR":"") + "_V");
1834  CRef<CSearchDatabase> db(new CSearchDatabase(df_db_name, mol_type));
1835  m_IgOptions->m_Db[3].Reset(new CLocalDbAdapter(*db));
1836  try {
1837  db->GetSeqDb();
1838  } catch(...) {
1839  NCBI_THROW(CInputException, eInvalidInput,
1840  "Germline annotation database " + df_db_name + " could not be found in [internal_data] directory");
1841  }
1842 
1844  if (args.Exist(kArgMinDMatch) && args[kArgMinDMatch]) {
1845  m_IgOptions->m_Min_D_match = args[kArgMinDMatch].AsInteger();
1846  }
1847 
1848  if (args.Exist(kArgVPenalty) && args[kArgVPenalty]) {
1849  m_IgOptions->m_V_penalty = args[kArgVPenalty].AsInteger();
1850  }
1851 
1852  if (args.Exist(kArgDPenalty) && args[kArgDPenalty]) {
1853  m_IgOptions->m_D_penalty = args[kArgDPenalty].AsInteger();
1854  }
1855 
1856  if (args.Exist(kArgJPenalty) && args[kArgJPenalty]) {
1857  m_IgOptions->m_J_penalty = args[kArgJPenalty].AsInteger();
1858  }
1859 
1860  CRef<CBlastOptionsHandle> opts_hndl;
1861  if (m_IgOptions->m_IsProtein) {
1863  } else {
1865  }
1866 
1867 
1868  const static char suffix[] = "VDJ";
1869  int num_genes = (m_IsProtein) ? 1: 3;
1870  for (int gene=0; gene< num_genes; ++gene) {
1871  string arg_sub = kArgGLSubject;
1872  string arg_db = kArgGLDatabase;
1873  string arg_na = kArgGLNumAlign;
1874 
1875  arg_sub.push_back(suffix[gene]);
1876  arg_db.push_back(suffix[gene]);
1877  arg_na.push_back(suffix[gene]);
1878 
1879  m_IgOptions->m_NumAlign[gene] = args[arg_na].AsInteger();
1880 
1881  if (args.Exist(arg_sub) && args[arg_sub]) {
1882  CNcbiIstream& subj_input_stream = args[arg_sub].AsInputFile();
1883  TSeqRange subj_range;
1884 
1885  const bool parse_deflines = args.Exist(kArgParseDeflines)
1886  ? bool(args[kArgParseDeflines])
1888  const bool use_lcase_masks = args.Exist(kArgUseLCaseMasking)
1889  ? bool(args[kArgUseLCaseMasking])
1892  CRef<CScope> scope = ReadSequencesToBlast(subj_input_stream,
1894  subj_range, parse_deflines,
1895  use_lcase_masks, subjects);
1896  m_Scope->AddScope(*scope,
1898  CRef<IQueryFactory> sub_seqs(
1899  new blast::CObjMgr_QueryFactory(*subjects));
1901  sub_seqs, opts_hndl));
1902  } else {
1903  string gl_db_name = m_IgOptions->m_Origin + "_gl_";
1904  gl_db_name.push_back(suffix[gene]);
1905  string db_name = (args.Exist(arg_db) && args[arg_db])
1906  ? args[arg_db].AsString() : gl_db_name;
1907  db.Reset(new CSearchDatabase(db_name, mol_type));
1908 
1909  if (args.Exist(arg_db + "_seqidlist") && args[arg_db + "_seqidlist"]) {
1910  string fn(SeqDB_ResolveDbPath(args[arg_db + "_seqidlist"].AsString()));
1913  }
1914 
1915  m_IgOptions->m_Db[gene].Reset(new CLocalDbAdapter(*db));
1916  m_Scope->AddDataLoader(s_RegisterOMDataLoader(db->GetSeqDb()));
1917  }
1918  }
1919 
1920  if (args.Exist(kArgCRegionDatabase) && args[kArgCRegionDatabase]) {
1921  m_IgOptions->m_NumAlign[3] = args[kArgCRegionNumAlign].AsInteger();
1922  db.Reset(new CSearchDatabase(args[kArgCRegionDatabase].AsString(), mol_type));
1923  m_IgOptions->m_Db[4].Reset(new CLocalDbAdapter(*db));
1924  m_Scope->AddDataLoader(s_RegisterOMDataLoader(db->GetSeqDb()));
1925  } else {
1926  m_IgOptions->m_Db[4].Reset(0);
1927  }
1928 }
1929 
1930 void
1932 {
1933 
1934  arg_desc.SetCurrentGroup("Query filtering options");
1935  // lowercase masking
1936  arg_desc.AddFlag(kArgUseLCaseMasking,
1937  "Use lower case filtering in query and subject sequence(s)?", true);
1938 
1939  arg_desc.SetCurrentGroup("Input query options");
1940  // query location
1941  arg_desc.AddOptionalKey(kArgQueryLocation, "range",
1942  "Location on the query sequence in 1-based offsets "
1943  "(Format: start-stop)",
1945 
1946  if ( !m_QueryCannotBeNucl) {
1947  // search strands
1948  arg_desc.AddDefaultKey(kArgStrand, "strand",
1949  "Query strand(s) to search against database/subject",
1951  arg_desc.SetConstraint(kArgStrand, &(*new CArgAllow_Strings,
1952  kDfltArgStrand, "plus", "minus"));
1953  }
1954 
1955  arg_desc.SetCurrentGroup("Miscellaneous options");
1956  arg_desc.AddFlag(kArgParseDeflines,
1957  "Should the query and subject defline(s) be parsed?", true);
1958 
1959  arg_desc.SetCurrentGroup("");
1960 }
1961 
1962 void
1964  CBlastOptions& opt)
1965 {
1966  // Get the strand
1967  {
1969 
1970  if (!Blast_QueryIsProtein(opt.GetProgramType())) {
1971 
1972  if (args.Exist(kArgStrand) && args[kArgStrand]) {
1973  const string& kStrand = args[kArgStrand].AsString();
1974  if (kStrand == "both") {
1976  } else if (kStrand == "plus") {
1978  } else if (kStrand == "minus") {
1980  } else {
1981  abort();
1982  }
1983  }
1984  else {
1986  }
1987  }
1988  }
1989 
1990  // set the sequence range
1991  if (args.Exist(kArgQueryLocation) && args[kArgQueryLocation]) {
1992  m_Range = ParseSequenceRange(args[kArgQueryLocation].AsString(),
1993  "Invalid specification of query location");
1994  }
1995 
1997  static_cast<bool>(args[kArgUseLCaseMasking]);
1999  static_cast<bool>(args[kArgParseDeflines]);
2000 }
2001 
2002 void
2004 {
2005 
2006  arg_desc.SetCurrentGroup("Query filtering options");
2007  // lowercase masking
2008  arg_desc.AddFlag(kArgUseLCaseMasking,
2009  "Use lower case filtering in subject sequence(s)?", true);
2010  arg_desc.AddDefaultKey(kArgQualityFilter, "TF", "Reject low quality "
2011  "sequences ", CArgDescriptions::eBoolean, "true");
2012 
2013  arg_desc.SetCurrentGroup("Input query options");
2014  arg_desc.AddDefaultKey(kArgInputFormat, "format", "Input format for "
2015  "sequences", CArgDescriptions::eString, "fasta");
2017  "fasta", "fastc", "fastq",
2018  "asn1", "asn1b"));
2019  arg_desc.AddFlag(kArgPaired, "Input query sequences are paired", true);
2020  arg_desc.AddOptionalKey(kArgQueryMate, "infile", "FASTA file with "
2021  "mates for query sequences (if given in "
2022  "another file)", CArgDescriptions::eInputFile);
2024  kArgQuery);
2025 
2026  arg_desc.AddOptionalKey(kArgSraAccession, "accession",
2027  "Comma-separated SRA accessions",
2030  kArgQuery);
2032  kArgInputFormat);
2033 
2034  arg_desc.AddOptionalKey(kArgSraAccessionBatch, "file",
2035  "File with a list of SRA accessions, one per line",
2040  kArgQuery);
2042  kArgInputFormat);
2043 
2044  arg_desc.SetCurrentGroup("Miscellaneous options");
2045  arg_desc.AddDefaultKey(kArgParseDeflines, "TF", "Should the query and "
2046  "subject defline(s) be parsed?",
2047  CArgDescriptions::eBoolean, "true");
2048 
2049  arg_desc.AddFlag(kArgEnableSraCache, "Enable SRA caching in local files");
2052 
2053 
2054  arg_desc.SetCurrentGroup("");
2055 }
2056 
2057 void
2059  CBlastOptions& opt)
2060 {
2062 
2063  if (args.Exist(kArgPaired) && args[kArgPaired]) {
2064  opt.SetPaired(true);
2065  m_IsPaired = true;
2066  }
2067 
2068  if (args.Exist(kArgInputFormat) && args[kArgInputFormat]) {
2069  if (args[kArgInputFormat].AsString() == "fasta") {
2071  }
2072  else if (args[kArgInputFormat].AsString() == "fastc") {
2074  }
2075  else if (args[kArgInputFormat].AsString() == "fastq") {
2077  }
2078  else if (args[kArgInputFormat].AsString() == "asn1") {
2080  }
2081  else if (args[kArgInputFormat].AsString() == "asn1b") {
2083  }
2084  else {
2085  NCBI_THROW(CInputException, eInvalidInput,
2086  "Unexpected input format: " +
2087  args[kArgInputFormat].AsString());
2088  }
2089  }
2090 
2091  if (m_InputFormat == eFastc) {
2092  // FASTC format always has pairs in a single file
2093  opt.SetPaired(true);
2094  m_IsPaired = true;
2095  }
2096 
2097  if (args.Exist(kArgQualityFilter) && args[kArgQualityFilter]) {
2098  opt.SetReadQualityFiltering(args[kArgQualityFilter].AsBoolean());
2099  }
2100 
2101  if (args.Exist(kArgQueryMate) && args[kArgQueryMate]) {
2102  // create a decompress stream is the file is compressed
2103  // (the primary query file is handeled by CStdCmdLieArgs object)
2104  if (NStr::EndsWith(args[kArgQueryMate].AsString(), ".gz",
2105  NStr::eNocase)) {
2107  args[kArgQueryMate].AsInputFile(),
2110  }
2111  else {
2112  m_MateInputStream = &args[kArgQueryMate].AsInputFile();
2113  }
2114 
2115  // queries have pairs in the mate stream
2116  opt.SetPaired(true);
2117  m_IsPaired = true;
2118  }
2119 
2120  if ((args.Exist(kArgSraAccession) && args[kArgSraAccession]) ||
2122 
2123  if (args[kArgSraAccession]) {
2124  // accessions given in the command-line
2125  NStr::Split((CTempString)args[kArgSraAccession].AsString(), ",",
2126  m_SraAccessions);
2127  }
2128  else {
2129  // accessions given in a file
2130  while (!args[kArgSraAccessionBatch].AsInputFile().eof()) {
2131  string line;
2132  args[kArgSraAccessionBatch].AsInputFile() >> line;
2133  if (!line.empty()) {
2134  m_SraAccessions.push_back(line);
2135  }
2136  }
2137  }
2138 
2139  if (m_SraAccessions.empty()) {
2140  NCBI_THROW(CInputException, eInvalidInput,
2141  "No SRA accessions provided");
2142  }
2143 
2144  m_InputFormat = eSra;
2145  // assume SRA input is paired, that information for each read is in
2146  // SRA database, this option will trigger checking for pairs
2147  opt.SetPaired(true);
2148  m_IsPaired = true;
2149  }
2150 
2151  if (args.Exist(kArgEnableSraCache) && args[kArgEnableSraCache]) {
2152  m_EnableSraCache = true;
2153  }
2154 }
2155 
2156 
2157 
2158 CBlastDatabaseArgs::CBlastDatabaseArgs(bool request_mol_type /* = false */,
2159  bool is_rpsblast /* = false */,
2160  bool is_igblast /* = false */,
2161  bool is_mapper /* = false */,
2162  bool is_kblast /* = false */)
2163  : m_RequestMoleculeType(request_mol_type),
2164  m_IsRpsBlast(is_rpsblast),
2165  m_IsIgBlast(is_igblast),
2166  m_IsProtein(true),
2167  m_IsMapper(is_mapper),
2168  m_IsKBlast(is_kblast),
2169  m_SupportsDatabaseMasking(false),
2170  m_SupportIPGFiltering(false)
2171 {}
2172 
2173 bool
2175 {
2176  if ( (args.Exist(kArgDb) && args[kArgDb].HasValue()) ||
2177  (args.Exist(kArgSubject) && args[kArgSubject].HasValue()) ) {
2178  return true;
2179  }
2180  return false;
2181 }
2182 
2183 void
2185 {
2186  arg_desc.SetCurrentGroup("General search options");
2187  // database filename
2188  if (m_IsIgBlast){
2189  arg_desc.AddOptionalKey(kArgDb, "database_name", "Optional additional database name",
2191  } else {
2192  arg_desc.AddOptionalKey(kArgDb, "database_name", "BLAST database name",
2194  }
2195 
2196  arg_desc.SetCurrentGroup("");
2197 
2198  if (m_RequestMoleculeType) {
2199  arg_desc.AddKey(kArgDbType, "database_type",
2200  "BLAST database molecule type",
2202  arg_desc.SetConstraint(kArgDbType,
2203  &(*new CArgAllow_Strings, "prot", "nucl"));
2204  }
2205 
2206  vector<string> database_args;
2207  database_args.push_back(kArgDb);
2208  database_args.push_back(kArgGiList);
2209  database_args.push_back(kArgSeqIdList);
2210  database_args.push_back(kArgNegativeGiList);
2211  database_args.push_back(kArgNegativeSeqidList);
2212  database_args.push_back(kArgTaxIdList);
2213  database_args.push_back(kArgTaxIdListFile);
2214  database_args.push_back(kArgNegativeTaxIdList);
2215  database_args.push_back(kArgNegativeTaxIdListFile);
2216  database_args.push_back(kArgNoTaxIdExpansion);
2217  if (m_SupportIPGFiltering) {
2218  database_args.push_back(kArgIpgList);
2219  database_args.push_back(kArgNegativeIpgList);
2220  }
2222  database_args.push_back(kArgDbSoftMask);
2223  database_args.push_back(kArgDbHardMask);
2224  }
2225 
2226  // DB size
2227  if (!m_IsMapper) {
2228  arg_desc.SetCurrentGroup("Statistical options");
2229  arg_desc.AddOptionalKey(kArgDbSize, "num_letters",
2230  "Effective length of the database ",
2232  }
2233 
2234  arg_desc.SetCurrentGroup("Restrict search or results");
2235  // GI list
2236  if (!m_IsRpsBlast && !m_IsIgBlast) {
2237  arg_desc.AddOptionalKey(kArgGiList, "filename",
2238  "Restrict search of database to list of GIs",
2240  // SeqId list
2241  arg_desc.AddOptionalKey(kArgSeqIdList, "filename",
2242  "Restrict search of database to list of SeqIDs",
2244  // Negative GI list
2245  arg_desc.AddOptionalKey(kArgNegativeGiList, "filename",
2246  "Restrict search of database to everything"
2247  " except the specified GIs",
2249 
2250  // Negative SeqId list
2251  arg_desc.AddOptionalKey(kArgNegativeSeqidList, "filename",
2252  "Restrict search of database to everything"
2253  " except the specified SeqIDs",
2255 
2256  // Tax ID list
2257  arg_desc.AddOptionalKey(kArgTaxIdList, "taxids",
2258  "Restrict search of database to include only "
2259  "the specified taxonomy IDs and their descendants "
2260  "(multiple IDs delimited by ',')",
2262  arg_desc.AddOptionalKey(kArgNegativeTaxIdList, "taxids",
2263  "Restrict search of database to everything "
2264  "except the specified taxonomy IDs and their descendants "
2265  "(multiple IDs delimited by ',')",
2267  // Tax ID list file
2268  arg_desc.AddOptionalKey(kArgTaxIdListFile, "filename",
2269  "Restrict search of database to include only "
2270  "the specified taxonomy IDs and their descendants ",
2272  arg_desc.AddOptionalKey(kArgNegativeTaxIdListFile, "filename",
2273  "Restrict search of database to everything "
2274  "except the specified taxonomy IDs and their descendants ",
2276  // Disable Tax ID resoution to the descendants
2277  arg_desc.AddFlag(kArgNoTaxIdExpansion, "Do not expand the taxonomy IDs provided to their descendant taxonomy IDs ", true);
2285 
2286  if (m_SupportIPGFiltering) {
2287  arg_desc.AddOptionalKey(kArgIpgList, "filename",
2288  "Restrict search of database to list of IPGs",
2290 
2291  // Negative IPG list
2292  arg_desc.AddOptionalKey(kArgNegativeIpgList, "filename",
2293  "Restrict search of database to everything"
2294  " except the specified IPGs",
2296  }
2297  // N.B.: all restricting options are mutually exclusive
2298  const vector<string> kBlastDBFilteringOptions = {
2299  kArgGiList,
2300  kArgSeqIdList,
2301  kArgTaxIdList,
2303 
2308  };
2309  for (size_t i = 0; i < kBlastDBFilteringOptions.size(); i++) {
2310  for (size_t j = i+1; j < kBlastDBFilteringOptions.size(); j++) {
2311  arg_desc.SetDependency(kBlastDBFilteringOptions[i], CArgDescriptions::eExcludes,
2312  kBlastDBFilteringOptions[j]);
2313  }
2314  }
2315 
2316  // For now, disable pairing -remote with either -gilist or
2317  // -negative_gilist as this is not implemented in the BLAST server
2318  for (const string& s: kBlastDBFilteringOptions) {
2320  }
2321  }
2322 
2323  // Entrez Query
2324  if (!m_IsMapper) {
2325  arg_desc.AddOptionalKey(kArgEntrezQuery, "entrez_query",
2326  "Restrict search with the given Entrez query",
2328 
2329  // Entrez query currently requires the -remote option
2331  kArgRemote);
2332  }
2333 
2334 
2335 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
2336  (!defined(NCBI_COMPILER_MIPSPRO)) )
2337  // Masking of database
2339  arg_desc.AddOptionalKey(kArgDbSoftMask,
2340  "filtering_algorithm",
2341  "Filtering algorithm ID to apply to the BLAST database as soft "
2342  "masking",
2345  kArgDbHardMask);
2346 
2347  arg_desc.AddOptionalKey(kArgDbHardMask,
2348  "filtering_algorithm",
2349  "Filtering algorithm ID to apply to the BLAST database as hard "
2350  "masking",
2352  }
2353 #endif
2354 
2355  // There is no RPS-BLAST 2 sequences
2356  if ( !m_IsRpsBlast && !m_IsKBlast && !m_IsIgBlast) {
2357  arg_desc.SetCurrentGroup("BLAST-2-Sequences options");
2358  // subject sequence input (for bl2seq)
2359  arg_desc.AddOptionalKey(kArgSubject, "subject_input_file",
2360  "Subject sequence(s) to search",
2362  ITERATE(vector<string>, dbarg, database_args) {
2364  *dbarg);
2365  }
2366 
2367  // subject location
2368  arg_desc.AddOptionalKey(kArgSubjectLocation, "range",
2369  "Location on the subject sequence in 1-based offsets "
2370  "(Format: start-stop)",
2372  ITERATE(vector<string>, dbarg, database_args) {
2375  *dbarg);
2376  }
2377  // Because Blast4-subject does not support Seq-locs, specifying a
2378  // subject range does not work for remote searches
2381  }
2382 
2383  arg_desc.SetCurrentGroup("");
2384 }
2385 
2386 
2387 
2388 //
2389 // Get taid(s) from user provided string or file, optionally resolve taxid to it's descendant if isTargetOnly == false
2390 // logic to add/resolve is next:
2391 // --------------------------------------------------------------------------------------
2392 // isTargetOnly | decsendant(s) found |
2393 // --------------------------------------------------------------------------------------
2394 // TRUE | N/A | add user's taxids, no lookup for decsendant
2395 // FALSE | TRUE | add user's taxid AND add only found descendant(s)
2396 // --------------------------------------------------------------------------------------
2397 //
2398 static void s_GetTaxIDList(const string & in, bool isFile, bool isNegativeList, CRef<CSearchDatabase> & sdb, bool isTargetOnly )
2399 {
2400  vector<string> ids;
2401  if (isFile) {
2402  string filename(SeqDB_ResolveDbPath(in));
2403  if(filename == kEmptyStr) {
2404  NCBI_THROW(CInputException, eInvalidInput, "File is not acessible: "+ in );
2405  }
2406  CNcbiIfstream instream(filename.c_str());
2407  CStreamLineReader reader(instream);
2408 
2409  while (!reader.AtEOF()) {
2410  reader.ReadLine();
2411  ids.push_back(reader.GetCurrentLine());
2412  }
2413  } else {
2414  NStr::Split(in, ",", ids, NStr::fSplit_Tokenize);
2415  }
2416  unique_ptr<ITaxonomy4Blast> tb;
2417  if( !isTargetOnly ) {
2418  try{
2419  tb.reset(new CTaxonomy4BlastSQLite());
2420  }
2421  catch(CException &){
2422  LOG_POST(Warning << "The -taxids command line option requires additional data files. Please see the section 'Taxonomic filtering for BLAST databases' in https://www.ncbi.nlm.nih.gov/books/NBK569839/ for details.");
2423  }
2424  }
2425  set<TTaxId> tax_ids;
2426  for (auto id : ids) {
2427  try {
2428  if (NStr::IsBlank(id)) {
2429  continue;
2430  }
2431  auto taxid = NStr::StringToNumeric<TTaxId>(id, NStr::fAllowLeadingSpaces | NStr::fAllowTrailingSpaces);
2432  if( isTargetOnly ) {
2433  tax_ids.insert(taxid);
2434  } else if (tb) {
2435  tax_ids.insert(taxid);
2436  vector<int> desc;
2437  tb->GetLeafNodeTaxids(taxid, desc);
2438  for (auto i: desc)
2439  tax_ids.insert( static_cast<TTaxId>(i) );
2440  }
2441  } catch(CException &){
2442  NCBI_THROW(CInputException, eInvalidInput, "Invalid taxidlist file ");
2443  }
2444  }
2445 
2446  CRef<CSeqDBGiList> taxid_list(new CSeqDBGiList());
2447  taxid_list->AddTaxIds(tax_ids);
2448  if(isNegativeList) {
2449  sdb->SetNegativeGiList(taxid_list.GetPointer());
2450  }
2451  else {
2452  sdb->SetGiList(taxid_list.GetPointer());
2453  }
2454 
2455 }
2456 
2457 
2458 void
2460  CBlastOptions& opts)
2461 {
2466 
2467  if (args.Exist(kArgDb) && args[kArgDb]) {
2468  std::string local_dblist = NStr::TruncateSpaces( args[kArgDb].AsString() );
2469 
2470  m_SearchDb.Reset(new CSearchDatabase( local_dblist,
2471  mol_type));
2472 
2473  if (args.Exist(kArgGiList) && args[kArgGiList]) {
2474  string fn(SeqDB_ResolveDbPath(args[kArgGiList].AsString()));
2476 
2477  } else if (args.Exist(kArgNegativeGiList) && args[kArgNegativeGiList]) {
2478  string fn(SeqDB_ResolveDbPath(args[kArgNegativeGiList].AsString()));
2480 
2481  } else if (args.Exist(kArgSeqIdList) && args[kArgSeqIdList]) {
2482  string fn(SeqDB_ResolveDbPath(args[kArgSeqIdList].AsString()));
2485  } else if (args.Exist(kArgNegativeSeqidList) && args[kArgNegativeSeqidList]) {
2486  string fn(SeqDB_ResolveDbPath(args[kArgNegativeSeqidList].AsString()));
2488  } else if (args.Exist(kArgTaxIdList) && args[kArgTaxIdList]) {
2489  s_GetTaxIDList(args[kArgTaxIdList].AsString(), false, false, m_SearchDb,args[kArgNoTaxIdExpansion].AsBoolean());
2490 
2491  } else if (args.Exist(kArgTaxIdListFile) && args[kArgTaxIdListFile]) {
2492  s_GetTaxIDList(args[kArgTaxIdListFile].AsString(), true, false, m_SearchDb, args[kArgNoTaxIdExpansion].AsBoolean());
2493 
2494  } else if (args.Exist(kArgNegativeTaxIdList) && args[kArgNegativeTaxIdList]) {
2495  s_GetTaxIDList(args[kArgNegativeTaxIdList].AsString(), false, true, m_SearchDb, args[kArgNoTaxIdExpansion].AsBoolean());
2496 
2497  } else if (args.Exist(kArgNegativeTaxIdListFile) && args[kArgNegativeTaxIdListFile]) {
2498  s_GetTaxIDList(args[kArgNegativeTaxIdListFile].AsString(), true, true, m_SearchDb,args[kArgNoTaxIdExpansion].AsBoolean());
2499 
2500  } else if (args.Exist(kArgIpgList) && args[kArgIpgList]) {
2501  string fn(SeqDB_ResolveDbPath(args[kArgIpgList].AsString()));
2503  } else if (args.Exist(kArgNegativeIpgList) && args[kArgNegativeIpgList]) {
2504  string fn(SeqDB_ResolveDbPath(args[kArgNegativeIpgList].AsString()));
2506 
2507  }
2508 
2509  if (args.Exist(kArgEntrezQuery) && args[kArgEntrezQuery])
2511 
2512 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
2513  (!defined(NCBI_COMPILER_MIPSPRO)) )
2514  if (args.Exist(kArgDbSoftMask) && args[kArgDbSoftMask]) {
2516  } else if (args.Exist(kArgDbHardMask) && args[kArgDbHardMask]) {
2518  }
2519 #endif
2520  } else if (args.Exist(kArgSubject) && args[kArgSubject]) {
2521 
2522  CNcbiIstream* subj_input_stream = NULL;
2523  unique_ptr<CDecompressIStream> decompress_stream;
2524  if (m_IsMapper &&
2525  NStr::EndsWith(args[kArgSubject].AsString(), ".gz", NStr::eNocase)) {
2526  decompress_stream.reset(
2527  new CDecompressIStream(args[kArgSubject].AsInputFile(),
2529  subj_input_stream = decompress_stream.get();
2530  }
2531  else {
2532  subj_input_stream = &args[kArgSubject].AsInputFile();
2533  }
2534 
2535  TSeqRange subj_range;
2536  if (args.Exist(kArgSubjectLocation) && args[kArgSubjectLocation]) {
2537  subj_range =
2538  ParseSequenceRange(args[kArgSubjectLocation].AsString(),
2539  "Invalid specification of subject location");
2540  }
2541 
2542  const bool parse_deflines = args.Exist(kArgParseDeflines)
2543  ? args[kArgParseDeflines].AsBoolean()
2545  const bool use_lcase_masks = args.Exist(kArgUseLCaseMasking)
2546  ? bool(args[kArgUseLCaseMasking])
2549  m_Scope = ReadSequencesToBlast(*subj_input_stream, IsProtein(),
2550  subj_range, parse_deflines,
2551  use_lcase_masks, subjects, m_IsMapper);
2552  m_Subjects.Reset(new blast::CObjMgr_QueryFactory(*subjects));
2553 
2554  } else if (!m_IsIgBlast){
2555  // IgBlast permits use of germline database
2556  NCBI_THROW(CInputException, eInvalidInput,
2557  "Either a BLAST database or subject sequence(s) must be specified");
2558  }
2559 
2560  if (opts.GetEffectiveSearchSpace() != 0) {
2561  // no need to set any other options, as this trumps them
2562  return;
2563  }
2564 
2565  if (args.Exist(kArgDbSize) && args[kArgDbSize]) {
2566  opts.SetDbLength(args[kArgDbSize].AsInt8());
2567  }
2568 
2569 }
2570 
2571 void
2573 {
2574  arg_desc.SetCurrentGroup("Formatting options");
2575 
2576  string kOutputFormatDescription = string(
2577  "alignment view options:\n"
2578  " 0 = Pairwise,\n"
2579  " 1 = Query-anchored showing identities,\n"
2580  " 2 = Query-anchored no identities,\n"
2581  " 3 = Flat query-anchored showing identities,\n"
2582  " 4 = Flat query-anchored no identities,\n"
2583  " 5 = BLAST XML,\n"
2584  " 6 = Tabular,\n"
2585  " 7 = Tabular with comment lines,\n"
2586  " 8 = Seqalign (Text ASN.1),\n"
2587  " 9 = Seqalign (Binary ASN.1),\n"
2588  " 10 = Comma-separated values,\n"
2589  " 11 = BLAST archive (ASN.1),\n"
2590  " 12 = Seqalign (JSON),\n"
2591  " 13 = Multiple-file BLAST JSON,\n"
2592  " 14 = Multiple-file BLAST XML2,\n"
2593  " 15 = Single-file BLAST JSON,\n"
2594  " 16 = Single-file BLAST XML2");
2595 
2596  if(m_FormatFlags & eIsSAM) {
2597  kOutputFormatDescription += ",\n 17 = Sequence Alignment/Map (SAM)";
2598  }
2599  kOutputFormatDescription += ",\n 18 = Organism Report\n\n";
2600  if(m_FormatFlags & eIsSAM) {
2601  kOutputFormatDescription +=
2602  "Options 6, 7, 10 and 17 "
2603  "can be additionally configured to produce\n"
2604  "a custom format specified by space delimited format specifiers,\n"
2605  "or in the case of options 6, 7, and 10, by a token specified\n"
2606  "by the delim keyword. E.g.: \"17 delim=@ qacc sacc score\".\n"
2607  "The delim keyword must appear after the numeric output format\n"
2608  "specification.\n"
2609  "The supported format specifiers for options 6, 7 and 10 are:\n";
2610  }
2611  else {
2612  kOutputFormatDescription +=
2613  "Options 6, 7 and 10 "
2614  "can be additionally configured to produce\n"
2615  "a custom format specified by space delimited format specifiers,\n"
2616  "or by a token specified by the delim keyword.\n"
2617  " E.g.: \"10 delim=@ qacc sacc score\".\n"
2618  "The delim keyword must appear after the numeric output format\n"
2619  "specification.\n"
2620  "The supported format specifiers are:\n";
2621  }
2622 
2623  kOutputFormatDescription += DescribeTabularOutputFormatSpecifiers() + string("\n");
2624 
2625  if(m_FormatFlags & eIsSAM) {
2626  kOutputFormatDescription +=
2627  "The supported format specifier for option 17 is:\n" +
2629  }
2630 
2631 
2632  int dft_outfmt = kDfltArgOutputFormat;
2633 
2634  // Igblast shows extra column of gaps
2635  if (m_IsIgBlast) {
2636  kOutputFormatDescription = string(
2637  "alignment view options:\n"
2638  " 3 = Flat query-anchored, show identities,\n"
2639  " 4 = Flat query-anchored, no identities,\n"
2640  " 7 = Tabular with comment lines\n"
2641  " 19 = Rearrangement summary report (AIRR format)\n\n"
2642  "Options 7 can be additionally configured to produce\n"
2643  "a custom format specified by space delimited format specifiers.\n"
2644  "The supported format specifiers are:\n") +
2646  string("\n");
2647  dft_outfmt = 3;
2648  }
2649 
2650  // alignment view
2651  arg_desc.AddDefaultKey(kArgOutputFormat, "format",
2652  kOutputFormatDescription,
2654  NStr::IntToString(dft_outfmt));
2655 
2656  // show GIs in deflines
2657  arg_desc.AddFlag(kArgShowGIs, "Show NCBI GIs in deflines?", true);
2658 
2659  // number of one-line descriptions to display
2660  arg_desc.AddOptionalKey(kArgNumDescriptions, "int_value",
2661  "Number of database sequences to show one-line "
2662  "descriptions for\n"
2663  "Not applicable for outfmt > 4\n"
2664  "Default = `"+ NStr::IntToString(m_DfltNumDescriptions)+ "'",
2668 
2669  // number of alignments per DB sequence
2670  arg_desc.AddOptionalKey(kArgNumAlignments, "int_value",
2671  "Number of database sequences to show alignments for\n"
2672  "Default = `" + NStr::IntToString(m_DfltNumAlignments) + "'",
2676 
2677  arg_desc.AddOptionalKey(kArgLineLength, "line_length",
2678  "Line length for formatting alignments\n"
2679  "Not applicable for outfmt > 4\n"
2682  arg_desc.SetConstraint(kArgLineLength,
2684 
2685  if(!m_IsIgBlast){
2686  // Produce HTML?
2687  arg_desc.AddFlag(kArgProduceHtml, "Produce HTML output?", true);
2688 
2689 
2690  arg_desc.AddOptionalKey(kArgSortHits, "sort_hits",
2691  "Sorting option for hits:\n"
2692  "alignment view options:\n"
2693  " 0 = Sort by evalue,\n"
2694  " 1 = Sort by bit score,\n"
2695  " 2 = Sort by total score,\n"
2696  " 3 = Sort by percent identity,\n"
2697  " 4 = Sort by query coverage\n"
2698  "Not applicable for outfmt > 4\n",
2700  arg_desc.SetConstraint(kArgSortHits,
2703  true));
2704 
2705  arg_desc.AddOptionalKey(kArgSortHSPs, "sort_hsps",
2706  "Sorting option for hps:\n"
2707  " 0 = Sort by hsp evalue,\n"
2708  " 1 = Sort by hsp score,\n"
2709  " 2 = Sort by hsp query start,\n"
2710  " 3 = Sort by hsp percent identity,\n"
2711  " 4 = Sort by hsp subject start\n"
2712  "Not applicable for outfmt != 0\n",
2714  arg_desc.SetConstraint(kArgSortHSPs,
2717  true));
2718  /// Hit list size, listed here for convenience only
2719  arg_desc.SetCurrentGroup("Restrict search or results");
2720  arg_desc.AddOptionalKey(kArgMaxTargetSequences, "num_sequences",
2721  "Maximum number of aligned sequences to keep \n"
2722  "(value of 5 or more is recommended)\n"
2723  "Default = `" + NStr::IntToString(BLAST_HITLIST_SIZE) + "'",
2733  }
2734  arg_desc.SetCurrentGroup("");
2735 }
2736 
2737 bool
2739 {
2740  EOutputFormat output_fmt;
2741  string ignore1, ignore2;
2742  ParseFormattingString(args, output_fmt, ignore1, ignore2);
2743  return (output_fmt == eArchiveFormat ? true : false);
2744 }
2745 
2746 
2747 static void s_ValidateCustomDelim(string custom_fmt_spec,string customDelim)
2748 {
2749  bool error = false;
2750  string checkfield;
2751  custom_fmt_spec = NStr::TruncateSpaces(custom_fmt_spec);
2752  if(custom_fmt_spec.empty()) return;
2753 
2754  //Check if delim is already used
2755  const string kFieldsWithSemicolSeparator = "sallseqid staxids sscinames scomnames sblastnames sskingdoms";//sep = ";"
2756  const string kFramesField = "frames"; //sep = "/"
2757  const string kAllTitlesField ="salltitles"; //sep = "<>""
2758 
2759  if(customDelim == ";") {
2760  vector <string> tokens;
2761  NStr::Split(kFieldsWithSemicolSeparator," ", tokens);
2762  for(size_t i = 0; i < tokens.size(); i++) {
2763  if(NStr::Find(custom_fmt_spec,tokens[i]) != NPOS) {
2764  checkfield = tokens[i];
2765  error = true;
2766  break;
2767  }
2768  }
2769  }
2770  else {
2771  if(customDelim == "/") {
2772  checkfield = kFramesField;
2773  }
2774  else if(customDelim == "<>") {
2775  checkfield = kAllTitlesField;
2776  }
2777  if(!checkfield.empty() && NStr::Find(custom_fmt_spec,checkfield) != NPOS) {
2778  error = true;
2779  }
2780  }
2781 
2782  if(error) {
2783  string msg("Your custom record separator (" + customDelim + ") is also used by the format specifier (" + checkfield +
2784  ") to separate multiple entries. Please use a different record separator (delim keyword).");
2785  NCBI_THROW(CInputException, eInvalidInput, msg);
2786  }
2787 }
2788 
2789 void
2791  EOutputFormat& fmt_type,
2792  string& custom_fmt_spec,
2793  string& custom_delim) const
2794 {
2795  custom_fmt_spec.clear();
2796  if (args[kArgOutputFormat]) {
2797  string fmt_choice =
2798  NStr::TruncateSpaces(args[kArgOutputFormat].AsString());
2799  string::size_type pos;
2800  if ( (pos = fmt_choice.find_first_of(' ')) != string::npos) {
2801  custom_fmt_spec.assign(fmt_choice, pos+1,
2802  fmt_choice.size()-(pos+1));
2803  fmt_choice.erase(pos);
2804  }
2805  if(!custom_fmt_spec.empty()) {
2806  if(NStr::StartsWith(custom_fmt_spec, "delim")) {
2807  vector <string> tokens;
2808  NStr::Split(custom_fmt_spec," ",tokens);
2809  if(tokens.size() > 0) {
2810  string tag;
2811  bool isValid = NStr::SplitInTwo(tokens[0],"=",tag,custom_delim);
2812  if(!isValid) {
2813  string msg("Delimiter format is invalid. Valid format is delim=<delimiter value>");
2814  NCBI_THROW(CInputException, eInvalidInput, msg);
2815  }
2816  else {
2817  custom_fmt_spec = NStr::Replace(custom_fmt_spec,tokens[0],"");
2818  }
2819  }
2820  }
2821  }
2822  int val = 0;
2823  try { val = NStr::StringToInt(fmt_choice); }
2824  catch (const CStringException&) { // probably a conversion error
2825  CNcbiOstrstream os;
2826  os << "'" << fmt_choice << "' is not a valid output format";
2827  string msg = CNcbiOstrstreamToString(os);
2828  NCBI_THROW(CInputException, eInvalidInput, msg);
2829  }
2830  if (val < 0 || val >= static_cast<int>(eEndValue)) {
2831  string msg("Formatting choice is out of range");
2832  throw std::out_of_range(msg);
2833  }
2834  if (m_IsIgBlast && (val != 3 && val != 4 && val != 7 && val != eAirrRearrangement)) {
2835  string msg("Formatting choice is not valid");
2836  throw std::out_of_range(msg);
2837  }
2838  fmt_type = static_cast<EOutputFormat>(val);
2839  if ( !(fmt_type == eTabular ||
2840  fmt_type == eTabularWithComments ||
2841  fmt_type == eCommaSeparatedValues ||
2842  fmt_type == eSAM) ) {
2843  custom_fmt_spec.clear();
2844  }
2845  }
2846 }
2847 
2848 
2849 void
2851  CBlastOptions& opt)
2852 {
2854  if((m_OutputFormat == eSAM) && !(m_FormatFlags & eIsSAM) ){
2855  NCBI_THROW(CInputException, eInvalidInput,
2856  "SAM format is only applicable to blastn" );
2857  }
2859  NCBI_THROW(CInputException, eInvalidInput,
2860  "AIRR rearrangement format is only applicable to igblastn" );
2861  }
2862  if (m_OutputFormat == eFasta) {
2863  NCBI_THROW(CInputException, eInvalidInput,
2864  "FASTA output format is only applicable to magicblast");
2865  }
2867  m_ShowGis = static_cast<bool>(args[kArgShowGIs]);
2868  if(m_IsIgBlast){
2869  m_Html = false;
2870  } else {
2871  m_Html = static_cast<bool>(args[kArgProduceHtml]);
2872  }
2873  // Default hitlist size 500, value can be changed if import search strategy is used
2874  int hitlist_size = opt.GetHitlistSize();
2875 
2876  // To preserve hitlist size in import search strategy > 500,
2877  // we need to increase the num_ descriptions and num_alignemtns
2878  if(hitlist_size > BLAST_HITLIST_SIZE )
2879  {
2880  if((!args.Exist(kArgNumDescriptions) || !args[kArgNumDescriptions]) &&
2881  (!args.Exist(kArgNumAlignments) || !args[kArgNumAlignments]) &&
2883  m_NumDescriptions = hitlist_size;
2884  m_NumAlignments = hitlist_size/ 2;
2885  return;
2886  }
2887  }
2888 
2890 
2891 
2894 
2895  if (args.Exist(kArgNumDescriptions) && args[kArgNumDescriptions]) {
2896  m_NumDescriptions = args[kArgNumDescriptions].AsInteger();
2897  }
2898 
2899  if (args.Exist(kArgNumAlignments) && args[kArgNumAlignments]) {
2900  m_NumAlignments = args[kArgNumAlignments].AsInteger();
2901  }
2902 
2904  m_NumDescriptions = args[kArgMaxTargetSequences].AsInteger();
2905  m_NumAlignments = args[kArgMaxTargetSequences].AsInteger();
2906  hitlist_size = m_NumAlignments;
2907  }
2908 
2909  // The If clause is for handling import_search_strategy hitlist size < 500
2910  // We want to preserve the hitlist size in iss if no formatting input is entered in cmdline
2911  // If formmating option(s) is entered than the iss hitlist size is overridden.
2912  // FIXME: does this work with import search strategies?
2913  if ((args.Exist(kArgNumDescriptions) && args[kArgNumDescriptions]) ||
2914  (args.Exist(kArgNumAlignments) && args[kArgNumAlignments])) {
2915  hitlist_size = max(m_NumDescriptions, m_NumAlignments);
2916  }
2917 
2918  if (args[kArgLineLength]) {
2919  m_LineLength = args[kArgLineLength].AsInteger();
2920  }
2921  if(args.Exist(kArgSortHits) && args[kArgSortHits])
2922  {
2923  m_HitsSortOption = args[kArgSortHits].AsInteger();
2924  }
2925  }
2926  else
2927  {
2928  if (args.Exist(kArgNumDescriptions) && args[kArgNumDescriptions]) {
2929  ERR_POST(Warning << "The parameter -num_descriptions is ignored for "
2930  "output formats > 4 . Use -max_target_seqs "
2931  "to control output");
2932  }
2933 
2934  if (args[kArgLineLength]) {
2935  ERR_POST(Warning << "The parameter -line_length is not applicable for "
2936  "output formats > 4 .");
2937  }
2938 
2940  hitlist_size = args[kArgMaxTargetSequences].AsInteger();
2941  }
2942  else if (args.Exist(kArgNumAlignments) && args[kArgNumAlignments]) {
2943  hitlist_size = args[kArgNumAlignments].AsInteger();
2944  }
2945 
2946  m_NumDescriptions = hitlist_size;
2947  m_NumAlignments = hitlist_size;
2948 
2949  if(args.Exist(kArgSortHits) && args[kArgSortHits]) {
2950  ERR_POST(Warning << "The parameter -sorthits is ignored for output formats > 4.");
2951  }
2952  }
2953 
2954  if(hitlist_size < 5){
2955  ERR_POST(Warning << "Examining 5 or more matches is recommended");
2956  }
2957  opt.SetHitlistSize(hitlist_size);
2958 
2959  if(args.Exist(kArgSortHSPs) && args[kArgSortHSPs])
2960  {
2961  int hspsSortOption = args[kArgSortHSPs].AsInteger();
2962  if(m_OutputFormat == ePairwise) {
2963  m_HspsSortOption = hspsSortOption;
2964  }
2965  else {
2966  ERR_POST(Warning << "The parameter -sorthsps is ignored for output formats != 0.");
2967  }
2968  }
2969  return;
2970 }
2971 
2972 
2973 void
2975 {
2976  arg_desc.SetCurrentGroup("Formatting options");
2977  string kOutputFormatDescription = string(
2978  "alignment view options:\n"
2979  "sam = SAM format,\n"
2980  "tabular = Tabular format,\n"
2981  "asn = text ASN.1\n");
2982 
2983  string kUnalignedOutputFormatDescription = string(
2984  "format for reporting unaligned reads:\n"
2985  "sam = SAM format,\n"
2986  "tabular = Tabular format,\n"
2987  "fasta = sequences in FASTA format\n"
2988  "Default = same as ") +
2990 
2991  arg_desc.AddDefaultKey(align_format::kArgOutputFormat, "format",
2992  kOutputFormatDescription,
2994  "sam");
2995 
2996  set<string> allowed_formats = {"sam", "tabular", "asn"};
2998  new CArgAllowStringSet(allowed_formats));
2999 
3000  arg_desc.AddOptionalKey(kArgUnalignedFormat, "format",
3001  kUnalignedOutputFormatDescription,
3003 
3004  set<string> allowed_unaligned_formats = {"sam", "tabular", "fasta"};
3006  new CArgAllowStringSet(allowed_unaligned_formats));
3007 
3010 
3011 
3012  arg_desc.AddFlag(kArgPrintMdTag, "Include MD tag in SAM report");
3013  arg_desc.AddFlag(kArgNoReadIdTrim, "Do not trim '.1', '/1', '.2', " \
3014  "or '/2' at the end of read ids for SAM format and" \
3015  "paired runs");
3016 
3017  arg_desc.AddFlag(kArgNoUnaligned, "Do not report unaligned reads");
3018 
3019  arg_desc.AddFlag(kArgNoDiscordant,
3020  "Suppress discordant alignments for paired reads");
3021 
3022  arg_desc.AddOptionalKey(kArgUserTag, "tag",
3023  "A user tag to add to each alignment",
3025 
3026  arg_desc.SetCurrentGroup("");
3027 }
3028 
3030  CBlastOptions& opt)
3031 {
3033  string fmt_choice = args[align_format::kArgOutputFormat].AsString();
3034  if (fmt_choice == "sam") {
3035  m_OutputFormat = eSAM;
3036  }
3037  else if (fmt_choice == "tabular") {
3039  }
3040  else if (fmt_choice == "asn") {
3042  }
3043  else {
3044  CNcbiOstrstream os;
3045  os << "'" << fmt_choice << "' is not a valid output format";
3046  string msg = CNcbiOstrstreamToString(os);
3047  NCBI_THROW(CInputException, eInvalidInput, msg);
3048  }
3049 
3051  }
3052 
3053  if (args.Exist(kArgUnalignedFormat) && args[kArgUnalignedFormat]) {
3054  string fmt_choice = args[kArgUnalignedFormat].AsString();
3055  if (fmt_choice == "sam") {
3057  }
3058  else if (fmt_choice == "tabular") {
3060  }
3061  else if (fmt_choice == "fasta") {
3063  }
3064  else {
3065  CNcbiOstrstream os;
3066  os << "'" << fmt_choice
3067  << "' is not a valid output format for unaligned reads";
3068  string msg = CNcbiOstrstreamToString(os);
3069  NCBI_THROW(CInputException, eInvalidInput, msg);
3070  }
3071  }
3072 
3073  m_ShowGis = true;
3074  m_Html = false;
3075 
3076  if (args.Exist(kArgNoReadIdTrim) && args[kArgNoReadIdTrim]) {
3077  m_TrimReadIds = false;
3078  }
3079 
3080  if (args.Exist(kArgNoUnaligned) && args[kArgNoUnaligned]) {
3081  m_PrintUnaligned = false;
3082  }
3083 
3084  if (args.Exist(kArgNoDiscordant) && args[kArgNoDiscordant]) {
3085  m_NoDiscordant = true;
3086  }
3087 
3088  if (args.Exist(kArgFwdRev) && args[kArgFwdRev]) {
3089  m_FwdRev = true;
3090  }
3091 
3092  if (args.Exist(kArgRevFwd) && args[kArgRevFwd]) {
3093  m_RevFwd = true;
3094  }
3095 
3096  if (args.Exist(kArgFwdOnly) && args[kArgFwdOnly]) {
3097  m_FwdOnly = true;
3098  }
3099 
3100  if (args.Exist(kArgRevOnly) && args[kArgRevOnly]) {
3101  m_RevOnly = true;
3102  }
3103 
3105  m_OnlyStrandSpecific = true;
3106  }
3107 
3108  if (args.Exist(kArgPrintMdTag) && args[kArgPrintMdTag]) {
3109  m_PrintMdTag = true;
3110  }
3111 
3112  // only the fast tabular format is able to show merged HSPs with
3113  // common query bases
3114  if (m_OutputFormat != eTabular) {
3115  // FIXME: This is a hack. Merging should be done by the formatter,
3116  // but is currently done by HSP stream writer. This is an easy
3117  // switch until merging is implemented properly.
3118  CNcbiEnvironment().Set("MAPPER_NO_OVERLAPPED_HSP_MERGE", "1");
3119  }
3120 
3121  if (args.Exist(kArgUserTag) && args[kArgUserTag]) {
3122  NStr::Replace(args[kArgUserTag].AsString(), "\\t", "\t", m_UserTag);
3123  }
3124 }
3125 
3126 void
3128 {
3129  // number of threads
3130  arg_desc.SetCurrentGroup("Miscellaneous options");
3131 #ifdef NCBI_THREADS
3132  const int kMinValue = static_cast<int>(CThreadable::kMinNumThreads);
3133  const int kMaxValue = static_cast<int>(CSystemInfo::GetCpuCount());
3134  const int kDfltValue = m_NumThreads != CThreadable::kMinNumThreads
3135  ? std::min<int>(static_cast<int>(m_NumThreads), kMaxValue) : kMinValue;
3136 
3137  arg_desc.AddDefaultKey(kArgNumThreads, "int_value",
3138  "Number of threads (CPUs) to use in the BLAST search",
3140  NStr::IntToString(kDfltValue));
3141  arg_desc.SetConstraint(kArgNumThreads,
3142  new CArgAllowValuesGreaterThanOrEqual(kMinValue));
3143  arg_desc.SetDependency(kArgNumThreads,
3145  kArgRemote);
3146 
3147  if (m_MTMode >= 0) {
3148  arg_desc.AddDefaultKey(kArgMTMode, "int_value",
3149  "Multi-thread mode to use in BLAST search:\n "
3150  "0 auto split by database or queries \n "
3151  "1 split by queries\n "
3152  "2 split by database",
3154  NStr::IntToString(0));
3155  arg_desc.SetConstraint(kArgMTMode,
3156  new CArgAllowValuesBetween(0, 2, true));
3157  arg_desc.SetDependency(kArgMTMode,
3159  kArgNumThreads);
3160  }
3161  /*
3162  arg_desc.SetDependency(kArgNumThreads,
3163  CArgDescriptions::eExcludes,
3164  kArgUseIndex);
3165  */
3166 #endif
3167  arg_desc.SetCurrentGroup("");
3168 }
3169 
3171 {
3173 }
3174 
3175 
3176 void
3178 {
3180 }
3181 void
3183 {
3184  const int kMaxValue = static_cast<int>(CSystemInfo::GetCpuCount());
3185 
3186  if (args.Exist(kArgNumThreads) &&
3187  args[kArgNumThreads].HasValue()) { // could be cancelled by the exclusion in CRemoteArgs
3188 
3189  // use the minimum of the two: user requested number of threads and
3190  // number of available CPUs for number of threads
3191  int num_threads = args[kArgNumThreads].AsInteger();
3192  if (num_threads > kMaxValue) {
3193  m_NumThreads = kMaxValue;
3194 
3195  ERR_POST(Warning << (string)"Number of threads was reduced to " +
3196  NStr::IntToString((unsigned int)m_NumThreads) +
3197  " to match the number of available CPUs");
3198  }
3199  else {
3200  m_NumThreads = num_threads;
3201  }
3202 
3203  // This is temporarily ignored (per SB-635)
3204  if (args.Exist(kArgSubject) && args[kArgSubject].HasValue() &&
3207  string opt = kArgNumThreads;
3208  if (args.Exist(kArgMTMode) &&
3209  (args[kArgMTMode].AsInteger() == CMTArgs::eSplitByQueries)) {
3211  opt += " and " + kArgMTMode;
3212  }
3213  ERR_POST(Warning << "'" << opt << "' is currently "
3214  << "ignored when '" << kArgSubject << "' is specified.");
3215  return;
3216  }
3217  }
3218  if (args.Exist(kArgMTMode) && args[kArgMTMode].HasValue()) {
3219  m_MTMode = (EMTMode) args[kArgMTMode].AsInteger();
3220  }
3221 
3222 }
3223 
3224 void
3226 {
3227  arg_desc.SetCurrentGroup("Miscellaneous options");
3228  arg_desc.AddFlag(kArgRemote, "Execute search remotely?", true);
3229 
3230  arg_desc.SetCurrentGroup("");
3231 }
3232 
3233 void
3235 {
3236  if (args.Exist(kArgRemote)) {
3237  m_IsRemote = static_cast<bool>(args[kArgRemote]);
3238  }
3239 }
3240 
3241 void
3243 {
3244 #if _BLAST_DEBUG
3245  arg_desc.SetCurrentGroup("Miscellaneous options");
3246  arg_desc.AddFlag("verbose", "Produce verbose output (show BLAST options)",
3247  true);
3248  arg_desc.AddFlag("remote_verbose",
3249  "Produce verbose output for remote searches", true);
3250  arg_desc.AddFlag("use_test_remote_service",
3251  "Send remote requests to test servers", true);
3252  arg_desc.SetCurrentGroup("");
3253 #endif /* _BLAST_DEBUG */
3254 }
3255 
3256 void
3258 {
3259 #if _BLAST_DEBUG
3260  m_DebugOutput = static_cast<bool>(args["verbose"]);
3261  m_RmtDebugOutput = static_cast<bool>(args["remote_verbose"]);
3262  if (args["use_test_remote_service"]) {
3265  "blast4_test");
3266  }
3267 #endif /* _BLAST_DEBUG */
3268 }
3269 
3270 void
3272 {
3273  // culling limit
3274  arg_desc.SetCurrentGroup("Restrict search or results");
3275  arg_desc.AddOptionalKey(kArgCullingLimit, "int_value",
3276  "If the query range of a hit is enveloped by that of at "
3277  "least this many higher-scoring hits, delete the hit",
3280  // best hit algorithm arguments
3282 
3283  arg_desc.AddOptionalKey(kArgBestHitOverhang, "float_value",
3284  "Best Hit algorithm overhang value "
3285  "(recommended value: " +
3287  ")",
3295 
3296  arg_desc.AddOptionalKey(kArgBestHitScoreEdge, "float_value",
3297  "Best Hit algorithm score edge value "
3298  "(recommended value: " +
3300  ")",
3308  arg_desc.AddFlag(kArgSubjectBestHit, "Turn on best hit per subject sequence", true);
3309 
3310  arg_desc.SetCurrentGroup("");
3311 }
3312 
3313 void
3315  CBlastOptions& opts)
3316 {
3317  if (args[kArgCullingLimit]) {
3318  opts.SetCullingLimit(args[kArgCullingLimit].AsInteger());
3319  }
3320  if (args[kArgBestHitOverhang]) {
3321  opts.SetBestHitOverhang(args[kArgBestHitOverhang].AsDouble());
3322  }
3323  if (args[kArgBestHitScoreEdge]) {
3324  opts.SetBestHitScoreEdge(args[kArgBestHitScoreEdge].AsDouble());
3325  }
3326  if (args[kArgSubjectBestHit]) {
3327  opts.SetSubjectBestHit();
3328  }
3329 }
3330 
3331 void
3333 {
3334  arg_desc.SetCurrentGroup("General search options");
3335  arg_desc.AddDefaultKey(
3336  kArgUseIndex, "boolean",
3337  "Use MegaBLAST database index",
3339  arg_desc.AddOptionalKey(
3340  kArgIndexName, "string",
3341  "MegaBLAST database index name (deprecated; use only for old style indices)",
3343  arg_desc.SetCurrentGroup( "" );
3344 }
3345 
3346 bool
3348 {
3349  if ( (args.Exist(kArgUseIndex) && args[kArgUseIndex].HasValue()) ||
3350  (args.Exist(kArgIndexName) && args[kArgIndexName].HasValue()) ) {
3351  return true;
3352  }
3353  return false;
3354 }
3355 
3356 void
3358  CBlastOptions& opts)
3359 {
3360  // MB Index does not apply to Blast2Sequences
3361  if( args.Exist( kArgUseIndex ) &&
3362  !(args.Exist( kArgSubject ) && args[kArgSubject])) {
3363 
3364  bool use_index = true;
3365  bool force_index = false;
3366  bool old_style_index = false;
3367 
3368  if( args[kArgUseIndex] ) {
3369  if( args[kArgUseIndex].AsBoolean() ) force_index = true;
3370  else use_index = false;
3371  }
3372 
3373  if( args.Exist( kTask ) && args[kTask] &&
3374  args[kTask].AsString() != "megablast" ) {
3375  use_index = false;
3376  }
3377 
3378  if( use_index ) {
3379  string index_name;
3380 
3381  if( args.Exist( kArgIndexName ) && args[kArgIndexName] ) {
3382  index_name = args[kArgIndexName].AsString();
3383  old_style_index = true;
3384  }
3385  else if( args.Exist( kArgDb ) && args[kArgDb] ) {
3386  index_name = args[kArgDb].AsString();
3387  }
3388  else {
3389  NCBI_THROW(CInputException, eInvalidInput,
3390  "Can not deduce database index name" );
3391  }
3392 
3393  opts.SetUseIndex( true, index_name, force_index, old_style_index );
3394  }
3395  }
3396 }
3397 
3398 void
3400 {
3401  arg_desc.SetCurrentGroup("Input query options");
3402 
3403  // query filename
3404  arg_desc.AddDefaultKey(kArgQuery, "input_file",
3405  "Input file name",
3407  // for now it's either -query or -sra
3408  if( m_SRAaccessionEnabled ) {
3409  arg_desc.AddOptionalKey(kArgSraAccession, "accession",
3410  "Comma-separated SRA accessions",
3414  kArgQuery);
3415  }
3416 
3417  arg_desc.SetCurrentGroup("General search options");
3418 
3419  // report output file
3420  arg_desc.AddDefaultKey(kArgOutput, "output_file",
3421  "Output file name",
3424 
3425  if (m_GzipEnabled) {
3426  arg_desc.AddFlag(kArgOutputGzip, "Output will be compressed");
3427  }
3428 
3429  arg_desc.SetCurrentGroup("");
3430 }
3431 
3432 void
3434  CBlastOptions& /* opt */)
3435 {
3436  if (args.Exist(kArgQuery) && args[kArgQuery].HasValue() &&
3437  m_InputStream == NULL) {
3438 
3439  if (m_GzipEnabled &&
3440  NStr::EndsWith(args[kArgQuery].AsString(), ".gz", NStr::eNocase)) {
3442  args[kArgQuery].AsInputFile(),
3445  }
3446  else {
3447  m_InputStream = &args[kArgQuery].AsInputFile();
3448  }
3449  }
3450 
3451  if (args.Exist(kArgOutputGzip) && args[kArgOutputGzip]) {
3453  args[kArgOutput].AsOutputFile(),
3456  }
3457  else {
3458  m_OutputStream = &args[kArgOutput].AsOutputFile();
3459  }
3460 
3461  // stream for unaligned reads in magicblast
3462  if (args.Exist(kArgUnalignedOutput) && args[kArgUnalignedOutput]) {
3463  if (args.Exist(kArgOutputGzip) && args[kArgOutputGzip]) {
3465  args[kArgUnalignedOutput].AsOutputFile(),
3468  }
3469  else {
3470  m_UnalignedOutputStream = &args[kArgUnalignedOutput].AsOutputFile();
3471  }
3472  }
3473 }
3474 
3475 CNcbiIstream&
3477 {
3478  // programmer must ensure the ExtractAlgorithmOptions method is called
3479  // before this method is invoked
3480  if ( !m_InputStream ) {
3481  abort();
3482  }
3483  return *m_InputStream;
3484 }
3485 
3486 CNcbiOstream&
3488 {
3489  // programmer must ensure the ExtractAlgorithmOptions method is called
3490  // before this method is invoked
3492  return *m_OutputStream;
3493 }
3494 
3495 void
3497 {
3500 }
3501 
3502 void
3504 {
3505  arg_desc.SetCurrentGroup("Search strategy options");
3506 
3508  "filename",
3509  "Search strategy to use",
3512  "filename",
3513  "File name to record the search strategy used",
3518 
3519  arg_desc.SetCurrentGroup("");
3520 }
3521 
3522 void
3524  CBlastOptions& /* options */)
3525 {
3526 }
3527 
3528 CNcbiIstream*
3530 {
3531  CNcbiIstream* retval = NULL;
3532  if (args.Exist(kArgInputSearchStrategy) &&
3533  args[kArgInputSearchStrategy].HasValue()) {
3534  retval = &args[kArgInputSearchStrategy].AsInputFile();
3535  }
3536  return retval;
3537 }
3538 
3539 CNcbiOstream*
3541 {
3542  CNcbiOstream* retval = NULL;
3543  if (args.Exist(kArgOutputSearchStrategy) &&
3544  args[kArgOutputSearchStrategy].HasValue()) {
3545  retval = &args[kArgOutputSearchStrategy].AsOutputFile();
3546  }
3547  return retval;
3548 }
3549 
3551 {
3554  m_IsUngapped = false;
3555 }
3556 
3559 {
3561 }
3562 
3565 {
3566  // We're recovering from a saved strategy or combining
3567  // CBlastOptions/CBlastOptionsHandle with command line options (in GBench,
3568  // see GB-1116), so we need to still extract
3569  // certain options from the command line, include overriding query
3570  // and/or database
3571  if (m_OptsHandle.NotEmpty()) {
3573  //opts.DebugDumpText(cerr, "OptionsBeforeLoop", 1);
3574  const bool mbidxargs_set = CMbIndexArgs::HasBeenSet(args);
3575  const bool dbargs_set = CBlastDatabaseArgs::HasBeenSet(args);
3577  if (dynamic_cast<CMbIndexArgs*>(&**arg)) {
3578  if (mbidxargs_set)
3579  (*arg)->ExtractAlgorithmOptions(args, opts);
3580  } else if (dynamic_cast<CBlastDatabaseArgs*>(&**arg)) {
3581  if (dbargs_set)
3583  } else {
3584  (*arg)->ExtractAlgorithmOptions(args, opts);
3585  }
3586  }
3587  m_IsUngapped = !opts.GetGappedMode();
3588  try { m_OptsHandle->Validate(); }
3589  catch (const CBlastException& e) {
3590  NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
3591  }
3592  //opts.DebugDumpText(cerr, "OptionsAfterLoop", 1);
3593  return m_OptsHandle;
3594  }
3595 
3596  CBlastOptions::EAPILocality locality =
3597  (args.Exist(kArgRemote) && args[kArgRemote])
3600 
3601  // This is needed as a CRemoteBlast object and its options are instantiated
3602  // to create the search strategy
3603  if (GetExportSearchStrategyStream(args) ||
3605  locality = CBlastOptions::eBoth;
3606  }
3607 
3608  CRef<CBlastOptionsHandle> retval(x_CreateOptionsHandle(locality, args));
3609  CBlastOptions& opts = retval->SetOptions();
3611  (*arg)->ExtractAlgorithmOptions(args, opts);
3612  }
3613 
3614  m_IsUngapped = !opts.GetGappedMode();
3615  try { retval->Validate(); }
3616  catch (const CBlastException& e) {
3617  NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
3618  }
3619  return retval;
3620 }
3621 
3622 void CBlastAppArgs::SetTask(const string& task)
3623 {
3624 #if _BLAST_DEBUG
3625  ThrowIfInvalidTask(task);
3626 #endif
3627  m_Task.assign(task);
3628 }
3629 
3630 /// Get the input stream
3632  return m_StdCmdLineArgs->GetInputStream();
3633 }
3634 /// Get the output stream
3637 }
3638 
3641 {
3642  unique_ptr<CArgDescriptions> retval(new CArgDescriptions);
3643 
3644  // Create the groups so that the ordering is established
3645  retval->SetCurrentGroup("Input query options");
3646  retval->SetCurrentGroup("General search options");
3647  retval->SetCurrentGroup("BLAST database options");
3648  retval->SetCurrentGroup("BLAST-2-Sequences options");
3649  retval->SetCurrentGroup("Formatting options");
3650  retval->SetCurrentGroup("Query filtering options");
3651  retval->SetCurrentGroup("Restrict search or results");
3652  retval->SetCurrentGroup("Discontiguous MegaBLAST options");
3653  retval->SetCurrentGroup("Statistical options");
3654  retval->SetCurrentGroup("Search strategy options");
3655  retval->SetCurrentGroup("Extension options");
3656  retval->SetCurrentGroup("");
3657 
3658 
3659  NON_CONST_ITERATE(TBlastCmdLineArgs, arg, args) {
3660  (*arg)->SetArgumentDescriptions(*retval);
3661  }
3662  return retval.release();
3663 }
3664 
3667  (CBlastOptions::EAPILocality locality, const string& task)
3668 {
3669  _ASSERT(!task.empty());
3671  SetTask(task);
3672  retval.Reset(CBlastOptionsFactory::CreateTask(GetTask(), locality));
3673  _ASSERT(retval.NotEmpty());
3674  return retval;
3675 }
3676 
3677 void
3679 {
3680  set<string> can_override;
3681  can_override.insert(kArgQuery);
3682  can_override.insert(kArgQueryLocation);
3683  can_override.insert(kArgSubject);
3684  can_override.insert(kArgSubjectLocation);
3685  can_override.insert(kArgUseLCaseMasking);
3686  can_override.insert(kArgDb);
3687  can_override.insert(kArgDbSize);
3688  can_override.insert(kArgEntrezQuery);
3689  can_override.insert(kArgDbSoftMask);
3690  can_override.insert(kArgDbHardMask);
3691  can_override.insert(kArgUseIndex);
3692  can_override.insert(kArgIndexName);
3693  can_override.insert(kArgStrand);
3694  can_override.insert(kArgParseDeflines);
3695  can_override.insert(kArgOutput);
3696  can_override.insert(kArgOutputFormat);
3697  can_override.insert(kArgNumDescriptions);
3698  can_override.insert(kArgNumAlignments);
3699  can_override.insert(kArgMaxTargetSequences);
3700  can_override.insert(kArgRemote);
3701  can_override.insert(kArgNumThreads);
3702  can_override.insert(kArgInputSearchStrategy);
3703  can_override.insert(kArgRemote);
3704  can_override.insert("remote_verbose");
3705  can_override.insert("verbose");
3706 
3707  // this stores the arguments (and their defaults) that cannot be overriden
3708  map<string, string> has_defaults;
3710  has_defaults[kArgCompBasedStats] =
3712  // FIX the line below for igblast, and add igblast options
3714  has_defaults[kTask] = m_Task;
3715  has_defaults[kArgOldStyleIndex] = kDfltArgOldStyleIndex;
3716 
3717  if (Blast_QueryIsProtein(prog)) {
3718  if (NStr::Find(m_Task, "blastp") != NPOS ||
3719  NStr::Find(m_Task, "psiblast") != NPOS) {
3720  has_defaults[kArgSegFiltering] = kDfltArgNoFiltering;
3721  } else {
3722  has_defaults[kArgSegFiltering] = kDfltArgSegFiltering;
3723  }
3724  has_defaults[kArgLookupTableMaskingOnly] =
3726  has_defaults[kArgGapTrigger] =
3728  } else {
3729  has_defaults[kArgDustFiltering] = kDfltArgDustFiltering;
3730  has_defaults[kArgLookupTableMaskingOnly] =
3732  has_defaults[kArgGapTrigger] =
3734  }
3735  has_defaults[kArgOffDiagonalRange] =
3737  has_defaults[kArgMaskLevel] = kDfltArgMaskLevel;
3738  has_defaults[kArgMaxIntronLength] =
3742  // pssm engine/psiblast default options
3743  has_defaults[kArgPSIPseudocount] =
3745  has_defaults[kArgPSIInclusionEThreshold] =
3747  has_defaults[kArgPSINumIterations] =
3749 
3750  // get arguments, remove the supported ones and warn about those that
3751  // cannot be overridden.
3752  typedef vector< CRef<CArgValue> > TArgs;
3753  TArgs arguments = args.GetAll();
3754  ITERATE(TArgs, a, arguments) {
3755  const string& arg_name = (*a)->GetName();
3756  const string& arg_value = (*a)->AsString();
3757  // if it has a default value, ignore it if it's not different from the
3758  // default, otherwise, issue a warning
3759  if (has_defaults.find(arg_name) != has_defaults.end()) {
3760  if (has_defaults[arg_name] == arg_value) {
3761  continue;
3762  } else {
3763  if (arg_name == kTask && arg_value == "megablast") {
3764  // No need to issue warning here, as it's OK to change this
3765  continue;
3766  }
3767  ERR_POST(Warning << arg_name << " cannot be overridden when "
3768  "using a search strategy");
3769  }
3770  }
3771  // if the argument cannot be overridden, issue a warning
3772  if (can_override.find(arg_name) == can_override.end()) {
3773  ERR_POST(Warning << arg_name << " cannot be overridden when "
3774  "using a search strategy");
3775  }
3776  }
3777 }
3778 
3781 {
3782  if(m_OptsHandle.Empty())
3783  {
3784  NCBI_THROW(CInputException, eInvalidInput, "Empty Blast Options Handle");
3785  }
3786 
3787  // We're recovering from a saved strategy, so we need to still extract
3788  // certain options from the command line, include overriding query
3789  // and/or database
3791  // invoke ExtractAlgorithmOptions on certain argument classes, i.e.: those
3792  // that should have their arguments overriden
3796  m_DebugArgs->ExtractAlgorithmOptions(args, opts);
3798  m_MTArgs->ExtractAlgorithmOptions(args, opts);
3799  if (CBlastDatabaseArgs::HasBeenSet(args)) {
3801  }
3802  if (CMbIndexArgs::HasBeenSet(args)) {
3804  if (dynamic_cast<CMbIndexArgs*>(arg->GetPointer()) != NULL) {
3805  (*arg)->ExtractAlgorithmOptions(args, opts);
3806  }
3807  }
3808  }
3809  m_IsUngapped = !opts.GetGappedMode();
3811  try { m_OptsHandle->Validate(); }
3812  catch (const CBlastException& e) {
3813  NCBI_THROW(CInputException, eInvalidInput, e.GetMsg());
3814  }
3815  return m_OptsHandle;
3816 }
3817 
3818 END_SCOPE(blast)
User-defined methods of the data storage class.
Declares singleton objects to store the version and reference for the BLAST engine.
USING_SCOPE(objects)
static void s_GetTaxIDList(const string &in, bool isFile, bool isNegativeList, CRef< CSearchDatabase > &sdb, bool isTargetOnly)
static bool s_IsDefaultWordThreshold(EProgram program, double threshold)
Definition: blast_args.cpp:585
static void s_ValidateCustomDelim(string custom_fmt_spec, string customDelim)
static void s_SetCompositionBasedStats(CBlastOptions &opt, const string &comp_stat_string, bool smith_waterman_value, bool *ungapped)
Auxiliary function to set the composition based statistics and smith waterman options.
Definition: blast_args.cpp:820
CArgDescriptions * SetUpCommandLineArguments(TBlastCmdLineArgs &args)
Create a CArgDescriptions object and invoke SetArgumentDescriptions for each of the TBlastCmdLineArgs...
static string s_RegisterOMDataLoader(CRef< CSeqDB > db_handle)
Interface for converting blast-related command line arguments into blast options.
vector< CRef< IBlastCmdLineArgs > > TBlastCmdLineArgs
Type definition of a container of IBlastCmdLineArgs.
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
@ eHardSubjMasking
Definition: blast_def.h:238
@ eSoftSubjMasking
Definition: blast_def.h:237
Declares the BLAST exception class.
Interface for converting sources of sequence data into blast sequence input.
TSeqRange ParseSequenceRange(const string &range_str, const char *error_prefix=NULL)
Parse and extract a sequence range from argument provided to this function.
CRef< objects::CScope > ReadSequencesToBlast(CNcbiIstream &in, bool read_proteins, const TSeqRange &range, bool parse_deflines, bool use_lcase_masking, CRef< CBlastQueryVector > &sequences, bool gaps_to_Ns=false)
Read sequence input for BLAST.
Routines for creating nucleotide BLAST lookup tables.
EDiscWordType
General types of discontiguous word templates.
@ eMBWordOptimal
@ eMBWordCoding
@ eMBWordTwoTemplates
#define PSI_INCLUSION_ETHRESH
Defaults for PSI-BLAST and DELTA-BLAST options.
#define BLAST_HITLIST_SIZE
Number of database sequences to save hits for.
#define BLAST_WORD_THRESHOLD_BLASTX
default threshold (blastx)
Int2 BLAST_GetSuggestedThreshold(EBlastProgramType program_number, const char *matrixName, double *threshold)
Get thresholds for word-finding suggested by Stephen Altschul.
@ eDynProgScoreOnly
standard affine gapping
Int2 BLAST_GetSuggestedWindowSize(EBlastProgramType program_number, const char *matrixName, Int4 *window_size)
Get window sizes for two hit algorithm suggested by Stephen Altschul.
#define BLAST_GAP_TRIGGER_NUCL
default bit score that will trigger a gapped extension for blastn
#define MAX_DB_WORD_COUNT_MAPPER
Default max frequency for a database word.
#define BLAST_EXPECT_VALUE
Default parameters for saving hits.
#define DELTA_INCLUSION_ETHRESH
Inclusion threshold for DELTA-BLAST.
#define BLAST_WORD_THRESHOLD_BLASTP
neighboring word score thresholds; a threshold of zero means that only query and subject words that m...
#define BLAST_GAP_TRIGGER_PROT
default bit score that will trigger gapped extension
#define PSI_PSEUDO_COUNT_CONST
Pseudo-count constant for PSI-BLAST.
@ eDynProgTbck
standard affine gapping
Int2 PSIBlastOptionsNew(PSIBlastOptions **psi_options)
Initialize default options for PSI BLAST.
#define BLAST_GENETIC_CODE
Default genetic code for query and/or database.
#define BLAST_WORD_THRESHOLD_TBLASTN
default neighboring threshold (tblastn/rpstblastn)
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
Boolean Blast_SubjectIsNucleotide(EBlastProgramType p)
Returns true if the subject is nucleotide.
Definition: blast_program.c:53
Boolean Blast_QueryIsNucleotide(EBlastProgramType p)
Returns true if the query is nucleotide.
Definition: blast_program.c:43
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Definition: blast_program.c:40
Boolean Blast_ProgramIsRpsBlast(EBlastProgramType p)
Returns true if program is RPS-BLAST (i.e.
Definition: blast_program.c:73
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
PSIDiagnosticsRequest * PSIDiagnosticsRequestNewEx(Boolean save_ascii_pssm)
Allocates a PSIDiagnosticsRequest structure, setting fields to their default values for their use in ...
Definition: blast_psi.c:591
Int2 BLAST_GetProteinGapExistenceExtendParams(const char *matrixName, Int4 *gap_existence, Int4 *gap_extension)
Extract the recommended gap existence and extension values.
Definition: blast_stat.c:3374
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eTblastx
Translated nucl-Translated nucl.
Definition: blast_types.hpp:62
@ eBlastn
Nucl-Nucl (traditional blastn)
Definition: blast_types.hpp:58
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
Definition: blast_types.hpp:63
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
@ ePSIBlast
PSI Blast.
Definition: blast_types.hpp:67
@ eTblastn
Protein-Translated nucl.
Definition: blast_types.hpp:61
@ eDeltaBlast
Delta Blast.
Definition: blast_types.hpp:71
@ ePSITblastn
PSI Tblastn.
Definition: blast_types.hpp:68
@ eRPSTblastn
nucleotide-pssm (RPS blast with translated query)
Definition: blast_types.hpp:64
@ eBlastx
Translated nucl-Protein.
Definition: blast_types.hpp:60
Auxiliary class to validate the genetic code input.
Definition: blast_args.cpp:994
virtual string GetUsage(void) const
Overloaded method from CArgAllow.
virtual bool Verify(const string &value) const
Overloaded method from CArgAllow.
Definition: blast_args.cpp:997
Class to constrain the length of the file name passed to a given CArgDescriptions key.
Class to constrain the values of an argument to those in between the values specified in the construc...
Class to constrain the values of an argument to those greater than or equal to the value specified in...
Class to constrain the values of an argument to those less than or equal to the value specified in th...
CArgAllow_Doubles –.
Definition: ncbiargs.hpp:1781
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgAllow –.
Definition: ncbiargs.hpp:1488
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
Auxiliary class to store the name of an output file, which is reset every time its GetStream method i...
CRef< CRemoteArgs > m_RemoteArgs
remote vs. local execution options
CRef< CBlastOptionsHandle > SetOptionsForSavedStrategy(const CArgs &args)
Combine the command line arguments into a CBlastOptions object recovered from saved search strategy.
string GetTask() const
Get the task for this object.
virtual CNcbiIstream & GetInputStream()
Get the input stream.
CRef< CBlastOptionsHandle > m_OptsHandle
The BLAST options handle, only non-NULL if assigned via SetOptionsHandle, i.e.
CRef< CQueryOptionsArgs > m_QueryOptsArgs
query options object
CRef< CBlastDatabaseArgs > m_BlastDbArgs
database/subject object
virtual CRef< CBlastOptionsHandle > x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs &args)=0
Create the options handle based on the command line arguments.
CRef< CBlastOptionsHandle > SetOptions(const CArgs &args)
Extract the command line arguments into a CBlastOptionsHandle object.
CRef< CSearchStrategyArgs > m_SearchStrategyArgs
arguments for dealing with search strategies
string m_Task
Task specified in the command line.
CRef< CDebugArgs > m_DebugArgs
Debugging arguments.
CRef< CBlastOptionsHandle > x_CreateOptionsHandleWithTask(CBlastOptions::EAPILocality locality, const string &task)
Creates the BLAST options handle based on the task argument.
CBlastAppArgs()
Default constructor.
CRef< CMTArgs > m_MTArgs
multi-threaded options
CArgDescriptions * SetCommandLine()
Set the command line arguments.
CRef< CFormattingArgs > m_FormattingArgs
formatting options
void x_IssueWarningsForIgnoredOptions(const CArgs &args)
Issue warnings when recovering from a search strategy (command line applications only)
bool m_IsUngapped
Is this application being run ungapped.
TBlastCmdLineArgs m_Args
Set of command line argument objects.
CNcbiOstream * GetExportSearchStrategyStream(const CArgs &args)
Get the output stream for the search strategy.
void SetTask(const string &task)
Set the task for this object.
virtual CNcbiOstream & GetOutputStream()
Get the output stream.
CRef< CStdCmdLineArgs > m_StdCmdLineArgs
standard command line arguments class
Argument class to collect database/subject arguments.
Definition: blast_args.hpp:889
CBlastDatabaseArgs(bool request_mol_type=false, bool is_rpsblast=false, bool is_igblast=false, bool is_mapper=false, bool is_kblast=false)
Constructor.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
static bool HasBeenSet(const CArgs &args)
Auxiliary function to determine if the database/subject sequence has been set.
CRef< objects::CScope > m_Scope
CScope object in which all subject sequences read are kept.
Definition: blast_args.hpp:985
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_IsMapper
true for short read mapper
Definition: blast_args.hpp:982
bool IsProtein() const
Is the database/subject protein?
Definition: blast_args.hpp:926
bool m_SupportsDatabaseMasking
true if it's supported
Definition: blast_args.hpp:987
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
Definition: blast_args.hpp:893
bool m_IsProtein
Is the database/subject(s) protein?
Definition: blast_args.hpp:981
bool m_RequestMoleculeType
Determines whether the database's molecule type should be requested in the command line,...
Definition: blast_args.hpp:974
bool m_IsIgBlast
true if the search is Ig-BLAST
Definition: blast_args.hpp:979
CRef< IQueryFactory > m_Subjects
The subject sequences.
Definition: blast_args.hpp:984
bool m_IsRpsBlast
true if the search is RPS-BLAST
Definition: blast_args.hpp:978
CRef< CSearchDatabase > m_SearchDb
Description of the BLAST database.
Definition: blast_args.hpp:973
bool m_SupportIPGFiltering
true if IPG filtering is supported
Definition: blast_args.hpp:988
bool m_IsKBlast
true for Kblastp
Definition: blast_args.hpp:983
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: bdbloader.cpp:52
static string GetLoaderNameFromArgs(CConstRef< CSeqDB > db_handle)
Definition: bdbloader.cpp:164
Defines BLAST error codes (user errors included)
Encapsulates ALL the BLAST algorithm's options.
EAPILocality
Enumerates the possible contexts in which objects of this type can be used.
@ eLocal
To be used for running BLAST locally.
@ eRemote
To be used when running BLAST remotely.
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
Definition: version.hpp:53
bool m_Is2and3Supported
Are options 2 and 3 supported.
Definition: blast_args.hpp:421
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:885
string m_ZeroOptDescr
Non standard description for option zero.
Definition: blast_args.hpp:425
string m_DefaultOpt
Default option.
Definition: blast_args.hpp:423
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:764
CCompressOStream –.
bool m_DebugOutput
Should debugging (verbose) output be printed.
bool m_RmtDebugOutput
Should debugging (verbose) output be printed for remote BLAST.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CDecompressIStream –.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CRef< CSearchDatabase > m_DomainDb
Conserved Domain Database.
Definition: blast_args.hpp:750
bool m_ShowDomainHits
Is printing CDD hits requested.
Definition: blast_args.hpp:753
CDirEntry –.
Definition: ncbifile.hpp:262
static const string kTemplType_CodingAndOptimal
Value to specify coding+optimal template type.
Definition: blast_args.hpp:392
static const string kTemplType_Optimal
Value to specify optimal template type.
Definition: blast_args.hpp:390
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:731
static const string kTemplType_Coding
Value to specify coding template type.
Definition: blast_args.hpp:388
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:691
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:327
bool m_QueryIsProtein
true if the query is protein
Definition: blast_args.hpp:352
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:384
void x_TokenizeFilteringArgs(const string &filtering_args, vector< string > &output) const
Auxiliary method to tokenize the filtering string.
Definition: blast_args.cpp:372
bool m_FilterByDefault
Should filtering be applied by default?
Definition: blast_args.hpp:353
Class implements different ad-hoc unreliable file format identifications.
@ eBinaryASN
Binary ASN.1.
@ eTextASN
Text ASN.1.
TSeqPos m_NumDescriptions
Number of 1-line descr. to show.
TSeqPos m_DfltNumDescriptions
Default value for num descriptions.
TSeqPos m_NumAlignments
Number of alignments to show.
virtual bool ArchiveFormatRequested(const CArgs &args) const
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
EFormatFlags m_FormatFlags
virtual void ParseFormattingString(const CArgs &args, EOutputFormat &fmt_type, string &custom_fmt_spec, string &custom_delim) const
Parses the output format command line option value, returns the requested output format type and any ...
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
EOutputFormat
Defines the output formats supported by our command line formatter.
@ eEndValue
Sentinel value for error checking.
@ ePairwise
Standard pairwise alignments.
@ eTabular
Tabular output.
@ eSAM
SAM format.
@ eCommaSeparatedValues
Comma-separated values.
@ eAsnText
ASN.1 text output.
@ eArchiveFormat
BLAST archive format.
@ eAirrRearrangement
igblast AIRR rearrangement, 19
@ eFasta
unaligned reads in magicblast
@ eFlatQueryAnchoredNoIdentities
@ eTabularWithComments
Tabular output with comments.
bool m_IsIgBlast
IgBlast has a different default num_alignments.
string m_CustomOutputFormatSpec
The format specification for custom output, e.g.
EOutputFormat m_OutputFormat
Choice of formatting output.
TSeqPos m_DfltNumAlignments
Default value for num alignments.
bool m_ShowGis
Display NCBI GIs?
bool m_Html
Display HTML output?
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:973
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:957
bool m_QueryIsProtein
true if the query is protein
Definition: blast_args.hpp:506
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:900
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:917
bool m_QueryIsProtein
true if the query is protein
Definition: blast_args.hpp:320
bool m_IsRpsBlast
true if the search is RPS-BLAST
Definition: blast_args.hpp:321
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:250
bool m_IsIgBlast
true if the search is igblast
Definition: blast_args.hpp:325
bool m_IsTblastx
true if the search is tblastx
Definition: blast_args.hpp:324
bool m_ShowPercentIdentity
true if the percent identity option should be shown
Definition: blast_args.hpp:322
CGenericSearchArgs(bool query_is_protein=true, bool is_rpsblast=false, bool show_perc_identity=false, bool is_tblastx=false, bool is_igblast=false, bool suppress_sum_stats=false)
Constructor.
Definition: blast_args.hpp:303
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:136
bool m_SuppressSumStats
true if search is blastn or blastp
Definition: blast_args.hpp:326
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
ETarget m_Target
Genetic code target.
Definition: blast_args.hpp:486
@ eQuery
Query genetic code.
Definition: blast_args.hpp:467
@ eDatabase
Database genetic code.
Definition: blast_args.hpp:468
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CRef< CIgBlastOptions > m_IgOptions
Igblast options to fill.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_IsProtein
Is this a protein search?
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CRef< objects::CScope > m_Scope
scope to get sequences
Defines user input exceptions.
double m_JDistance
Jaccard distance.
Definition: blast_args.hpp:705
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
int m_CandidateSeqs
Number of candidate sequences to try BLAST on.
Definition: blast_args.hpp:714
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
int m_MinHits
Minimum number of hits in LSH phase.
Definition: blast_args.hpp:708
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:944
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:928
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
size_t m_NumThreads
Number of threads to spawn.
void x_ExtractAlgorithmOptions(const CArgs &args)
CMTArgs(size_t default_num_threads=CThreadable::kMinNumThreads, EMTMode mt_mode=eNotSupported)
Default Constructor.
EMTMode m_MTMode
@ eSplitByQueries
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
EOutputFormat m_UnalignedOutputFormat
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opt)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CNcbiIstream * m_MateInputStream
Definition: blast_args.hpp:880
EInputFormat m_InputFormat
Definition: blast_args.hpp:877
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opt)
Interface method,.
vector< string > m_SraAccessions
Definition: blast_args.hpp:878
unique_ptr< CDecompressIStream > m_DecompressIStream
Definition: blast_args.hpp:881
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:621
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:631
Argument class to retrieve megablast database indexing options.
static bool HasBeenSet(const CArgs &args)
Auxiliary function to determine if the megablast database indexing options have been set.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
static CNcbiApplication * Instance(void)
Singleton method.
Definition: ncbiapp.cpp:264
CNcbiEnvironment –.
Definition: ncbienv.hpp:110
CNcbiOstrstreamToString class helps convert CNcbiOstrstream to a string Sample usage:
Definition: ncbistre.hpp:802
CNcbiRegistry –.
Definition: ncbireg.hpp:913
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:668
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:639
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:513
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:498
Wrapper class for PSIBlastOptions .
Definition: blast_aux.hpp:330
Wrapper class for PSIDiagnosticsRequest .
Definition: blast_aux.hpp:347
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:77
string m_ProgDesc
Application's description.
Definition: blast_args.hpp:193
string m_ProgName
Application's name.
Definition: blast_args.hpp:192
CProgramDescriptionArgs(const string &program_name, const string &program_description)
Constructor.
Definition: blast_args.cpp:71
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
bool m_SaveLastPssm
Save PSSM after the last database search.
Definition: blast_args.hpp:642
CRef< CAutoOutputFileReset > m_AsciiMatrixOutput
ASCII matrix output file.
Definition: blast_args.hpp:634
bool m_IsDeltaBlast
Are the aruments set up for Delta Blast.
Definition: blast_args.hpp:639
@ eProteinDb
Traditional, iterated PSI-BLAST.
Definition: blast_args.hpp:551
@ eNucleotideDb
PSI-Tblastn, non-iterated.
Definition: blast_args.hpp:552
CRef< CAutoOutputFileReset > m_CheckPointOutput
checkpoint output file
Definition: blast_args.hpp:632
ETargetDatabase m_DbTarget
Molecule of the database.
Definition: blast_args.hpp:628
CRef< objects::CPssmWithParameters > x_CreatePssmFromMsa(CNcbiIstream &input_stream, CBlastOptions &opt, bool save_ascii_pssm, unsigned int msa_master_idx, bool ignore_pssm_tmpl_seq)
Auxiliary function to create a PSSM from a multiple sequence alignment file.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CRef< objects::CPssmWithParameters > m_Pssm
PSSM.
Definition: blast_args.hpp:636
size_t m_NumIterations
number of iterations to perform
Definition: blast_args.hpp:630
This class is a concrete strategy for IPssmInputData which converts the CLUSTALW-style output contain...
bool m_IsDeltaBlast
Are these arumnets for Delta Blast.
Definition: blast_args.hpp:526
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Computes a PSSM as specified in PSI-BLAST.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool m_UseLCaseMask
use lowercase masking in FASTA input
Definition: blast_args.hpp:815
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
objects::ENa_strand m_Strand
Strand(s) to search.
Definition: blast_args.hpp:811
TSeqRange m_Range
range to restrict the query sequence(s)
Definition: blast_args.hpp:813
bool m_ParseDeflines
Should the deflines be parsed?
Definition: blast_args.hpp:817
bool m_QueryCannotBeNucl
only false for blast[xn], and tblastx true in case of PSI-BLAST
Definition: blast_args.hpp:821
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:524
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:551
bool m_IsRemote
Should the search be executed remotely?
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CScope –.
Definition: scope.hpp:92
Blast Search Subject.
Argument class to import/export the search strategy.
Definition: blast_args.hpp:531
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CNcbiIstream * GetImportStream(const CArgs &args) const
Get the input stream for the search strategy.
CNcbiOstream * GetExportStream(const CArgs &args) const
Get the output stream for the search strategy.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CSeqDBFileGiList.
CSeqDBGiList.
void AddTaxIds(const set< TTaxId > &tax_ids)
EStatType
Counts statistics formats.
static EStatType DiscoverStatType(string const &name)
Return the format of the counts statistics file.
Root class for all serialization exceptions.
Definition: exception.hpp:50
bool m_GzipEnabled
If true input file will be decompressed with gzip if filename ends with ".gz".
Definition: blast_args.hpp:165
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
CNcbiIstream & GetInputStream() const
Get the input stream for a command line application.
unique_ptr< CDecompressIStream > m_DecompressIStream
Definition: blast_args.hpp:156
CNcbiOstream & GetOutputStream() const
Get the output stream for a command line application.
CRef< CTmpFile > m_QueryTmpInputFile
ASN.1 specification of query sequences when read from a saved search strategy.
Definition: blast_args.hpp:161
unique_ptr< CCompressOStream > m_CompressOStream
Definition: blast_args.hpp:157
CNcbiOstream * m_OutputStream
Application's output stream.
Definition: blast_args.hpp:155
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
CNcbiIstream * m_InputStream
Application's input stream.
Definition: blast_args.hpp:154
bool m_SRAaccessionEnabled
If true, option to specify SRA runs will be presented as possible query input.
Definition: blast_args.hpp:169
void SetInputStream(CRef< CTmpFile > input_file)
Set the input stream if read from a saved search strategy.
CNcbiOstream * m_UnalignedOutputStream
Output stream to report unaligned sequences/reads.
Definition: blast_args.hpp:172
unique_ptr< CCompressOStream > m_UnalignedCompressOStream
Definition: blast_args.hpp:173
Simple implementation of ILineReader for i(o)streams.
CStringException –.
Definition: ncbistr.hpp:4506
static unsigned int GetCpuCount(void)
Return number of active CPUs/cores (never less than 1).
const set< string > m_SupportedTasks
Set of supported tasks by this command line argument.
Definition: blast_args.hpp:215
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:111
CTaskCmdLineArgs(const set< string > &supported_tasks, const string &default_task)
Constructor.
Definition: blast_args.cpp:84
string m_DefaultTask
Default task for this command line argument.
Definition: blast_args.hpp:217
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:95
Clas to retrieve taxonomic information for filtering BLASTDBs.
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:482
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:468
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
Definition: blast_args.cpp:603
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Definition: blast_args.cpp:571
BLAST Command line arguments design The idea is to have several small objects (subclasses of IBlastCm...
Definition: blast_args.hpp:84
IRWRegistry –.
Definition: ncbireg.hpp:407
const_iterator end() const
Definition: map.hpp:152
const_iterator find(const key_type &key) const
Definition: map.hpp:153
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
bool empty() const
Definition: set.hpp:133
const_iterator find(const key_type &key) const
Definition: set.hpp:137
const_iterator end() const
Definition: set.hpp:136
const string kArgMatrixName
Argument for scoring matrix.
const string kArgWindowMaskerDatabase
Argument to specify a path to a Window Masker database.
const string kArgGLChainType
Argument to specify the germline database chaintype name for igblast.
const string kArgAsciiPssmOutputFile
Argument to specify the file name for saving the ASCII representation of the PSSM.
const string kArgMaxDbWordCount
Argument to specify a maximum number of times a word can be repeated in a database.
const string kArgGLOrigin
Argument to specify the germline origin for igblast.
const string kDfltArgJDistance
Jaccard default value.
const string kArgPSIPseudocount
Argument to specify the pseudo-count value used when constructing PSSM.
const string kArgNoGreedyExtension
Argument to specify non-greedy dynamic programming extension.
const string kDfltArgApplyFiltering
Default argument to specify filtering.
const string kArgPSIOutputChkPntFile
Argument to specify a 'checkpoint' file to write the PSSM.
const string kArgSplice
Argument to specify whether to search for spliced alignments.
const string kArgMinRawGappedScore
Argument for minimum raw gapped score for preliminary gapped and traceback stages.
const string kArgGLNumAlign
Argument to specify the number of alignments for germline database.
const string kArgLookupStride
Argument to sepcify the stride when creating a lookup table.
const string kArgDbSize
Effective length of BLAST database.
const string kArgDbGeneticCode
Database genetic code.
const string kArgPSIInclusionEThreshold
Argument to specify the evalue inclusion threshold for considering aligned sequences for PSSM constru...
const string kArgRevFwd
Argument to specify reverse/forward strand specificity.
const string kArgPSIInputChkPntFile
Argument to specify a 'checkpoint' file to recover the PSSM from.
const string kArgScore
Argument to specify cutoff score for accepting a spliced alignment.
const string kArgMaxIntronLength
Argument to specify the maximum length of an intron when linking multiple distinct alignments (applic...
const string kArgTranslate
Arugment to specify if Igblast alignment should be translated to protein.
const bool kDfltArgParseDeflines
Default argument to specify whether sequences deflines should be parsed.
const string kArgDMBTemplateLength
Argument to specify the discontinuous megablast template length.
const string kArgOutput
Output file name.
const string kArgClonotypeFile
Argument to specify number of clonotype file.
const int kDfltArgCullingLimit
Default argument to specify the culling limit.
const string kArgPercentIdentity
Argument to specify the target percent identity.
const string kArgStrand
Argument to select the query strand(s) to search.
const string kDfltArgCompBasedStatsDelta
const string kArgDMBTemplateType
Argument to specify the discontinuous megablast template type.
const string kArgCandidateSeqs
Number of sequences to attempt BLAST on.
const string kArgOutputSearchStrategy
Argument to specify the file name to save the search strategy used for a BLAST search.
const string kArgDPenalty
Argument to specify mismatch penalty for D gene search.
const string kArgGapExtend
Argument to select the gap extending penalty.
const string kArgRemote
Argument to determine whether searches should be run locally or remotely.
const string kArgQueryLocation
Argument to specify a location to restrict the query sequence(s)
const string kArgDbHardMask
const string kArgDbSoftMask
List of filtering algorithms to apply to subjects as soft masking.
const string kArgOnlyStrandSpecific
Argument to specify only strand specific results.
const int kDfltArgMaxIntronLength
Default value for maximum intron length.
const double kDfltArgBestHitOverhang
Default argument for the overhang parameter to the best hit algorithm.
const string kArgJPenalty
Argument to specify mismatch penalty for J gene search.
const string kArgFilteringDb
Argument to specify a filtering database (i.e.
const string kArgSegFiltering
Argument to specify SEG filtering on query sequence(s)
const string kArgDbType
BLAST database molecule type.
const string kArgTaxIdListFile
Argument to specify file with taxonomy ids for filtering.
const string kArgUnalignedOutput
Argument to output unaligned reads in a separate file.
const string kArgNoTaxIdExpansion
Argument to not to resolve TaxId to descendant.
const string kArgMinJLength
Argument to specify minimal required J gene length.
const string kArgPrintMdTag
Argument to specify printing SAN MD tag.
const string kArgGappedXDropoff
Argument to select the gapped X dropoff value.
const string kArgUseSWTraceback
Argument to specify that Smith-Waterman algorithm should be used to compute locally optimal alignment...
const string kArgIndexName
Megablast database index name.
const string kArgGapOpen
Argument to select the gap opening penalty.
const string kArgDustFiltering
Argument to specify DUST filtering on query sequence(s)
const string kArgSubjectBestHit
Argument to specify the culling limit.
const string kArgQueryMate
Mates for the query sequences if given in a separate file.
const string kArgFinalGappedXDropoff
Argument to select the final gapped X dropoff value.
const string kArgBestHitOverhang
Argument to specify the overhang parameter to the best hit algorithm.
const string kArgNegativeSeqidList
argument for gi list to exclude from a BLAST database search
const string kArgEntrezQuery
Entrez query.
const string kArgJDistance
KBLASTP arguments Specifies Jaccard distance (threshold)
const string kArgGLDatabase
Argument to specify the germline database name for igblast.
const string kArgGLFocusV
Arugment to specify if Igblast alignment should restrict to V seg.
const string kTask
Task to perform.
const string kArgSraAccessionBatch
Argument to specify a file with a list of SRA accessions.
const string kArgLineLength
Argument to specify line length for displaying alignments.
const string kArgMaxTargetSequences
Argument to specify the maximum number of target sequences to keep (a.k.a.
const string kArgFrameShiftPenalty
Argument to specify the frame shift penality.
const string kArgUseIndex
Flag to force using or not using megablast database index.
const bool kDfltArgUseIndex
Default value for megablast database index flag.
const string kArgMinDMatch
Arugment to specify if Igblast min D gene match.
const string kDfltArgQuery
Default value for query sequence input.
const string kArgRpsDb
Argument to specify domain database name for DELTA-BLAST.
const string kArgQualityFilter
Argyment to specify whether quality filtering is to be done.
const string kArgNegativeGiList
argument for seqid list to exclude from a BLAST database search
const string kArgInputFormat
Argument to specify input format.
const string kArgLookupTableMaskingOnly
Argument to specify to mask query during lookup table creation.
const string kArgMismatch
Argument to select the nucleotide mismatch penalty.
const string kArgParseDeflines
Argument to specify if the query and subject sequences defline should be parsed.
const string kArgSaveAllPssms
Argument to specify whether to save PSSM after each psiblast iteration.
const string kDfltArgCandidateSeqs
const string kArgIgnoreMsaMaster
Argument to specify whether the template sequence (usually the query) should be ignored for the purpo...
const string kArgEvalue
Argument for expectation value cutoff.
const string kArgFwdRev
Argument to specify forward/reverse strand specificity.
const string kArgOldStyleIndex
Use old style megablast index.
const string kArgMaskLevel
const string kArgIgSeqType
Argument to specify IgBlast sequence type.
const string kArgGLDomainSystem
Argument to specify the Ig domain system.
const string kArgIpgList
IPG list file name to restrict BLAST database.
const string kArgMaxEditDist
Argument to specify a cutoff edit distance fot an alignment.
const string kArgEnableSraCache
Argument to enable SRA caching in local files.
const bool kDfltArgUseLCaseMasking
Default argument to specify whether lowercase masking should be used.
const string kArgCullingLimit
Argument to specify the culling limit.
const string kArgGapTrigger
Argument to specify number of bits to initiate gapping.
const string kArgEffSearchSpace
Argument to specify the effective length of the search space.
const string kArgSubjectLocation
Argument to specify a location to restrict the subject sequence(s)
const string kArgOffDiagonalRange
Argument to select the off-diagonal scan range in the 2-hit wordfinder algorithm.
const string kDfltArgStrand
Default value for strand selection.
const string kArgPaired
Argument to specify whether mapped reads are paired.
const string kArgQueryCovHspPerc
Argument to specify min query coverage percentage for each hsp.
const string kDfltArgSegFiltering
Default arguments to apply SEG filtering on query sequence(s)
const string kArgMTMode
Argument to specify mt mode (split by db or split by queries)
const string kArgPSINumIterations
Argument to select the number of iterations to perform in PSI-BLAST.
const string kArgQuery
Query sequence(s)
const string kArgNumClonotype
Argument to specify number of clonotype to show.
const string kArgMinVLength
Argument to specify minimal required V length.
const string kArgNegativeIpgList
argument for IPG list to exclude from a BLAST database search
const string kArgNoUnaligned
Argument to trun off printing of unaligned reads.
const string kArgComplexityAdj
const string kArgMSAInputFile
Argument to specify a multiple sequence alignment file to create a PSSM from.
const string kArgUnalignedFormat
Argument to specify format for reporting unaligned reads.
const string kArgNegativeTaxIdList
Argument to specify negative taxonomy ids filtering.
const string kDfltArgOldStyleIndex
Default value for use old style megablast index.
const string kArgVPenalty
Argument to specify mismatch penalty for V gene search.
const string kDfltArgDustFiltering
Default arguments to apply DUST filtering on query sequence(s)
const string kArgSeqIdList
seqid list file name to restrict BLAST database
const string kDfltArgLookupTableMaskingOnlyProt
Default argument mask a protein query during lookup table construction.
const unsigned int kDfltArgPSINumIterations
const string kArgRevOnly
Argument to specify reverse-only strand specificity.
const string kArgDb
BLAST database name.
const string kArgOutputGzip
Argument to specify that the output will be compressed with gzip.
const string kArgCustomInternalData
Argument to specify custom internal data file.
const string kArgWindowMaskerTaxId
Argument to specify a taxid for Window Masker.
const string kArgCRegionNumAlign
Argument to specify the number of alignments for c gene db.
const string kArgWindowSize
Argument to select the window size in the 2-hit wordfinder algorithm.
const string kArgRefType
Reference type: genome or transcriptome.
const string kArgWordSize
Argument to select the wordfinder's word size.
const string kArgUseLCaseMasking
Argument to specify whether lowercase masking in the query sequence(s) should be interpreted as maski...
const string kArgNumThreads
Argument to determine the number of threads to use when running BLAST.
const string kDfltArgLookupTableMaskingOnlyNucl
Default argument mask a nucleotide query during lookup table construction.
const string kArgMatch
Argument to select the nucleotide match reward.
const string kDfltArgMaskLevel