NCBI C++ ToolKit
blast_args.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_args.hpp 100886 2023-09-25 13:53:51Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jason Papadopoulos
27  *
28  */
29 
30 /** @file blast_args.hpp
31  * Interface for converting blast-related command line
32  * arguments into blast options
33  */
34 
35 #ifndef ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP
36 #define ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP
37 
38 #include <corelib/ncbistd.hpp>
39 #include <corelib/ncbiargs.hpp>
44 #include <algo/blast/api/setup_factory.hpp> // for CThreadable
47 
48 #include <objmgr/scope.hpp> // for CScope
51 
53 
55 BEGIN_SCOPE(blast)
56 
57 /**
58  * BLAST Command line arguments design
59  * The idea is to have several small objects (subclasses of IBlastCmdLineArgs)
60  * which can do two things:
61  * 1) On creation, add flags/options/etc to a CArgs object
62  * 2) When passed in a CBlastOptions object, call the appropriate methods based
63  * on the CArgs options set when the NCBI application framework parsed the
64  * command line. If data collected by the small object (from the command line)
65  * cannot be applied to the CBlastOptions object, then it's provided to the
66  * application via some other interface methods.
67  *
68  * Each command line application will have its own argument class (e.g.:
69  * CPsiBlastAppArgs), which will contain several of the aformentioned small
70  * objects. It will create and hold a reference to a CArgs class as well as
71  * a CBlastOptionsHandle object, which will pass to each of its small objects
72  * aggregated as data members and then return it to the caller (application)
73  *
74  * Categories of data to extract from command line options
75  * 1) BLAST algorithm options
76  * 2) Input/Output files, and their modifiers (e.g.: believe query defline)
77  * 3) BLAST database information (names, limitations, num db seqs)
78  * 4) Formatting options (html, display formats, etc)
79 */
80 
81 /** Interface definition for a generic command line option for BLAST
82  */
84 {
85 public:
86  /** Our virtual destructor */
87  virtual ~IBlastCmdLineArgs() {}
88 
89  /** Sets the command line descriptions in the CArgDescriptions object
90  * relevant to the subclass
91  * @param arg_desc the argument descriptions object [in|out]
92  */
93  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) = 0;
94 
95  /** Extracts BLAST algorithmic options from the command line arguments into
96  * the CBlastOptions object. Default implementation does nothing.
97  * @param cmd_line_args Command line arguments parsed by the NCBI
98  * application framework [in]
99  * @param options object to which the appropriate options will be set
100  * [in|out]
101  */
102  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
103  CBlastOptions& options);
104 };
105 
106 /** Argument class to retrieve input and output streams for a command line
107  * program.
108  */
110 {
111 public:
112  /** Default constructor */
113  CStdCmdLineArgs() : m_InputStream(0), m_OutputStream(0),
114  m_GzipEnabled(false),
115  m_SRAaccessionEnabled(false),
116  m_UnalignedOutputStream(0) {};
117  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
118  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
119  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
120  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
121  CBlastOptions& options);
122  /** Get the input stream for a command line application */
123  CNcbiIstream& GetInputStream() const;
124  /** Get the output stream for a command line application */
125  CNcbiOstream& GetOutputStream() const;
126  /** Set the input stream if read from a saved search strategy */
127  void SetInputStream(CRef<CTmpFile> input_file);
128 
129  /** Set automatic decompression of the input file is file name is
130  * recognized
131  * @param g If true input file will be unzgipped if the file name ends with
132  * ".gz" [in]
133  */
134  void SetGzipEnabled(bool g) {m_GzipEnabled = g;}
135 
136  /** enables sra accession flag
137  * @param g If true "-sra" will be added (not compatible with "-query")
138  */
139  void SetSRAaccessionEnabled(bool g) {m_SRAaccessionEnabled = g;}
140 
141  /** Is there a separate output stream for unaligned sequences/reads
142  * (for magicblast)
143  * @return True if separate output stream has been set up, otherwise false
144  */
145  bool HasUnalignedOutputStream(void) const {return m_UnalignedOutputStream;}
146 
147  /** Get output stream for unaligned sequences/reads (for magicblast)
148  * @return Output stream for unaligned reads or NULL
149  */
151  {return m_UnalignedOutputStream;}
152 
153 private:
154  CNcbiIstream* m_InputStream; ///< Application's input stream
155  CNcbiOstream* m_OutputStream; ///< Application's output stream
156  unique_ptr<CDecompressIStream> m_DecompressIStream;
157  unique_ptr<CCompressOStream> m_CompressOStream;
158 
159  /// ASN.1 specification of query sequences when read from a saved search
160  /// strategy
162 
163  /// If true input file will be decompressed with gzip if filename ends
164  /// with ".gz"
166 
167  /// If true, option to specify SRA runs will be presented as possible
168  /// query input
170 
171  /// Output stream to report unaligned sequences/reads
173  unique_ptr<CCompressOStream> m_UnalignedCompressOStream;
174 };
175 
176 /** Argument class to populate an application's name and description */
178 {
179 public:
180  /**
181  * @brief Constructor
182  *
183  * @param program_name application's name [in]
184  * @param program_description application's description [in]
185  */
186  CProgramDescriptionArgs(const string& program_name,
187  const string& program_description);
188  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
189  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
190 
191 protected:
192  string m_ProgName; ///< Application's name
193  string m_ProgDesc; ///< Application's description
194 };
195 
196 /// Argument class to specify the supported tasks a given program
198 {
199 public:
200  /** Constructor
201  * @param supported_tasks list of supported tasks [in]
202  * @param default_task One of the tasks above, to be displayed as
203  * default in the command line arguments (cannot be empty or absent from
204  * the set above) [in]
205  */
206  CTaskCmdLineArgs(const set<string>& supported_tasks,
207  const string& default_task);
208  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
209  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
210  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
211  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
212  CBlastOptions& options);
213 private:
214  /// Set of supported tasks by this command line argument
216  /// Default task for this command line argument
218 };
219 
220 /** Argument class to retrieve and set the window size BLAST algorithm
221  * option */
223 {
224 public:
225  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
226  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
227  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions
228  * @note this depends on the matrix already being set...
229  */
230  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
231  CBlastOptions& options);
232 };
233 
234 /** Argument class to retrieve and set the off-diagonal range used in 2-hit
235  algorithm */
237 {
238 public:
239  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
240  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
241  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions
242  * @note this depends on the matrix already being set...
243  */
244  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
245  CBlastOptions& options);
246 };
247 
248 /** Argument class to retrieve and set the word threshold BLAST algorithm
249  * option */
251 {
252 public:
253  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
254  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
255  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions
256  * @note this depends on the matrix already being set...
257  */
258  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
259  CBlastOptions& options);
260 };
261 
262 /** RMH: Argument class to retrieve and set the options specific to
263  * the RMBlastN algorithm
264  */
266 {
267 public:
268  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
269  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
270  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
271  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
272  CBlastOptions& options);
273 };
274 
275 /** Argument class to retrieve and set the scoring matrix name BLAST algorithm
276  * option */
278 {
279 public:
280  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
281  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
282  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
283  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
284  CBlastOptions& options);
285 };
286 
287 /** Argument class for general search BLAST algorithm options: evalue, gap
288  * penalties, query filter string, ungapped x-drop, initial and final gapped
289  * x-drop, word size, percent identity, and effective search space.
290  */
292 {
293 public:
294  /**
295  * @brief Constructor
296  *
297  * @param query_is_protein is the query sequence(s) protein? [in]
298  * @param is_rpsblast is it RPS-BLAST? [in]
299  * @param show_perc_identity should the percent identity be shown?
300  * @param is_igblast is it IG-BLAST? [in]
301  * Currently only supported for blastn [in]
302  */
303  CGenericSearchArgs(bool query_is_protein = true, bool is_rpsblast = false,
304  bool show_perc_identity = false, bool is_tblastx = false,
305  bool is_igblast = false, bool suppress_sum_stats = false)
306  : m_QueryIsProtein(query_is_protein), m_IsRpsBlast(is_rpsblast),
307  m_ShowPercentIdentity(show_perc_identity), m_IsTblastx(is_tblastx),
308  m_IsIgBlast(is_igblast), m_SuppressSumStats(suppress_sum_stats),
309  m_IsBlastn(false){}
310 
311  // Only support and used by blastn for now
313 
314  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
315  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
316  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
317  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
318  CBlastOptions& options);
319 private:
320  bool m_QueryIsProtein; /**< true if the query is protein */
321  bool m_IsRpsBlast; /**< true if the search is RPS-BLAST */
322  bool m_ShowPercentIdentity; /**< true if the percent identity option should
323  be shown */
324  bool m_IsTblastx; /**< true if the search is tblastx */
325  bool m_IsIgBlast; /**< true if the search is igblast */
326  bool m_SuppressSumStats; /**< true if search is blastn or blastp */
328 
329 };
330 
331 /** Argument class for collecting filtering options */
333 {
334 public:
335  /**
336  * @brief Constructor
337  *
338  * @param query_is_protein is the query sequence(s) protein? [in]
339  * @param filter_by_default should filtering be applied by default? [in]
340  */
341  CFilteringArgs(bool query_is_protein = true,
342  bool filter_by_default = true)
343  : m_QueryIsProtein(query_is_protein),
344  m_FilterByDefault(filter_by_default) {}
345 
346  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
347  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
348  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
349  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
350  CBlastOptions& options);
351 private:
352  bool m_QueryIsProtein; /**< true if the query is protein */
353  bool m_FilterByDefault; /**< Should filtering be applied by default? */
354 
355  /**
356  * @brief Auxiliary method to tokenize the filtering string.
357  *
358  * @param filtering_args string to tokenize [in]
359  * @param output vector with tokens [in|out]
360  */
361  void x_TokenizeFilteringArgs(const string& filtering_args,
362  vector<string>& output) const;
363 };
364 
365 /// Defines values for match and mismatch in nucleotide comparisons as well as
366 /// non-greedy extension
368 {
369 public:
370  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
371  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
372  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
373  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
374  CBlastOptions& options);
375 };
376 
377 /// Argument class to retrieve discontiguous megablast arguments
379 {
380 public:
381  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
382  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
383  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
384  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
385  CBlastOptions& options);
386 
387  /// Value to specify coding template type
388  static const string kTemplType_Coding;
389  /// Value to specify optimal template type
390  static const string kTemplType_Optimal;
391  /// Value to specify coding+optimal template type
392  static const string kTemplType_CodingAndOptimal;
393 };
394 
395 /** Argument class for collecting composition based statistics options */
397 {
398 public:
399  /// Constructor
400  ///@param is_2and3supported Are composition based statistics options 2 and
401  /// 3 supported [in]
402  ///@param default_option Default composition based satatistics option [in]
403  ///@param zero_option_descr Non-standard description for composition
404  /// based statistics option zero [in]
405  CCompositionBasedStatsArgs(bool is_2and3supported = true,
406  const string& default_option
408  const string& zero_option_descr = "")
409  : m_Is2and3Supported(is_2and3supported),
410  m_DefaultOpt(default_option),
411  m_ZeroOptDescr(zero_option_descr) {}
412 
413  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
414  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
415  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
416  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
417  CBlastOptions& options);
418 
419 protected:
420  /// Are options 2 and 3 supported
422  /// Default option
423  string m_DefaultOpt;
424  /// Non standard description for option zero
426 };
427 
428 /** Argument class for collecting gapped options */
430 {
431 public:
432  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
433  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
434  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
435  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
436  CBlastOptions& options);
437 };
438 
439 /** Argument class for collecting the largest intron size */
441 {
442 public:
443  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
444  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
445  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
446  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
447  CBlastOptions& options);
448 };
449 
450 /// Argument class to collect the frame shift penalty for out-of-frame searches
452 {
453 public:
454  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
455  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
456  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
457  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
458  CBlastOptions& options);
459 };
460 
461 /// Argument class to collect the genetic code for all queries/subjects
463 {
464 public:
465  /// Enumeration defining which sequences the genetic code applies to
466  enum ETarget {
467  eQuery, ///< Query genetic code
468  eDatabase ///< Database genetic code
469  };
470 
471 
472  /**
473  * @brief Constructor
474  *
475  * @param t genetic code target (query or database)
476  */
477  CGeneticCodeArgs(ETarget t) : m_Target(t) {};
478 
479  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
480  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
481  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
482  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
483  CBlastOptions& options);
484 
485 private:
486  ETarget m_Target; ///< Genetic code target
487 };
488 
489 /// Argument class to retrieve the gap trigger option
491 {
492 public:
493  /**
494  * @brief Constructor
495  *
496  * @param query_is_protein is the query sequence(s) protein?
497  */
498  CGapTriggerArgs(bool query_is_protein)
499  : m_QueryIsProtein(query_is_protein) {}
500  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
501  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
502  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
503  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
504  CBlastOptions& options);
505 private:
506  bool m_QueryIsProtein; /**< true if the query is protein */
507 };
508 
509 /// Argument class to collect PSSM engine options
511 {
512 public:
513  /// Constructor
514  /// @param is_deltablast Are the aruments set up for Delta Blast [in]
515  CPssmEngineArgs(bool is_deltablast = false) : m_IsDeltaBlast(is_deltablast)
516  {}
517 
518  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
519  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
520  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
521  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
522  CBlastOptions& options);
523 
524 private:
525  /// Are these arumnets for Delta Blast
527 };
528 
529 /// Argument class to import/export the search strategy
531 {
532 public:
533  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
534  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
535  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
536  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
537  CBlastOptions& options);
538 
539  /// Get the input stream for the search strategy
540  CNcbiIstream* GetImportStream(const CArgs& args) const;
541  /// Get the output stream for the search strategy
542  CNcbiOstream* GetExportStream(const CArgs& args) const;
543 };
544 
545 /// Argument class to collect options specific to PSI-BLAST
547 {
548 public:
549  /// Enumeration to determine the molecule type of the database
551  eProteinDb, ///< Traditional, iterated PSI-BLAST
552  eNucleotideDb ///< PSI-Tblastn, non-iterated
553  };
554 
555  /**
556  * @brief Constructor
557  *
558  * @param db_target Molecule type of the database
559  * @param is_deltablast Are the aruments set up for Delta Blast
560  */
561  CPsiBlastArgs(ETargetDatabase db_target = eProteinDb,
562  bool is_deltablast = false)
563  : m_DbTarget(db_target), m_NumIterations(1),
564  m_CheckPointOutput(0), m_AsciiMatrixOutput(0),
565  m_IsDeltaBlast(is_deltablast),
566  m_SaveLastPssm(false)
567  {};
568 
569  /// Our virtual destructor
570  virtual ~CPsiBlastArgs() {}
571 
572  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
573  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
574  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
575  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
576  CBlastOptions& options);
577 
578  /// Retrieve the number of iterations to perform
579  size_t GetNumberOfIterations() const {
580  return m_NumIterations;
581  }
582 
583  /// Retrieve the number of iterations to perform
584  void SetNumberOfIterations(unsigned int num_iters) {
585  m_NumIterations = num_iters;
586  }
587  /// Returns true if checkpoint PSSM is required to be printed
589  return m_CheckPointOutput != NULL;
590  }
591  /// Get the checkpoint file output stream
592  /// @return pointer to output stream, not to be free'd by the caller
594  return m_CheckPointOutput ? m_CheckPointOutput->GetStream() : NULL;
595  }
596  /// Returns true if ASCII PSSM is required to be printed
597  bool RequiresAsciiPssmOutput() const {
598  return m_AsciiMatrixOutput != NULL;
599  }
600  /// Get the ASCII matrix output stream
601  /// @return pointer to output stream, not to be free'd by the caller
603  return m_AsciiMatrixOutput ? m_AsciiMatrixOutput->GetStream() : NULL;
604  }
605 
606  /// Get the PSSM read from checkpoint file
608  return m_Pssm;
609  }
610 
611  /// Set the PSSM read from saved search strategy
613  m_Pssm = pssm;
614  }
615 
616  /// Should the PSSM after the last database search be saved
617  bool GetSaveLastPssm(void) const {
618  return m_SaveLastPssm;
619  }
620 
621  /// Set the on/off switch for saving PSSM after the last database search
622  void SetSaveLastPssm(bool b) {
623  m_SaveLastPssm = b;
624  }
625 
626 private:
627  /// Molecule of the database
629  /// number of iterations to perform
631  /// checkpoint output file
633  /// ASCII matrix output file
635  /// PSSM
637 
638  /// Are the aruments set up for Delta Blast
640 
641  /// Save PSSM after the last database search
643 
644  /// Prohibit copy constructor
646  /// Prohibit assignment operator
648 
649  /// Auxiliary function to create a PSSM from a multiple sequence alignment
650  /// file
652  x_CreatePssmFromMsa(CNcbiIstream& input_stream, CBlastOptions& opt,
653  bool save_ascii_pssm, unsigned int msa_master_idx,
654  bool ignore_pssm_tmpl_seq);
655 };
656 
657 /// Argument class to collect options specific to PHI-BLAST
659 {
660 public:
661  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
662  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
663  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
664  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
665  CBlastOptions& options);
666 };
667 
668 /// Argument class to collect options specific to KBLASTP
670 {
671 public:
672 
673  /// Constructor
674  CKBlastpArgs(void) : m_JDistance(0.10), m_MinHits(0), m_CandidateSeqs(1000) {}
675 
676  /// Our virtual destructor
677  virtual ~CKBlastpArgs() {}
678 
679  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
680  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
681  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
682  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
683  CBlastOptions& options);
684 
685 
686  /// Get the Jaccard distance
687  double GetJaccardDistance(void) { return m_JDistance;}
688 
689  /// Get the minimum number of LSH matches.
690  int GetMinHits(void) {return m_MinHits;}
691 
692  /// The database
693  string GetDatabase(void) {return m_DbIndex;}
694 
695  /// Number of candidate sequences to attempt with BLASTP
696  int GetCandidateSeqs(void) {return m_CandidateSeqs;}
697 
698 private:
699  /// Prohibit copy constructor
701  /// Prohibit assignment operator
703 
704  /// Jaccard distance
705  double m_JDistance;
706 
707  /// Minimum number of hits in LSH phase
709 
710  /// Database/index
711  string m_DbIndex;
712 
713  /// Number of candidate sequences to try BLAST on.
715 };
716 
717 /// Argument class to collect options specific to DELTA-BLAST
719 {
720 public:
721 
722  /// Constructor
723  CDeltaBlastArgs(void) : m_ShowDomainHits(false) {}
724 
725  /// Our virtual destructor
726  virtual ~CDeltaBlastArgs() {}
727 
728  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
729  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
730  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
731  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
732  CBlastOptions& options);
733 
734  /// Get domain database
736  {return m_DomainDb;}
737 
738  /// Get show domain hits option value
739  bool GetShowDomainHits(void) const {return m_ShowDomainHits;}
740 
741 private:
742  /// Prohibit copy constructor
744  /// Prohibit assignment operator
746 
747 private:
748 
749  /// Conserved Domain Database
751 
752  /// Is printing CDD hits requested
754 };
755 
756 
758 {
759 public:
760  CMappingArgs(void) {}
761 
762  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
763  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
764  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
765  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
766  CBlastOptions& options);
767 
768 };
769 
770 /*****************************************************************************/
771 // Input options
772 
773 /// Argument class to collect query options
775 {
776 public:
777  /**
778  * @brief Constructor
779  *
780  * @param query_cannot_be_nucl can the query not be nucleotide?
781  */
782  CQueryOptionsArgs(bool query_cannot_be_nucl = false)
783  : m_Strand(objects::eNa_strand_unknown), m_Range(),
784  m_UseLCaseMask(kDfltArgUseLCaseMasking),
785  m_ParseDeflines(kDfltArgParseDeflines),
786  m_QueryCannotBeNucl(query_cannot_be_nucl)
787  {};
788 
789  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
790  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
791  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
792  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
793  CBlastOptions& options);
794 
795  /// Get query sequence range restriction
796  TSeqRange GetRange() const { return m_Range; }
797  /// Set query sequence range restriction
798  void SetRange(const TSeqRange& range) { m_Range = range; }
799  /// Get strand to search in query sequence(s)
800  objects::ENa_strand GetStrand() const { return m_Strand; }
801  /// Use lowercase masking in FASTA input?
802  bool UseLowercaseMasks() const { return m_UseLCaseMask; }
803  /// Should the defline be parsed?
804  bool GetParseDeflines() const { return m_ParseDeflines; }
805 
806  /// Is the query sequence protein?
807  bool QueryIsProtein() const { return m_QueryCannotBeNucl; }
808 
809 private:
810  /// Strand(s) to search
812  /// range to restrict the query sequence(s)
814  /// use lowercase masking in FASTA input
816  /// Should the deflines be parsed?
818 
819  /// only false for blast[xn], and tblastx
820  /// true in case of PSI-BLAST
822 };
823 
824 /// Argument class to collect query options for BLAST Mapper
826 {
827 public:
828 
829  /// Input formats
831  eFasta = 0,
836  eSra
837  };
838 
839 
842  m_IsPaired(false),
843  m_InputFormat(eFasta),
844  m_MateInputStream(NULL),
845  m_EnableSraCache(false)
846  {}
847 
848  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
849  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
850  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
851  virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt);
852 
853  /// Are query sequences paired
854  bool IsPaired(void) const {return m_IsPaired;}
855 
856  /// Are queries provided in Fastc format
858  {return m_InputFormat;}
859 
860  /// Does the mate input stream exits
861  bool HasMateInputStream(void) const {return m_MateInputStream;}
862 
863  /// Get input stream for query mates
864  CNcbiIstream* GetMateInputStream(void) const {return m_MateInputStream;}
865 
866  /// Get a list of SRA accessions
867  const vector<string>& GetSraAccessions(void) const
868  {return m_SraAccessions;}
869 
870  /// Is SRA caching in local files enabled
871  /// (see File Caching at
872  /// https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration)
873  bool IsSraCacheEnabled(void) const {return m_EnableSraCache;}
874 
875 private:
878  vector<string> m_SraAccessions;
879 
881  unique_ptr<CDecompressIStream> m_DecompressIStream;
882 
884 };
885 
886 
887 /// Argument class to collect database/subject arguments
889 {
890 public:
891  /// The default priority for subjects, should be used for
892  /// subjects/databases
893  static const int kSubjectsDataLoaderPriority = 10;
894 
895  /// alias for the database molecule type
897 
898  /// Auxiliary function to determine if the database/subject sequence has
899  /// been set
900  static bool HasBeenSet(const CArgs& args);
901 
902  /// Constructor
903  /// @param request_mol_type If true, the command line arguments will
904  /// include a mandatory option to disambiguate whether a protein or a
905  /// nucleotide database is searched
906  /// @param is_rpsblast is it RPS-BLAST?
907  /// @param is_igblast is it IG-BLAST?
908  /// @param is_deltablast is it DELTA-BLAST?
909  CBlastDatabaseArgs(bool request_mol_type = false,
910  bool is_rpsblast = false,
911  bool is_igblast = false,
912  bool is_mapper = false,
913  bool is_kblast = false);
914  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
915  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
916  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
917  virtual void ExtractAlgorithmOptions(const CArgs& args,
918  CBlastOptions& opts);
919 
920  /// Turns on/off database masking support
922  m_SupportsDatabaseMasking = val;
923  }
924 
925  /// Is the database/subject protein?
926  bool IsProtein() const { return m_IsProtein; }
927 
928  /// Get the BLAST database name
929  /// @return empty string in the case of BLAST2Sequences, otherwise the
930  /// BLAST database name
931  string GetDatabaseName() const {
932  return m_SearchDb.Empty() ? kEmptyStr : m_SearchDb->GetDatabaseName();
933  }
934 
935  /// Retrieve the search database information
936  CRef<CSearchDatabase> GetSearchDatabase() const { return m_SearchDb; }
937  /// Set the search database information.
938  /// use case: recovering from search strategy
940  m_SearchDb = search_db;
941  m_IsProtein = search_db->IsProtein();
942  }
943 
944  /// Sets the subject sequences.
945  /// use case: recovering from search strategy
947  bool is_protein) {
948  m_Subjects = subjects;
949  m_Scope = scope;
950  m_IsProtein = is_protein;
951  }
952 
953  /// Retrieve subject sequences, if provided
954  /// @param scope scope to which to sequence read will be added (if
955  /// non-NULL) [in]
956  /// @return empty CRef<> if no subjects were provided, otherwise a properly
957  /// initialized IQueryFactory object
958  CRef<IQueryFactory> GetSubjects(objects::CScope* scope = NULL) {
959  if (m_Subjects && scope) {
960  // m_Scope contains the subject(s) read
962  // Add the scope with a lower priority to avoid conflicts
963  scope->AddScope(*m_Scope, kSubjectsDataLoaderPriority);
964  }
965  return m_Subjects;
966  }
967 
969  m_SupportIPGFiltering = val;
970  }
971 
972 protected:
973  CRef<CSearchDatabase> m_SearchDb;/**< Description of the BLAST database */
974  bool m_RequestMoleculeType; /**< Determines whether the database's
975  molecule type should be requested in the
976  command line, true in case of PSI-BLAST
977  */
978  bool m_IsRpsBlast; /**< true if the search is RPS-BLAST */
979  bool m_IsIgBlast; /**< true if the search is Ig-BLAST */
980 
981  bool m_IsProtein; /**< Is the database/subject(s) protein? */
982  bool m_IsMapper; /**< true for short read mapper */
983  bool m_IsKBlast; /**< true for Kblastp */
984  CRef<IQueryFactory> m_Subjects; /**< The subject sequences */
985  CRef<objects::CScope> m_Scope; /**< CScope object in which all subject
986  sequences read are kept */
987  bool m_SupportsDatabaseMasking; /**< true if it's supported */
988  bool m_SupportIPGFiltering; /**< true if IPG filtering is supported */
989 };
990 
991 /// Argument class to collect options specific to igBLAST
993 {
994 public:
995  CIgBlastArgs(bool is_protein) : m_IsProtein(is_protein) {};
996 
997  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
998  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
999  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1000  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1001  CBlastOptions& options);
1002 
1003  CRef<CIgBlastOptions> GetIgBlastOptions() { return m_IgOptions; }
1004 
1006 
1007  if (m_Scope.NotEmpty()) {
1008  // Add the scope with a lower priority to avoid conflicts
1009  scope->AddScope(*m_Scope,
1011  }
1012  }
1013 
1014 private:
1015  /// Is this a protein search?
1017  /// Igblast options to fill
1019  /// scope to get sequences
1021 };
1022 
1023 /// Argument class to collect formatting options, use this to create a
1024 /// CBlastFormat object.
1025 /// @note This object is also needed to set the maximum number of target
1026 /// sequences to save (hitlist size)
1028 {
1029 public:
1030  /// Defines the output formats supported by our command line formatter
1032  /// Standard pairwise alignments
1033  ePairwise = 0,
1034  ///< Query anchored showing identities
1035  eQueryAnchoredIdentities,
1036  ///< Query anchored no identities
1037  eQueryAnchoredNoIdentities,
1038  ///< Flat query anchored showing identities
1039  eFlatQueryAnchoredIdentities,
1040  ///< Flat query anchored no identities
1042  /// XML output
1044  /// Tabular output
1046  /// Tabular output with comments
1048  /// ASN.1 text output
1050  /// ASN.1 binary output
1052  /// Comma-separated values
1054  /// BLAST archive format
1056  /// JSON seq-align
1058  /// JSON XInclude
1060  /// XML2 XInclude
1062  /// JSON2 single file
1064  /// XML2 single file
1066  /// SAM format
1068 
1070 
1071  ///igblast AIRR rearrangement, 19
1073 
1074  /// unaligned reads in magicblast
1076  /// Sentinel value for error checking
1077  eEndValue
1078 
1079  };
1080 
1082  eDefaultFlag = 0,
1083  // Set if VDB
1084  eIsVDB = 0x01,
1085  // Set if SAM format is supported
1086  eIsSAM = 0x02,
1087  // Set if both VDB and SAM is true
1088  eIsVDB_SAM = eIsVDB | eIsSAM,
1089  //Is eAirrRearrangement format supported?
1090  eIsAirrRearrangement = 0x04
1091  };
1092  /// Default constructor
1093  CFormattingArgs(bool isIgblast = false, EFormatFlags flag = eDefaultFlag)
1094  : m_OutputFormat(ePairwise), m_ShowGis(false),
1095  m_NumDescriptions(0), m_NumAlignments(0),
1096  m_DfltNumDescriptions(0), m_DfltNumAlignments(0),
1097  m_Html(false),
1098  m_IsIgBlast(isIgblast),
1099  m_LineLength(align_format::kDfltLineLength),
1100  m_FormatFlags(flag),
1101  m_HitsSortOption(-1),
1102  m_HspsSortOption(-1)
1103  {
1104  if (m_IsIgBlast) {
1105  m_DfltNumAlignments = m_DfltNumDescriptions = 10;
1106  } else {
1107  m_DfltNumAlignments = static_cast<ncbi::TSeqPos>(align_format::kDfltArgNumAlignments) ;
1108  m_DfltNumDescriptions = static_cast<TSeqPos>(align_format::kDfltArgNumDescriptions);
1109  }
1110  };
1111 
1112  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1113  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1114  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1115  virtual void ExtractAlgorithmOptions(const CArgs& args,
1116  CBlastOptions& opts);
1117 
1118  /// Parses the output format command line option value, returns the
1119  /// requested output format type and any custom output formats (if
1120  /// any and applicable)
1121  /// @param args Command line arguments object [in]
1122  /// @param fmt_type Output format type requested in command line options
1123  /// [out]
1124  /// @param custom_fmt_spec Custom output format specification in command
1125  /// line options [out]
1126  virtual void
1127  ParseFormattingString(const CArgs& args,
1128  EOutputFormat& fmt_type,
1129  string& custom_fmt_spec,
1130  string& custom_delim) const;
1131 
1132  /// Get the choice of formatted output
1134  return m_OutputFormat;
1135  }
1136 
1137  /// Returns true if the desired output format is structured (needed to
1138  /// determine whether to print or not that a PSI-BLAST search has
1139  /// converged - this is not supported in structured formats)
1141  return m_OutputFormat == eXml ||
1142  m_OutputFormat == eAsnText ||
1143  m_OutputFormat == eAsnBinary ||
1144  m_OutputFormat == eXml2 ||
1145  m_OutputFormat == eJson ||
1146  m_OutputFormat == eXml2_S ||
1147  m_OutputFormat == eJson_S ||
1148  m_OutputFormat == eJsonSeqalign ||
1149  m_OutputFormat == eSAM;
1150  }
1151 
1152  /// Display the NCBI GIs in formatted output?
1153  bool ShowGis() const {
1154  return m_ShowGis;
1155  }
1156  /// Number of one-line descriptions to show in traditional BLAST output
1158  return m_NumDescriptions;
1159  }
1160  /// Number of alignments to show in traditional BLAST output
1162  return m_NumAlignments;
1163  }
1164  /// Display HTML output?
1165  bool DisplayHtmlOutput() const {
1166  return m_Html;
1167  }
1168 
1169  /// Retrieve for string that specifies the custom output format for tabular
1170  /// and comma-separated value
1171  string GetCustomOutputFormatSpec() const {
1172  return m_CustomOutputFormatSpec;
1173  }
1174 
1175  virtual bool ArchiveFormatRequested(const CArgs& args) const;
1176 
1177  size_t GetLineLength() const {
1178  return m_LineLength;
1179  }
1180  int GetHitsSortOption() const {
1181  return m_HitsSortOption;
1182  }
1183  int GetHspsSortOption() const {
1184  return m_HspsSortOption;
1185  }
1186  string GetCustomDelimiter(){return m_CustomDelim;}
1187 
1188 protected:
1189  EOutputFormat m_OutputFormat; ///< Choice of formatting output
1190  bool m_ShowGis; ///< Display NCBI GIs?
1191  TSeqPos m_NumDescriptions; ///< Number of 1-line descr. to show
1192  TSeqPos m_NumAlignments; ///< Number of alignments to show
1193  TSeqPos m_DfltNumDescriptions; ///< Default value for num descriptions
1194  TSeqPos m_DfltNumAlignments; ///< Default value for num alignments
1195  bool m_Html; ///< Display HTML output?
1196  bool m_IsIgBlast; ///< IgBlast has a different default num_alignments
1197  /// The format specification for custom output, e.g.: tabular or
1198  /// comma-separated value (populated if applicable)
1205 };
1206 
1207 /// Formatting args for magicblast advertising only SAM and fast tabular
1208 /// formats
1210 {
1211 public:
1212 
1214  CFormattingArgs(),
1215  m_TrimReadIds(true),
1216  m_PrintUnaligned(true),
1217  m_NoDiscordant(false),
1218  m_FwdRev(false),
1219  m_RevFwd(false),
1220  m_FwdOnly(false),
1221  m_RevOnly(false),
1222  m_OnlyStrandSpecific(false),
1223  m_PrintMdTag(false),
1224  m_UnalignedOutputFormat(eSAM)
1225  {}
1226 
1227  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1228 
1229  virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt);
1230 
1231  virtual bool ArchiveFormatRequested(const CArgs& /*args*/) const {
1232  return false;
1233  }
1234 
1235  /// Should read ids be in SAM format be trimmed of .1 and .2 endings
1236  /// for paired mapping
1237  bool TrimReadIds(void) const {return m_TrimReadIds;}
1238 
1239  /// Should unaligned reads be reported
1240  bool PrintUnaligned(void) const {return m_PrintUnaligned;}
1241 
1242  /// Should non-concordant pairs be filtered out of report
1243  bool NoDiscordant(void) const {return m_NoDiscordant;}
1244 
1245  /// Specify fwd/ref strands
1246  bool SelectFwdRev(void) const {return m_FwdRev;}
1247 
1248  /// Specify rev/fwd strands
1249  bool SelectRevFwd(void) const {return m_RevFwd;}
1250 
1251  /// Specify fwd-only strands
1252  bool SelectFwdOnly(void) const {return m_FwdOnly;}
1253 
1254  /// Specify rev-only strands
1255  bool SelectRevOnly(void) const {return m_RevOnly;}
1256 
1257  /// Specify only-strand-specific
1258  bool SelectOnlyStrandSpecific(void) const {return m_OnlyStrandSpecific;}
1259 
1260  /// Should MD tag be included in SAM report
1261  bool PrintMdTag(void) const {return m_PrintMdTag;}
1262 
1263  /// Get format choice for unaligned reads
1265  {return m_UnalignedOutputFormat;}
1266 
1267  /// Get a user tag added to each alignment
1268  const string& GetUserTag(void) const {return m_UserTag;}
1269 
1270 private:
1274  bool m_FwdRev;
1275  bool m_RevFwd;
1281  string m_UserTag;
1282 };
1283 
1284 /// Argument class to collect multi-threaded arguments
1286 {
1287 public:
1288  enum EMTMode {
1289  eNotSupported = -1,
1292  eSplitByDB
1293  };
1294  /// Default Constructor
1295  CMTArgs(size_t default_num_threads = CThreadable::kMinNumThreads, EMTMode mt_mode = eNotSupported) :
1296  m_NumThreads(default_num_threads), m_MTMode(mt_mode)
1297  {
1298 #ifdef NCBI_NO_THREADS
1299  // No threads can be set in NON-MT mode
1300  m_NumThreads = CThreadable::kMinNumThreads;
1301  m_MTMode = eNotSupported;
1302 #endif
1303  }
1304  CMTArgs(const CArgs& cmd_line_args);
1305  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1306  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1307  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1308  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1309  CBlastOptions& options);
1310 
1311  /// Get the number of threads to spawn
1312  size_t GetNumThreads() const { return m_NumThreads; }
1313 
1314  int GetMTMode() const { return m_MTMode; }
1315 
1316 protected:
1317  void x_ExtractAlgorithmOptions(const CArgs& args);
1318  size_t m_NumThreads; ///< Number of threads to spawn
1320 };
1321 
1322 /// Argument class to collect remote vs. local execution
1324 {
1325 public:
1326  /// Default constructor
1327  CRemoteArgs() : m_IsRemote(false) {}
1328  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1329  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1330  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1331  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1332  CBlastOptions& options);
1333 
1334  /// Return whether the search should be executed remotely or not
1335  bool ExecuteRemotely() const { return m_IsRemote; }
1336 
1337 private:
1338  /// Should the search be executed remotely?
1340 };
1341 
1342 /// Argument class to collect debugging options.
1343 /// Only show in command line if compiled with _BLAST_DEBUG
1345 {
1346 public:
1347  /// Default constructor
1348  CDebugArgs() : m_DebugOutput(false), m_RmtDebugOutput(false) {}
1349  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1350  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1351  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1352  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1353  CBlastOptions& options);
1354 
1355  /// Return whether debug (verbose) output should be produced on remote
1356  /// searches (only available when compiled with _DEBUG)
1357  bool ProduceDebugRemoteOutput() const { return m_RmtDebugOutput; }
1358  /// Return whether debug (verbose) output should be produced
1359  /// (only available when compiled with _DEBUG)
1360  bool ProduceDebugOutput() const { return m_DebugOutput; }
1361 private:
1362 
1363  /// Should debugging (verbose) output be printed
1365  /// Should debugging (verbose) output be printed for remote BLAST
1367 };
1368 
1369 /// Argument class to retrieve options for filtering HSPs (e.g.: culling
1370 /// options, best hit algorithm options)
1372 {
1373 public:
1374  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1375  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1376  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1377  virtual void ExtractAlgorithmOptions(const CArgs& args,
1378  CBlastOptions& opts);
1379 };
1380 
1381 /// Argument class to retrieve megablast database indexing options
1383 {
1384 public:
1385  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1386  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1387  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1388  virtual void ExtractAlgorithmOptions(const CArgs& args,
1389  CBlastOptions& opts);
1390 
1391  /// Auxiliary function to determine if the megablast database indexing
1392  /// options have been set
1393  static bool HasBeenSet(const CArgs& args);
1394 };
1395 
1396 /// Type definition of a container of IBlastCmdLineArgs
1397 typedef vector< CRef<IBlastCmdLineArgs> > TBlastCmdLineArgs;
1398 
1399 
1400 /// Base command line argument class for a generic BLAST command line binary
1402 {
1403 public:
1404  /// Default constructor
1405  CBlastAppArgs();
1406  /// Our virtual destructor
1407  virtual ~CBlastAppArgs() {}
1408 
1409  /// Set the command line arguments
1410  CArgDescriptions* SetCommandLine();
1411 
1412  /// Get the task for this object
1413  string GetTask() const {
1414  return m_Task;
1415  }
1416 
1417  /// Set the task for this object
1418  /// @param task task name to set [in]
1419  void SetTask(const string& task);
1420 
1421  /// Extract the command line arguments into a CBlastOptionsHandle object
1422  /// @param args Commad line arguments [in]
1423  CRef<CBlastOptionsHandle> SetOptions(const CArgs& args);
1424 
1425  /// Combine the command line arguments into a CBlastOptions object
1426  /// recovered from saved search strategy
1427  /// @param args Commad line arguments [in]
1428  CRef<CBlastOptionsHandle> SetOptionsForSavedStrategy(const CArgs& args);
1429 
1430  /// Setter for the BLAST options handle, this is used if the options are
1431  /// recovered from a saved BLAST search strategy
1433  m_OptsHandle = opts_hndl;
1434  }
1435 
1436  /// Get the BLAST database arguments
1438  return m_BlastDbArgs;
1439  }
1440  /// Set the BLAST database arguments
1442  m_BlastDbArgs = args;
1443  }
1444 
1445  /// Get the options for the query sequence(s)
1447  return m_QueryOptsArgs;
1448  }
1449 
1450  /// Get the formatting options
1452  return m_FormattingArgs;
1453  }
1454 
1455  /// Get the number of threads to spawn
1456  size_t GetNumThreads() const {
1457  return m_MTArgs->GetNumThreads();
1458  }
1459 
1460  int GetMTMode() const {
1461  return m_MTArgs->GetMTMode();
1462  }
1463 
1464  /// Get the input stream
1465  virtual CNcbiIstream& GetInputStream();
1466 
1467  /// Get the output stream
1468  virtual CNcbiOstream& GetOutputStream();
1469 
1470  /// Set the input stream to a temporary input file (needed when importing
1471  /// a search strategy)
1472  /// @param input_file temporary input file to read [in]
1474  m_StdCmdLineArgs->SetInputStream(input_file);
1475  }
1476 
1477  /// Get the input stream for the search strategy
1479  return m_SearchStrategyArgs->GetImportStream(args);
1480  }
1481  /// Get the output stream for the search strategy
1483  return m_SearchStrategyArgs->GetExportStream(args);
1484  }
1485 
1486  /// Determine whether the search should be executed remotely or not
1487  bool ExecuteRemotely() const {
1488  return m_RemoteArgs->ExecuteRemotely();
1489  }
1490 
1491  /// Return whether debug (verbose) output should be produced on remote
1492  /// searches (only available when compiled with _DEBUG)
1494  return m_DebugArgs->ProduceDebugRemoteOutput();
1495  }
1496 
1497  /// Return whether debug (verbose) output should be produced on remote
1498  /// searches (only available when compiled with _DEBUG)
1499  bool ProduceDebugOutput() const {
1500  return m_DebugArgs->ProduceDebugOutput();
1501  }
1502 
1503  /// Get the query batch size
1504  virtual int GetQueryBatchSize() const = 0;
1505 
1506  /// Retrieve the client ID for remote requests
1507  string GetClientId() const {
1508  _ASSERT( !m_ClientId.empty() );
1509  return m_ClientId;
1510  }
1511 
1512 protected:
1513  /// Set of command line argument objects
1515  /// query options object
1517  /// database/subject object
1519  /// formatting options
1521  /// multi-threaded options
1523  /// remote vs. local execution options
1525  /// standard command line arguments class
1527  /// arguments for dealing with search strategies
1529  /// Debugging arguments
1531  /// HSP filtering arguments
1533  /// The BLAST options handle, only non-NULL if assigned via
1534  /// SetOptionsHandle, i.e.: from a saved search strategy
1536  /// Task specified in the command line
1537  string m_Task;
1538  /// Client ID used for remote BLAST submissions, must be populated by
1539  /// subclasses
1540  string m_ClientId;
1541  /// Is this application being run ungapped
1543 
1544  /// Create the options handle based on the command line arguments
1545  /// @param locality whether the search will be executed locally or remotely
1546  /// [in]
1547  /// @param args command line arguments [in]
1550  const CArgs& args) = 0;
1551 
1552  /** Creates the BLAST options handle based on the task argument
1553  * @param locality whether the search will be executed locally or remotely [in]
1554  * @param task program-specific BLAST named parameter set [in]
1555  */
1557  x_CreateOptionsHandleWithTask(CBlastOptions::EAPILocality locality,
1558  const string& task);
1559 
1560  /// Issue warnings when recovering from a search strategy (command line
1561  /// applications only)
1562  void x_IssueWarningsForIgnoredOptions(const CArgs& args);
1563 };
1564 
1565 /**
1566  * @brief Create a CArgDescriptions object and invoke SetArgumentDescriptions
1567  * for each of the TBlastCmdLineArgs in its argument list
1568  *
1569  * @param args arguments to configure the return value [in]
1570  *
1571  * @return a CArgDescriptions object with the command line options set
1572  */
1576 
1577 END_SCOPE(blast)
1579 
1580 #endif /* ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP */
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
CArgDescriptions * SetUpCommandLineArguments(TBlastCmdLineArgs &args)
Create a CArgDescriptions object and invoke SetArgumentDescriptions for each of the TBlastCmdLineArgs...
vector< CRef< IBlastCmdLineArgs > > TBlastCmdLineArgs
Type definition of a container of IBlastCmdLineArgs.
Auxiliary classes/functions for BLAST input library.
Declares class to encapsulate all BLAST options.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
Base command line argument class for a generic BLAST command line binary.
CRef< CRemoteArgs > m_RemoteArgs
remote vs. local execution options
string GetTask() const
Get the task for this object.
void SetOptionsHandle(CRef< CBlastOptionsHandle > opts_hndl)
Setter for the BLAST options handle, this is used if the options are recovered from a saved BLAST sea...
CRef< CBlastOptionsHandle > m_OptsHandle
The BLAST options handle, only non-NULL if assigned via SetOptionsHandle, i.e.
CRef< CQueryOptionsArgs > m_QueryOptsArgs
query options object
size_t GetNumThreads() const
Get the number of threads to spawn.
virtual int GetQueryBatchSize() const =0
Get the query batch size.
CRef< CBlastDatabaseArgs > m_BlastDbArgs
database/subject object
virtual CRef< CBlastOptionsHandle > x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs &args)=0
Create the options handle based on the command line arguments.
CRef< CSearchStrategyArgs > m_SearchStrategyArgs
arguments for dealing with search strategies
string m_Task
Task specified in the command line.
CRef< CDebugArgs > m_DebugArgs
Debugging arguments.
int GetMTMode() const
CRef< CBlastDatabaseArgs > GetBlastDatabaseArgs() const
Get the BLAST database arguments.
CNcbiIstream * GetImportSearchStrategyStream(const CArgs &args)
Get the input stream for the search strategy.
virtual ~CBlastAppArgs()
Our virtual destructor.
void SetInputStream(CRef< CTmpFile > input_file)
Set the input stream to a temporary input file (needed when importing a search strategy)
CRef< CMTArgs > m_MTArgs
multi-threaded options
CRef< CFormattingArgs > m_FormattingArgs
formatting options
bool ExecuteRemotely() const
Determine whether the search should be executed remotely or not.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
bool m_IsUngapped
Is this application being run ungapped.
CRef< CQueryOptionsArgs > GetQueryOptionsArgs() const
Get the options for the query sequence(s)
string GetClientId() const
Retrieve the client ID for remote requests.
TBlastCmdLineArgs m_Args
Set of command line argument objects.
void SetBlastDatabaseArgs(CRef< CBlastDatabaseArgs > args)
Set the BLAST database arguments.
CRef< CFormattingArgs > GetFormattingArgs() const
Get the formatting options.
CNcbiOstream * GetExportSearchStrategyStream(const CArgs &args)
Get the output stream for the search strategy.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CRef< CStdCmdLineArgs > m_StdCmdLineArgs
standard command line arguments class
CRef< CHspFilteringArgs > m_HspFilteringArgs
HSP filtering arguments.
string m_ClientId
Client ID used for remote BLAST submissions, must be populated by subclasses.
Argument class to collect database/subject arguments.
Definition: blast_args.hpp:889
CRef< objects::CScope > m_Scope
CScope object in which all subject sequences read are kept.
Definition: blast_args.hpp:985
bool m_IsMapper
true for short read mapper
Definition: blast_args.hpp:982
CRef< CSearchDatabase > GetSearchDatabase() const
Retrieve the search database information.
Definition: blast_args.hpp:936
bool IsProtein() const
Is the database/subject protein?
Definition: blast_args.hpp:926
bool m_SupportsDatabaseMasking
true if it's supported
Definition: blast_args.hpp:987
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
Definition: blast_args.hpp:893
void SetIPGFilteringSupport(bool val)
Definition: blast_args.hpp:968
bool m_IsProtein
Is the database/subject(s) protein?
Definition: blast_args.hpp:981
bool m_RequestMoleculeType
Determines whether the database's molecule type should be requested in the command line,...
Definition: blast_args.hpp:974
bool m_IsIgBlast
true if the search is Ig-BLAST
Definition: blast_args.hpp:979
CRef< IQueryFactory > m_Subjects
The subject sequences.
Definition: blast_args.hpp:984
void SetSubjects(CRef< IQueryFactory > subjects, CRef< CScope > scope, bool is_protein)
Sets the subject sequences.
Definition: blast_args.hpp:946
bool m_IsRpsBlast
true if the search is RPS-BLAST
Definition: blast_args.hpp:978
CRef< IQueryFactory > GetSubjects(objects::CScope *scope=NULL)
Retrieve subject sequences, if provided.
Definition: blast_args.hpp:958
CRef< CSearchDatabase > m_SearchDb
Description of the BLAST database.
Definition: blast_args.hpp:973
CSearchDatabase::EMoleculeType EMoleculeType
alias for the database molecule type
Definition: blast_args.hpp:896
bool m_SupportIPGFiltering
true if IPG filtering is supported
Definition: blast_args.hpp:988
void SetSearchDatabase(CRef< CSearchDatabase > search_db)
Set the search database information.
Definition: blast_args.hpp:939
void SetDatabaseMaskingSupport(bool val)
Turns on/off database masking support.
Definition: blast_args.hpp:921
bool m_IsKBlast
true for Kblastp
Definition: blast_args.hpp:983
string GetDatabaseName() const
Get the BLAST database name.
Definition: blast_args.hpp:931
Encapsulates ALL the BLAST algorithm's options.
EAPILocality
Enumerates the possible contexts in which objects of this type can be used.
Argument class for collecting composition based statistics options.
Definition: blast_args.hpp:397
bool m_Is2and3Supported
Are options 2 and 3 supported.
Definition: blast_args.hpp:421
CCompositionBasedStatsArgs(bool is_2and3supported=true, const string &default_option=kDfltArgCompBasedStats, const string &zero_option_descr="")
Constructor.
Definition: blast_args.hpp:405
string m_ZeroOptDescr
Non standard description for option zero.
Definition: blast_args.hpp:425
string m_DefaultOpt
Default option.
Definition: blast_args.hpp:423
Argument class to collect debugging options.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CDebugArgs()
Default constructor.
bool m_DebugOutput
Should debugging (verbose) output be printed.
bool m_RmtDebugOutput
Should debugging (verbose) output be printed for remote BLAST.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced (only available when compiled with _DEBUG)
Argument class to collect options specific to DELTA-BLAST.
Definition: blast_args.hpp:719
CDeltaBlastArgs & operator=(const CDeltaBlastArgs &rhs)
Prohibit assignment operator.
CDeltaBlastArgs(void)
Constructor.
Definition: blast_args.hpp:723
CRef< CSearchDatabase > m_DomainDb
Conserved Domain Database.
Definition: blast_args.hpp:750
virtual ~CDeltaBlastArgs()
Our virtual destructor.
Definition: blast_args.hpp:726
CRef< CSearchDatabase > GetDomainDatabase(void)
Get domain database.
Definition: blast_args.hpp:735
CDeltaBlastArgs(const CDeltaBlastArgs &rhs)
Prohibit copy constructor.
bool m_ShowDomainHits
Is printing CDD hits requested.
Definition: blast_args.hpp:753
bool GetShowDomainHits(void) const
Get show domain hits option value.
Definition: blast_args.hpp:739
Argument class to retrieve discontiguous megablast arguments.
Definition: blast_args.hpp:379
static const string kTemplType_CodingAndOptimal
Value to specify coding+optimal template type.
Definition: blast_args.hpp:392
static const string kTemplType_Optimal
Value to specify optimal template type.
Definition: blast_args.hpp:390
static const string kTemplType_Coding
Value to specify coding template type.
Definition: blast_args.hpp:388
Argument class for collecting filtering options.
Definition: blast_args.hpp:333
CFilteringArgs(bool query_is_protein=true, bool filter_by_default=true)
Constructor.
Definition: blast_args.hpp:341
bool m_QueryIsProtein
true if the query is protein
Definition: blast_args.hpp:352
bool m_FilterByDefault
Should filtering be applied by default?
Definition: blast_args.hpp:353
Argument class to collect formatting options, use this to create a CBlastFormat object.
TSeqPos m_NumDescriptions
Number of 1-line descr. to show.
TSeqPos m_DfltNumDescriptions
Default value for num descriptions.
int GetHitsSortOption() const
bool HasStructuredOutputFormat() const
Returns true if the desired output format is structured (needed to determine whether to print or not ...
CFormattingArgs(bool isIgblast=false, EFormatFlags flag=eDefaultFlag)
Default constructor.
TSeqPos m_NumAlignments
Number of alignments to show.
string GetCustomOutputFormatSpec() const
Retrieve for string that specifies the custom output format for tabular and comma-separated value.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
EFormatFlags m_FormatFlags
EOutputFormat
Defines the output formats supported by our command line formatter.
@ eJsonSeqalign
JSON seq-align.
@ eJson
JSON XInclude.
@ eTabular
Tabular output.
@ eXml2
XML2 XInclude.
@ eSAM
SAM format.
@ eCommaSeparatedValues
Comma-separated values.
@ eAsnText
ASN.1 text output.
@ eArchiveFormat
BLAST archive format.
@ eAirrRearrangement
igblast AIRR rearrangement, 19
@ eXml2_S
XML2 single file.
@ eJson_S
JSON2 single file.
@ eXml
XML output.
@ eFasta
unaligned reads in magicblast
@ eAsnBinary
ASN.1 binary output.
@ eFlatQueryAnchoredNoIdentities
@ eTabularWithComments
Tabular output with comments.
bool m_IsIgBlast
IgBlast has a different default num_alignments.
int GetHspsSortOption() const
string m_CustomOutputFormatSpec
The format specification for custom output, e.g.
EOutputFormat m_OutputFormat
Choice of formatting output.
TSeqPos GetNumAlignments() const
Number of alignments to show in traditional BLAST output.
bool ShowGis() const
Display the NCBI GIs in formatted output?
TSeqPos GetNumDescriptions() const
Number of one-line descriptions to show in traditional BLAST output.
size_t GetLineLength() const
bool DisplayHtmlOutput() const
Display HTML output?
TSeqPos m_DfltNumAlignments
Default value for num alignments.
string GetCustomDelimiter()
bool m_ShowGis
Display NCBI GIs?
bool m_Html
Display HTML output?
Argument class to collect the frame shift penalty for out-of-frame searches.
Definition: blast_args.hpp:452
Argument class to retrieve the gap trigger option.
Definition: blast_args.hpp:491
bool m_QueryIsProtein
true if the query is protein
Definition: blast_args.hpp:506
CGapTriggerArgs(bool query_is_protein)
Constructor.
Definition: blast_args.hpp:498
Argument class for collecting gapped options.
Definition: blast_args.hpp:430
Argument class for general search BLAST algorithm options: evalue, gap penalties, query filter string...
Definition: blast_args.hpp:292
bool m_QueryIsProtein
true if the query is protein
Definition: blast_args.hpp:320
bool m_IsRpsBlast
true if the search is RPS-BLAST
Definition: blast_args.hpp:321
bool m_IsIgBlast
true if the search is igblast
Definition: blast_args.hpp:325
bool m_IsTblastx
true if the search is tblastx
Definition: blast_args.hpp:324
bool m_ShowPercentIdentity
true if the percent identity option should be shown
Definition: blast_args.hpp:322
CGenericSearchArgs(bool query_is_protein=true, bool is_rpsblast=false, bool show_perc_identity=false, bool is_tblastx=false, bool is_igblast=false, bool suppress_sum_stats=false)
Constructor.
Definition: blast_args.hpp:303
bool m_SuppressSumStats
true if search is blastn or blastp
Definition: blast_args.hpp:326
Argument class to collect the genetic code for all queries/subjects.
Definition: blast_args.hpp:463
CGeneticCodeArgs(ETarget t)
Constructor.
Definition: blast_args.hpp:477
ETarget m_Target
Genetic code target.
Definition: blast_args.hpp:486
ETarget
Enumeration defining which sequences the genetic code applies to.
Definition: blast_args.hpp:466
@ eQuery
Query genetic code.
Definition: blast_args.hpp:467
Argument class to retrieve options for filtering HSPs (e.g.
Argument class to collect options specific to igBLAST.
Definition: blast_args.hpp:993
CRef< CIgBlastOptions > m_IgOptions
Igblast options to fill.
bool m_IsProtein
Is this a protein search?
void AddIgSequenceScope(CRef< objects::CScope > scope)
CRef< CIgBlastOptions > GetIgBlastOptions()
CIgBlastArgs(bool is_protein)
Definition: blast_args.hpp:995
CRef< objects::CScope > m_Scope
scope to get sequences
Argument class to collect options specific to KBLASTP.
Definition: blast_args.hpp:670
virtual ~CKBlastpArgs()
Our virtual destructor.
Definition: blast_args.hpp:677
double m_JDistance
Jaccard distance.
Definition: blast_args.hpp:705
int m_CandidateSeqs
Number of candidate sequences to try BLAST on.
Definition: blast_args.hpp:714
CKBlastpArgs(const CKBlastpArgs &rhs)
Prohibit copy constructor.
CKBlastpArgs & operator=(const CKBlastpArgs &rhs)
Prohibit assignment operator.
CKBlastpArgs(void)
Constructor.
Definition: blast_args.hpp:674
int GetMinHits(void)
Get the minimum number of LSH matches.
Definition: blast_args.hpp:690
int m_MinHits
Minimum number of hits in LSH phase.
Definition: blast_args.hpp:708
string m_DbIndex
Database/index.
Definition: blast_args.hpp:711
int GetCandidateSeqs(void)
Number of candidate sequences to attempt with BLASTP.
Definition: blast_args.hpp:696
string GetDatabase(void)
The database.
Definition: blast_args.hpp:693
double GetJaccardDistance(void)
Get the Jaccard distance.
Definition: blast_args.hpp:687
Argument class for collecting the largest intron size.
Definition: blast_args.hpp:441
Argument class to collect multi-threaded arguments.
size_t GetNumThreads() const
Get the number of threads to spawn.
size_t m_NumThreads
Number of threads to spawn.
int GetMTMode() const
CMTArgs(size_t default_num_threads=CThreadable::kMinNumThreads, EMTMode mt_mode=eNotSupported)
Default Constructor.
EMTMode m_MTMode
@ eSplitByQueries
Formatting args for magicblast advertising only SAM and fast tabular formats.
bool TrimReadIds(void) const
Should read ids be in SAM format be trimmed of .1 and .2 endings for paired mapping.
const string & GetUserTag(void) const
Get a user tag added to each alignment.
bool SelectRevOnly(void) const
Specify rev-only strands.
bool SelectFwdRev(void) const
Specify fwd/ref strands.
EOutputFormat m_UnalignedOutputFormat
bool SelectFwdOnly(void) const
Specify fwd-only strands.
EOutputFormat GetUnalignedOutputFormat(void) const
Get format choice for unaligned reads.
bool SelectRevFwd(void) const
Specify rev/fwd strands.
bool SelectOnlyStrandSpecific(void) const
Specify only-strand-specific.
virtual bool ArchiveFormatRequested(const CArgs &) const
bool PrintMdTag(void) const
Should MD tag be included in SAM report.
bool NoDiscordant(void) const
Should non-concordant pairs be filtered out of report.
bool PrintUnaligned(void) const
Should unaligned reads be reported.
Argument class to collect query options for BLAST Mapper.
Definition: blast_args.hpp:826
bool IsSraCacheEnabled(void) const
Is SRA caching in local files enabled (see File Caching at https://github.com/ncbi/sra-tools/wiki/Too...
Definition: blast_args.hpp:873
const vector< string > & GetSraAccessions(void) const
Get a list of SRA accessions.
Definition: blast_args.hpp:867
bool HasMateInputStream(void) const
Does the mate input stream exits.
Definition: blast_args.hpp:861
EInputFormat GetInputFormat(void) const
Are queries provided in Fastc format.
Definition: blast_args.hpp:857
CNcbiIstream * m_MateInputStream
Definition: blast_args.hpp:880
EInputFormat m_InputFormat
Definition: blast_args.hpp:877
bool IsPaired(void) const
Are query sequences paired.
Definition: blast_args.hpp:854
EInputFormat
Input formats.
Definition: blast_args.hpp:830
vector< string > m_SraAccessions
Definition: blast_args.hpp:878
unique_ptr< CDecompressIStream > m_DecompressIStream
Definition: blast_args.hpp:881
CNcbiIstream * GetMateInputStream(void) const
Get input stream for query mates.
Definition: blast_args.hpp:864
CMappingArgs(void)
Definition: blast_args.hpp:760
Argument class to retrieve and set the scoring matrix name BLAST algorithm option.
Definition: blast_args.hpp:278
Argument class to retrieve megablast database indexing options.
Defines values for match and mismatch in nucleotide comparisons as well as non-greedy extension.
Definition: blast_args.hpp:368
CObject –.
Definition: ncbiobj.hpp:180
Argument class to retrieve and set the off-diagonal range used in 2-hit algorithm.
Definition: blast_args.hpp:237
Argument class to collect options specific to PHI-BLAST.
Definition: blast_args.hpp:659
Argument class to populate an application's name and description.
Definition: blast_args.hpp:178
string m_ProgDesc
Application's description.
Definition: blast_args.hpp:193
string m_ProgName
Application's name.
Definition: blast_args.hpp:192
Argument class to collect options specific to PSI-BLAST.
Definition: blast_args.hpp:547
CPsiBlastArgs(ETargetDatabase db_target=eProteinDb, bool is_deltablast=false)
Constructor.
Definition: blast_args.hpp:561
CPsiBlastArgs & operator=(const CPsiBlastArgs &rhs)
Prohibit assignment operator.
size_t GetNumberOfIterations() const
Retrieve the number of iterations to perform.
Definition: blast_args.hpp:579
virtual ~CPsiBlastArgs()
Our virtual destructor.
Definition: blast_args.hpp:570
bool RequiresAsciiPssmOutput() const
Returns true if ASCII PSSM is required to be printed.
Definition: blast_args.hpp:597
bool m_SaveLastPssm
Save PSSM after the last database search.
Definition: blast_args.hpp:642
CNcbiOstream * GetCheckPointOutputStream()
Get the checkpoint file output stream.
Definition: blast_args.hpp:593
CRef< CAutoOutputFileReset > m_AsciiMatrixOutput
ASCII matrix output file.
Definition: blast_args.hpp:634
bool m_IsDeltaBlast
Are the aruments set up for Delta Blast.
Definition: blast_args.hpp:639
ETargetDatabase
Enumeration to determine the molecule type of the database.
Definition: blast_args.hpp:550
@ eProteinDb
Traditional, iterated PSI-BLAST.
Definition: blast_args.hpp:551
void SetNumberOfIterations(unsigned int num_iters)
Retrieve the number of iterations to perform.
Definition: blast_args.hpp:584
CNcbiOstream * GetAsciiMatrixOutputStream()
Get the ASCII matrix output stream.
Definition: blast_args.hpp:602
bool RequiresCheckPointOutput() const
Returns true if checkpoint PSSM is required to be printed.
Definition: blast_args.hpp:588
CRef< CAutoOutputFileReset > m_CheckPointOutput
checkpoint output file
Definition: blast_args.hpp:632
CPsiBlastArgs(const CPsiBlastArgs &rhs)
Prohibit copy constructor.
bool GetSaveLastPssm(void) const
Should the PSSM after the last database search be saved.
Definition: blast_args.hpp:617
ETargetDatabase m_DbTarget
Molecule of the database.
Definition: blast_args.hpp:628
void SetSaveLastPssm(bool b)
Set the on/off switch for saving PSSM after the last database search.
Definition: blast_args.hpp:622
CRef< objects::CPssmWithParameters > m_Pssm
PSSM.
Definition: blast_args.hpp:636
size_t m_NumIterations
number of iterations to perform
Definition: blast_args.hpp:630
CRef< objects::CPssmWithParameters > GetInputPssm() const
Get the PSSM read from checkpoint file.
Definition: blast_args.hpp:607
void SetInputPssm(CRef< objects::CPssmWithParameters > pssm)
Set the PSSM read from saved search strategy.
Definition: blast_args.hpp:612
Argument class to collect PSSM engine options.
Definition: blast_args.hpp:511
bool m_IsDeltaBlast
Are these arumnets for Delta Blast.
Definition: blast_args.hpp:526
CPssmEngineArgs(bool is_deltablast=false)
Constructor.
Definition: blast_args.hpp:515
Argument class to collect query options.
Definition: blast_args.hpp:775
bool m_UseLCaseMask
use lowercase masking in FASTA input
Definition: blast_args.hpp:815
objects::ENa_strand GetStrand() const
Get strand to search in query sequence(s)
Definition: blast_args.hpp:800
void SetRange(const TSeqRange &range)
Set query sequence range restriction.
Definition: blast_args.hpp:798
bool GetParseDeflines() const
Should the defline be parsed?
Definition: blast_args.hpp:804
bool QueryIsProtein() const
Is the query sequence protein?
Definition: blast_args.hpp:807
CQueryOptionsArgs(bool query_cannot_be_nucl=false)
Constructor.
Definition: blast_args.hpp:782
objects::ENa_strand m_Strand
Strand(s) to search.
Definition: blast_args.hpp:811
TSeqRange m_Range
range to restrict the query sequence(s)
Definition: blast_args.hpp:813
bool m_ParseDeflines
Should the deflines be parsed?
Definition: blast_args.hpp:817
bool m_QueryCannotBeNucl
only false for blast[xn], and tblastx true in case of PSI-BLAST
Definition: blast_args.hpp:821
TSeqRange GetRange() const
Get query sequence range restriction.
Definition: blast_args.hpp:796
bool UseLowercaseMasks() const
Use lowercase masking in FASTA input?
Definition: blast_args.hpp:802
RMH: Argument class to retrieve and set the options specific to the RMBlastN algorithm.
Definition: blast_args.hpp:266
Argument class to collect remote vs. local execution.
bool m_IsRemote
Should the search be executed remotely?
CRemoteArgs()
Default constructor.
bool ExecuteRemotely() const
Return whether the search should be executed remotely or not.
Argument class to import/export the search strategy.
Definition: blast_args.hpp:531
Argument class to retrieve input and output streams for a command line program.
Definition: blast_args.hpp:110
bool m_GzipEnabled
If true input file will be decompressed with gzip if filename ends with ".gz".
Definition: blast_args.hpp:165
CNcbiOstream * GetUnalignedOutputStream() const
Get output stream for unaligned sequences/reads (for magicblast)
Definition: blast_args.hpp:150
unique_ptr< CDecompressIStream > m_DecompressIStream
Definition: blast_args.hpp:156
CRef< CTmpFile > m_QueryTmpInputFile
ASN.1 specification of query sequences when read from a saved search strategy.
Definition: blast_args.hpp:161
void SetGzipEnabled(bool g)
Set automatic decompression of the input file is file name is recognized.
Definition: blast_args.hpp:134
unique_ptr< CCompressOStream > m_CompressOStream
Definition: blast_args.hpp:157
CNcbiOstream * m_OutputStream
Application's output stream.
Definition: blast_args.hpp:155
CNcbiIstream * m_InputStream
Application's input stream.
Definition: blast_args.hpp:154
bool m_SRAaccessionEnabled
If true, option to specify SRA runs will be presented as possible query input.
Definition: blast_args.hpp:169
bool HasUnalignedOutputStream(void) const
Is there a separate output stream for unaligned sequences/reads (for magicblast)
Definition: blast_args.hpp:145
CNcbiOstream * m_UnalignedOutputStream
Output stream to report unaligned sequences/reads.
Definition: blast_args.hpp:172
void SetSRAaccessionEnabled(bool g)
enables sra accession flag
Definition: blast_args.hpp:139
unique_ptr< CCompressOStream > m_UnalignedCompressOStream
Definition: blast_args.hpp:173
CStdCmdLineArgs()
Default constructor.
Definition: blast_args.hpp:113
Argument class to specify the supported tasks a given program.
Definition: blast_args.hpp:198
const set< string > m_SupportedTasks
Set of supported tasks by this command line argument.
Definition: blast_args.hpp:215
string m_DefaultTask
Default task for this command line argument.
Definition: blast_args.hpp:217
Argument class to retrieve and set the window size BLAST algorithm option.
Definition: blast_args.hpp:223
Argument class to retrieve and set the word threshold BLAST algorithm option.
Definition: blast_args.hpp:251
BLAST Command line arguments design The idea is to have several small objects (subclasses of IBlastCm...
Definition: blast_args.hpp:84
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Extracts BLAST algorithmic options from the command line arguments into the CBlastOptions object.
Definition: blast_args.cpp:67
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)=0
Sets the command line descriptions in the CArgDescriptions object relevant to the subclass.
virtual ~IBlastCmdLineArgs()
Our virtual destructor.
Definition: blast_args.hpp:87
Constant declarations for command line arguments for BLAST programs.
const bool kDfltArgParseDeflines
Default argument to specify whether sequences deflines should be parsed.
const bool kDfltArgUseLCaseMasking
Default argument to specify whether lowercase masking should be used.
const string kDfltArgCompBasedStats
Default argument for composition based statistics.
Include a standard set of the NCBI C++ Toolkit most basic headers.
const size_t kDfltArgNumDescriptions
Default number of one-line descriptions to display in the traditional BLAST report.
const size_t kDfltArgNumAlignments
Default number of alignments to display in the traditional BLAST report.
const size_t kDfltLineLength
static FILE * input_file
Definition: common.c:35
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static SQLCHAR output[256]
Definition: print.c:5
EOutputFormat
Definition: grid_cli.hpp:276
bool IsProtein() const
Determine whether this database contains protein sequences or not.
EMoleculeType
Molecule of the BLAST database.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NULL
Definition: ncbistd.hpp:225
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NCBI_BLASTINPUT_EXPORT
Definition: ncbi_export.h:336
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
Declares CIgBlast, the C++ API for the IG-BLAST engine.
range(_Ty, _Ty) -> range< _Ty >
EIPRangeType t
Definition: ncbi_localip.c:101
Defines command line argument related classes.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
@ ePairwise
Definition: splign_app.cpp:551
C++ I/O stream wrappers to compress/decompress data on-the-fly.
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
Uniform BLAST Search Interface.
Modified on Sun Apr 21 03:43:39 2024 by modify_doxy.py rev. 669887