NCBI C++ ToolKit
blast_args.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_args.hpp 102828 2024-07-25 12:37:59Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Jason Papadopoulos
27  *
28  */
29 
30 /** @file blast_args.hpp
31  * Interface for converting blast-related command line
32  * arguments into blast options
33  */
34 
35 #ifndef ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP
36 #define ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP
37 
38 #include <corelib/ncbistd.hpp>
39 #include <corelib/ncbiargs.hpp>
44 #include <algo/blast/api/setup_factory.hpp> // for CThreadable
47 
48 #include <objmgr/scope.hpp> // for CScope
51 
53 
55 BEGIN_SCOPE(blast)
56 
57 /**
58  * BLAST Command line arguments design
59  * The idea is to have several small objects (subclasses of IBlastCmdLineArgs)
60  * which can do two things:
61  * 1) On creation, add flags/options/etc to a CArgs object
62  * 2) When passed in a CBlastOptions object, call the appropriate methods based
63  * on the CArgs options set when the NCBI application framework parsed the
64  * command line. If data collected by the small object (from the command line)
65  * cannot be applied to the CBlastOptions object, then it's provided to the
66  * application via some other interface methods.
67  *
68  * Each command line application will have its own argument class (e.g.:
69  * CPsiBlastAppArgs), which will contain several of the aformentioned small
70  * objects. It will create and hold a reference to a CArgs class as well as
71  * a CBlastOptionsHandle object, which will pass to each of its small objects
72  * aggregated as data members and then return it to the caller (application)
73  *
74  * Categories of data to extract from command line options
75  * 1) BLAST algorithm options
76  * 2) Input/Output files, and their modifiers (e.g.: believe query defline)
77  * 3) BLAST database information (names, limitations, num db seqs)
78  * 4) Formatting options (html, display formats, etc)
79 */
80 
81 /** Interface definition for a generic command line option for BLAST
82  */
84 {
85 public:
86  /** Our virtual destructor */
87  virtual ~IBlastCmdLineArgs() {}
88 
89  /** Sets the command line descriptions in the CArgDescriptions object
90  * relevant to the subclass
91  * @param arg_desc the argument descriptions object [in|out]
92  */
93  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) = 0;
94 
95  /** Extracts BLAST algorithmic options from the command line arguments into
96  * the CBlastOptions object. Default implementation does nothing.
97  * @param cmd_line_args Command line arguments parsed by the NCBI
98  * application framework [in]
99  * @param options object to which the appropriate options will be set
100  * [in|out]
101  */
102  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
103  CBlastOptions& options);
104 };
105 
106 /** Argument class to retrieve input and output streams for a command line
107  * program.
108  */
110 {
111 public:
112  /** Default constructor */
113  CStdCmdLineArgs() : m_InputStream(0), m_OutputStream(0),
114  m_GzipEnabled(false),
115  m_SRAaccessionEnabled(false),
116  m_UnalignedOutputStream(0) {};
117  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
118  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
119  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
120  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
121  CBlastOptions& options);
122  /** Get the input stream for a command line application */
123  CNcbiIstream& GetInputStream() const;
124  /** Get the output stream for a command line application */
125  CNcbiOstream& GetOutputStream() const;
126  /** Set the input stream if read from a saved search strategy */
127  void SetInputStream(CRef<CTmpFile> input_file);
128 
129  /** Set automatic decompression of the input file is file name is
130  * recognized
131  * @param g If true input file will be unzgipped if the file name ends with
132  * ".gz" [in]
133  */
134  void SetGzipEnabled(bool g) {m_GzipEnabled = g;}
135 
136  /** enables sra accession flag
137  * @param g If true "-sra" will be added (not compatible with "-query")
138  */
139  void SetSRAaccessionEnabled(bool g) {m_SRAaccessionEnabled = g;}
140 
141  /** Is there a separate output stream for unaligned sequences/reads
142  * (for magicblast)
143  * @return True if separate output stream has been set up, otherwise false
144  */
145  bool HasUnalignedOutputStream(void) const {return m_UnalignedOutputStream;}
146 
147  /** Get output stream for unaligned sequences/reads (for magicblast)
148  * @return Output stream for unaligned reads or NULL
149  */
151  {return m_UnalignedOutputStream;}
152 
153 private:
154  CNcbiIstream* m_InputStream; ///< Application's input stream
155  CNcbiOstream* m_OutputStream; ///< Application's output stream
156  unique_ptr<CDecompressIStream> m_DecompressIStream;
157  unique_ptr<CCompressOStream> m_CompressOStream;
158 
159  /// ASN.1 specification of query sequences when read from a saved search
160  /// strategy
162 
163  /// If true input file will be decompressed with gzip if filename ends
164  /// with ".gz"
166 
167  /// If true, option to specify SRA runs will be presented as possible
168  /// query input
170 
171  /// Output stream to report unaligned sequences/reads
173  unique_ptr<CCompressOStream> m_UnalignedCompressOStream;
174 };
175 
176 /** Argument class to populate an application's name and description */
178 {
179 public:
180  /**
181  * @brief Constructor
182  *
183  * @param program_name application's name [in]
184  * @param program_description application's description [in]
185  */
186  CProgramDescriptionArgs(const string& program_name,
187  const string& program_description);
188  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
189  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
190 
191 protected:
192  string m_ProgName; ///< Application's name
193  string m_ProgDesc; ///< Application's description
194 };
195 
196 /// Argument class to specify the supported tasks a given program
198 {
199 public:
200  /** Constructor
201  * @param supported_tasks list of supported tasks [in]
202  * @param default_task One of the tasks above, to be displayed as
203  * default in the command line arguments (cannot be empty or absent from
204  * the set above) [in]
205  */
206  CTaskCmdLineArgs(const set<string>& supported_tasks,
207  const string& default_task);
208  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
209  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
210  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
211  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
212  CBlastOptions& options);
213 private:
214  /// Set of supported tasks by this command line argument
216  /// Default task for this command line argument
218 };
219 
220 /** Argument class to retrieve and set the window size BLAST algorithm
221  * option */
223 {
224 public:
225  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
226  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
227  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions
228  * @note this depends on the matrix already being set...
229  */
230  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
231  CBlastOptions& options);
232 };
233 
234 /** Argument class to retrieve and set the off-diagonal range used in 2-hit
235  algorithm */
237 {
238 public:
239  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
240  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
241  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions
242  * @note this depends on the matrix already being set...
243  */
244  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
245  CBlastOptions& options);
246 };
247 
248 /** Argument class to retrieve and set the word threshold BLAST algorithm
249  * option */
251 {
252 public:
253  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
254  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
255  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions
256  * @note this depends on the matrix already being set...
257  */
258  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
259  CBlastOptions& options);
260 };
261 
262 /** RMH: Argument class to retrieve and set the options specific to
263  * the RMBlastN algorithm
264  */
266 {
267 public:
268  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
269  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
270  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
271  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
272  CBlastOptions& options);
273 };
274 
275 /** Argument class to retrieve and set the scoring matrix name BLAST algorithm
276  * option */
278 {
279 public:
280  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
281  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
282  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
283  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
284  CBlastOptions& options);
285 };
286 
287 /** Argument class for general search BLAST algorithm options: evalue, gap
288  * penalties, query filter string, ungapped x-drop, initial and final gapped
289  * x-drop, word size, percent identity, and effective search space.
290  */
292 {
293 public:
294  /**
295  * @brief Constructor
296  *
297  * @param query_is_protein is the query sequence(s) protein? [in]
298  * @param is_rpsblast is it RPS-BLAST? [in]
299  * @param show_perc_identity should the percent identity be shown?
300  * @param is_igblast is it IG-BLAST? [in]
301  * Currently only supported for blastn [in]
302  */
303  CGenericSearchArgs(bool query_is_protein = true, bool is_rpsblast = false,
304  bool show_perc_identity = false, bool is_tblastx = false,
305  bool is_igblast = false, bool suppress_sum_stats = false)
306  : m_QueryIsProtein(query_is_protein), m_IsRpsBlast(is_rpsblast),
307  m_ShowPercentIdentity(show_perc_identity), m_IsTblastx(is_tblastx),
308  m_IsIgBlast(is_igblast), m_SuppressSumStats(suppress_sum_stats),
309  m_IsBlastn(false){}
310 
311  // Only support and used by blastn for now
313 
314  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
315  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
316  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
317  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
318  CBlastOptions& options);
319 private:
320  bool m_QueryIsProtein; /**< true if the query is protein */
321  bool m_IsRpsBlast; /**< true if the search is RPS-BLAST */
322  bool m_ShowPercentIdentity; /**< true if the percent identity option should
323  be shown */
324  bool m_IsTblastx; /**< true if the search is tblastx */
325  bool m_IsIgBlast; /**< true if the search is igblast */
326  bool m_SuppressSumStats; /**< true if search is blastn or blastp */
328 
329 };
330 
331 /** Argument class for collecting filtering options */
333 {
334 public:
335  /**
336  * @brief Constructor
337  *
338  * @param query_is_protein is the query sequence(s) protein? [in]
339  * @param filter_by_default should filtering be applied by default? [in]
340  */
341  CFilteringArgs(bool query_is_protein = true,
342  bool filter_by_default = true)
343  : m_QueryIsProtein(query_is_protein),
344  m_FilterByDefault(filter_by_default) {}
345 
346  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
347  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
348  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
349  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
350  CBlastOptions& options);
351 private:
352  bool m_QueryIsProtein; /**< true if the query is protein */
353  bool m_FilterByDefault; /**< Should filtering be applied by default? */
354 
355  /**
356  * @brief Auxiliary method to tokenize the filtering string.
357  *
358  * @param filtering_args string to tokenize [in]
359  * @param output vector with tokens [in|out]
360  */
361  void x_TokenizeFilteringArgs(const string& filtering_args,
362  vector<string>& output) const;
363 };
364 
365 /// Defines values for match and mismatch in nucleotide comparisons as well as
366 /// non-greedy extension
368 {
369 public:
370  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
371  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
372  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
373  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
374  CBlastOptions& options);
375 };
376 
377 /// Argument class to retrieve discontiguous megablast arguments
379 {
380 public:
381  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
382  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
383  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
384  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
385  CBlastOptions& options);
386 };
387 
388 /** Argument class for collecting composition based statistics options */
390 {
391 public:
392  /// Constructor
393  ///@param is_2and3supported Are composition based statistics options 2 and
394  /// 3 supported [in]
395  ///@param default_option Default composition based satatistics option [in]
396  ///@param zero_option_descr Non-standard description for composition
397  /// based statistics option zero [in]
398  CCompositionBasedStatsArgs(bool is_2and3supported = true,
399  const string& default_option
401  const string& zero_option_descr = "")
402  : m_Is2and3Supported(is_2and3supported),
403  m_DefaultOpt(default_option),
404  m_ZeroOptDescr(zero_option_descr) {}
405 
406  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
407  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
408  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
409  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
410  CBlastOptions& options);
411 
412 protected:
413  /// Are options 2 and 3 supported
415  /// Default option
416  string m_DefaultOpt;
417  /// Non standard description for option zero
419 };
420 
421 /** Argument class for collecting gapped options */
423 {
424 public:
425  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
426  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
427  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
428  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
429  CBlastOptions& options);
430 };
431 
432 /** Argument class for collecting the largest intron size */
434 {
435 public:
436  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
437  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
438  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
439  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
440  CBlastOptions& options);
441 };
442 
443 /// Argument class to collect the frame shift penalty for out-of-frame searches
445 {
446 public:
447  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
448  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
449  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
450  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
451  CBlastOptions& options);
452 };
453 
454 /// Argument class to collect the genetic code for all queries/subjects
456 {
457 public:
458  /// Enumeration defining which sequences the genetic code applies to
459  enum ETarget {
460  eQuery, ///< Query genetic code
461  eDatabase ///< Database genetic code
462  };
463 
464 
465  /**
466  * @brief Constructor
467  *
468  * @param t genetic code target (query or database)
469  */
470  CGeneticCodeArgs(ETarget t) : m_Target(t) {};
471 
472  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
473  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
474  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
475  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
476  CBlastOptions& options);
477 
478 private:
479  ETarget m_Target; ///< Genetic code target
480 };
481 
482 /// Argument class to retrieve the gap trigger option
484 {
485 public:
486  /**
487  * @brief Constructor
488  *
489  * @param query_is_protein is the query sequence(s) protein?
490  */
491  CGapTriggerArgs(bool query_is_protein)
492  : m_QueryIsProtein(query_is_protein) {}
493  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
494  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
495  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
496  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
497  CBlastOptions& options);
498 private:
499  bool m_QueryIsProtein; /**< true if the query is protein */
500 };
501 
502 /// Argument class to collect PSSM engine options
504 {
505 public:
506  /// Constructor
507  /// @param is_deltablast Are the aruments set up for Delta Blast [in]
508  CPssmEngineArgs(bool is_deltablast = false) : m_IsDeltaBlast(is_deltablast)
509  {}
510 
511  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
512  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
513  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
514  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
515  CBlastOptions& options);
516 
517 private:
518  /// Are these arumnets for Delta Blast
520 };
521 
522 /// Argument class to import/export the search strategy
524 {
525 public:
526  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
527  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
528  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
529  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
530  CBlastOptions& options);
531 
532  /// Get the input stream for the search strategy
533  CNcbiIstream* GetImportStream(const CArgs& args) const;
534  /// Get the output stream for the search strategy
535  CNcbiOstream* GetExportStream(const CArgs& args) const;
536 };
537 
538 /// Argument class to collect options specific to PSI-BLAST
540 {
541 public:
542  /// Enumeration to determine the molecule type of the database
544  eProteinDb, ///< Traditional, iterated PSI-BLAST
545  eNucleotideDb ///< PSI-Tblastn, non-iterated
546  };
547 
548  /**
549  * @brief Constructor
550  *
551  * @param db_target Molecule type of the database
552  * @param is_deltablast Are the aruments set up for Delta Blast
553  */
554  CPsiBlastArgs(ETargetDatabase db_target = eProteinDb,
555  bool is_deltablast = false)
556  : m_DbTarget(db_target), m_NumIterations(1),
557  m_CheckPointOutput(0), m_AsciiMatrixOutput(0),
558  m_IsDeltaBlast(is_deltablast),
559  m_SaveLastPssm(false)
560  {};
561 
562  /// Our virtual destructor
563  virtual ~CPsiBlastArgs() {}
564 
565  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
566  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
567  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
568  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
569  CBlastOptions& options);
570 
571  /// Retrieve the number of iterations to perform
572  size_t GetNumberOfIterations() const {
573  return m_NumIterations;
574  }
575 
576  /// Retrieve the number of iterations to perform
577  void SetNumberOfIterations(unsigned int num_iters) {
578  m_NumIterations = num_iters;
579  }
580  /// Returns true if checkpoint PSSM is required to be printed
582  return m_CheckPointOutput != NULL;
583  }
584  /// Get the checkpoint file output stream
585  /// @return pointer to output stream, not to be free'd by the caller
587  return m_CheckPointOutput ? m_CheckPointOutput->GetStream() : NULL;
588  }
589  /// Returns true if ASCII PSSM is required to be printed
590  bool RequiresAsciiPssmOutput() const {
591  return m_AsciiMatrixOutput != NULL;
592  }
593  /// Get the ASCII matrix output stream
594  /// @return pointer to output stream, not to be free'd by the caller
596  return m_AsciiMatrixOutput ? m_AsciiMatrixOutput->GetStream() : NULL;
597  }
598 
599  /// Get the PSSM read from checkpoint file
601  return m_Pssm;
602  }
603 
604  /// Set the PSSM read from saved search strategy
606  m_Pssm = pssm;
607  }
608 
609  /// Should the PSSM after the last database search be saved
610  bool GetSaveLastPssm(void) const {
611  return m_SaveLastPssm;
612  }
613 
614  /// Set the on/off switch for saving PSSM after the last database search
615  void SetSaveLastPssm(bool b) {
616  m_SaveLastPssm = b;
617  }
618 
619 private:
620  /// Molecule of the database
622  /// number of iterations to perform
624  /// checkpoint output file
626  /// ASCII matrix output file
628  /// PSSM
630 
631  /// Are the aruments set up for Delta Blast
633 
634  /// Save PSSM after the last database search
636 
637  /// Prohibit copy constructor
639  /// Prohibit assignment operator
641 
642  /// Auxiliary function to create a PSSM from a multiple sequence alignment
643  /// file
645  x_CreatePssmFromMsa(CNcbiIstream& input_stream, CBlastOptions& opt,
646  bool save_ascii_pssm, unsigned int msa_master_idx,
647  bool ignore_pssm_tmpl_seq);
648 };
649 
650 /// Argument class to collect options specific to PHI-BLAST
652 {
653 public:
654  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
655  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
656  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
657  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
658  CBlastOptions& options);
659 };
660 
661 /// Argument class to collect options specific to KBLASTP
663 {
664 public:
665 
666  /// Constructor
667  CKBlastpArgs(void) : m_JDistance(0.10), m_MinHits(0), m_CandidateSeqs(1000) {}
668 
669  /// Our virtual destructor
670  virtual ~CKBlastpArgs() {}
671 
672  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
673  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
674  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
675  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
676  CBlastOptions& options);
677 
678 
679  /// Get the Jaccard distance
680  double GetJaccardDistance(void) { return m_JDistance;}
681 
682  /// Get the minimum number of LSH matches.
683  int GetMinHits(void) {return m_MinHits;}
684 
685  /// The database
686  string GetDatabase(void) {return m_DbIndex;}
687 
688  /// Number of candidate sequences to attempt with BLASTP
689  int GetCandidateSeqs(void) {return m_CandidateSeqs;}
690 
691 private:
692  /// Prohibit copy constructor
694  /// Prohibit assignment operator
696 
697  /// Jaccard distance
698  double m_JDistance;
699 
700  /// Minimum number of hits in LSH phase
702 
703  /// Database/index
704  string m_DbIndex;
705 
706  /// Number of candidate sequences to try BLAST on.
708 };
709 
710 /// Argument class to collect options specific to DELTA-BLAST
712 {
713 public:
714 
715  /// Constructor
716  CDeltaBlastArgs(void) : m_ShowDomainHits(false) {}
717 
718  /// Our virtual destructor
719  virtual ~CDeltaBlastArgs() {}
720 
721  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
722  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
723  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
724  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
725  CBlastOptions& options);
726 
727  /// Get domain database
729  {return m_DomainDb;}
730 
731  /// Get show domain hits option value
732  bool GetShowDomainHits(void) const {return m_ShowDomainHits;}
733 
734 private:
735  /// Prohibit copy constructor
737  /// Prohibit assignment operator
739 
740 private:
741 
742  /// Conserved Domain Database
744 
745  /// Is printing CDD hits requested
747 };
748 
749 
751 {
752 public:
753  CMappingArgs(void) {}
754 
755  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
756  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
757  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
758  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
759  CBlastOptions& options);
760 
761 };
762 
763 /*****************************************************************************/
764 // Input options
765 
766 /// Argument class to collect query options
768 {
769 public:
770  /**
771  * @brief Constructor
772  *
773  * @param query_cannot_be_nucl can the query not be nucleotide?
774  */
775  CQueryOptionsArgs(bool query_cannot_be_nucl = false)
776  : m_Strand(objects::eNa_strand_unknown), m_Range(),
777  m_UseLCaseMask(kDfltArgUseLCaseMasking),
778  m_ParseDeflines(kDfltArgParseDeflines),
779  m_QueryCannotBeNucl(query_cannot_be_nucl)
780  {};
781 
782  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
783  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
784  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
785  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
786  CBlastOptions& options);
787 
788  /// Get query sequence range restriction
789  TSeqRange GetRange() const { return m_Range; }
790  /// Set query sequence range restriction
791  void SetRange(const TSeqRange& range) { m_Range = range; }
792  /// Get strand to search in query sequence(s)
793  objects::ENa_strand GetStrand() const { return m_Strand; }
794  /// Use lowercase masking in FASTA input?
795  bool UseLowercaseMasks() const { return m_UseLCaseMask; }
796  /// Should the defline be parsed?
797  bool GetParseDeflines() const { return m_ParseDeflines; }
798 
799  /// Is the query sequence protein?
800  bool QueryIsProtein() const { return m_QueryCannotBeNucl; }
801 
802 private:
803  /// Strand(s) to search
805  /// range to restrict the query sequence(s)
807  /// use lowercase masking in FASTA input
809  /// Should the deflines be parsed?
811 
812  /// only false for blast[xn], and tblastx
813  /// true in case of PSI-BLAST
815 };
816 
817 /// Argument class to collect query options for BLAST Mapper
819 {
820 public:
821 
822  /// Input formats
824  eFasta = 0,
829  eSra
830  };
831 
832 
835  m_IsPaired(false),
836  m_InputFormat(eFasta),
837  m_MateInputStream(NULL),
838  m_EnableSraCache(false)
839  {}
840 
841  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
842  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
843  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
844  virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt);
845 
846  /// Are query sequences paired
847  bool IsPaired(void) const {return m_IsPaired;}
848 
849  /// Are queries provided in Fastc format
851  {return m_InputFormat;}
852 
853  /// Does the mate input stream exits
854  bool HasMateInputStream(void) const {return m_MateInputStream;}
855 
856  /// Get input stream for query mates
857  CNcbiIstream* GetMateInputStream(void) const {return m_MateInputStream;}
858 
859  /// Get a list of SRA accessions
860  const vector<string>& GetSraAccessions(void) const
861  {return m_SraAccessions;}
862 
863  /// Is SRA caching in local files enabled
864  /// (see File Caching at
865  /// https://github.com/ncbi/sra-tools/wiki/Toolkit-Configuration)
866  bool IsSraCacheEnabled(void) const {return m_EnableSraCache;}
867 
868 private:
871  vector<string> m_SraAccessions;
872 
874  unique_ptr<CDecompressIStream> m_DecompressIStream;
875 
877 };
878 
879 
880 /// Argument class to collect database/subject arguments
882 {
883 public:
884  /// The default priority for subjects, should be used for
885  /// subjects/databases
886  static const int kSubjectsDataLoaderPriority = 10;
887 
888  /// alias for the database molecule type
890 
891  /// Auxiliary function to determine if the database/subject sequence has
892  /// been set
893  static bool HasBeenSet(const CArgs& args);
894 
895  /// Constructor
896  /// @param request_mol_type If true, the command line arguments will
897  /// include a mandatory option to disambiguate whether a protein or a
898  /// nucleotide database is searched
899  /// @param is_rpsblast is it RPS-BLAST?
900  /// @param is_igblast is it IG-BLAST?
901  /// @param is_deltablast is it DELTA-BLAST?
902  CBlastDatabaseArgs(bool request_mol_type = false,
903  bool is_rpsblast = false,
904  bool is_igblast = false,
905  bool is_mapper = false,
906  bool is_kblast = false);
907  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
908  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
909  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
910  virtual void ExtractAlgorithmOptions(const CArgs& args,
911  CBlastOptions& opts);
912 
913  /// Turns on/off database masking support
915  m_SupportsDatabaseMasking = val;
916  }
917 
918  /// Is the database/subject protein?
919  bool IsProtein() const { return m_IsProtein; }
920 
921  /// Get the BLAST database name
922  /// @return empty string in the case of BLAST2Sequences, otherwise the
923  /// BLAST database name
924  string GetDatabaseName() const {
925  return m_SearchDb.Empty() ? kEmptyStr : m_SearchDb->GetDatabaseName();
926  }
927 
928  /// Retrieve the search database information
929  CRef<CSearchDatabase> GetSearchDatabase() const { return m_SearchDb; }
930  /// Set the search database information.
931  /// use case: recovering from search strategy
933  m_SearchDb = search_db;
934  m_IsProtein = search_db->IsProtein();
935  }
936 
937  /// Sets the subject sequences.
938  /// use case: recovering from search strategy
940  bool is_protein) {
941  m_Subjects = subjects;
942  m_Scope = scope;
943  m_IsProtein = is_protein;
944  }
945 
946  /// Retrieve subject sequences, if provided
947  /// @param scope scope to which to sequence read will be added (if
948  /// non-NULL) [in]
949  /// @return empty CRef<> if no subjects were provided, otherwise a properly
950  /// initialized IQueryFactory object
951  CRef<IQueryFactory> GetSubjects(objects::CScope* scope = NULL) {
952  if (m_Subjects && scope) {
953  // m_Scope contains the subject(s) read
955  // Add the scope with a lower priority to avoid conflicts
956  scope->AddScope(*m_Scope, kSubjectsDataLoaderPriority);
957  }
958  return m_Subjects;
959  }
960 
962  m_SupportIPGFiltering = val;
963  }
964 
965 protected:
966  CRef<CSearchDatabase> m_SearchDb;/**< Description of the BLAST database */
967  bool m_RequestMoleculeType; /**< Determines whether the database's
968  molecule type should be requested in the
969  command line, true in case of PSI-BLAST
970  */
971  bool m_IsRpsBlast; /**< true if the search is RPS-BLAST */
972  bool m_IsIgBlast; /**< true if the search is Ig-BLAST */
973 
974  bool m_IsProtein; /**< Is the database/subject(s) protein? */
975  bool m_IsMapper; /**< true for short read mapper */
976  bool m_IsKBlast; /**< true for Kblastp */
977  CRef<IQueryFactory> m_Subjects; /**< The subject sequences */
978  CRef<objects::CScope> m_Scope; /**< CScope object in which all subject
979  sequences read are kept */
980  bool m_SupportsDatabaseMasking; /**< true if it's supported */
981  bool m_SupportIPGFiltering; /**< true if IPG filtering is supported */
982 };
983 
984 /// Argument class to collect options specific to igBLAST
986 {
987 public:
988  CIgBlastArgs(bool is_protein) : m_IsProtein(is_protein) {};
989 
990  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
991  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
992  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
993  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
994  CBlastOptions& options);
995 
996  CRef<CIgBlastOptions> GetIgBlastOptions() { return m_IgOptions; }
997 
999 
1000  if (m_Scope.NotEmpty()) {
1001  // Add the scope with a lower priority to avoid conflicts
1002  scope->AddScope(*m_Scope,
1004  }
1005  }
1006 
1007 private:
1008  /// Is this a protein search?
1010  /// Igblast options to fill
1012  /// scope to get sequences
1014 };
1015 
1016 /// Argument class to collect formatting options, use this to create a
1017 /// CBlastFormat object.
1018 /// @note This object is also needed to set the maximum number of target
1019 /// sequences to save (hitlist size)
1021 {
1022 public:
1023  /// Defines the output formats supported by our command line formatter
1025  /// Standard pairwise alignments
1026  ePairwise = 0,
1027  ///< Query anchored showing identities
1028  eQueryAnchoredIdentities,
1029  ///< Query anchored no identities
1030  eQueryAnchoredNoIdentities,
1031  ///< Flat query anchored showing identities
1032  eFlatQueryAnchoredIdentities,
1033  ///< Flat query anchored no identities
1035  /// XML output
1037  /// Tabular output
1039  /// Tabular output with comments
1041  /// ASN.1 text output
1043  /// ASN.1 binary output
1045  /// Comma-separated values
1047  /// BLAST archive format
1049  /// JSON seq-align
1051  /// JSON XInclude
1053  /// XML2 XInclude
1055  /// JSON2 single file
1057  /// XML2 single file
1059  /// SAM format
1061 
1063 
1064  ///igblast AIRR rearrangement, 19
1066 
1067  /// unaligned reads in magicblast
1069  /// Sentinel value for error checking
1070  eEndValue
1071 
1072  };
1073 
1075  eDefaultFlag = 0,
1076  // Set if VDB
1077  eIsVDB = 0x01,
1078  // Set if SAM format is supported
1079  eIsSAM = 0x02,
1080  // Set if both VDB and SAM is true
1081  eIsVDB_SAM = eIsVDB | eIsSAM,
1082  //Is eAirrRearrangement format supported?
1083  eIsAirrRearrangement = 0x04
1084  };
1085  /// Default constructor
1086  CFormattingArgs(bool isIgblast = false, EFormatFlags flag = eDefaultFlag)
1087  : m_OutputFormat(ePairwise), m_ShowGis(false),
1088  m_NumDescriptions(0), m_NumAlignments(0),
1089  m_DfltNumDescriptions(0), m_DfltNumAlignments(0),
1090  m_Html(false),
1091  m_IsIgBlast(isIgblast),
1092  m_LineLength(align_format::kDfltLineLength),
1093  m_FormatFlags(flag),
1094  m_HitsSortOption(-1),
1095  m_HspsSortOption(-1)
1096  {
1097  if (m_IsIgBlast) {
1098  m_DfltNumAlignments = m_DfltNumDescriptions = 10;
1099  } else {
1100  m_DfltNumAlignments = static_cast<ncbi::TSeqPos>(align_format::kDfltArgNumAlignments) ;
1101  m_DfltNumDescriptions = static_cast<TSeqPos>(align_format::kDfltArgNumDescriptions);
1102  }
1103  };
1104 
1105  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1106  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1107  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1108  virtual void ExtractAlgorithmOptions(const CArgs& args,
1109  CBlastOptions& opts);
1110 
1111  /// Parses the output format command line option value, returns the
1112  /// requested output format type and any custom output formats (if
1113  /// any and applicable)
1114  /// @param args Command line arguments object [in]
1115  /// @param fmt_type Output format type requested in command line options
1116  /// [out]
1117  /// @param custom_fmt_spec Custom output format specification in command
1118  /// line options [out]
1119  virtual void
1120  ParseFormattingString(const CArgs& args,
1121  EOutputFormat& fmt_type,
1122  string& custom_fmt_spec,
1123  string& custom_delim) const;
1124 
1125  /// Get the choice of formatted output
1127  return m_OutputFormat;
1128  }
1129 
1130  /// Returns true if the desired output format is structured (needed to
1131  /// determine whether to print or not that a PSI-BLAST search has
1132  /// converged - this is not supported in structured formats)
1134  return m_OutputFormat == eXml ||
1135  m_OutputFormat == eAsnText ||
1136  m_OutputFormat == eAsnBinary ||
1137  m_OutputFormat == eXml2 ||
1138  m_OutputFormat == eJson ||
1139  m_OutputFormat == eXml2_S ||
1140  m_OutputFormat == eJson_S ||
1141  m_OutputFormat == eJsonSeqalign ||
1142  m_OutputFormat == eSAM;
1143  }
1144 
1145  /// Display the NCBI GIs in formatted output?
1146  bool ShowGis() const {
1147  return m_ShowGis;
1148  }
1149  /// Number of one-line descriptions to show in traditional BLAST output
1151  return m_NumDescriptions;
1152  }
1153  /// Number of alignments to show in traditional BLAST output
1155  return m_NumAlignments;
1156  }
1157  /// Display HTML output?
1158  bool DisplayHtmlOutput() const {
1159  return m_Html;
1160  }
1161 
1162  /// Retrieve for string that specifies the custom output format for tabular
1163  /// and comma-separated value
1164  string GetCustomOutputFormatSpec() const {
1165  return m_CustomOutputFormatSpec;
1166  }
1167 
1168  virtual bool ArchiveFormatRequested(const CArgs& args) const;
1169 
1170  size_t GetLineLength() const {
1171  return m_LineLength;
1172  }
1173  int GetHitsSortOption() const {
1174  return m_HitsSortOption;
1175  }
1176  int GetHspsSortOption() const {
1177  return m_HspsSortOption;
1178  }
1179  string GetCustomDelimiter(){return m_CustomDelim;}
1180 
1181 protected:
1182  EOutputFormat m_OutputFormat; ///< Choice of formatting output
1183  bool m_ShowGis; ///< Display NCBI GIs?
1184  TSeqPos m_NumDescriptions; ///< Number of 1-line descr. to show
1185  TSeqPos m_NumAlignments; ///< Number of alignments to show
1186  TSeqPos m_DfltNumDescriptions; ///< Default value for num descriptions
1187  TSeqPos m_DfltNumAlignments; ///< Default value for num alignments
1188  bool m_Html; ///< Display HTML output?
1189  bool m_IsIgBlast; ///< IgBlast has a different default num_alignments
1190  /// The format specification for custom output, e.g.: tabular or
1191  /// comma-separated value (populated if applicable)
1198 };
1199 
1200 /// Formatting args for magicblast advertising only SAM and fast tabular
1201 /// formats
1203 {
1204 public:
1205 
1207  CFormattingArgs(),
1208  m_TrimReadIds(true),
1209  m_PrintUnaligned(true),
1210  m_NoDiscordant(false),
1211  m_FwdRev(false),
1212  m_RevFwd(false),
1213  m_FwdOnly(false),
1214  m_RevOnly(false),
1215  m_OnlyStrandSpecific(false),
1216  m_PrintMdTag(false),
1217  m_UnalignedOutputFormat(eSAM)
1218  {}
1219 
1220  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1221 
1222  virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& opt);
1223 
1224  virtual bool ArchiveFormatRequested(const CArgs& /*args*/) const {
1225  return false;
1226  }
1227 
1228  /// Should read ids be in SAM format be trimmed of .1 and .2 endings
1229  /// for paired mapping
1230  bool TrimReadIds(void) const {return m_TrimReadIds;}
1231 
1232  /// Should unaligned reads be reported
1233  bool PrintUnaligned(void) const {return m_PrintUnaligned;}
1234 
1235  /// Should non-concordant pairs be filtered out of report
1236  bool NoDiscordant(void) const {return m_NoDiscordant;}
1237 
1238  /// Specify fwd/ref strands
1239  bool SelectFwdRev(void) const {return m_FwdRev;}
1240 
1241  /// Specify rev/fwd strands
1242  bool SelectRevFwd(void) const {return m_RevFwd;}
1243 
1244  /// Specify fwd-only strands
1245  bool SelectFwdOnly(void) const {return m_FwdOnly;}
1246 
1247  /// Specify rev-only strands
1248  bool SelectRevOnly(void) const {return m_RevOnly;}
1249 
1250  /// Specify only-strand-specific
1251  bool SelectOnlyStrandSpecific(void) const {return m_OnlyStrandSpecific;}
1252 
1253  /// Should MD tag be included in SAM report
1254  bool PrintMdTag(void) const {return m_PrintMdTag;}
1255 
1256  /// Get format choice for unaligned reads
1258  {return m_UnalignedOutputFormat;}
1259 
1260  /// Get a user tag added to each alignment
1261  const string& GetUserTag(void) const {return m_UserTag;}
1262 
1263 private:
1267  bool m_FwdRev;
1268  bool m_RevFwd;
1274  string m_UserTag;
1275 };
1276 
1277 /// Argument class to collect multi-threaded arguments
1279 {
1280 public:
1281  enum EMTMode {
1282  eNotSupported = -1,
1285  eSplitByDB
1286  };
1287  /// Default Constructor
1288  CMTArgs(size_t default_num_threads = CThreadable::kMinNumThreads, EMTMode mt_mode = eNotSupported) :
1289  m_NumThreads(default_num_threads), m_MTMode(mt_mode)
1290  {
1291 #ifdef NCBI_NO_THREADS
1292  // No threads can be set in NON-MT mode
1293  m_NumThreads = CThreadable::kMinNumThreads;
1294  m_MTMode = eNotSupported;
1295 #endif
1296  }
1297  CMTArgs(const CArgs& cmd_line_args);
1298  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1299  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1300  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1301  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1302  CBlastOptions& options);
1303 
1304  /// Get the number of threads to spawn
1305  size_t GetNumThreads() const { return m_NumThreads; }
1306 
1307  int GetMTMode() const { return m_MTMode; }
1308 
1309 protected:
1310  void x_ExtractAlgorithmOptions(const CArgs& args);
1311  size_t m_NumThreads; ///< Number of threads to spawn
1313 };
1314 
1315 /// Argument class to collect remote vs. local execution
1317 {
1318 public:
1319  /// Default constructor
1320  CRemoteArgs() : m_IsRemote(false) {}
1321  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1322  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1323  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1324  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1325  CBlastOptions& options);
1326 
1327  /// Return whether the search should be executed remotely or not
1328  bool ExecuteRemotely() const { return m_IsRemote; }
1329 
1330 private:
1331  /// Should the search be executed remotely?
1333 };
1334 
1335 /// Argument class to collect debugging options.
1336 /// Only show in command line if compiled with _BLAST_DEBUG
1338 {
1339 public:
1340  /// Default constructor
1341  CDebugArgs() : m_DebugOutput(false), m_RmtDebugOutput(false) {}
1342  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1343  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1344  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1345  virtual void ExtractAlgorithmOptions(const CArgs& cmd_line_args,
1346  CBlastOptions& options);
1347 
1348  /// Return whether debug (verbose) output should be produced on remote
1349  /// searches (only available when compiled with _DEBUG)
1350  bool ProduceDebugRemoteOutput() const { return m_RmtDebugOutput; }
1351  /// Return whether debug (verbose) output should be produced
1352  /// (only available when compiled with _DEBUG)
1353  bool ProduceDebugOutput() const { return m_DebugOutput; }
1354 private:
1355 
1356  /// Should debugging (verbose) output be printed
1358  /// Should debugging (verbose) output be printed for remote BLAST
1360 };
1361 
1362 /// Argument class to retrieve options for filtering HSPs (e.g.: culling
1363 /// options, best hit algorithm options)
1365 {
1366 public:
1367  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1368  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1369  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1370  virtual void ExtractAlgorithmOptions(const CArgs& args,
1371  CBlastOptions& opts);
1372 };
1373 
1374 /// Argument class to retrieve megablast database indexing options
1376 {
1377 public:
1378  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1379  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc);
1380  /** Interface method, \sa IBlastCmdLineArgs::SetArgumentDescriptions */
1381  virtual void ExtractAlgorithmOptions(const CArgs& args,
1382  CBlastOptions& opts);
1383 
1384  /// Auxiliary function to determine if the megablast database indexing
1385  /// options have been set
1386  static bool HasBeenSet(const CArgs& args);
1387 };
1388 
1389 /// Type definition of a container of IBlastCmdLineArgs
1390 typedef vector< CRef<IBlastCmdLineArgs> > TBlastCmdLineArgs;
1391 
1392 
1393 /// Base command line argument class for a generic BLAST command line binary
1395 {
1396 public:
1397  /// Default constructor
1398  CBlastAppArgs();
1399  /// Our virtual destructor
1400  virtual ~CBlastAppArgs() {}
1401 
1402  /// Set the command line arguments
1403  CArgDescriptions* SetCommandLine();
1404 
1405  /// Get the task for this object
1406  string GetTask() const {
1407  return m_Task;
1408  }
1409 
1410  /// Set the task for this object
1411  /// @param task task name to set [in]
1412  void SetTask(const string& task);
1413 
1414  /// Extract the command line arguments into a CBlastOptionsHandle object
1415  /// @param args Commad line arguments [in]
1416  CRef<CBlastOptionsHandle> SetOptions(const CArgs& args);
1417 
1418  /// Combine the command line arguments into a CBlastOptions object
1419  /// recovered from saved search strategy
1420  /// @param args Commad line arguments [in]
1421  CRef<CBlastOptionsHandle> SetOptionsForSavedStrategy(const CArgs& args);
1422 
1423  /// Setter for the BLAST options handle, this is used if the options are
1424  /// recovered from a saved BLAST search strategy
1426  m_OptsHandle = opts_hndl;
1427  }
1428 
1429  /// Get the BLAST database arguments
1431  return m_BlastDbArgs;
1432  }
1433  /// Set the BLAST database arguments
1435  m_BlastDbArgs = args;
1436  }
1437 
1438  /// Get the options for the query sequence(s)
1440  return m_QueryOptsArgs;
1441  }
1442 
1443  /// Get the formatting options
1445  return m_FormattingArgs;
1446  }
1447 
1448  /// Get the number of threads to spawn
1449  size_t GetNumThreads() const {
1450  return m_MTArgs->GetNumThreads();
1451  }
1452 
1453  int GetMTMode() const {
1454  return m_MTArgs->GetMTMode();
1455  }
1456 
1457  /// Get the input stream
1458  virtual CNcbiIstream& GetInputStream();
1459 
1460  /// Get the output stream
1461  virtual CNcbiOstream& GetOutputStream();
1462 
1463  /// Set the input stream to a temporary input file (needed when importing
1464  /// a search strategy)
1465  /// @param input_file temporary input file to read [in]
1467  m_StdCmdLineArgs->SetInputStream(input_file);
1468  }
1469 
1470  /// Get the input stream for the search strategy
1472  return m_SearchStrategyArgs->GetImportStream(args);
1473  }
1474  /// Get the output stream for the search strategy
1476  return m_SearchStrategyArgs->GetExportStream(args);
1477  }
1478 
1479  /// Determine whether the search should be executed remotely or not
1480  bool ExecuteRemotely() const {
1481  return m_RemoteArgs->ExecuteRemotely();
1482  }
1483 
1484  /// Return whether debug (verbose) output should be produced on remote
1485  /// searches (only available when compiled with _DEBUG)
1487  return m_DebugArgs->ProduceDebugRemoteOutput();
1488  }
1489 
1490  /// Return whether debug (verbose) output should be produced on remote
1491  /// searches (only available when compiled with _DEBUG)
1492  bool ProduceDebugOutput() const {
1493  return m_DebugArgs->ProduceDebugOutput();
1494  }
1495 
1496  /// Get the query batch size
1497  virtual int GetQueryBatchSize() const = 0;
1498 
1499  /// Retrieve the client ID for remote requests
1500  string GetClientId() const {
1501  _ASSERT( !m_ClientId.empty() );
1502  return m_ClientId;
1503  }
1504 
1505 protected:
1506  /// Set of command line argument objects
1508  /// query options object
1510  /// database/subject object
1512  /// formatting options
1514  /// multi-threaded options
1516  /// remote vs. local execution options
1518  /// standard command line arguments class
1520  /// arguments for dealing with search strategies
1522  /// Debugging arguments
1524  /// HSP filtering arguments
1526  /// The BLAST options handle, only non-NULL if assigned via
1527  /// SetOptionsHandle, i.e.: from a saved search strategy
1529  /// Task specified in the command line
1530  string m_Task;
1531  /// Client ID used for remote BLAST submissions, must be populated by
1532  /// subclasses
1533  string m_ClientId;
1534  /// Is this application being run ungapped
1536 
1537  /// Create the options handle based on the command line arguments
1538  /// @param locality whether the search will be executed locally or remotely
1539  /// [in]
1540  /// @param args command line arguments [in]
1543  const CArgs& args) = 0;
1544 
1545  /** Creates the BLAST options handle based on the task argument
1546  * @param locality whether the search will be executed locally or remotely [in]
1547  * @param task program-specific BLAST named parameter set [in]
1548  */
1550  x_CreateOptionsHandleWithTask(CBlastOptions::EAPILocality locality,
1551  const string& task);
1552 
1553  /// Issue warnings when recovering from a search strategy (command line
1554  /// applications only)
1555  void x_IssueWarningsForIgnoredOptions(const CArgs& args);
1556 };
1557 
1558 /**
1559  * @brief Create a CArgDescriptions object and invoke SetArgumentDescriptions
1560  * for each of the TBlastCmdLineArgs in its argument list
1561  *
1562  * @param args arguments to configure the return value [in]
1563  *
1564  * @return a CArgDescriptions object with the command line options set
1565  */
1569 
1570 END_SCOPE(blast)
1572 
1573 #endif /* ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP */
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
CArgDescriptions * SetUpCommandLineArguments(TBlastCmdLineArgs &args)
Create a CArgDescriptions object and invoke SetArgumentDescriptions for each of the TBlastCmdLineArgs...
vector< CRef< IBlastCmdLineArgs > > TBlastCmdLineArgs
Type definition of a container of IBlastCmdLineArgs.
Auxiliary classes/functions for BLAST input library.
Declares class to encapsulate all BLAST options.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
Base command line argument class for a generic BLAST command line binary.
CRef< CRemoteArgs > m_RemoteArgs
remote vs. local execution options
string GetTask() const
Get the task for this object.
void SetOptionsHandle(CRef< CBlastOptionsHandle > opts_hndl)
Setter for the BLAST options handle, this is used if the options are recovered from a saved BLAST sea...
CRef< CBlastOptionsHandle > m_OptsHandle
The BLAST options handle, only non-NULL if assigned via SetOptionsHandle, i.e.
CRef< CQueryOptionsArgs > m_QueryOptsArgs
query options object
size_t GetNumThreads() const
Get the number of threads to spawn.
virtual int GetQueryBatchSize() const =0
Get the query batch size.
CRef< CBlastDatabaseArgs > m_BlastDbArgs
database/subject object
virtual CRef< CBlastOptionsHandle > x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs &args)=0
Create the options handle based on the command line arguments.
CRef< CSearchStrategyArgs > m_SearchStrategyArgs
arguments for dealing with search strategies
string m_Task
Task specified in the command line.
CRef< CDebugArgs > m_DebugArgs
Debugging arguments.
int GetMTMode() const
CRef< CBlastDatabaseArgs > GetBlastDatabaseArgs() const
Get the BLAST database arguments.
CNcbiIstream * GetImportSearchStrategyStream(const CArgs &args)
Get the input stream for the search strategy.
virtual ~CBlastAppArgs()
Our virtual destructor.
void SetInputStream(CRef< CTmpFile > input_file)
Set the input stream to a temporary input file (needed when importing a search strategy)
CRef< CMTArgs > m_MTArgs
multi-threaded options
CRef< CFormattingArgs > m_FormattingArgs
formatting options
bool ExecuteRemotely() const
Determine whether the search should be executed remotely or not.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
bool m_IsUngapped
Is this application being run ungapped.
CRef< CQueryOptionsArgs > GetQueryOptionsArgs() const
Get the options for the query sequence(s)
string GetClientId() const
Retrieve the client ID for remote requests.
TBlastCmdLineArgs m_Args
Set of command line argument objects.
void SetBlastDatabaseArgs(CRef< CBlastDatabaseArgs > args)
Set the BLAST database arguments.
CRef< CFormattingArgs > GetFormattingArgs() const
Get the formatting options.
CNcbiOstream * GetExportSearchStrategyStream(const CArgs &args)
Get the output stream for the search strategy.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CRef< CStdCmdLineArgs > m_StdCmdLineArgs
standard command line arguments class
CRef< CHspFilteringArgs > m_HspFilteringArgs
HSP filtering arguments.
string m_ClientId
Client ID used for remote BLAST submissions, must be populated by subclasses.
Argument class to collect database/subject arguments.
Definition: blast_args.hpp:882
CRef< objects::CScope > m_Scope
CScope object in which all subject sequences read are kept.
Definition: blast_args.hpp:978
bool m_IsMapper
true for short read mapper
Definition: blast_args.hpp:975
CRef< CSearchDatabase > GetSearchDatabase() const
Retrieve the search database information.
Definition: blast_args.hpp:929
bool IsProtein() const
Is the database/subject protein?
Definition: blast_args.hpp:919
bool m_SupportsDatabaseMasking
true if it's supported
Definition: blast_args.hpp:980
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
Definition: blast_args.hpp:886
void SetIPGFilteringSupport(bool val)
Definition: blast_args.hpp:961
bool m_IsProtein
Is the database/subject(s) protein?
Definition: blast_args.hpp:974
bool m_RequestMoleculeType
Determines whether the database's molecule type should be requested in the command line,...
Definition: blast_args.hpp:967
bool m_IsIgBlast
true if the search is Ig-BLAST
Definition: blast_args.hpp:972
CRef< IQueryFactory > m_Subjects
The subject sequences.
Definition: blast_args.hpp:977
void SetSubjects(CRef< IQueryFactory > subjects, CRef< CScope > scope, bool is_protein)
Sets the subject sequences.
Definition: blast_args.hpp:939
bool m_IsRpsBlast
true if the search is RPS-BLAST
Definition: blast_args.hpp:971
CRef< IQueryFactory > GetSubjects(objects::CScope *scope=NULL)
Retrieve subject sequences, if provided.
Definition: blast_args.hpp:951
CRef< CSearchDatabase > m_SearchDb
Description of the BLAST database.
Definition: blast_args.hpp:966
CSearchDatabase::EMoleculeType EMoleculeType
alias for the database molecule type
Definition: blast_args.hpp:889
bool m_SupportIPGFiltering
true if IPG filtering is supported
Definition: blast_args.hpp:981
void SetSearchDatabase(CRef< CSearchDatabase > search_db)
Set the search database information.
Definition: blast_args.hpp:932
void SetDatabaseMaskingSupport(bool val)
Turns on/off database masking support.
Definition: blast_args.hpp:914
bool m_IsKBlast
true for Kblastp
Definition: blast_args.hpp:976
string GetDatabaseName() const
Get the BLAST database name.
Definition: blast_args.hpp:924
Encapsulates ALL the BLAST algorithm's options.
EAPILocality
Enumerates the possible contexts in which objects of this type can be used.
Argument class for collecting composition based statistics options.
Definition: blast_args.hpp:390
bool m_Is2and3Supported
Are options 2 and 3 supported.
Definition: blast_args.hpp:414
CCompositionBasedStatsArgs(bool is_2and3supported=true, const string &default_option=kDfltArgCompBasedStats, const string &zero_option_descr="")
Constructor.
Definition: blast_args.hpp:398
string m_ZeroOptDescr
Non standard description for option zero.
Definition: blast_args.hpp:418
string m_DefaultOpt
Default option.
Definition: blast_args.hpp:416
Argument class to collect debugging options.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CDebugArgs()
Default constructor.
bool m_DebugOutput
Should debugging (verbose) output be printed.
bool m_RmtDebugOutput
Should debugging (verbose) output be printed for remote BLAST.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced (only available when compiled with _DEBUG)
Argument class to collect options specific to DELTA-BLAST.
Definition: blast_args.hpp:712
CDeltaBlastArgs & operator=(const CDeltaBlastArgs &rhs)
Prohibit assignment operator.
CDeltaBlastArgs(void)
Constructor.
Definition: blast_args.hpp:716
CRef< CSearchDatabase > m_DomainDb
Conserved Domain Database.
Definition: blast_args.hpp:743
virtual ~CDeltaBlastArgs()
Our virtual destructor.
Definition: blast_args.hpp:719
CRef< CSearchDatabase > GetDomainDatabase(void)
Get domain database.
Definition: blast_args.hpp:728
CDeltaBlastArgs(const CDeltaBlastArgs &rhs)
Prohibit copy constructor.
bool m_ShowDomainHits
Is printing CDD hits requested.
Definition: blast_args.hpp:746
bool GetShowDomainHits(void) const
Get show domain hits option value.
Definition: blast_args.hpp:732
Argument class to retrieve discontiguous megablast arguments.
Definition: blast_args.hpp:379
Argument class for collecting filtering options.
Definition: blast_args.hpp:333
CFilteringArgs(bool query_is_protein=true, bool filter_by_default=true)
Constructor.
Definition: blast_args.hpp:341
bool m_QueryIsProtein
true if the query is protein
Definition: blast_args.hpp:352
bool m_FilterByDefault
Should filtering be applied by default?
Definition: blast_args.hpp:353
Argument class to collect formatting options, use this to create a CBlastFormat object.
TSeqPos m_NumDescriptions
Number of 1-line descr. to show.
TSeqPos m_DfltNumDescriptions
Default value for num descriptions.
int GetHitsSortOption() const
bool HasStructuredOutputFormat() const
Returns true if the desired output format is structured (needed to determine whether to print or not ...
CFormattingArgs(bool isIgblast=false, EFormatFlags flag=eDefaultFlag)
Default constructor.
TSeqPos m_NumAlignments
Number of alignments to show.
string GetCustomOutputFormatSpec() const
Retrieve for string that specifies the custom output format for tabular and comma-separated value.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
EFormatFlags m_FormatFlags
EOutputFormat
Defines the output formats supported by our command line formatter.
@ eJsonSeqalign
JSON seq-align.
@ eJson
JSON XInclude.
@ eTabular
Tabular output.
@ eXml2
XML2 XInclude.
@ eSAM
SAM format.
@ eCommaSeparatedValues
Comma-separated values.
@ eAsnText
ASN.1 text output.
@ eArchiveFormat
BLAST archive format.
@ eAirrRearrangement
igblast AIRR rearrangement, 19
@ eXml2_S
XML2 single file.
@ eJson_S
JSON2 single file.
@ eXml
XML output.
@ eFasta
unaligned reads in magicblast
@ eAsnBinary
ASN.1 binary output.
@ eFlatQueryAnchoredNoIdentities
@ eTabularWithComments
Tabular output with comments.
bool m_IsIgBlast
IgBlast has a different default num_alignments.
int GetHspsSortOption() const
string m_CustomOutputFormatSpec
The format specification for custom output, e.g.
EOutputFormat m_OutputFormat
Choice of formatting output.
TSeqPos GetNumAlignments() const
Number of alignments to show in traditional BLAST output.
bool ShowGis() const
Display the NCBI GIs in formatted output?
TSeqPos GetNumDescriptions() const
Number of one-line descriptions to show in traditional BLAST output.
size_t GetLineLength() const
bool DisplayHtmlOutput() const
Display HTML output?
TSeqPos m_DfltNumAlignments
Default value for num alignments.
string GetCustomDelimiter()
bool m_ShowGis
Display NCBI GIs?
bool m_Html
Display HTML output?
Argument class to collect the frame shift penalty for out-of-frame searches.
Definition: blast_args.hpp:445
Argument class to retrieve the gap trigger option.
Definition: blast_args.hpp:484
bool m_QueryIsProtein
true if the query is protein
Definition: blast_args.hpp:499
CGapTriggerArgs(bool query_is_protein)
Constructor.
Definition: blast_args.hpp:491
Argument class for collecting gapped options.
Definition: blast_args.hpp:423
Argument class for general search BLAST algorithm options: evalue, gap penalties, query filter string...
Definition: blast_args.hpp:292
bool m_QueryIsProtein
true if the query is protein
Definition: blast_args.hpp:320
bool m_IsRpsBlast
true if the search is RPS-BLAST
Definition: blast_args.hpp:321
bool m_IsIgBlast
true if the search is igblast
Definition: blast_args.hpp:325
bool m_IsTblastx
true if the search is tblastx
Definition: blast_args.hpp:324
bool m_ShowPercentIdentity
true if the percent identity option should be shown
Definition: blast_args.hpp:322
CGenericSearchArgs(bool query_is_protein=true, bool is_rpsblast=false, bool show_perc_identity=false, bool is_tblastx=false, bool is_igblast=false, bool suppress_sum_stats=false)
Constructor.
Definition: blast_args.hpp:303
bool m_SuppressSumStats
true if search is blastn or blastp
Definition: blast_args.hpp:326
Argument class to collect the genetic code for all queries/subjects.
Definition: blast_args.hpp:456
CGeneticCodeArgs(ETarget t)
Constructor.
Definition: blast_args.hpp:470
ETarget m_Target
Genetic code target.
Definition: blast_args.hpp:479
ETarget
Enumeration defining which sequences the genetic code applies to.
Definition: blast_args.hpp:459
@ eQuery
Query genetic code.
Definition: blast_args.hpp:460
Argument class to retrieve options for filtering HSPs (e.g.
Argument class to collect options specific to igBLAST.
Definition: blast_args.hpp:986
CRef< CIgBlastOptions > m_IgOptions
Igblast options to fill.
bool m_IsProtein
Is this a protein search?
void AddIgSequenceScope(CRef< objects::CScope > scope)
Definition: blast_args.hpp:998
CRef< CIgBlastOptions > GetIgBlastOptions()
Definition: blast_args.hpp:996
CIgBlastArgs(bool is_protein)
Definition: blast_args.hpp:988
CRef< objects::CScope > m_Scope
scope to get sequences
Argument class to collect options specific to KBLASTP.
Definition: blast_args.hpp:663
virtual ~CKBlastpArgs()
Our virtual destructor.
Definition: blast_args.hpp:670
double m_JDistance
Jaccard distance.
Definition: blast_args.hpp:698
int m_CandidateSeqs
Number of candidate sequences to try BLAST on.
Definition: blast_args.hpp:707
CKBlastpArgs(const CKBlastpArgs &rhs)
Prohibit copy constructor.
CKBlastpArgs & operator=(const CKBlastpArgs &rhs)
Prohibit assignment operator.
CKBlastpArgs(void)
Constructor.
Definition: blast_args.hpp:667
int GetMinHits(void)
Get the minimum number of LSH matches.
Definition: blast_args.hpp:683
int m_MinHits
Minimum number of hits in LSH phase.
Definition: blast_args.hpp:701
string m_DbIndex
Database/index.
Definition: blast_args.hpp:704
int GetCandidateSeqs(void)
Number of candidate sequences to attempt with BLASTP.
Definition: blast_args.hpp:689
string GetDatabase(void)
The database.
Definition: blast_args.hpp:686
double GetJaccardDistance(void)
Get the Jaccard distance.
Definition: blast_args.hpp:680
Argument class for collecting the largest intron size.
Definition: blast_args.hpp:434
Argument class to collect multi-threaded arguments.
size_t GetNumThreads() const
Get the number of threads to spawn.
size_t m_NumThreads
Number of threads to spawn.
int GetMTMode() const
CMTArgs(size_t default_num_threads=CThreadable::kMinNumThreads, EMTMode mt_mode=eNotSupported)
Default Constructor.
EMTMode m_MTMode
@ eSplitByQueries
Formatting args for magicblast advertising only SAM and fast tabular formats.
bool TrimReadIds(void) const
Should read ids be in SAM format be trimmed of .1 and .2 endings for paired mapping.
const string & GetUserTag(void) const
Get a user tag added to each alignment.
bool SelectRevOnly(void) const
Specify rev-only strands.
bool SelectFwdRev(void) const
Specify fwd/ref strands.
EOutputFormat m_UnalignedOutputFormat
bool SelectFwdOnly(void) const
Specify fwd-only strands.
EOutputFormat GetUnalignedOutputFormat(void) const
Get format choice for unaligned reads.
bool SelectRevFwd(void) const
Specify rev/fwd strands.
bool SelectOnlyStrandSpecific(void) const
Specify only-strand-specific.
virtual bool ArchiveFormatRequested(const CArgs &) const
bool PrintMdTag(void) const
Should MD tag be included in SAM report.
bool NoDiscordant(void) const
Should non-concordant pairs be filtered out of report.
bool PrintUnaligned(void) const
Should unaligned reads be reported.
Argument class to collect query options for BLAST Mapper.
Definition: blast_args.hpp:819
bool IsSraCacheEnabled(void) const
Is SRA caching in local files enabled (see File Caching at https://github.com/ncbi/sra-tools/wiki/Too...
Definition: blast_args.hpp:866
const vector< string > & GetSraAccessions(void) const
Get a list of SRA accessions.
Definition: blast_args.hpp:860
bool HasMateInputStream(void) const
Does the mate input stream exits.
Definition: blast_args.hpp:854
EInputFormat GetInputFormat(void) const
Are queries provided in Fastc format.
Definition: blast_args.hpp:850
CNcbiIstream * m_MateInputStream
Definition: blast_args.hpp:873
EInputFormat m_InputFormat
Definition: blast_args.hpp:870
bool IsPaired(void) const
Are query sequences paired.
Definition: blast_args.hpp:847
EInputFormat
Input formats.
Definition: blast_args.hpp:823
vector< string > m_SraAccessions
Definition: blast_args.hpp:871
unique_ptr< CDecompressIStream > m_DecompressIStream
Definition: blast_args.hpp:874
CNcbiIstream * GetMateInputStream(void) const
Get input stream for query mates.
Definition: blast_args.hpp:857
CMappingArgs(void)
Definition: blast_args.hpp:753
Argument class to retrieve and set the scoring matrix name BLAST algorithm option.
Definition: blast_args.hpp:278
Argument class to retrieve megablast database indexing options.
Defines values for match and mismatch in nucleotide comparisons as well as non-greedy extension.
Definition: blast_args.hpp:368
CObject –.
Definition: ncbiobj.hpp:180
Argument class to retrieve and set the off-diagonal range used in 2-hit algorithm.
Definition: blast_args.hpp:237
Argument class to collect options specific to PHI-BLAST.
Definition: blast_args.hpp:652
Argument class to populate an application's name and description.
Definition: blast_args.hpp:178
string m_ProgDesc
Application's description.
Definition: blast_args.hpp:193
string m_ProgName
Application's name.
Definition: blast_args.hpp:192
Argument class to collect options specific to PSI-BLAST.
Definition: blast_args.hpp:540
CPsiBlastArgs(ETargetDatabase db_target=eProteinDb, bool is_deltablast=false)
Constructor.
Definition: blast_args.hpp:554
CPsiBlastArgs & operator=(const CPsiBlastArgs &rhs)
Prohibit assignment operator.
size_t GetNumberOfIterations() const
Retrieve the number of iterations to perform.
Definition: blast_args.hpp:572
virtual ~CPsiBlastArgs()
Our virtual destructor.
Definition: blast_args.hpp:563
bool RequiresAsciiPssmOutput() const
Returns true if ASCII PSSM is required to be printed.
Definition: blast_args.hpp:590
bool m_SaveLastPssm
Save PSSM after the last database search.
Definition: blast_args.hpp:635
CNcbiOstream * GetCheckPointOutputStream()
Get the checkpoint file output stream.
Definition: blast_args.hpp:586
CRef< CAutoOutputFileReset > m_AsciiMatrixOutput
ASCII matrix output file.
Definition: blast_args.hpp:627
bool m_IsDeltaBlast
Are the aruments set up for Delta Blast.
Definition: blast_args.hpp:632
ETargetDatabase
Enumeration to determine the molecule type of the database.
Definition: blast_args.hpp:543
@ eProteinDb
Traditional, iterated PSI-BLAST.
Definition: blast_args.hpp:544
void SetNumberOfIterations(unsigned int num_iters)
Retrieve the number of iterations to perform.
Definition: blast_args.hpp:577
CNcbiOstream * GetAsciiMatrixOutputStream()
Get the ASCII matrix output stream.
Definition: blast_args.hpp:595
bool RequiresCheckPointOutput() const
Returns true if checkpoint PSSM is required to be printed.
Definition: blast_args.hpp:581
CRef< CAutoOutputFileReset > m_CheckPointOutput
checkpoint output file
Definition: blast_args.hpp:625
CPsiBlastArgs(const CPsiBlastArgs &rhs)
Prohibit copy constructor.
bool GetSaveLastPssm(void) const
Should the PSSM after the last database search be saved.
Definition: blast_args.hpp:610
ETargetDatabase m_DbTarget
Molecule of the database.
Definition: blast_args.hpp:621
void SetSaveLastPssm(bool b)
Set the on/off switch for saving PSSM after the last database search.
Definition: blast_args.hpp:615
CRef< objects::CPssmWithParameters > m_Pssm
PSSM.
Definition: blast_args.hpp:629
size_t m_NumIterations
number of iterations to perform
Definition: blast_args.hpp:623
CRef< objects::CPssmWithParameters > GetInputPssm() const
Get the PSSM read from checkpoint file.
Definition: blast_args.hpp:600
void SetInputPssm(CRef< objects::CPssmWithParameters > pssm)
Set the PSSM read from saved search strategy.
Definition: blast_args.hpp:605
Argument class to collect PSSM engine options.
Definition: blast_args.hpp:504
bool m_IsDeltaBlast
Are these arumnets for Delta Blast.
Definition: blast_args.hpp:519
CPssmEngineArgs(bool is_deltablast=false)
Constructor.
Definition: blast_args.hpp:508
Argument class to collect query options.
Definition: blast_args.hpp:768
bool m_UseLCaseMask
use lowercase masking in FASTA input
Definition: blast_args.hpp:808
objects::ENa_strand GetStrand() const
Get strand to search in query sequence(s)
Definition: blast_args.hpp:793
void SetRange(const TSeqRange &range)
Set query sequence range restriction.
Definition: blast_args.hpp:791
bool GetParseDeflines() const
Should the defline be parsed?
Definition: blast_args.hpp:797
bool QueryIsProtein() const
Is the query sequence protein?
Definition: blast_args.hpp:800
CQueryOptionsArgs(bool query_cannot_be_nucl=false)
Constructor.
Definition: blast_args.hpp:775
objects::ENa_strand m_Strand
Strand(s) to search.
Definition: blast_args.hpp:804
TSeqRange m_Range
range to restrict the query sequence(s)
Definition: blast_args.hpp:806
bool m_ParseDeflines
Should the deflines be parsed?
Definition: blast_args.hpp:810
bool m_QueryCannotBeNucl
only false for blast[xn], and tblastx true in case of PSI-BLAST
Definition: blast_args.hpp:814
TSeqRange GetRange() const
Get query sequence range restriction.
Definition: blast_args.hpp:789
bool UseLowercaseMasks() const
Use lowercase masking in FASTA input?
Definition: blast_args.hpp:795
RMH: Argument class to retrieve and set the options specific to the RMBlastN algorithm.
Definition: blast_args.hpp:266
Argument class to collect remote vs. local execution.
bool m_IsRemote
Should the search be executed remotely?
CRemoteArgs()
Default constructor.
bool ExecuteRemotely() const
Return whether the search should be executed remotely or not.
Argument class to import/export the search strategy.
Definition: blast_args.hpp:524
Argument class to retrieve input and output streams for a command line program.
Definition: blast_args.hpp:110
bool m_GzipEnabled
If true input file will be decompressed with gzip if filename ends with ".gz".
Definition: blast_args.hpp:165
CNcbiOstream * GetUnalignedOutputStream() const
Get output stream for unaligned sequences/reads (for magicblast)
Definition: blast_args.hpp:150
unique_ptr< CDecompressIStream > m_DecompressIStream
Definition: blast_args.hpp:156
CRef< CTmpFile > m_QueryTmpInputFile
ASN.1 specification of query sequences when read from a saved search strategy.
Definition: blast_args.hpp:161
void SetGzipEnabled(bool g)
Set automatic decompression of the input file is file name is recognized.
Definition: blast_args.hpp:134
unique_ptr< CCompressOStream > m_CompressOStream
Definition: blast_args.hpp:157
CNcbiOstream * m_OutputStream
Application's output stream.
Definition: blast_args.hpp:155
CNcbiIstream * m_InputStream
Application's input stream.
Definition: blast_args.hpp:154
bool m_SRAaccessionEnabled
If true, option to specify SRA runs will be presented as possible query input.
Definition: blast_args.hpp:169
bool HasUnalignedOutputStream(void) const
Is there a separate output stream for unaligned sequences/reads (for magicblast)
Definition: blast_args.hpp:145
CNcbiOstream * m_UnalignedOutputStream
Output stream to report unaligned sequences/reads.
Definition: blast_args.hpp:172
void SetSRAaccessionEnabled(bool g)
enables sra accession flag
Definition: blast_args.hpp:139
unique_ptr< CCompressOStream > m_UnalignedCompressOStream
Definition: blast_args.hpp:173
CStdCmdLineArgs()
Default constructor.
Definition: blast_args.hpp:113
Argument class to specify the supported tasks a given program.
Definition: blast_args.hpp:198
const set< string > m_SupportedTasks
Set of supported tasks by this command line argument.
Definition: blast_args.hpp:215
string m_DefaultTask
Default task for this command line argument.
Definition: blast_args.hpp:217
Argument class to retrieve and set the window size BLAST algorithm option.
Definition: blast_args.hpp:223
Argument class to retrieve and set the word threshold BLAST algorithm option.
Definition: blast_args.hpp:251
BLAST Command line arguments design The idea is to have several small objects (subclasses of IBlastCm...
Definition: blast_args.hpp:84
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Extracts BLAST algorithmic options from the command line arguments into the CBlastOptions object.
Definition: blast_args.cpp:67
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)=0
Sets the command line descriptions in the CArgDescriptions object relevant to the subclass.
virtual ~IBlastCmdLineArgs()
Our virtual destructor.
Definition: blast_args.hpp:87
Constant declarations for command line arguments for BLAST programs.
const bool kDfltArgParseDeflines
Default argument to specify whether sequences deflines should be parsed.
const bool kDfltArgUseLCaseMasking
Default argument to specify whether lowercase masking should be used.
const string kDfltArgCompBasedStats
Default argument for composition based statistics.
Include a standard set of the NCBI C++ Toolkit most basic headers.
const size_t kDfltArgNumDescriptions
Default number of one-line descriptions to display in the traditional BLAST report.
const size_t kDfltArgNumAlignments
Default number of alignments to display in the traditional BLAST report.
const size_t kDfltLineLength
static FILE * input_file
Definition: common.c:35
#define true
Definition: bool.h:35
#define false
Definition: bool.h:36
static SQLCHAR output[256]
Definition: print.c:5
EOutputFormat
Definition: grid_cli.hpp:276
bool IsProtein() const
Determine whether this database contains protein sequences or not.
EMoleculeType
Molecule of the BLAST database.
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
#define NULL
Definition: ncbistd.hpp:225
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NCBI_BLASTINPUT_EXPORT
Definition: ncbi_export.h:336
ENa_strand
strand of nucleic acid
Definition: Na_strand_.hpp:64
@ eNa_strand_unknown
Definition: Na_strand_.hpp:65
Declares CIgBlast, the C++ API for the IG-BLAST engine.
range(_Ty, _Ty) -> range< _Ty >
EIPRangeType t
Definition: ncbi_localip.c:101
Defines command line argument related classes.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
@ ePairwise
Definition: splign_app.cpp:551
C++ I/O stream wrappers to compress/decompress data on-the-fly.
#define _ASSERT
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)
Definition: thrddgri.c:44
Uniform BLAST Search Interface.
Modified on Fri Sep 20 14:57:01 2024 by modify_doxy.py rev. 669887