NCBI C++ ToolKit
magicblast_args.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: magicblast_args.cpp 88808 2020-01-21 10:48:40Z boratyng $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Greg Boratyn
27  *
28  */
29 
30 /** @file blastmapper_args.cpp
31  * Implementation of the BLASTMAPPER command line arguments
32  */
33 
34 #include <ncbi_pch.hpp>
39 //#include <algo/blast/api/version.hpp>
41 
43 BEGIN_SCOPE(blast)
45 
46 
47 /// StdCmdLineArgs with output stream for unaligned reads
49 {
50 public:
52 
53  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) {
54 
56 
57  arg_desc.SetCurrentGroup("General search options");
58 
59  arg_desc.AddOptionalKey(kArgUnalignedOutput, "output_file",
60  "Report unaligned reads to this file",
62 
66 
67  arg_desc.SetCurrentGroup("");
68  }
69 };
70 
71 
72 /// Special generic search arguments for blastmapper
74 {
75 public:
77 
78  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) {
79 
80  arg_desc.SetCurrentGroup("General search options");
81 
82  arg_desc.AddDefaultKey(kArgWordSize, "int_value", "Minimum number of "
83  "consecutive bases matching exactly",
86 
87  arg_desc.SetConstraint(kArgWordSize,
89 
90 
91  // gap open penalty
92  arg_desc.AddDefaultKey(kArgGapOpen, "open_penalty",
93  "Cost to open a gap",
95 
96  // gap extend penalty
97  arg_desc.AddDefaultKey(kArgGapExtend, "extend_penalty",
98  "Cost to extend a gap",
100 
101  // FIXME: not sure if this one is needed
102  arg_desc.SetCurrentGroup("Restrict search or results");
103  arg_desc.AddDefaultKey(kArgPercentIdentity, "float_value",
104  "Percent identity cutoff for alignments",
107  new CArgAllow_Doubles(0.0, 100.0));
108 
109  // strand-specific flags (mutually exclusive)
110  arg_desc.AddFlag(kArgFwdRev,
111  "Strand specific reads forward/reverse"
112 // ", requires -only_strand_specific for non-SAM "
113 // "output format"
114  );
115  arg_desc.AddFlag(kArgRevFwd,
116  "Strand specific reads reverse/forward"
117 // ", requires -only_strand_specific for non-SAM "
118 // "output format"
119  );
120  // FIXME: save for later feature enhancement
121 // arg_desc.AddFlag(kArgFwdOnly,
122 // "Strand specific reads forward only", true);
123 // arg_desc.AddFlag(kArgRevOnly,
124 // "Strand specific reads reverse only", true);
125 // arg_desc.AddFlag(kArgOnlyStrandSpecific,
126 // "Only show strand specific reads, "
127 // "requires either -fr or -rf",
128 // true);
129 
130  arg_desc.SetDependency(kArgFwdRev,
132  kArgRevFwd);
133  // FIXME: save for later feature enhancement
134 // arg_desc.SetDependency(kArgFwdOnly,
135 // CArgDescriptions::eRequires,
136 // kArgOnlyStrandSpecific);
137 // arg_desc.SetDependency(kArgRevOnly,
138 // CArgDescriptions::eRequires,
139 // kArgOnlyStrandSpecific);
140  }
141 };
142 
143 /// Nucleotide args with no reward score
145 {
146 public:
148 
149  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) {
150  arg_desc.SetCurrentGroup("General search options");
151  // blastn mismatch penalty
152  arg_desc.AddDefaultKey(kArgMismatch, "penalty",
153  "Penalty for a nucleotide mismatch",
155  arg_desc.SetConstraint(kArgMismatch,
157  arg_desc.SetCurrentGroup("");
158  }
159 };
160 
161 /// Longest intron size with non-zero defalut value
163 {
164 public:
165  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) {
166  arg_desc.SetCurrentGroup("General search options");
167  // largest intron length
168  arg_desc.AddDefaultKey(kArgMaxIntronLength, "length",
169  "Maximum allowed intron length",
171  NStr::IntToString(500000));
174  arg_desc.SetCurrentGroup("");
175  }
176 };
177 
178 /// RemoteArgs with no option for remote
180 {
181 public:
182  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) {}
183 };
184 
185 
186 /// Program description without BLAST+ version
188 {
189 public:
190  CMapperProgramDescriptionArgs(const string& program_name,
191  const string& program_desc)
192  : CProgramDescriptionArgs(program_name, program_desc) {}
193 
194  virtual void SetArgumentDescriptions(CArgDescriptions& arg_desc) {
196  }
197 };
198 
199 
200 /// MT args that allow multiple threads with a FASTA subject
201 class CMapperMTArgs : public CMTArgs
202 {
203 public:
204  virtual void ExtractAlgorithmOptions(const CArgs& args, CBlastOptions& /* opts */) {
205  const int kMaxValue = static_cast<int>(GetCpuCount());
206 
207  if (args.Exist(kArgNumThreads) &&
208  args[kArgNumThreads].HasValue()) {
209 
210  // use the minimum of the two: user requested number of threads and
211  // number of available CPUs for number of threads
212  int num_threads = args[kArgNumThreads].AsInteger();
213  if (num_threads > kMaxValue) {
214  m_NumThreads = kMaxValue;
215 
216  ERR_POST(Warning << (string)"Number of threads was reduced to "
217  + NStr::IntToString((unsigned int)m_NumThreads) +
218  " to match the number of available CPUs");
219  }
220  else {
221  m_NumThreads = num_threads;
222  }
223  }
224  }
225 };
226 
228 {
229  // remove search strategy args added in parent class constructor
230  m_Args.clear();
231 
233  static const char kProgram[] = "magicblast";
234  arg.Reset(new CMapperProgramDescriptionArgs(kProgram, "Short read mapper"));
235  m_Args.push_back(arg);
236 
237  m_BlastDbArgs.Reset(new CBlastDatabaseArgs(false, false, false, true));
239  arg.Reset(m_BlastDbArgs);
240  m_Args.push_back(arg);
241 
244  arg.Reset(m_StdCmdLineArgs);
245  m_Args.push_back(arg);
246 
248  m_Args.push_back(arg);
249 
250  arg.Reset(new CMapperNuclArgs);
251  m_Args.push_back(arg);
252 
254  arg.Reset(m_QueryOptsArgs);
255  m_Args.push_back(arg);
256 
257  arg.Reset(new CMapperFormattingArgs);
259  dynamic_cast<CFormattingArgs*>(arg.GetNonNullPointer())
260  );
261  m_Args.push_back(arg);
262 
264  arg.Reset(m_MTArgs);
265  m_Args.push_back(arg);
266 
268  arg.Reset(m_RemoteArgs);
269  m_Args.push_back(arg);
270 
272  arg.Reset(m_DebugArgs);
273  m_Args.push_back(arg);
274 
276  m_Args.push_back(arg);
277 
278  arg.Reset(new CMappingArgs);
279  m_Args.push_back(arg);
280 }
281 
285  const CArgs& args)
286 {
288 }
289 
290 int
292 {
293  bool is_remote = (m_RemoteArgs.NotEmpty() && m_RemoteArgs->ExecuteRemotely());
294  return blast::GetQueryBatchSize(eMapper, false, is_remote, true);
295 }
296 
297 END_SCOPE(blast)
299 
Declares the BLAST exception class.
Interface for reading SRA sequences into blast input.
Auxiliary classes/functions for BLAST input library.
int GetQueryBatchSize(EProgram program, bool is_ungapped=false, bool remote=false, bool use_default=true, string task="", bool mt_mode=false)
Retrieve the appropriate batch size for the specified task.
#define BLAST_WORDSIZE_MAPPER
default word size for mapping rna-seq to a genome
Definition: blast_options.h:73
@ eMapper
Jumper alignment for mapping.
Definition: blast_types.hpp:73
Class to constrain the values of an argument to those greater than or equal to the value specified in...
Class to constrain the values of an argument to those less than or equal to the value specified in th...
CArgAllow_Doubles –.
Definition: ncbiargs.hpp:1781
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
CRef< CRemoteArgs > m_RemoteArgs
remote vs. local execution options
CRef< CQueryOptionsArgs > m_QueryOptsArgs
query options object
CRef< CBlastDatabaseArgs > m_BlastDbArgs
database/subject object
CRef< CDebugArgs > m_DebugArgs
Debugging arguments.
CRef< CMTArgs > m_MTArgs
multi-threaded options
CRef< CFormattingArgs > m_FormattingArgs
formatting options
TBlastCmdLineArgs m_Args
Set of command line argument objects.
CRef< CStdCmdLineArgs > m_StdCmdLineArgs
standard command line arguments class
Argument class to collect database/subject arguments.
Definition: blast_args.hpp:889
void SetDatabaseMaskingSupport(bool val)
Turns on/off database masking support.
Definition: blast_args.hpp:921
Encapsulates ALL the BLAST algorithm's options.
EAPILocality
Enumerates the possible contexts in which objects of this type can be used.
Argument class to collect debugging options.
Argument class to collect formatting options, use this to create a CBlastFormat object.
Argument class for general search BLAST algorithm options: evalue, gap penalties, query filter string...
Definition: blast_args.hpp:292
Argument class for collecting the largest intron size.
Definition: blast_args.hpp:441
Argument class to collect multi-threaded arguments.
size_t m_NumThreads
Number of threads to spawn.
CMagicBlastAppArgs()
Constructor.
virtual CRef< CBlastOptionsHandle > x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs &args)
@inheritDoc
virtual int GetQueryBatchSize() const
@inheritDoc
Handle to the nucleotide mapping options to the BLAST algorithm.
Formatting args for magicblast advertising only SAM and fast tabular formats.
Special generic search arguments for blastmapper.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Longest intron size with non-zero defalut value.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
MT args that allow multiple threads with a FASTA subject.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &)
Interface method,.
Nucleotide args with no reward score.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Program description without BLAST+ version.
CMapperProgramDescriptionArgs(const string &program_name, const string &program_desc)
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Argument class to collect query options for BLAST Mapper.
Definition: blast_args.hpp:826
RemoteArgs with no option for remote.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
StdCmdLineArgs with output stream for unaligned reads.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
Defines values for match and mismatch in nucleotide comparisons as well as non-greedy extension.
Definition: blast_args.hpp:368
Argument class to populate an application's name and description.
Definition: blast_args.hpp:178
string m_ProgDesc
Application's description.
Definition: blast_args.hpp:193
string m_ProgName
Application's name.
Definition: blast_args.hpp:192
CRef –.
Definition: ncbiobj.hpp:618
Argument class to collect remote vs. local execution.
bool ExecuteRemotely() const
Return whether the search should be executed remotely or not.
Argument class to retrieve input and output streams for a command line program.
Definition: blast_args.hpp:110
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
void SetGzipEnabled(bool g)
Set automatic decompression of the input file is file name is recognized.
Definition: blast_args.hpp:134
const string kArgRevFwd
Argument to specify reverse/forward strand specificity.
const string kArgMaxIntronLength
Argument to specify the maximum length of an intron when linking multiple distinct alignments (applic...
const string kArgPercentIdentity
Argument to specify the target percent identity.
const string kArgGapExtend
Argument to select the gap extending penalty.
const string kArgUnalignedOutput
Argument to output unaligned reads in a separate file.
const string kArgGapOpen
Argument to select the gap opening penalty.
const string kArgMismatch
Argument to select the nucleotide mismatch penalty.
const string kArgFwdRev
Argument to specify forward/reverse strand specificity.
const string kArgNoUnaligned
Argument to trun off printing of unaligned reads.
const string kArgWordSize
Argument to select the wordfinder's word size.
const string kArgNumThreads
Argument to determine the number of threads to use when running BLAST.
#define false
Definition: bool.h:36
void AddFlag(const string &name, const string &comment, CBoolEnum< EFlagValue > set_value=eFlagHasValueIfSet, TFlags flags=0)
Add description for flag argument.
Definition: ncbiargs.cpp:2459
void SetConstraint(const string &name, const CArgAllow *constraint, EConstraintNegate negate=eConstraint)
Set additional user defined constraint on argument value.
Definition: ncbiargs.cpp:2591
void SetDependency(const string &arg1, EDependency dep, const string &arg2)
Define a dependency.
Definition: ncbiargs.cpp:2618
bool Exist(const string &name) const
Check existence of argument description.
Definition: ncbiargs.cpp:1813
void SetUsageContext(const string &usage_name, const string &usage_description, bool usage_sort_args=false, SIZE_TYPE usage_width=78)
Set extra info to be used by PrintUsage().
Definition: ncbiargs.cpp:3304
void AddOptionalKey(const string &name, const string &synopsis, const string &comment, EType type, TFlags flags=0)
Add description for optional key without default value.
Definition: ncbiargs.cpp:2427
void SetCurrentGroup(const string &group)
Set current arguments group name.
Definition: ncbiargs.cpp:2632
void AddDefaultKey(const string &name, const string &synopsis, const string &comment, EType type, const string &default_value, TFlags flags=0, const string &env_var=kEmptyStr, const char *display_value=nullptr)
Add description for optional key with default value.
Definition: ncbiargs.cpp:2442
@ eExcludes
One argument excludes another.
Definition: ncbiargs.hpp:957
@ eDouble
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:594
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:968
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
USING_SCOPE(objects)
unsigned int GetCpuCount(void)
Return number of active CPUs (never less than 1).
Modified on Wed May 08 12:05:34 2024 by modify_doxy.py rev. 669887