35 #ifndef ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP
36 #define ALGO_BLAST_BLASTINPUT___BLAST_ARGS__HPP
102 virtual void ExtractAlgorithmOptions(
const CArgs& cmd_line_args,
114 m_GzipEnabled(
false),
115 m_SRAaccessionEnabled(
false),
116 m_UnalignedOutputStream(0) {};
151 {
return m_UnalignedOutputStream;}
187 const string& program_description);
207 const string& default_task);
304 bool show_perc_identity =
false,
bool is_tblastx =
false,
305 bool is_igblast =
false,
bool suppress_sum_stats =
false)
306 : m_QueryIsProtein(query_is_protein), m_IsRpsBlast(is_rpsblast),
307 m_ShowPercentIdentity(show_perc_identity), m_IsTblastx(is_tblastx),
308 m_IsIgBlast(is_igblast), m_SuppressSumStats(suppress_sum_stats),
342 bool filter_by_default =
true)
343 : m_QueryIsProtein(query_is_protein),
344 m_FilterByDefault(filter_by_default) {}
361 void x_TokenizeFilteringArgs(
const string& filtering_args,
362 vector<string>&
output)
const;
399 const string& default_option
401 const string& zero_option_descr =
"")
402 : m_Is2and3Supported(is_2and3supported),
403 m_DefaultOpt(default_option),
404 m_ZeroOptDescr(zero_option_descr) {}
492 : m_QueryIsProtein(query_is_protein) {}
555 bool is_deltablast =
false)
556 : m_DbTarget(db_target), m_NumIterations(1),
557 m_CheckPointOutput(0), m_AsciiMatrixOutput(0),
558 m_IsDeltaBlast(is_deltablast),
559 m_SaveLastPssm(
false)
573 return m_NumIterations;
578 m_NumIterations = num_iters;
582 return m_CheckPointOutput !=
NULL;
587 return m_CheckPointOutput ? m_CheckPointOutput->GetStream() :
NULL;
591 return m_AsciiMatrixOutput !=
NULL;
596 return m_AsciiMatrixOutput ? m_AsciiMatrixOutput->GetStream() :
NULL;
611 return m_SaveLastPssm;
646 bool save_ascii_pssm,
unsigned int msa_master_idx,
647 bool ignore_pssm_tmpl_seq);
667 CKBlastpArgs(
void) : m_JDistance(0.10), m_MinHits(0), m_CandidateSeqs(1000) {}
779 m_QueryCannotBeNucl(query_cannot_be_nucl)
836 m_InputFormat(eFasta),
837 m_MateInputStream(
NULL),
838 m_EnableSraCache(
false)
851 {
return m_InputFormat;}
861 {
return m_SraAccessions;}
886 static const int kSubjectsDataLoaderPriority = 10;
893 static bool HasBeenSet(
const CArgs& args);
903 bool is_rpsblast =
false,
904 bool is_igblast =
false,
905 bool is_mapper =
false,
906 bool is_kblast =
false);
915 m_SupportsDatabaseMasking =
val;
925 return m_SearchDb.Empty() ?
kEmptyStr : m_SearchDb->GetDatabaseName();
933 m_SearchDb = search_db;
941 m_Subjects = subjects;
943 m_IsProtein = is_protein;
952 if (m_Subjects && scope) {
956 scope->AddScope(*
m_Scope, kSubjectsDataLoaderPriority);
962 m_SupportIPGFiltering =
val;
1028 eQueryAnchoredIdentities,
1030 eQueryAnchoredNoIdentities,
1032 eFlatQueryAnchoredIdentities,
1081 eIsVDB_SAM = eIsVDB | eIsSAM,
1083 eIsAirrRearrangement = 0x04
1088 m_NumDescriptions(0), m_NumAlignments(0),
1089 m_DfltNumDescriptions(0), m_DfltNumAlignments(0),
1091 m_IsIgBlast(isIgblast),
1093 m_FormatFlags(flag),
1094 m_HitsSortOption(-1),
1095 m_HspsSortOption(-1)
1098 m_DfltNumAlignments = m_DfltNumDescriptions = 10;
1120 ParseFormattingString(
const CArgs& args,
1122 string& custom_fmt_spec,
1123 string& custom_delim)
const;
1127 return m_OutputFormat;
1134 return m_OutputFormat == eXml ||
1135 m_OutputFormat == eAsnText ||
1136 m_OutputFormat == eAsnBinary ||
1137 m_OutputFormat == eXml2 ||
1138 m_OutputFormat == eJson ||
1139 m_OutputFormat == eXml2_S ||
1140 m_OutputFormat == eJson_S ||
1141 m_OutputFormat == eJsonSeqalign ||
1142 m_OutputFormat == eSAM;
1151 return m_NumDescriptions;
1155 return m_NumAlignments;
1165 return m_CustomOutputFormatSpec;
1168 virtual bool ArchiveFormatRequested(
const CArgs& args)
const;
1171 return m_LineLength;
1174 return m_HitsSortOption;
1177 return m_HspsSortOption;
1208 m_TrimReadIds(
true),
1209 m_PrintUnaligned(
true),
1210 m_NoDiscordant(
false),
1215 m_OnlyStrandSpecific(
false),
1216 m_PrintMdTag(
false),
1217 m_UnalignedOutputFormat(eSAM)
1258 {
return m_UnalignedOutputFormat;}
1289 m_NumThreads(default_num_threads), m_MTMode(mt_mode)
1291 #ifdef NCBI_NO_THREADS
1294 m_MTMode = eNotSupported;
1310 void x_ExtractAlgorithmOptions(
const CArgs& args);
1386 static bool HasBeenSet(
const CArgs& args);
1412 void SetTask(
const string& task);
1426 m_OptsHandle = opts_hndl;
1431 return m_BlastDbArgs;
1435 m_BlastDbArgs = args;
1440 return m_QueryOptsArgs;
1445 return m_FormattingArgs;
1450 return m_MTArgs->GetNumThreads();
1454 return m_MTArgs->GetMTMode();
1467 m_StdCmdLineArgs->SetInputStream(
input_file);
1472 return m_SearchStrategyArgs->GetImportStream(args);
1476 return m_SearchStrategyArgs->GetExportStream(args);
1481 return m_RemoteArgs->ExecuteRemotely();
1487 return m_DebugArgs->ProduceDebugRemoteOutput();
1493 return m_DebugArgs->ProduceDebugOutput();
1501 _ASSERT( !m_ClientId.empty() );
1543 const CArgs& args) = 0;
1551 const string& task);
1555 void x_IssueWarningsForIgnoredOptions(
const CArgs& args);
static CRef< CScope > m_Scope
User-defined methods of the data storage class.
CArgDescriptions * SetUpCommandLineArguments(TBlastCmdLineArgs &args)
Create a CArgDescriptions object and invoke SetArgumentDescriptions for each of the TBlastCmdLineArgs...
vector< CRef< IBlastCmdLineArgs > > TBlastCmdLineArgs
Type definition of a container of IBlastCmdLineArgs.
Declares class to encapsulate all BLAST options.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Base command line argument class for a generic BLAST command line binary.
CRef< CRemoteArgs > m_RemoteArgs
remote vs. local execution options
string GetTask() const
Get the task for this object.
void SetOptionsHandle(CRef< CBlastOptionsHandle > opts_hndl)
Setter for the BLAST options handle, this is used if the options are recovered from a saved BLAST sea...
CRef< CBlastOptionsHandle > m_OptsHandle
The BLAST options handle, only non-NULL if assigned via SetOptionsHandle, i.e.
CRef< CQueryOptionsArgs > m_QueryOptsArgs
query options object
size_t GetNumThreads() const
Get the number of threads to spawn.
virtual int GetQueryBatchSize() const =0
Get the query batch size.
CRef< CBlastDatabaseArgs > m_BlastDbArgs
database/subject object
virtual CRef< CBlastOptionsHandle > x_CreateOptionsHandle(CBlastOptions::EAPILocality locality, const CArgs &args)=0
Create the options handle based on the command line arguments.
CRef< CSearchStrategyArgs > m_SearchStrategyArgs
arguments for dealing with search strategies
string m_Task
Task specified in the command line.
CRef< CDebugArgs > m_DebugArgs
Debugging arguments.
CRef< CBlastDatabaseArgs > GetBlastDatabaseArgs() const
Get the BLAST database arguments.
CNcbiIstream * GetImportSearchStrategyStream(const CArgs &args)
Get the input stream for the search strategy.
virtual ~CBlastAppArgs()
Our virtual destructor.
void SetInputStream(CRef< CTmpFile > input_file)
Set the input stream to a temporary input file (needed when importing a search strategy)
CRef< CMTArgs > m_MTArgs
multi-threaded options
CRef< CFormattingArgs > m_FormattingArgs
formatting options
bool ExecuteRemotely() const
Determine whether the search should be executed remotely or not.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
bool m_IsUngapped
Is this application being run ungapped.
CRef< CQueryOptionsArgs > GetQueryOptionsArgs() const
Get the options for the query sequence(s)
string GetClientId() const
Retrieve the client ID for remote requests.
TBlastCmdLineArgs m_Args
Set of command line argument objects.
void SetBlastDatabaseArgs(CRef< CBlastDatabaseArgs > args)
Set the BLAST database arguments.
CRef< CFormattingArgs > GetFormattingArgs() const
Get the formatting options.
CNcbiOstream * GetExportSearchStrategyStream(const CArgs &args)
Get the output stream for the search strategy.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CRef< CStdCmdLineArgs > m_StdCmdLineArgs
standard command line arguments class
CRef< CHspFilteringArgs > m_HspFilteringArgs
HSP filtering arguments.
string m_ClientId
Client ID used for remote BLAST submissions, must be populated by subclasses.
Argument class to collect database/subject arguments.
CRef< objects::CScope > m_Scope
CScope object in which all subject sequences read are kept.
bool m_IsMapper
true for short read mapper
CRef< CSearchDatabase > GetSearchDatabase() const
Retrieve the search database information.
bool IsProtein() const
Is the database/subject protein?
bool m_SupportsDatabaseMasking
true if it's supported
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
void SetIPGFilteringSupport(bool val)
bool m_IsProtein
Is the database/subject(s) protein?
bool m_RequestMoleculeType
Determines whether the database's molecule type should be requested in the command line,...
bool m_IsIgBlast
true if the search is Ig-BLAST
CRef< IQueryFactory > m_Subjects
The subject sequences.
void SetSubjects(CRef< IQueryFactory > subjects, CRef< CScope > scope, bool is_protein)
Sets the subject sequences.
bool m_IsRpsBlast
true if the search is RPS-BLAST
CRef< IQueryFactory > GetSubjects(objects::CScope *scope=NULL)
Retrieve subject sequences, if provided.
CRef< CSearchDatabase > m_SearchDb
Description of the BLAST database.
CSearchDatabase::EMoleculeType EMoleculeType
alias for the database molecule type
bool m_SupportIPGFiltering
true if IPG filtering is supported
void SetSearchDatabase(CRef< CSearchDatabase > search_db)
Set the search database information.
void SetDatabaseMaskingSupport(bool val)
Turns on/off database masking support.
bool m_IsKBlast
true for Kblastp
string GetDatabaseName() const
Get the BLAST database name.
Encapsulates ALL the BLAST algorithm's options.
EAPILocality
Enumerates the possible contexts in which objects of this type can be used.
Argument class for collecting composition based statistics options.
bool m_Is2and3Supported
Are options 2 and 3 supported.
CCompositionBasedStatsArgs(bool is_2and3supported=true, const string &default_option=kDfltArgCompBasedStats, const string &zero_option_descr="")
Constructor.
string m_ZeroOptDescr
Non standard description for option zero.
string m_DefaultOpt
Default option.
Argument class to collect debugging options.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CDebugArgs()
Default constructor.
bool m_DebugOutput
Should debugging (verbose) output be printed.
bool m_RmtDebugOutput
Should debugging (verbose) output be printed for remote BLAST.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced (only available when compiled with _DEBUG)
Argument class to collect options specific to DELTA-BLAST.
CDeltaBlastArgs & operator=(const CDeltaBlastArgs &rhs)
Prohibit assignment operator.
CDeltaBlastArgs(void)
Constructor.
CRef< CSearchDatabase > m_DomainDb
Conserved Domain Database.
virtual ~CDeltaBlastArgs()
Our virtual destructor.
CRef< CSearchDatabase > GetDomainDatabase(void)
Get domain database.
CDeltaBlastArgs(const CDeltaBlastArgs &rhs)
Prohibit copy constructor.
bool m_ShowDomainHits
Is printing CDD hits requested.
bool GetShowDomainHits(void) const
Get show domain hits option value.
Argument class to retrieve discontiguous megablast arguments.
Argument class for collecting filtering options.
CFilteringArgs(bool query_is_protein=true, bool filter_by_default=true)
Constructor.
bool m_QueryIsProtein
true if the query is protein
bool m_FilterByDefault
Should filtering be applied by default?
Argument class to collect the frame shift penalty for out-of-frame searches.
Argument class to retrieve the gap trigger option.
bool m_QueryIsProtein
true if the query is protein
CGapTriggerArgs(bool query_is_protein)
Constructor.
Argument class for collecting gapped options.
Argument class for general search BLAST algorithm options: evalue, gap penalties, query filter string...
bool m_QueryIsProtein
true if the query is protein
bool m_IsRpsBlast
true if the search is RPS-BLAST
bool m_IsIgBlast
true if the search is igblast
bool m_IsTblastx
true if the search is tblastx
bool m_ShowPercentIdentity
true if the percent identity option should be shown
CGenericSearchArgs(bool query_is_protein=true, bool is_rpsblast=false, bool show_perc_identity=false, bool is_tblastx=false, bool is_igblast=false, bool suppress_sum_stats=false)
Constructor.
bool m_SuppressSumStats
true if search is blastn or blastp
Argument class to collect the genetic code for all queries/subjects.
CGeneticCodeArgs(ETarget t)
Constructor.
ETarget m_Target
Genetic code target.
ETarget
Enumeration defining which sequences the genetic code applies to.
@ eQuery
Query genetic code.
Argument class to retrieve options for filtering HSPs (e.g.
Argument class to collect options specific to igBLAST.
CRef< CIgBlastOptions > m_IgOptions
Igblast options to fill.
bool m_IsProtein
Is this a protein search?
void AddIgSequenceScope(CRef< objects::CScope > scope)
CRef< CIgBlastOptions > GetIgBlastOptions()
CIgBlastArgs(bool is_protein)
CRef< objects::CScope > m_Scope
scope to get sequences
Argument class to collect options specific to KBLASTP.
virtual ~CKBlastpArgs()
Our virtual destructor.
double m_JDistance
Jaccard distance.
int m_CandidateSeqs
Number of candidate sequences to try BLAST on.
CKBlastpArgs(const CKBlastpArgs &rhs)
Prohibit copy constructor.
CKBlastpArgs & operator=(const CKBlastpArgs &rhs)
Prohibit assignment operator.
CKBlastpArgs(void)
Constructor.
int GetMinHits(void)
Get the minimum number of LSH matches.
int m_MinHits
Minimum number of hits in LSH phase.
string m_DbIndex
Database/index.
int GetCandidateSeqs(void)
Number of candidate sequences to attempt with BLASTP.
string GetDatabase(void)
The database.
double GetJaccardDistance(void)
Get the Jaccard distance.
Argument class for collecting the largest intron size.
Argument class to collect multi-threaded arguments.
size_t GetNumThreads() const
Get the number of threads to spawn.
size_t m_NumThreads
Number of threads to spawn.
CMTArgs(size_t default_num_threads=CThreadable::kMinNumThreads, EMTMode mt_mode=eNotSupported)
Default Constructor.
Argument class to collect query options for BLAST Mapper.
bool IsSraCacheEnabled(void) const
Is SRA caching in local files enabled (see File Caching at https://github.com/ncbi/sra-tools/wiki/Too...
const vector< string > & GetSraAccessions(void) const
Get a list of SRA accessions.
bool HasMateInputStream(void) const
Does the mate input stream exits.
CMapperQueryOptionsArgs(void)
EInputFormat GetInputFormat(void) const
Are queries provided in Fastc format.
CNcbiIstream * m_MateInputStream
EInputFormat m_InputFormat
bool IsPaired(void) const
Are query sequences paired.
EInputFormat
Input formats.
vector< string > m_SraAccessions
unique_ptr< CDecompressIStream > m_DecompressIStream
CNcbiIstream * GetMateInputStream(void) const
Get input stream for query mates.
Argument class to retrieve and set the scoring matrix name BLAST algorithm option.
Argument class to retrieve megablast database indexing options.
Defines values for match and mismatch in nucleotide comparisons as well as non-greedy extension.
Argument class to retrieve and set the off-diagonal range used in 2-hit algorithm.
Argument class to collect options specific to PHI-BLAST.
Argument class to populate an application's name and description.
string m_ProgDesc
Application's description.
string m_ProgName
Application's name.
Argument class to collect options specific to PSI-BLAST.
CPsiBlastArgs(ETargetDatabase db_target=eProteinDb, bool is_deltablast=false)
Constructor.
CPsiBlastArgs & operator=(const CPsiBlastArgs &rhs)
Prohibit assignment operator.
size_t GetNumberOfIterations() const
Retrieve the number of iterations to perform.
virtual ~CPsiBlastArgs()
Our virtual destructor.
bool RequiresAsciiPssmOutput() const
Returns true if ASCII PSSM is required to be printed.
bool m_SaveLastPssm
Save PSSM after the last database search.
CNcbiOstream * GetCheckPointOutputStream()
Get the checkpoint file output stream.
CRef< CAutoOutputFileReset > m_AsciiMatrixOutput
ASCII matrix output file.
bool m_IsDeltaBlast
Are the aruments set up for Delta Blast.
ETargetDatabase
Enumeration to determine the molecule type of the database.
@ eProteinDb
Traditional, iterated PSI-BLAST.
void SetNumberOfIterations(unsigned int num_iters)
Retrieve the number of iterations to perform.
CNcbiOstream * GetAsciiMatrixOutputStream()
Get the ASCII matrix output stream.
bool RequiresCheckPointOutput() const
Returns true if checkpoint PSSM is required to be printed.
CRef< CAutoOutputFileReset > m_CheckPointOutput
checkpoint output file
CPsiBlastArgs(const CPsiBlastArgs &rhs)
Prohibit copy constructor.
bool GetSaveLastPssm(void) const
Should the PSSM after the last database search be saved.
ETargetDatabase m_DbTarget
Molecule of the database.
void SetSaveLastPssm(bool b)
Set the on/off switch for saving PSSM after the last database search.
CRef< objects::CPssmWithParameters > m_Pssm
PSSM.
size_t m_NumIterations
number of iterations to perform
CRef< objects::CPssmWithParameters > GetInputPssm() const
Get the PSSM read from checkpoint file.
void SetInputPssm(CRef< objects::CPssmWithParameters > pssm)
Set the PSSM read from saved search strategy.
Argument class to collect PSSM engine options.
bool m_IsDeltaBlast
Are these arumnets for Delta Blast.
CPssmEngineArgs(bool is_deltablast=false)
Constructor.
Argument class to collect query options.
bool m_UseLCaseMask
use lowercase masking in FASTA input
objects::ENa_strand GetStrand() const
Get strand to search in query sequence(s)
void SetRange(const TSeqRange &range)
Set query sequence range restriction.
bool GetParseDeflines() const
Should the defline be parsed?
bool QueryIsProtein() const
Is the query sequence protein?
CQueryOptionsArgs(bool query_cannot_be_nucl=false)
Constructor.
objects::ENa_strand m_Strand
Strand(s) to search.
TSeqRange m_Range
range to restrict the query sequence(s)
bool m_ParseDeflines
Should the deflines be parsed?
bool m_QueryCannotBeNucl
only false for blast[xn], and tblastx true in case of PSI-BLAST
TSeqRange GetRange() const
Get query sequence range restriction.
bool UseLowercaseMasks() const
Use lowercase masking in FASTA input?
RMH: Argument class to retrieve and set the options specific to the RMBlastN algorithm.
Argument class to collect remote vs. local execution.
bool m_IsRemote
Should the search be executed remotely?
CRemoteArgs()
Default constructor.
bool ExecuteRemotely() const
Return whether the search should be executed remotely or not.
Argument class to import/export the search strategy.
Argument class to retrieve input and output streams for a command line program.
bool m_GzipEnabled
If true input file will be decompressed with gzip if filename ends with ".gz".
CNcbiOstream * GetUnalignedOutputStream() const
Get output stream for unaligned sequences/reads (for magicblast)
unique_ptr< CDecompressIStream > m_DecompressIStream
CRef< CTmpFile > m_QueryTmpInputFile
ASN.1 specification of query sequences when read from a saved search strategy.
void SetGzipEnabled(bool g)
Set automatic decompression of the input file is file name is recognized.
unique_ptr< CCompressOStream > m_CompressOStream
CNcbiOstream * m_OutputStream
Application's output stream.
CNcbiIstream * m_InputStream
Application's input stream.
bool m_SRAaccessionEnabled
If true, option to specify SRA runs will be presented as possible query input.
bool HasUnalignedOutputStream(void) const
Is there a separate output stream for unaligned sequences/reads (for magicblast)
CNcbiOstream * m_UnalignedOutputStream
Output stream to report unaligned sequences/reads.
void SetSRAaccessionEnabled(bool g)
enables sra accession flag
unique_ptr< CCompressOStream > m_UnalignedCompressOStream
CStdCmdLineArgs()
Default constructor.
Argument class to specify the supported tasks a given program.
const set< string > m_SupportedTasks
Set of supported tasks by this command line argument.
string m_DefaultTask
Default task for this command line argument.
Argument class to retrieve and set the window size BLAST algorithm option.
Argument class to retrieve and set the word threshold BLAST algorithm option.
BLAST Command line arguments design The idea is to have several small objects (subclasses of IBlastCm...
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Extracts BLAST algorithmic options from the command line arguments into the CBlastOptions object.
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)=0
Sets the command line descriptions in the CArgDescriptions object relevant to the subclass.
virtual ~IBlastCmdLineArgs()
Our virtual destructor.
Constant declarations for command line arguments for BLAST programs.
const bool kDfltArgParseDeflines
Default argument to specify whether sequences deflines should be parsed.
const bool kDfltArgUseLCaseMasking
Default argument to specify whether lowercase masking should be used.
const string kDfltArgCompBasedStats
Default argument for composition based statistics.
Include a standard set of the NCBI C++ Toolkit most basic headers.
static SQLCHAR output[256]
bool IsProtein() const
Determine whether this database contains protein sequences or not.
EMoleculeType
Molecule of the BLAST database.
unsigned int TSeqPos
Type for sequence locations and lengths.
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
#define END_SCOPE(ns)
End the previously defined scope.
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
#define BEGIN_SCOPE(ns)
Define a new scope.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
#define NCBI_BLASTINPUT_EXPORT
ENa_strand
strand of nucleic acid
Declares CIgBlast, the C++ API for the IG-BLAST engine.
range(_Ty, _Ty) -> range< _Ty >
Defines command line argument related classes.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
C++ I/O stream wrappers to compress/decompress data on-the-fly.
int g(Seg_Gsm *spe, Seq_Mtf *psm, Thd_Gsm *tdg)