NCBI C++ ToolKit
rpsblast_app.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: rpsblast_app.cpp 94281 2021-07-16 13:42:53Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Jason Papadopoulos
27  *
28  */
29 
30 /** @file rpsblast_app.cpp
31  * RPSBLAST command line application
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
42 #include "blast_app_util.hpp"
43 #include "rpsblast_node.hpp"
46 
47 #ifndef SKIP_DOXYGEN_PROCESSING
49 USING_SCOPE(blast);
51 #endif
52 
54 {
55 public:
56  /** @inheritDoc */
59  version->SetVersionInfo(new CBlastVersion());
62  if (m_UsageReport.IsEnabled()) {
64  }
65  }
68  }
69 private:
70  /** @inheritDoc */
71  virtual void Init();
72  /** @inheritDoc */
73  virtual int Run();
74 
75  int x_RunMTBySplitDB();
76  int x_RunMTBySplitQuery();
77 
78  /// This application's command line args
82 };
83 
85 {
86  // formulate command line arguments
87 
89 
90  // read the command line
91 
94 }
95 
97 {
98  const CArgs& args = GetArgs();
99  CMTArgs mt_args(args);
100  if ((mt_args.GetMTMode() == CMTArgs::eSplitByQueries) &&
101  (mt_args.GetNumThreads() > 1)){
103  return x_RunMTBySplitQuery();
104  }
105  else {
106  return x_RunMTBySplitDB();
107  }
108 }
109 
111 {
112  int status = BLAST_EXIT_SUCCESS;
114 
115  try {
116 
117  // Allow the fasta reader to complain on invalid sequence input
119  SetDiagPostPrefix("rpsblast");
120  SetDiagHandler(&bah, false);
121 
122  /*** Get the BLAST options ***/
123  const CArgs& args = GetArgs();
124  CRef<CBlastOptionsHandle> opts_hndl;
126  opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
127  }
128  else {
129  opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
130  }
131 
133  opts_hndl, true);
134  const CBlastOptions& opt = opts_hndl->GetOptions();
135 
136  /*** Initialize the database ***/
138  CRef<CLocalDbAdapter> db_adapter;
139  CRef<CScope> scope;
140  InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
141  db_adapter, scope);
142  _ASSERT(db_adapter && scope);
143 
144  /*** Get the query sequence(s) ***/
145  CRef<CQueryOptionsArgs> query_opts =
147  SDataLoaderConfig dlconfig =
149  db_adapter);
150  CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
151  query_opts->UseLowercaseMasks(),
152  query_opts->GetParseDeflines(),
153  query_opts->GetRange());
155  ERR_POST(Warning << "Query is Empty!");
156  return BLAST_EXIT_SUCCESS;
157  }
160 
161  /*** Get the formatting options ***/
163  bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args);
164  if(!isArchiveFormat) {
165  bah.DoNotSaveMessages();
166  }
167  CBlastFormat formatter(opt, *db_adapter,
168  fmt_args->GetFormattedOutputChoice(),
169  query_opts->GetParseDeflines(),
171  fmt_args->GetNumDescriptions(),
172  fmt_args->GetNumAlignments(),
173  *scope,
174  opt.GetMatrixName(),
175  fmt_args->ShowGis(),
176  fmt_args->DisplayHtmlOutput(),
177  opt.GetQueryGeneticCode(),
178  opt.GetDbGeneticCode(),
179  opt.GetSumStatisticsMode(),
181  db_adapter->GetFilteringAlgorithm(),
182  fmt_args->GetCustomOutputFormatSpec(),
183  false, false, NULL, NULL,
185 
186  formatter.SetQueryRange(query_opts->GetRange());
187  formatter.SetLineLength(fmt_args->GetLineLength());
188  if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
189  formatter.SetBaseFile(args[kArgOutput].AsString());
190  }
191  formatter.PrintProlog();
192 
193  /*** Process the input ***/
194  for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) {
195 
196  CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
197  CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*query_batch));
198 
199  SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
200 
201  CRef<CSearchResultSet> results;
202 
204  {
205  CRef<CRemoteBlast> rmt_blast =
206  InitializeRemoteBlast(queries, db_args, opts_hndl,
209  results = rmt_blast->GetResultSet();
210  }
211  else
212  {
213  CLocalRPSBlast local_search (query_batch, db_args->GetDatabaseName(), opts_hndl, args[kArgNumThreads].AsInteger() );
214  results = local_search.Run();
215  }
216 
217  if (fmt_args->ArchiveFormatRequested(args)) {
218  formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages());
219  bah.ResetMessages();
220  } else {
221  BlastFormatter_PreFetchSequenceData(*results, scope,
222  fmt_args->GetFormattedOutputChoice());
223  ITERATE(CSearchResultSet, result, *results) {
224  formatter.PrintOneResultSet(**result, query_batch);
225  }
226  }
227  }
228 
229  formatter.PrintEpilog(opt);
230 
232  opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
233  }
234 
237  } CATCH_ALL(status)
238  if(!bah.GetMessages().empty()) {
239  const CArgs & a = GetArgs();
241  }
244  return status;
245 }
246 
247 
249 {
250  int status = BLAST_EXIT_SUCCESS;
252  int batch_size = 3600;
253 
254  char * mt_query_batch_env = getenv("BLAST_MT_QUERY_BATCH_SIZE");
255  if (mt_query_batch_env) {
256  batch_size = NStr::StringToInt(mt_query_batch_env);
257  }
258  INFO_POST("Batch Size: " << batch_size);
259  // Allow the fasta reader to complain on invalid sequence input
261  SetDiagPostPrefix("rpsblast");
262  SetDiagHandler(&bah, false);
263 
264  try {
265  const CArgs& args = GetArgs();
266  CRef<CBlastOptionsHandle> opts_hndl;
268  opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
269  }
270  else {
271  opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
272  }
274  ERR_POST(Warning << "Query is Empty!");
275  return BLAST_EXIT_SUCCESS;
276  }
277  CNcbiOstream & out_stream = m_CmdLineArgs->GetOutputStream();
278  const int kMaxNumOfThreads = m_CmdLineArgs->GetNumThreads();
279  CBlastMasterNode master_node(out_stream, kMaxNumOfThreads);
280  int chunk_num = 0;
281 
285  while (master_node.Processing()) {
286  if (!input.AtEOF()) {
287  if (!master_node.IsFull()) {
288  string qb;
289  int q_index = 0;
290  int num_q = input.GetQueryBatch(qb, q_index);
291  if (num_q > 0) {
292  CBlastNodeMailbox * mb(new CBlastNodeMailbox(chunk_num, master_node.GetBuzzer()));
293  CRPSBlastNode * t(new CRPSBlastNode(chunk_num, GetArguments(), args, bah, qb, q_index, num_q, mb));
294  master_node.RegisterNode(t, mb);
295  chunk_num ++;
296  }
297  }
298  }
299  else {
300  master_node.Shutdown();
305  }
306  }
307 
308  if(chunk_num < kMaxNumOfThreads){
309  CheckMTByQueries_QuerySize(opts_hndl->GetOptions().GetProgram(), batch_size);
310  }
311  } CATCH_ALL (status)
312 
313  if(!bah.GetMessages().empty()) {
314  const CArgs & a = GetArgs();
316  }
319  return status;
320 
321 }
322 
323 #ifndef SKIP_DOXYGEN_PROCESSING
324 int main(int argc, const char* argv[] /*, const char* envp[]*/)
325 {
326  int status = CRPSBlastApp().AppMain(argc, argv);
327 
328  return status;
329 }
330 #endif /* SKIP_DOXYGEN_PROCESSING */
Produce formatted blast output for command line applications.
void LogCmdOptions(blast::CBlastUsageReport &report, const CBlastAppArgs &args)
void CheckForFreqRatioFile(const string &rps_dbname, CRef< CBlastOptionsHandle > &opt_handle, bool isRpsblast)
CRef< blast::CRemoteBlast > InitializeRemoteBlast(CRef< blast::IQueryFactory > queries, CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool verbose_output, const string &client_id, CRef< objects::CPssmWithParameters > pssm)
Initializes a CRemoteBlast instance for usage by command line BLAST binaries.
blast::SDataLoaderConfig InitializeQueryDataLoaderConfiguration(bool query_is_protein, CRef< blast::CLocalDbAdapter > db_adapter)
Initialize the data loader configuration for the query.
void SaveSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args, CRef< blast::IQueryFactory > queries, CRef< blast::CBlastOptionsHandle > opts_hndl, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
Save the search strategy corresponding to the current command line search.
void QueryBatchCleanup()
Clean up formatter scope and release.
bool RecoverSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args)
Recover search strategy from input file.
void CheckMTByQueries_QuerySize(EProgram prog, int batch_size)
void PrintErrorArchive(const CArgs &a, const list< CRef< CBlast4_error > > &msg)
Function to print blast archive with only error messages (search failed) to output stream.
void InitializeSubject(CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool is_remote_search, CRef< blast::CLocalDbAdapter > &db_adapter, CRef< objects::CScope > &scope)
Initializes the subject/database as well as its scope.
string GetCmdlineArgs(const CNcbiArguments &a)
void BlastFormatter_PreFetchSequenceData(const blast::CSearchResultSet &results, CRef< CScope > scope, blast::CFormattingArgs::EOutputFormat format_type)
This method optimize the retrieval of sequence data to scope.
void LogBlastOptions(blast::CBlastUsageReport &report, const CBlastOptions &opt)
bool UseXInclude(const CFormattingArgs &f, const string &s)
bool IsIStreamEmpty(CNcbiIstream &in)
void LogQueryInfo(CBlastUsageReport &report, const CBlastInput &q_info)
Utility functions for BLAST command line applications.
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
#define BLAST_EXIT_SUCCESS
Command line binary exit code: success.
Interface for reading SRA sequences into blast input.
CArgs –.
Definition: ncbiargs.hpp:379
CRef< CBlastOptionsHandle > SetOptionsForSavedStrategy(const CArgs &args)
Combine the command line arguments into a CBlastOptions object recovered from saved search strategy.
size_t GetNumThreads() const
Get the number of threads to spawn.
CRef< CBlastOptionsHandle > SetOptions(const CArgs &args)
Extract the command line arguments into a CBlastOptionsHandle object.
CRef< CBlastDatabaseArgs > GetBlastDatabaseArgs() const
Get the BLAST database arguments.
CArgDescriptions * SetCommandLine()
Set the command line arguments.
bool ExecuteRemotely() const
Determine whether the search should be executed remotely or not.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CRef< CQueryOptionsArgs > GetQueryOptionsArgs() const
Get the options for the query sequence(s)
string GetClientId() const
Retrieve the client ID for remote requests.
CRef< CFormattingArgs > GetFormattingArgs() const
Get the formatting options.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
Class to capture message from diag handler.
Definition: blast_aux.hpp:249
string GetDatabaseName() const
Get the BLAST database name.
Definition: blast_args.hpp:931
Class representing a text file containing sequences in fasta format.
This class formats the BLAST results for command line applications.
void LogBlastSearchInfo(blast::CBlastUsageReport &report)
void PrintOneResultSet(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds(), bool is_deltablast_domain_result=false)
Print all alignment information for a single query sequence along with any errors or warnings (errors...
void PrintEpilog(const blast::CBlastOptions &options)
Print the footer of the blast report.
void SetBaseFile(string base)
For use by XML2 only.
void ResetScopeHistory()
Resets the scope history for some output formats.
void SetLineLength(size_t len)
Set Alignment Length.
void WriteArchive(blast::IQueryFactory &queries, blast::CBlastOptionsHandle &options_handle, const blast::CSearchResultSet &results, unsigned int num_iters=0, const list< CRef< objects::CBlast4_error > > &msg=list< CRef< objects::CBlast4_error > >())
Writes out the query and results as an "archive" format.
void PrintProlog()
Print the header of the blast report.
void SetQueryRange(const TSeqRange &query_range)
Set query range.
Class that centralizes the configuration data for sequences to be converted.
Definition: blast_input.hpp:48
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
Int8 GetQueriesLength()
Definition: blast_node.hpp:156
CConditionVariable & GetBuzzer()
Definition: blast_node.hpp:153
void RegisterNode(CBlastNode *node, CBlastNodeMailbox *mailbox)
Definition: blast_node.cpp:136
Encapsulates ALL the BLAST algorithm's options.
void AddParam(EUsageParams p, int val)
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
Definition: version.hpp:53
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
Definition: ddumpable.cpp:56
virtual bool ArchiveFormatRequested(const CArgs &args) const
string GetCustomOutputFormatSpec() const
Retrieve for string that specifies the custom output format for tabular and comma-separated value.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
TSeqPos GetNumAlignments() const
Number of alignments to show in traditional BLAST output.
bool ShowGis() const
Display the NCBI GIs in formatted output?
TSeqPos GetNumDescriptions() const
Number of one-line descriptions to show in traditional BLAST output.
size_t GetLineLength() const
bool DisplayHtmlOutput() const
Display HTML output?
CRef< CSearchResultSet > Run()
Argument class to collect multi-threaded arguments.
size_t GetNumThreads() const
Get the number of threads to spawn.
int GetMTMode() const
@ eSplitByQueries
NCBI C++ Object Manager dependant implementation of IQueryFactory.
objects::ENa_strand GetStrand() const
Get strand to search in query sequence(s)
Definition: blast_args.hpp:800
bool GetParseDeflines() const
Should the defline be parsed?
Definition: blast_args.hpp:804
bool QueryIsProtein() const
Is the query sequence protein?
Definition: blast_args.hpp:807
TSeqRange GetRange() const
Get query sequence range restriction.
Definition: blast_args.hpp:796
bool UseLowercaseMasks() const
Use lowercase masking in FASTA input?
Definition: blast_args.hpp:802
Handles command line arguments for blastp binary.
virtual CNcbiIstream & GetInputStream()
Get the input stream.
virtual int GetQueryBatchSize() const
@inheritDoc
virtual CNcbiOstream & GetOutputStream()
Get the output stream.
virtual void Init()
@inheritDoc
virtual int Run()
@inheritDoc
CRef< CRPSBlastAppArgs > m_CmdLineArgs
This application's command line args.
int x_RunMTBySplitDB()
CBlastUsageReport m_UsageReport
CRPSBlastApp()
@inheritDoc
CStopWatch m_StopWatch
int x_RunMTBySplitQuery()
CRef –.
Definition: ncbiobj.hpp:618
Search Results for All Queries.
CStopWatch –.
Definition: ncbitime.hpp:1938
const string kArgOutput
Output file name.
const string kArgMTMode
Argument to specify mt mode (split by db or split by queries)
const string kArgNumThreads
Argument to determine the number of threads to use when running BLAST.
void Print(const CCompactSAMApplication::AlignInfo &ai)
int GetDbGeneticCode() const
void ResetMessages(void)
Reset messgae buffer, erase all saved message.
Definition: blast_aux.cpp:1174
int GetFilteringAlgorithm()
Retrieve the database filtering algorithm.
EProgram GetProgram() const
Accessors/Mutators for individual options.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
int GetQueryGeneticCode() const
bool GetSumStatisticsMode() const
Sum statistics options.
CRef< CSearchResultSet > GetResultSet()
Submit the search (if necessary) and return the results.
void DoNotSaveMessages(void)
Call to turn off saving diag message, discard all saved message.
Definition: blast_aux.cpp:1189
const char * GetMatrixName() const
list< CRef< objects::CBlast4_error > > & GetMessages(void)
Return list of saved diag messages.
Definition: blast_aux.hpp:262
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
Definition: ncbiapp.cpp:1174
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1312
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:819
CVersionInfo GetVersion(void) const
Get the program version information.
Definition: ncbiapp.cpp:1184
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1195
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
#define NULL
Definition: ncbistd.hpp:225
void SetDiagPostPrefix(const char *prefix)
Specify a string to prefix all subsequent error postings with.
Definition: ncbidiag.cpp:6097
#define INFO_POST(message)
Definition: ncbidiag.hpp:201
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void SetDiagHandler(CDiagHandler *handler, bool can_delete=true)
Set the diagnostic handler using the specified diagnostic handler class.
Definition: ncbidiag.cpp:6288
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiCerr
Definition: ncbistre.hpp:544
static int StringToInt(const CTempString str, TStringToNumFlags flags=0, int base=10)
Convert string to int.
Definition: ncbistr.cpp:630
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2765
#define CVersion
static int input()
Main class to perform a BLAST search on the local machine.
static int version
Definition: mdb_load.c:29
unsigned int a
Definition: ncbi_localip.c:102
EIPRangeType t
Definition: ncbi_localip.c:101
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CRemoteBlast class.
Declares auxiliary classes to manage RPS-BLAST related C-structures.
USING_SCOPE(blast)
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
Main argument class for RPSBLAST application.
Declares the CLocalRPSBlast class.
RPSBLAST node api.
Configuration structure for the CBlastScopeSource.
#define _ASSERT
else result
Definition: token2.c:20
Modified on Wed Apr 24 14:12:20 2024 by modify_doxy.py rev. 669887