NCBI C++ ToolKit
igblastp_app.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: igblastp_app.cpp 93299 2021-03-29 18:29:47Z jianye $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Ning Ma
27  *
28  */
29 
30 /** @file igblastp_app.cpp
31  * IGBLASTP command line application
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
42 #include "../blast/blast_app_util.hpp"
43 
44 #ifndef SKIP_DOXYGEN_PROCESSING
46 USING_SCOPE(blast);
48 #endif
49 
51 {
52 public:
53  /** @inheritDoc */
56  version->SetVersionInfo(new CIgBlastVersion());
58  }
59 private:
60  /** @inheritDoc */
61  virtual void Init();
62  /** @inheritDoc */
63  virtual int Run();
64 
65  /// This application's command line args
68 };
69 
71 {
72 
73  if (m_UsageReport.IsEnabled()) {
76  }
77  // formulate command line arguments
78 
80 
81  // read the command line
82 
85 }
86 
88 {
89  int status = BLAST_EXIT_SUCCESS;
90  string organism = NcbiEmptyString;
91  try {
92 
93  // Allow the fasta reader to complain on invalid sequence input
95 
96  /*** Get the BLAST options ***/
97  const CArgs& args = GetArgs();
98  CRef<CBlastOptionsHandle> opts_hndl;
99 
100  opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
101 
102  const CBlastOptions& opt = opts_hndl->GetOptions();
103 
104  /*** Get the query sequence(s) ***/
105  CRef<CQueryOptionsArgs> query_opts =
107  SDataLoaderConfig dlconfig(query_opts->QueryIsProtein());
109  CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
110  query_opts->UseLowercaseMasks(),
111  query_opts->GetParseDeflines(),
112  query_opts->GetRange());
113  iconfig.SetQueryLocalIdMode();
116 
117  /*** Initialize igblast database/subject and options ***/
119  CRef<CIgBlastOptions> ig_opts(ig_args->GetIgBlastOptions());
120 
121  if (m_UsageReport.IsEnabled()) {
122  //logging
123  organism = ig_opts->m_Origin;
124  }
125  /*** Initialize the database/subject ***/
126  bool db_is_remote = true;
127  CRef<CScope> scope;
128  CRef<CLocalDbAdapter> blastdb;
129  CRef<CLocalDbAdapter> blastdb_full;
131  if (db_args->GetDatabaseName() == kEmptyStr &&
132  db_args->GetSubjects().Empty()) {
133  blastdb.Reset(&(*(ig_opts->m_Db[0])));
135  db_is_remote = false;
136  blastdb_full.Reset(&(*blastdb));
137  } else {
138  InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
139  blastdb, scope);
141  blastdb_full.Reset(&(*blastdb));
142  } else {
143  CSearchDatabase sdb(ig_opts->m_Db[0]->GetDatabaseName() + " " +
144  blastdb->GetDatabaseName(),
146  blastdb_full.Reset(new CLocalDbAdapter(sdb));
147  }
148  }
149  _ASSERT(blastdb && scope);
150 
151  // TODO: whose priority is higher?
152  ig_args->AddIgSequenceScope(scope);
153 
154  /*** Get the formatting options ***/
156  Int4 num_alignments = (db_args->GetDatabaseName() == kEmptyStr) ?
157  0 : fmt_args->GetNumAlignments();
158  CBlastFormat formatter(opt, *blastdb_full,
159  fmt_args->GetFormattedOutputChoice(),
160  query_opts->GetParseDeflines(),
162  fmt_args->GetNumDescriptions(),
163  num_alignments,
164  *scope,
165  opt.GetMatrixName(),
166  fmt_args->ShowGis(),
167  fmt_args->DisplayHtmlOutput(),
168  opt.GetQueryGeneticCode(),
169  opt.GetDbGeneticCode(),
170  opt.GetSumStatisticsMode(),
171  false,
172  blastdb->GetFilteringAlgorithm(),
173  fmt_args->GetCustomOutputFormatSpec(),
174  false,
175  false,
176  &*ig_opts);
177 
178 
179  //formatter.PrintProlog();
180  if(fmt_args->GetFormattedOutputChoice() ==
182  fmt_args->GetFormattedOutputChoice() ==
184  if(blastdb_full->GetDatabaseName() != NcbiEmptyString){
185  vector<CBlastFormatUtil::SDbInfo> db_info;
186  CBlastFormatUtil::GetBlastDbInfo(db_info, blastdb_full->GetDatabaseName(),
187  ig_opts->m_IsProtein, -1, false);
188  CBlastFormatUtil::PrintDbReport(db_info, 68, m_CmdLineArgs->GetOutputStream(), true);
189  }
190  }
191  /*** Process the input ***/
192  for (; !input.End(); formatter.ResetScopeHistory()) {
193 
194  CRef<CBlastQueryVector> query(input.GetNextSeqBatch(*scope));
195 
196  //SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
197  CRef<CSearchResultSet> results;
198 
199  if (m_CmdLineArgs->ExecuteRemotely() && db_is_remote) {
200  CIgBlast rmt_blast(query,
201  db_args->GetSearchDatabase(),
202  db_args->GetSubjects(),
203  opts_hndl, ig_opts,
204  NcbiEmptyString, scope);
205  //TODO: m_CmdLineArgs->ProduceDebugRemoteOutput(),
206  //TODO: m_CmdLineArgs->GetClientId());
207  results = rmt_blast.Run();
208  } else {
209  CIgBlast lcl_blast(query, blastdb, opts_hndl, ig_opts, scope);
211  results = lcl_blast.Run();
212  }
213 
214  /* TODO should we support archive format?
215  if (fmt_args->ArchiveFormatRequested(args)) {
216  CRef<IQueryFactory> qf(new CObjMgr_QueryFactory(*query));
217  formatter.WriteArchive(*qf, *opts_hndl, *results);
218  } else {
219  */
220  BlastFormatter_PreFetchSequenceData(*results, scope,
221  fmt_args->GetFormattedOutputChoice());
222  ITERATE(CSearchResultSet, result, *results) {
223  CBlastFormat::SClone clone_info;
224  CIgBlastResults &ig_result = *const_cast<CIgBlastResults *>
225  (dynamic_cast<const CIgBlastResults *>(&(**result)));
226  formatter.PrintOneResultSet(ig_result, query, clone_info, false, false);
227  }
228  }
229 
230  formatter.PrintEpilog(opt);
231 
233  opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
234  }
235 
236  } CATCH_ALL(status)
237  if (m_UsageReport.IsEnabled()) {
240  }
241  return status;
242 }
243 
244 #ifndef SKIP_DOXYGEN_PROCESSING
245 int main(int argc, const char* argv[] /*, const char* envp[]*/)
246 {
247  return CIgBlastpApp().AppMain(argc, argv, 0, eDS_Default, "");
248 }
249 #endif /* SKIP_DOXYGEN_PROCESSING */
Produce formatted blast output for command line applications.
void InitializeSubject(CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool is_remote_search, CRef< blast::CLocalDbAdapter > &db_adapter, CRef< objects::CScope > &scope)
Initializes the subject/database as well as its scope.
void BlastFormatter_PreFetchSequenceData(const blast::CSearchResultSet &results, CRef< CScope > scope, blast::CFormattingArgs::EOutputFormat format_type)
This method optimize the retrieval of sequence data to scope.
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
#define BLAST_EXIT_SUCCESS
Command line binary exit code: success.
Interface for reading SRA sequences into blast input.
CArgs –.
Definition: ncbiargs.hpp:379
virtual CNcbiIstream & GetInputStream()
Get the input stream.
size_t GetNumThreads() const
Get the number of threads to spawn.
CRef< CBlastOptionsHandle > SetOptions(const CArgs &args)
Extract the command line arguments into a CBlastOptionsHandle object.
CRef< CBlastDatabaseArgs > GetBlastDatabaseArgs() const
Get the BLAST database arguments.
CArgDescriptions * SetCommandLine()
Set the command line arguments.
bool ExecuteRemotely() const
Determine whether the search should be executed remotely or not.
CRef< CQueryOptionsArgs > GetQueryOptionsArgs() const
Get the options for the query sequence(s)
CRef< CFormattingArgs > GetFormattingArgs() const
Get the formatting options.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
virtual CNcbiOstream & GetOutputStream()
Get the output stream.
CRef< CSearchDatabase > GetSearchDatabase() const
Retrieve the search database information.
Definition: blast_args.hpp:936
CRef< IQueryFactory > GetSubjects(objects::CScope *scope=NULL)
Retrieve subject sequences, if provided.
Definition: blast_args.hpp:958
string GetDatabaseName() const
Get the BLAST database name.
Definition: blast_args.hpp:931
Class representing a text file containing sequences in fasta format.
This class formats the BLAST results for command line applications.
void PrintOneResultSet(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds(), bool is_deltablast_domain_result=false)
Print all alignment information for a single query sequence along with any errors or warnings (errors...
void PrintEpilog(const blast::CBlastOptions &options)
Print the footer of the blast report.
void ResetScopeHistory()
Resets the scope history for some output formats.
Class that centralizes the configuration data for sequences to be converted.
Definition: blast_input.hpp:48
void SetQueryLocalIdMode()
Append query-specific prefix codes to all generated local ids.
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
Encapsulates ALL the BLAST algorithm's options.
void AddParam(EUsageParams p, int val)
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
Definition: ddumpable.cpp:56
string GetCustomOutputFormatSpec() const
Retrieve for string that specifies the custom output format for tabular and comma-separated value.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
@ eFlatQueryAnchoredNoIdentities
@ eFlatQueryAnchoredIdentities
Flat query anchored no identities.
TSeqPos GetNumAlignments() const
Number of alignments to show in traditional BLAST output.
bool ShowGis() const
Display the NCBI GIs in formatted output?
TSeqPos GetNumDescriptions() const
Number of one-line descriptions to show in traditional BLAST output.
bool DisplayHtmlOutput() const
Display HTML output?
void AddIgSequenceScope(CRef< objects::CScope > scope)
CRef< CIgBlastOptions > GetIgBlastOptions()
Keeps track of the version of IgBLAST in the NCBI C++ toolkit.
Definition: igblast.hpp:54
Handles command line arguments for igblastp binary.
CRef< CIgBlastArgs > GetIgBlastArgs() const
virtual int GetQueryBatchSize() const
@inheritDoc
virtual int Run()
@inheritDoc
CRef< CIgBlastpAppArgs > m_CmdLineArgs
This application's command line args.
virtual void Init()
@inheritDoc
CBlastUsageReport m_UsageReport
CIgBlastpApp()
@inheritDoc
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
objects::ENa_strand GetStrand() const
Get strand to search in query sequence(s)
Definition: blast_args.hpp:800
bool GetParseDeflines() const
Should the defline be parsed?
Definition: blast_args.hpp:804
bool QueryIsProtein() const
Is the query sequence protein?
Definition: blast_args.hpp:807
TSeqRange GetRange() const
Get query sequence range restriction.
Definition: blast_args.hpp:796
bool UseLowercaseMasks() const
Use lowercase masking in FASTA input?
Definition: blast_args.hpp:802
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
Blast Search Subject.
Search Results for All Queries.
void Print(const CCompactSAMApplication::AlignInfo &ai)
int GetDbGeneticCode() const
bool m_IsProtein
Definition: igblast.hpp:66
string GetDatabaseName() const
Returns the database name if appropriate, else kEmptyStr for subject sequences.
CRef< CSearchResultSet > Run()
Run the Ig-BLAST engine.
Definition: igblast.cpp:356
CRef< CLocalDbAdapter > m_Db[5]
Definition: igblast.hpp:78
void SetNumberOfThreads(size_t nthreads)
Set MT mode.
Definition: igblast.hpp:289
int GetFilteringAlgorithm()
Retrieve the database filtering algorithm.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
int GetQueryGeneticCode() const
string m_Origin
Definition: igblast.hpp:67
bool GetSumStatisticsMode() const
Sum statistics options.
const char * GetMatrixName() const
@ eBlastDbIsProtein
protein
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
Definition: ncbiapp.cpp:1154
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1292
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:799
CVersionInfo GetVersion(void) const
Get the program version information.
Definition: ncbiapp.cpp:1164
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
@ eDS_Default
Try standard log file (app.name + ".log") in /log/, use stderr on failure.
Definition: ncbidiag.hpp:1790
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
#define NcbiCerr
Definition: ncbistre.hpp:544
#define kEmptyStr
Definition: ncbistr.hpp:123
#define NcbiEmptyString
Definition: ncbistr.hpp:122
#define CVersion
USING_SCOPE(blast)
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
Main argument class for IGBLASTP application.
static int input()
Main class to perform a BLAST search on the local machine.
static int version
Definition: mdb_load.c:29
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CRemoteBlast class.
igblast clone info
Configuration structure for the CBlastScopeSource.
void OptimizeForWholeLargeSequenceRetrieval(bool value=true)
Configures the BLAST database data loader to optimize the retrieval of *entire* large sequences.
static string query
#define _ASSERT
else result
Definition: token2.c:20
Modified on Tue Dec 05 02:21:26 2023 by modify_doxy.py rev. 669887