NCBI C++ ToolKit
remote_blast_demo.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: remote_blast_demo.cpp 90001 2020-05-04 12:53:02Z ivanov $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Tom Madden
27  *
28  * File Description:
29  * Sample application for the running a remote blast search.
30  *
31  */
32 
33 #include <ncbi_pch.hpp>
34 #include <corelib/ncbiapp.hpp>
35 #include <corelib/ncbienv.hpp>
36 #include <corelib/ncbiargs.hpp>
37 
39 
41 
53 
56 
58 USING_SCOPE(blast);
59 
60 
61 /////////////////////////////////////////////////////////////////////////////
62 // CBlastDemoApplication::
63 
65 {
66 private:
67  virtual void Init(void);
68  virtual int Run(void);
69  virtual void Exit(void);
70 
72 };
73 
74 
75 /////////////////////////////////////////////////////////////////////////////
76 // Init test for all different types of arguments
77 
79 {
80  // Create command-line argument descriptions class
81  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
82 
83  // Specify USAGE context
84  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(), "BLAST demo program");
85 
86  arg_desc->AddKey
87  ("program", "ProgramName",
88  "One of blastn, megablast, disc_megablast, blastp, blastx, tblastn, tblastx, rpsblast",
90 
91  arg_desc->SetConstraint
92  ("program", &(*new CArgAllow_Strings,
93  "blastn", "megablast", "disc_megablast", "blastp", "blastx", "tblastn", "tblastx", "rpsblast"));
94 
95  arg_desc->AddDefaultKey("db", "DataBase",
96  "This is the name of the database", CArgDescriptions::eString, "nr");
97 
98  arg_desc->AddDefaultKey("in", "Queryfile",
99  "A FASTA file with the query", CArgDescriptions::eInputFile, "stdin");
100 
101  arg_desc->AddDefaultKey("parse", "parse", "Parse FASTA defline",
103 
104  arg_desc->AddDefaultKey("out", "Outputfile",
105  "The output file", CArgDescriptions::eOutputFile, "stdout");
106 
107  arg_desc->AddDefaultKey("evalue", "evalue",
108  "E-value threshold for saving hits", CArgDescriptions::eDouble, "0");
109 
110  arg_desc->AddDefaultKey("penalty", "penalty", "Penalty score for a mismatch",
112 
113  arg_desc->AddDefaultKey("reward", "reward", "Reward score for a match",
115 
116  arg_desc->AddDefaultKey("matrix", "matrix", "Scoring matrix name",
117  CArgDescriptions::eString, "BLOSUM62");
118 
119  arg_desc->AddDefaultKey("hitsize", "hitsize", "Hitlist size",
121 
122  // Setup arg.descriptions for this application
123  SetupArgDescriptions(arg_desc.release());
124 }
125 
126 
127 /// Modify BLAST options from defaults based upon command-line args.
128 ///
129 /// @param opts_handle already created CBlastOptionsHandle to modify [in]
130 
132 {
133  const CArgs& args = GetArgs();
134 
135  // Expect value is a supported option for all flavors of BLAST.
136  if(args["evalue"].AsDouble())
137  opts_handle->SetEvalueThreshold(args["evalue"].AsDouble());
138 
139  if(args["hitsize"].AsInteger() && args["hitsize"].AsInteger() > 0)
140  opts_handle->SetHitlistSize(args["hitsize"].AsInteger());
141 
142  // The first branch is used if the program is blastn or a flavor of megablast
143  // as reward and penalty is a valid option.
144  //
145  // The second branch is used for all other programs except rpsblast as matrix
146  // is a valid option for blastp and other programs that perform protein-protein
147  // comparisons.
148  //
149  if (CBlastNucleotideOptionsHandle* nucl_handle =
150  dynamic_cast<CBlastNucleotideOptionsHandle*>(&*opts_handle)) {
151 
152  if (args["reward"].AsInteger())
153  nucl_handle->SetMatchReward(args["reward"].AsInteger());
154 
155  if (args["penalty"].AsInteger())
156  nucl_handle->SetMismatchPenalty(args["penalty"].AsInteger());
157  }
158  else if (CBlastProteinOptionsHandle* prot_handle =
159  dynamic_cast<CBlastProteinOptionsHandle*>(&*opts_handle)) {
160 
161  if (args["matrix"])
162  prot_handle->SetMatrixName(args["matrix"].AsString().c_str());
163  }
164 
165  return;
166 }
167 
168 
169 /////////////////////////////////////////////////////////////////////////////
170 // Run test (printout arguments obtained from command-line)
171 
173 {
174  // Get arguments
175  const CArgs& args = GetArgs();
176 
177  EProgram program = ProgramNameToEnum(args["program"].AsString());
178 
179  bool db_is_aa = (program == eBlastp ||
180  program == eBlastx ||
181  program == eRPSBlast ||
182  program == eRPSTblastn);
183 
185 
187 
188  opts->Validate(); // Can throw CBlastException::eInvalidOptions for invalid option.
189 
190  // This will dump the options to stderr.
191  // opts->GetOptions().DebugDumpText(cerr, "opts", 1);
192 
194  if (!objmgr) {
195  throw std::runtime_error("Could not initialize object manager");
196  }
197 
198  const bool is_protein = !!Blast_QueryIsProtein(opts->GetOptions().GetProgramType());
199  SDataLoaderConfig dlconfig(is_protein);
200  CBlastInputSourceConfig iconfig(dlconfig, objects::eNa_strand_other, false, args["parse"].AsBoolean());
201  CBlastFastaInputSource fasta_input(args["in"].AsInputFile(), iconfig);
202  CScope scope(*objmgr);
203 
204  CBlastInput blast_input(&fasta_input);
205 
206  TSeqLocVector query_loc = blast_input.GetAllSeqLocs(scope);
207 
208  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(query_loc));
209 
210  const CSearchDatabase target_db(args["db"].AsString(),
212 
213  CRemoteBlast blaster(query_factory, opts, target_db);
214 
215  // This will dump a lot of stuff to stderr.
216  // blaster.SetVerbose();
217 
218  bool status = blaster.SubmitSync();
219 
220  if (status == false)
221  throw std::runtime_error("No results returned by SubmitSync");
222 
223  cerr << "RID: " << blaster.GetRID() << '\n';
224 
225  CSearchResultSet results = *blaster.GetResultSet();
226  CNcbiOstream& out = args["out"].AsOutputFile();
227 
228  for (unsigned int i = 0; i < results.GetNumResults(); i++) {
229  CConstRef<CSeq_align_set> sas = results[i].GetSeqAlign();
230  out << MSerial_AsnText << *sas;
231  }
232 
233  return 0;
234 }
235 
236 
237 /////////////////////////////////////////////////////////////////////////////
238 // Cleanup
239 
241 {
242  // Do your after-Run() cleanup here
243 }
244 
245 
246 /////////////////////////////////////////////////////////////////////////////
247 // MAIN
248 
249 #ifndef SKIP_DOXYGEN_PROCESSING
250 int NcbiSys_main(int argc, ncbi::TXChar* argv[])
251 {
252  // Execute main application function
253  return CBlastDemoApplication().AppMain(argc, argv);
254 }
255 #endif /* SKIP_DOXYGEN_PROCESSING */
Contains C++ wrapper classes to structures in algo/blast/core as well as some auxiliary functions to ...
Interface for reading SRA sequences into blast input.
Interface for converting sources of sequence data into blast sequence input.
Declares the CBlastNucleotideOptionsHandle class.
Declares class to encapsulate all BLAST options.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Definition: blast_program.c:40
Declares the CBlastProteinOptionsHandle class.
Definitions of special type used in BLAST.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
Definition: blast_types.hpp:63
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
@ eRPSTblastn
nucleotide-pssm (RPS blast with translated query)
Definition: blast_types.hpp:64
@ eBlastx
Translated nucl-Protein.
Definition: blast_types.hpp:60
CArgAllow_Strings –.
Definition: ncbiargs.hpp:1641
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
void ProcessCommandLineArgs(CRef< CBlastOptionsHandle > opts_handle)
virtual int Run(void)
Run the application.
virtual int Run(void)
Run the application.
Definition: blast_demo.cpp:161
virtual void Exit(void)
Cleanup on application exit.
Definition: blast_demo.cpp:226
virtual void Init(void)
Initialize the application.
virtual void Init(void)
Initialize the application.
Definition: blast_demo.cpp:76
virtual void Exit(void)
Cleanup on application exit.
Class representing a text file containing sequences in fasta format.
Class that centralizes the configuration data for sequences to be converted.
Definition: blast_input.hpp:48
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
@ eRemote
To be used when running BLAST remotely.
Handle to the protein-protein options to the BLAST algorithm.
NCBI C++ Object Manager dependant implementation of IQueryFactory.
API for Remote Blast Requests.
CScope –.
Definition: scope.hpp:92
Blast Search Subject.
Search Results for All Queries.
std::ofstream out("events_result.xml")
main entry point for tests
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
void SetHitlistSize(int s)
Sets HitlistSize.
EProgram ProgramNameToEnum(const std::string &program_name)
Map a string into an element of the ncbi::blast::EProgram enumeration (except eBlastProgramMax).
Definition: blast_aux.cpp:757
@ eBlastDbIsNucleotide
nucleotide
@ eBlastDbIsProtein
protein
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:305
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:832
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1208
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eBoolean
{'true', 't', 'false', 'f'}, case-insensitive
Definition: ncbiargs.hpp:590
@ eDouble
Convertible into a floating point number (double)
Definition: ncbiargs.hpp:594
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
@ eInteger
Convertible into an integer number (int or Int8)
Definition: ncbiargs.hpp:592
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
char TXChar
Definition: ncbistr.hpp:172
@ eNa_strand_other
Definition: Na_strand_.hpp:70
int i
Main class to perform a BLAST search on the local machine.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines command line argument related classes.
Defines unified interface to application:
The Object manager core.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
static int * results[]
Declares the CRemoteBlast class.
USING_SCOPE(blast)
int NcbiSys_main(int argc, ncbi::TXChar *argv[])
USING_NCBI_SCOPE
Definition of SSeqLoc structure.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Configuration structure for the CBlastScopeSource.
Uniform BLAST Search Interface.
Modified on Wed Sep 04 15:01:33 2024 by modify_doxy.py rev. 669887