NCBI C++ ToolKit
blastn_node.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id:
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Amelia Fong
27  *
28  */
29 
30 /** @file blastn_node.cpp
31  * blastn node api
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
41 #include "blast_app_util.hpp"
42 #include "blastn_node.hpp"
43 
44 #ifndef SKIP_DOXYGEN_PROCESSING
46 USING_SCOPE(blast);
48 #endif
49 
50 CBlastnNode::CBlastnNode (int node_num, const CNcbiArguments & ncbi_args, const CArgs& args,
51  CBlastAppDiagHandler & bah, string & input,
52  int query_index, int num_queries, CBlastNodeMailbox * mailbox):
53  CBlastNode(node_num, ncbi_args, args, bah, query_index, num_queries, mailbox), m_Input(kEmptyStr)
54 {
55  m_Input.swap(input);
58  SendMsg(CBlastNodeMsg::eRunRequest, (void*) this);
59 }
60 
62 {
63  if(GetState() == eDone) {
64  if (m_CmdLineArgs->GetOutputStrStream().rdbuf()->in_avail() > 0) {
65  os << m_CmdLineArgs->GetOutputStrStream().rdbuf();
66  }
67  return GetStatus();
68  }
69  return -1;
70 }
71 
73 {
75 }
76 
77 void *
79 {
80  int status = BLAST_EXIT_SUCCESS;
83 
86  try {
87 
88  /*** Get the BLAST options ***/
89  const CArgs& args = GetArgs();
90 
91  CRef<CBlastOptionsHandle> opts_hndl;
94  }
95  else {
96  opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
97  }
98  const CBlastOptions& opt = opts_hndl->GetOptions();
99 
100  /*** Initialize the database/subject ***/
102  CRef<CLocalDbAdapter> db_adapter;
103  CRef<CScope> scope;
104  InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(), db_adapter, scope);
105  _ASSERT(db_adapter && scope);
106 
107  /*** Get the query sequence(s) ***/
108  CRef<CQueryOptionsArgs> query_opts =
110  SDataLoaderConfig dlconfig =
112  db_adapter);
113  CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
114  query_opts->UseLowercaseMasks(),
115  query_opts->GetParseDeflines(),
116  query_opts->GetRange());
118  ERR_POST(Warning << "Query is Empty!");
119  return BLAST_EXIT_SUCCESS;
120  }
122  CBlastInput input(&fasta);
123 
124  // Initialize the megablast database index now so we can know whether an indexed search will be run.
125  // This is only important for the reference in the report, but would be done anyway.
126  if (opt.GetUseIndex() && !m_CmdLineArgs->ExecuteRemotely()) {
127  CRef<CBlastOptions> my_options(&(opts_hndl->SetOptions()));
129  }
130  /*** Get the formatting options ***/
132  bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args);
133  if(!isArchiveFormat) {
134  bah.DoNotSaveMessages();
135  }
136  CBlastFormat formatter(opt, *db_adapter,
137  fmt_args->GetFormattedOutputChoice(),
138  query_opts->GetParseDeflines(),
140  fmt_args->GetNumDescriptions(),
141  fmt_args->GetNumAlignments(),
142  *scope,
143  opt.GetMatrixName(),
144  fmt_args->ShowGis(),
145  fmt_args->DisplayHtmlOutput(),
146  opt.GetQueryGeneticCode(),
147  opt.GetDbGeneticCode(),
148  opt.GetSumStatisticsMode(),
150  db_adapter->GetFilteringAlgorithm(),
151  fmt_args->GetCustomOutputFormatSpec(),
152  m_CmdLineArgs->GetTask() == "megablast",
153  opt.GetMBIndexLoaded(),
154  NULL, NULL,
156  GetSubjectFile(args));
157 
158  formatter.SetQueryRange(query_opts->GetRange());
159  formatter.SetLineLength(fmt_args->GetLineLength());
160  formatter.SetHitsSortOption(fmt_args->GetHitsSortOption());
161  formatter.SetHspsSortOption(fmt_args->GetHspsSortOption());
162  formatter.SetCustomDelimiter(fmt_args->GetCustomDelimiter());
163  if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
164  formatter.SetBaseFile(args[kArgOutput].AsString() + "_"+ NStr::IntToString(GetNodeNum()));
165  }
166  formatter.PrintProlog();
167 
168  /*** Process the input ***/
170  int batch_size = m_CmdLineArgs->GetQueryBatchSize();
171  if (batch_size) {
172  input.SetBatchSize(batch_size);
173  } else {
174  Int8 total_len = formatter.GetDbTotalLength();
175  if (total_len > 0) {
176  /* the optimal hits per batch scales with total db size */
177  mixer.SetTargetHits(total_len / 3000);
178  }
179  input.SetBatchSize(mixer.GetBatchSize());
180  }
181  for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) {
182 
183  CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
184  CRef<IQueryFactory> queries(new CObjMgr_QueryFactory(*query_batch));
185 
186  SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
187 
188  CRef<CSearchResultSet> results;
189 
191  CRef<CRemoteBlast> rmt_blast =
192  InitializeRemoteBlast(queries, db_args, opts_hndl,
195  results = rmt_blast->GetResultSet();
196  } else {
197  CLocalBlast lcl_blast(queries, opts_hndl, db_adapter);
198  lcl_blast.SetNumberOfThreads(1);
199  results = lcl_blast.Run();
200  if (!batch_size)
201  input.SetBatchSize(mixer.GetBatchSize(lcl_blast.GetNumExtensions()));
202  }
203 
204  if (isArchiveFormat) {
205  formatter.WriteArchive(*queries, *opts_hndl, *results, 0, bah.GetMessages());
206  bah.ResetMessages();
207  } else {
208  BlastFormatter_PreFetchSequenceData(*results, scope,
209  fmt_args->GetFormattedOutputChoice());
210  ITERATE(CSearchResultSet, result, *results) {
211  formatter.PrintOneResultSet(**result, query_batch);
212  }
213  }
214  }
215 
216  formatter.PrintEpilog(opt);
217 
219  opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
220  }
221 
222  SetQueriesLength(input.GetTotalLengthProcessed());
223 
224  } CATCH_ALL(status)
225 
226  SetStatus(status);
227  if (status == BLAST_EXIT_SUCCESS) {
228  SetState(eDone);
229  SendMsg(CBlastNodeMsg::ePostResult, (void *) this);
230 
231  }
232  else {
233  SetState(eError);
234  SendMsg(CBlastNodeMsg::eErrorExit, (void *) this);
235  }
236 
237  return NULL;
238 }
239 
Produce formatted blast output for command line applications.
CRef< blast::CRemoteBlast > InitializeRemoteBlast(CRef< blast::IQueryFactory > queries, CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool verbose_output, const string &client_id, CRef< objects::CPssmWithParameters > pssm)
Initializes a CRemoteBlast instance for usage by command line BLAST binaries.
blast::SDataLoaderConfig InitializeQueryDataLoaderConfiguration(bool query_is_protein, CRef< blast::CLocalDbAdapter > db_adapter)
Initialize the data loader configuration for the query.
void SaveSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args, CRef< blast::IQueryFactory > queries, CRef< blast::CBlastOptionsHandle > opts_hndl, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
Save the search strategy corresponding to the current command line search.
void QueryBatchCleanup()
Clean up formatter scope and release.
string GetSubjectFile(const CArgs &args)
Get name of subject file @parameter args arguments class [in].
bool RecoverSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args)
Recover search strategy from input file.
void InitializeSubject(CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool is_remote_search, CRef< blast::CLocalDbAdapter > &db_adapter, CRef< objects::CScope > &scope)
Initializes the subject/database as well as its scope.
string GetCmdlineArgs(const CNcbiArguments &a)
void BlastFormatter_PreFetchSequenceData(const blast::CSearchResultSet &results, CRef< CScope > scope, blast::CFormattingArgs::EOutputFormat format_type)
This method optimize the retrieval of sequence data to scope.
bool UseXInclude(const CFormattingArgs &f, const string &s)
bool IsIStreamEmpty(CNcbiIstream &in)
Utility functions for BLAST command line applications.
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
#define BLAST_EXIT_SUCCESS
Command line binary exit code: success.
Interface for reading SRA sequences into blast input.
USING_SCOPE(blast)
USING_NCBI_SCOPE
Definition: blastn_node.cpp:45
blastn node api
CArgs –.
Definition: ncbiargs.hpp:379
Class to mix batch size for BLAST runs.
void SetTargetHits(Int4 target)
Int4 GetBatchSize(Int4 hits=-1)
CRef< CBlastOptionsHandle > SetOptionsForSavedStrategy(const CArgs &args)
Combine the command line arguments into a CBlastOptions object recovered from saved search strategy.
string GetTask() const
Get the task for this object.
CRef< CBlastOptionsHandle > SetOptions(const CArgs &args)
Extract the command line arguments into a CBlastOptionsHandle object.
CRef< CBlastDatabaseArgs > GetBlastDatabaseArgs() const
Get the BLAST database arguments.
bool ExecuteRemotely() const
Determine whether the search should be executed remotely or not.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CRef< CQueryOptionsArgs > GetQueryOptionsArgs() const
Get the options for the query sequence(s)
string GetClientId() const
Retrieve the client ID for remote requests.
CRef< CFormattingArgs > GetFormattingArgs() const
Get the formatting options.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
Class to capture message from diag handler.
Definition: blast_aux.hpp:249
Class representing a text file containing sequences in fasta format.
This class formats the BLAST results for command line applications.
void SetHitsSortOption(int hitsSortOption)
void SetHspsSortOption(int hspsSortOption)
void PrintOneResultSet(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds(), bool is_deltablast_domain_result=false)
Print all alignment information for a single query sequence along with any errors or warnings (errors...
void SetCustomDelimiter(string customDelim)
void PrintEpilog(const blast::CBlastOptions &options)
Print the footer of the blast report.
void SetBaseFile(string base)
For use by XML2 only.
void ResetScopeHistory()
Resets the scope history for some output formats.
void SetLineLength(size_t len)
Set Alignment Length.
void WriteArchive(blast::IQueryFactory &queries, blast::CBlastOptionsHandle &options_handle, const blast::CSearchResultSet &results, unsigned int num_iters=0, const list< CRef< objects::CBlast4_error > > &msg=list< CRef< objects::CBlast4_error > >())
Writes out the query and results as an "archive" format.
Int8 GetDbTotalLength()
Get total length of the database.
void PrintProlog()
Print the header of the blast report.
void SetQueryRange(const TSeqRange &query_range)
Set query range.
Class that centralizes the configuration data for sequences to be converted.
Definition: blast_input.hpp:48
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
EState GetState()
Definition: blast_node.hpp:100
void SendMsg(CBlastNodeMsg::EMsgType msg_type, void *ptr=NULL)
Definition: blast_node.cpp:103
const CArgs & GetArgs()
Definition: blast_node.hpp:102
CBlastAppDiagHandler & GetDiagHandler()
Definition: blast_node.hpp:103
void SetDataLoaderPrefix()
Definition: blast_node.cpp:112
int GetNodeNum()
Definition: blast_node.hpp:99
int GetStatus()
Definition: blast_node.hpp:101
const CNcbiArguments & GetArguments()
Definition: blast_node.hpp:104
void SetState(EState state)
Definition: blast_node.hpp:112
void SetQueriesLength(int l)
Definition: blast_node.hpp:114
void SetStatus(int status)
Definition: blast_node.hpp:113
string & GetNodeIdStr()
Definition: blast_node.hpp:106
Encapsulates ALL the BLAST algorithm's options.
virtual CNcbiIstream & GetInputStream()
Get the input stream.
virtual int GetQueryBatchSize() const
@inheritDoc
CNcbiStrstream & GetOutputStrStream()
Definition: blastn_args.hpp:76
virtual CNcbiOstream & GetOutputStream()
Get the output stream.
string m_Input
Definition: blastn_node.hpp:55
virtual ~CBlastnNode(void)
Definition: blastn_node.cpp:72
CRef< CBlastnNodeArgs > m_CmdLineArgs
Definition: blastn_node.hpp:56
CBlastnNode(int check_num, const CNcbiArguments &ncbi_args, const CArgs &args, CBlastAppDiagHandler &bah, string &input, int query_index, int num_queries, CBlastNodeMailbox *mailbox=NULL)
Definition: blastn_node.cpp:50
virtual void * Main(void)
Derived (user-created) class must provide a real thread function.
Definition: blastn_node.cpp:78
virtual int GetBlastResults(CNcbiOstream &os)
Definition: blastn_node.cpp:61
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
Definition: ddumpable.cpp:56
int GetHitsSortOption() const
virtual bool ArchiveFormatRequested(const CArgs &args) const
string GetCustomOutputFormatSpec() const
Retrieve for string that specifies the custom output format for tabular and comma-separated value.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
int GetHspsSortOption() const
TSeqPos GetNumAlignments() const
Number of alignments to show in traditional BLAST output.
bool ShowGis() const
Display the NCBI GIs in formatted output?
TSeqPos GetNumDescriptions() const
Number of one-line descriptions to show in traditional BLAST output.
size_t GetLineLength() const
bool DisplayHtmlOutput() const
Display HTML output?
string GetCustomDelimiter()
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
Definition: local_blast.hpp:62
CNcbiArguments –.
Definition: ncbienv.hpp:236
NCBI C++ Object Manager dependant implementation of IQueryFactory.
objects::ENa_strand GetStrand() const
Get strand to search in query sequence(s)
Definition: blast_args.hpp:800
bool GetParseDeflines() const
Should the defline be parsed?
Definition: blast_args.hpp:804
bool QueryIsProtein() const
Is the query sequence protein?
Definition: blast_args.hpp:807
TSeqRange GetRange() const
Get query sequence range restriction.
Definition: blast_args.hpp:796
bool UseLowercaseMasks() const
Use lowercase masking in FASTA input?
Definition: blast_args.hpp:802
Search Results for All Queries.
const string kArgOutput
Output file name.
int GetDbGeneticCode() const
virtual void SetNumberOfThreads(size_t nthreads)
Mutator for the number of threads.
void ResetMessages(void)
Reset messgae buffer, erase all saved message.
Definition: blast_aux.cpp:1174
CRef< CSearchResultSet > Run()
Executes the search.
static void InitializeMegablastDbIndex(CRef< CBlastOptions > options)
Initialize a megablast BLAST database index.
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
size_t SplitQuery_GetChunkSize(EProgram program)
Returns the optimal chunk size for a given task.
Definition: local_blast.cpp:54
int GetFilteringAlgorithm()
Retrieve the database filtering algorithm.
EProgram GetProgram() const
Accessors/Mutators for individual options.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
int GetQueryGeneticCode() const
bool GetSumStatisticsMode() const
Sum statistics options.
CRef< CSearchResultSet > GetResultSet()
Submit the search (if necessary) and return the results.
bool GetMBIndexLoaded() const
void DoNotSaveMessages(void)
Call to turn off saving diag message, discard all saved message.
Definition: blast_aux.cpp:1189
Int4 GetNumExtensions()
Retrieve the number of extensions performed during the search.
bool GetUseIndex() const
const char * GetMatrixName() const
list< CRef< objects::CBlast4_error > > & GetMessages(void)
Return list of saved diag messages.
Definition: blast_aux.hpp:262
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
#define NULL
Definition: ncbistd.hpp:225
void SetDiagPostPrefix(const char *prefix)
Specify a string to prefix all subsequent error postings with.
Definition: ncbidiag.cpp:6097
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiCerr
Definition: ncbistre.hpp:544
#define kEmptyStr
Definition: ncbistr.hpp:123
static string IntToString(int value, TNumToStringFlags flags=0, int base=10)
Convert int to string.
Definition: ncbistr.hpp:5084
static int input()
Main class to perform a BLAST search on the local machine.
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CRemoteBlast class.
Configuration structure for the CBlastScopeSource.
#define _ASSERT
else result
Definition: token2.c:20
Modified on Sun Apr 14 05:29:42 2024 by modify_doxy.py rev. 669887