NCBI C++ ToolKit
deltablast_app.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: deltablast_app.cpp 90594 2020-07-02 13:16:00Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Greg Boratyn
27  *
28  */
29 
30 /** @file deltablast_app.cpp
31  * DELTA-BLAST command line application
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbistl.hpp>
36 #include <corelib/ncbiapp.hpp>
38 #include <algo/blast/api/psiblast.hpp> // needed for psiblast iterations
46 #include <objects/scoremat/Pssm.hpp> // needed for printing Pssm
47 #include <objects/scoremat/PssmIntermediateData.hpp> // needed for clearing
48  // information content in ascii Pssm
49 #include <objects/seq/Seq_descr.hpp> // needed for adding qurey title to Pssm
50 #include "blast_app_util.hpp"
51 
52 #ifndef SKIP_DOXYGEN_PROCESSING
55 USING_SCOPE(blast);
56 #endif
57 
59 {
60 public:
61  /** @inheritDoc */
64  version->SetVersionInfo(new CBlastVersion());
67  if (m_UsageReport.IsEnabled()) {
69  }
70  }
73  }
74 private:
75  /** @inheritDoc */
76  virtual void Init();
77  /** @inheritDoc */
78  virtual int Run();
79 
80 
81  /// Save Pssm to file
83 
84  /// Do PSI-BLAST iterations follwing DELTA-BLAST
87  CConstRef<CBioseq> query_bioseq,
90  const CArgs& args,
91  CRef<CLocalDbAdapter> db_adapter,
92  CRef<CScope> scope,
93  CBlastFormat& formatter);
94 
95  /// Compute PSSM for next PSI-BLAST iteration
97  const CBioseq& bioseq,
100  CRef<CScope> scope,
101  CRef<CBlastAncillaryData> ancillary_data);
102 
103 
104  /// This application's command line args
106 
108 
112 };
113 
115 {
116  // formulate command line arguments
117 
119 
120  // read the command line
121 
123  | fHideDryRun);
125 }
126 
127 void
129 {
130  if (pssm.Empty()) {
131  return;
132  }
133 
134  if (m_CmdLineArgs->SaveCheckpoint()) {
136  }
137 
138  if (m_CmdLineArgs->SaveAsciiPssm()) {
139  if (m_AncillaryData.Empty() && pssm.NotEmpty()) {
141  }
142 
146  }
147 }
148 
149 
150 // Add query sequence title from scope to computed Pssm
152  CRef<CBlastQueryVector> query_batch,
153  CRef<CScope> scope)
154 {
155  CConstRef<CSeq_id> query_id =
156  query_batch->GetBlastSearchQuery(0)->GetQueryId();
157 
158  CBioseq_Handle bhandle = scope->GetBioseqHandle(*query_id);
159  CConstRef<CBioseq> scope_bioseq = bhandle.GetCompleteBioseq();
160 
161  if (scope_bioseq->IsSetDescr()) {
162 
163  CBioseq& pssm_bioseq = pssm->SetQuery().SetSeq();
164  ITERATE (CSeq_descr::Tdata, it, scope_bioseq->GetDescr().Get()) {
165  pssm_bioseq.SetDescr().Set().push_back(*it);
166  }
167  }
168 }
169 
170 // Add sequence data to pssm query
172  CRef<CBlastQueryVector> query_batch,
173  CRef<CScope> scope)
174 {
175  CConstRef<CSeq_id> query_id =
176  query_batch->GetBlastSearchQuery(0)->GetQueryId();
177 
178  // first make sure that query id and pssm query id are the same
179  if (!pssm->GetPssm().GetQuery().GetSeq().GetFirstId()->Match(*query_id)) {
180  NCBI_THROW(CException, eInvalid, "Query and PSSM sequence ids do not "
181  "match");
182  }
183 
184  CBioseq_Handle bhandle = scope->GetBioseqHandle(*query_id);
185  CConstRef<CBioseq> scope_bioseq = bhandle.GetCompleteBioseq();
186 
187  // set sequence data only if query bioseq has them and pssm does not
188  if (scope_bioseq->GetInst().IsSetSeq_data()
189  && !pssm->GetPssm().GetQuery().GetSeq().GetInst().IsSetSeq_data()) {
190  const CSeq_data& seq_data = scope_bioseq->GetInst().GetSeq_data();
191  pssm->SetQuery().SetSeq().SetInst().SetSeq_data(
192  const_cast<CSeq_data&>(seq_data));
193  }
194 }
195 
197 {
198  int status = BLAST_EXIT_SUCCESS;
199 
200  try {
201 
202  // Allow the fasta reader to complain on invalid sequence input
204  SetDiagPostPrefix("deltablast");
205  SetDiagHandler(&m_bah, false);
206 
207  /*** Get the BLAST options ***/
208  const CArgs& args = GetArgs();
211  const CBlastOptions& opt = opts_hndl->GetOptions();
212 
213  /*** Initialize the database/subject ***/
215  CRef<CLocalDbAdapter> db_adapter;
216  CRef<CScope> scope;
217  InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
218  db_adapter, scope);
219  _ASSERT(db_adapter && scope);
220 
221  /*** Get the query sequence(s) ***/
222  CRef<CQueryOptionsArgs> query_opts =
224  SDataLoaderConfig dlconfig =
226  db_adapter);
227  CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
228  query_opts->UseLowercaseMasks(),
229  query_opts->GetParseDeflines(),
230  query_opts->GetRange());
232  ERR_POST(Warning << "Query is Empty!");
233  return BLAST_EXIT_SUCCESS;
234  }
236  size_t query_batch_size = m_CmdLineArgs->GetQueryBatchSize();
239 
240  query_batch_size = 1;
241  }
242  CBlastInput input(&fasta, query_batch_size);
243 
244  /*** Initialize the domain database ***/
245  CRef<CLocalDbAdapter> domain_db_adapter(new CLocalDbAdapter(
247  _ASSERT(domain_db_adapter);
248  CLocalDbAdapter* domain_db_ptr = NULL;
249 
250  // domain database does not need to be loaded into scope unless
251  // domain search results are requested
253  CRef<CSeqDB> seqdb(new CSeqDB(domain_db_adapter->GetDatabaseName(),
255  scope->AddDataLoader(RegisterOMDataLoader(seqdb),
257 
258  domain_db_ptr = domain_db_adapter.GetNonNullPointer();
259  }
260 
261  /*** Get the formatting options ***/
263  CBlastFormat formatter(opt, *db_adapter,
264  fmt_args->GetFormattedOutputChoice(),
265  query_opts->GetParseDeflines(),
267  fmt_args->GetNumDescriptions(),
268  fmt_args->GetNumAlignments(),
269  *scope,
270  opt.GetMatrixName(),
271  fmt_args->ShowGis(),
272  fmt_args->DisplayHtmlOutput(),
273  opt.GetQueryGeneticCode(),
274  opt.GetDbGeneticCode(),
275  opt.GetSumStatisticsMode(),
277  db_adapter->GetFilteringAlgorithm(),
278  fmt_args->GetCustomOutputFormatSpec(),
279  false, false, NULL,
280  domain_db_ptr,
282 
283  formatter.SetQueryRange(query_opts->GetRange());
284  formatter.SetLineLength(fmt_args->GetLineLength());
285  if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
286  formatter.SetBaseFile(args[kArgOutput].AsString());
287  }
288  formatter.PrintProlog();
289 
290  /*** Process the input ***/
291  for (; !input.End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) {
292 
293  CRef<CBlastQueryVector> query_batch(input.GetNextSeqBatch(*scope));
295  new CObjMgr_QueryFactory(*query_batch));
296 
297  SaveSearchStrategy(args, m_CmdLineArgs, queries, opts_hndl);
298 
300  CRef<blast::CSearchResultSet> domain_results;
301 
302  CRef<CDeltaBlast> deltablast;
304 
306 
307  // Remote BLAST
308 
309  CRef<CRemoteBlast> rmt_blast =
310  InitializeRemoteBlast(queries, db_args, opts_hndl,
313  results = rmt_blast->GetResultSet();
314  pssm = rmt_blast->GetPSSM();
315  } else {
316 
317  // Run locally
318 
320  dynamic_cast<CDeltaBlastOptionsHandle*>(&*opts_hndl));
321 
322  deltablast.Reset(new CDeltaBlast(queries, db_adapter,
323  domain_db_adapter,
324  delta_opts));
325  deltablast->SetNumberOfThreads(m_CmdLineArgs->GetNumThreads());
326  results = deltablast->Run();
327  domain_results = deltablast->GetDomainResults();
328  pssm = deltablast->GetPssm();
329  }
330 
331  // deltablast computed pssm does not have query title, so
332  // it must be added if pssm is requested
334  || fmt_args->GetFormattedOutputChoice()
336 
337  s_AddSeqTitleToPssm(pssm, query_batch, scope);
338  }
339 
340  // remote blast remves sequence data from pssm for known ids
341  // the data must be added if pssm is requested after remote search
344  || m_CmdLineArgs->SaveAsciiPssm())) {
345 
346  s_AddSeqDataToPssm(pssm, query_batch, scope);
347  }
348 
349  // only one PSI-BLAST iteration requested, then print results
350  // (the first PIS-BLAST iteration is done by DELTA-BLAST)
352 
353  SavePssmToFile(pssm);
354 
355  blast::CSearchResultSet::const_iterator domain_it;
357  domain_it = domain_results->begin();
358  }
359 
360  if (fmt_args->ArchiveFormatRequested(args)) {
361  formatter.WriteArchive(*queries, *opts_hndl, *results, 0, m_bah.GetMessages());
363  } else {
364  BlastFormatter_PreFetchSequenceData(*results, scope,
365  fmt_args->GetFormattedOutputChoice());
367  BlastFormatter_PreFetchSequenceData(*domain_results, scope,
368  fmt_args->GetFormattedOutputChoice());
369  }
370  ITERATE(blast::CSearchResultSet, result, *results) {
372  _ASSERT(domain_it != domain_results->end());
373  formatter.PrintOneResultSet(**domain_it,
374  query_batch,
376  blast::CPsiBlastIterationState::TSeqIds(),
377  true);
378  ++domain_it;
379  }
380  formatter.PrintOneResultSet(**result, query_batch);
381  }
382  }
383 
385  CConstRef<CBioseq> query_bioseq(&pssm->GetQuery().GetSeq());
386  blast::CSearchResults& res = (*results)[0];
387 
389  dynamic_cast<CPSIBlastOptionsHandle*>(&*opts_hndl));
390 
391  pssm = ComputePssmForNextPsiBlastIteration(*query_bioseq,
392  res.GetSeqAlign(),
393  psi_opts,
394  scope,
395  res.GetAncillaryData());
396 
397  SavePssmToFile(pssm);
398  }
399  }
400  else {
401 
402  // if more than 1 iterations are requested, then
403  // do PSI-BLAST iterations, this is not allowed for remote blast
404 
405  SavePssmToFile(pssm);
406 
407  // print domain search results if requested
408  // query_batch_size == 1 if number of iteratins > 1
410  ITERATE (blast::CSearchResultSet, result,
411  *deltablast->GetDomainResults()) {
412 
413  formatter.PrintOneResultSet(**result, query_batch,
415  blast::CPsiBlastIterationState::TSeqIds(),
416  true);
417  }
418  }
419 
420  // use pssm variable here, because it will contains query title
421  CConstRef<CBioseq> query_bioseq(&pssm->GetQuery().GetSeq());
422 
423  bool retval = DoPsiBlastIterations(opts_hndl,
424  query_batch,
425  query_bioseq,
426  results,
427  db_args,
428  args,
429  db_adapter,
430  scope,
431  formatter);
432 
433  if (retval && !fmt_args->HasStructuredOutputFormat()
434  && fmt_args->GetFormattedOutputChoice()
436 
438  << "Search has CONVERGED!"
439  << NcbiEndl;
440  }
441  // Reset for next query sequence.
443  }
444  }
445 
446  formatter.PrintEpilog(opt);
447 
449  opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
450  }
451 
454  } CATCH_ALL(status)
455  if(!m_bah.GetMessages().empty()) {
456  const CArgs & a = GetArgs();
458  }
461  return status;
462 }
463 
464 // This is a simplified version of CPsiBlastApp::DoIterations()
465 bool
468  CConstRef<CBioseq> query_bioseq,
470  CRef<CBlastDatabaseArgs> db_args,
471  const CArgs& args,
472  CRef<CLocalDbAdapter> db_adapter,
473  CRef<CScope> scope,
474  CBlastFormat& formatter)
475 {
476  bool converged = false;
477 
478  const size_t kNumIterations = m_CmdLineArgs->GetNumberOfPsiBlastIterations();
479 
480 
481  CPsiBlastIterationState itr(kNumIterations);
483 
484 
485  psi_opts.Reset(dynamic_cast<CPSIBlastOptionsHandle*>(&*opts_hndl));
486  CRef<CPsiBlast> psiblast;
487 
488  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(*query));
489 
490  BlastFormatter_PreFetchSequenceData(*results, scope,
494  formatter.WriteArchive(*query_factory, *opts_hndl, *results,
497  }
498  else {
499  ITERATE(blast::CSearchResultSet, result, *results) {
500  formatter.PrintOneResultSet(**result, query,
501  itr.GetIterationNumber(),
503  }
504  }
505  // FIXME: what if there are no results!?!
506 
507  blast::CSearchResults& results_1st_query = (*results)[0];
508  if ( !results_1st_query.HasAlignments() ) {
509  return false;
510  }
511 
512  CConstRef<CSeq_align_set> aln(results_1st_query.GetSeqAlign());
514  CPsiBlastIterationState::GetSeqIds(aln, psi_opts, ids);
515 
516  itr.Advance(ids);
517 
518  while (itr) {
519 
521  pssm = ComputePssmForNextPsiBlastIteration(*query_bioseq, aln,
522  psi_opts,
523  scope,
524  (*results)[0].GetAncillaryData());
525 
526  if (psiblast.Empty()) {
527  psiblast.Reset(new CPsiBlast(pssm, db_adapter, psi_opts));
528  }
529  else {
530  psiblast->SetPssm(pssm);
531  }
532 
533  SavePssmToFile(pssm);
534 
536  results = psiblast->Run();
537 
538  BlastFormatter_PreFetchSequenceData(*results, scope,
542  formatter.WriteArchive(*pssm, *opts_hndl, *results,
543  itr.GetIterationNumber());
544  }
545  else {
546  ITERATE(blast::CSearchResultSet, result, *results) {
547  formatter.PrintOneResultSet(**result, query,
548  itr.GetIterationNumber(),
550  }
551  }
552  // FIXME: what if there are no results!?!
553 
554  blast::CSearchResults& results_1st_query = (*results)[0];
555  if ( !results_1st_query.HasAlignments() ) {
556  break;
557  }
558 
559  aln.Reset(results_1st_query.GetSeqAlign());
561  CPsiBlastIterationState::GetSeqIds(aln, psi_opts, new_ids);
562 
563  itr.Advance(new_ids);
564  }
565  if (itr.HasConverged()) {
566  converged = true;
567  }
568 
571  pssm = ComputePssmForNextPsiBlastIteration(*query_bioseq, aln,
572  psi_opts,
573  scope,
574  results_1st_query.GetAncillaryData());
575 
576  SavePssmToFile(pssm);
577  }
578 
579  return converged;
580 }
581 
586  CRef<CScope> scope,
587  CRef<CBlastAncillaryData> ancillary_data)
588 {
591 
592  m_AncillaryData = ancillary_data;
593  return PsiBlastComputePssmFromAlignment(bioseq, sset, scope, *opts_handle,
594  m_AncillaryData, diags);
595 }
596 
597 
598 #ifndef SKIP_DOXYGEN_PROCESSING
599 int main(int argc, const char* argv[] /*, const char* envp[]*/)
600 {
601  return CDeltaBlastApp().AppMain(argc, argv);
602 }
603 #endif /* SKIP_DOXYGEN_PROCESSING */
604 
605 
User-defined methods of the data storage class.
User-defined methods of the data storage class.
Produce formatted blast output for command line applications.
CRef< blast::CRemoteBlast > InitializeRemoteBlast(CRef< blast::IQueryFactory > queries, CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool verbose_output, const string &client_id, CRef< objects::CPssmWithParameters > pssm)
Initializes a CRemoteBlast instance for usage by command line BLAST binaries.
blast::SDataLoaderConfig InitializeQueryDataLoaderConfiguration(bool query_is_protein, CRef< blast::CLocalDbAdapter > db_adapter)
Initialize the data loader configuration for the query.
void SaveSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args, CRef< blast::IQueryFactory > queries, CRef< blast::CBlastOptionsHandle > opts_hndl, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
Save the search strategy corresponding to the current command line search.
void QueryBatchCleanup()
Clean up formatter scope and release.
string RegisterOMDataLoader(CRef< CSeqDB > db_handle)
Register the BLAST database data loader using the already initialized CSeqDB object.
CRef< CBlastAncillaryData > ExtractPssmAncillaryData(const CPssmWithParameters &pssm)
Auxiliary function to extract the ancillary data from the PSSM.
bool RecoverSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args)
Recover search strategy from input file.
void PrintErrorArchive(const CArgs &a, const list< CRef< CBlast4_error > > &msg)
Function to print blast archive with only error messages (search failed) to output stream.
void InitializeSubject(CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool is_remote_search, CRef< blast::CLocalDbAdapter > &db_adapter, CRef< objects::CScope > &scope)
Initializes the subject/database as well as its scope.
string GetCmdlineArgs(const CNcbiArguments &a)
void BlastFormatter_PreFetchSequenceData(const blast::CSearchResultSet &results, CRef< CScope > scope, blast::CFormattingArgs::EOutputFormat format_type)
This method optimize the retrieval of sequence data to scope.
bool UseXInclude(const CFormattingArgs &f, const string &s)
bool IsIStreamEmpty(CNcbiIstream &in)
void LogQueryInfo(CBlastUsageReport &report, const CBlastInput &q_info)
Utility functions for BLAST command line applications.
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
#define BLAST_EXIT_SUCCESS
Command line binary exit code: success.
Interface for reading SRA sequences into blast input.
PSIDiagnosticsRequest * PSIDiagnosticsRequestNewEx(Boolean save_ascii_pssm)
Allocates a PSIDiagnosticsRequest structure, setting fields to their default values for their use in ...
Definition: blast_psi.c:591
Definition of classes which constitute the results of running a BLAST search.
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_Handle –.
const CSeq_id * GetFirstId() const
Definition: Bioseq.cpp:271
virtual CNcbiIstream & GetInputStream()
Get the input stream.
size_t GetNumThreads() const
Get the number of threads to spawn.
CRef< CBlastOptionsHandle > SetOptions(const CArgs &args)
Extract the command line arguments into a CBlastOptionsHandle object.
CRef< CBlastDatabaseArgs > GetBlastDatabaseArgs() const
Get the BLAST database arguments.
CArgDescriptions * SetCommandLine()
Set the command line arguments.
bool ExecuteRemotely() const
Determine whether the search should be executed remotely or not.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CRef< CQueryOptionsArgs > GetQueryOptionsArgs() const
Get the options for the query sequence(s)
string GetClientId() const
Retrieve the client ID for remote requests.
CRef< CFormattingArgs > GetFormattingArgs() const
Get the formatting options.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
virtual CNcbiOstream & GetOutputStream()
Get the output stream.
Class to capture message from diag handler.
Definition: blast_aux.hpp:249
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
Definition: blast_args.hpp:893
Class representing a text file containing sequences in fasta format.
This class formats the BLAST results for command line applications.
void LogBlastSearchInfo(blast::CBlastUsageReport &report)
void PrintOneResultSet(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds(), bool is_deltablast_domain_result=false)
Print all alignment information for a single query sequence along with any errors or warnings (errors...
void PrintEpilog(const blast::CBlastOptions &options)
Print the footer of the blast report.
void SetBaseFile(string base)
For use by XML2 only.
void ResetScopeHistory()
Resets the scope history for some output formats.
void SetLineLength(size_t len)
Set Alignment Length.
void WriteArchive(blast::IQueryFactory &queries, blast::CBlastOptionsHandle &options_handle, const blast::CSearchResultSet &results, unsigned int num_iters=0, const list< CRef< objects::CBlast4_error > > &msg=list< CRef< objects::CBlast4_error > >())
Writes out the query and results as an "archive" format.
void PrintProlog()
Print the header of the blast report.
void SetQueryRange(const TSeqRange &query_range)
Set query range.
Class that centralizes the configuration data for sequences to be converted.
Definition: blast_input.hpp:48
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
Encapsulates ALL the BLAST algorithm's options.
CRef< CBlastSearchQuery > GetBlastSearchQuery(size_type i) const
Get the CBlastSearchQuery object at index i.
Definition: sseqloc.hpp:367
void AddParam(EUsageParams p, int val)
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
Definition: version.hpp:53
CConstRef –.
Definition: ncbiobj.hpp:1266
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
Definition: ddumpable.cpp:56
Handle command line arguments for deltablast binary.
size_t GetNumberOfPsiBlastIterations(void) const
Get number of PSI-BLAST iterations.
CNcbiOstream * GetCheckpointStream(void)
Get stream for saving Pssm.
bool GetSaveLastPssm(void) const
Should PSSM after last database search be saved.
virtual int GetQueryBatchSize(void) const
Get query batch size.
bool GetShowDomainHits(void) const
Was printing domain hits requested.
CNcbiOstream * GetAsciiPssmStream(void)
Get stream for saving ascii Pssm.
bool SaveAsciiPssm(void) const
Was saving ascii Pssm requested.
bool SaveCheckpoint(void) const
Was saving Pssm requested.
CRef< CSearchDatabase > GetDomainDatabase(void)
Get conserved domain database.
CRef< CPssmWithParameters > ComputePssmForNextPsiBlastIteration(const CBioseq &bioseq, CConstRef< CSeq_align_set > sset, CConstRef< CPSIBlastOptionsHandle > opts_handle, CRef< CScope > scope, CRef< CBlastAncillaryData > ancillary_data)
Compute PSSM for next PSI-BLAST iteration.
void SavePssmToFile(CConstRef< CPssmWithParameters > pssm)
Save Pssm to file.
CBlastUsageReport m_UsageReport
virtual void Init()
@inheritDoc
virtual int Run()
@inheritDoc
CDeltaBlastApp()
@inheritDoc
bool DoPsiBlastIterations(CRef< CBlastOptionsHandle > opts_hndl, CRef< CBlastQueryVector > query, CConstRef< CBioseq > query_bioseq, CRef< blast::CSearchResultSet > results, CRef< CBlastDatabaseArgs > db_args, const CArgs &args, CRef< CLocalDbAdapter > db_adapter, CRef< CScope > scope, CBlastFormat &formatter)
Do PSI-BLAST iterations follwing DELTA-BLAST.
CRef< CDeltaBlastAppArgs > m_CmdLineArgs
This application's command line args.
CBlastAppDiagHandler m_bah
CStopWatch m_StopWatch
CRef< CBlastAncillaryData > m_AncillaryData
Handle to the protein-protein options to the BLAST algorithm.
A simple realization of the DELTA-BLAST algorithm: seacrch domain database, compute PSSM,...
Definition: deltablast.hpp:60
bool HasStructuredOutputFormat() const
Returns true if the desired output format is structured (needed to determine whether to print or not ...
virtual bool ArchiveFormatRequested(const CArgs &args) const
string GetCustomOutputFormatSpec() const
Retrieve for string that specifies the custom output format for tabular and comma-separated value.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
@ eArchiveFormat
BLAST archive format.
TSeqPos GetNumAlignments() const
Number of alignments to show in traditional BLAST output.
bool ShowGis() const
Display the NCBI GIs in formatted output?
TSeqPos GetNumDescriptions() const
Number of one-line descriptions to show in traditional BLAST output.
size_t GetLineLength() const
bool DisplayHtmlOutput() const
Display HTML output?
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Handle to the protein-protein options to the BLAST algorithm.
Wrapper class for PSIDiagnosticsRequest .
Definition: blast_aux.hpp:347
Represents the iteration state in PSI-BLAST.
Runs a single iteration of the PSI-BLAST algorithm on a BLAST database.
Definition: psiblast.hpp:79
const CSeq_entry & GetQuery() const
Retrieve the query sequence.
CSeq_entry & SetQuery()
Retrieve the query sequence.
objects::ENa_strand GetStrand() const
Get strand to search in query sequence(s)
Definition: blast_args.hpp:800
bool GetParseDeflines() const
Should the defline be parsed?
Definition: blast_args.hpp:804
bool QueryIsProtein() const
Is the query sequence protein?
Definition: blast_args.hpp:807
TSeqRange GetRange() const
Get query sequence range restriction.
Definition: blast_args.hpp:796
bool UseLowercaseMasks() const
Use lowercase masking in FASTA input?
Definition: blast_args.hpp:802
CRef –.
Definition: ncbiobj.hpp:618
CSeqDB.
Definition: seqdb.hpp:161
@ eProtein
Definition: seqdb.hpp:174
CStopWatch –.
Definition: ncbitime.hpp:1938
const string kArgOutput
Output file name.
void Print(const CCompactSAMApplication::AlignInfo &ai)
Declares CPsiBlast, the C++ API for the PSI-BLAST engine.
USING_SCOPE(objects)
static void s_AddSeqDataToPssm(CRef< CPssmWithParameters > pssm, CRef< CBlastQueryVector > query_batch, CRef< CScope > scope)
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
static void s_AddSeqTitleToPssm(CRef< CPssmWithParameters > pssm, CRef< CBlastQueryVector > query_batch, CRef< CScope > scope)
Main argument class for DELTA-BLAST application.
CRef< CSearchResultSet > Run()
Run the PSI-BLAST engine for one iteration.
Definition: psiblast.cpp:95
CRef< objects::CPssmWithParameters > GetPSSM(void)
Get the PSSM produced by the search.
int GetDbGeneticCode() const
string GetDatabaseName() const
Returns the database name if appropriate, else kEmptyStr for subject sequences.
virtual void SetNumberOfThreads(size_t nthreads)
Mutator for the number of threads.
void ResetMessages(void)
Reset messgae buffer, erase all saved message.
Definition: blast_aux.cpp:1174
unsigned int GetIterationNumber() const
Return the number of the current iteration.
TSeqIds GetPreviouslyFoundSeqIds() const
Retrieve the set of Seq-id's found in the previous iteration.
int GetFilteringAlgorithm()
Retrieve the database filtering algorithm.
void SetPssm(CConstRef< objects::CPssmWithParameters > pssm)
This method allows the same object to be reused when performing multiple iterations.
Definition: psiblast.cpp:83
bool HasConverged()
Determines if the PSI-BLAST search has converged (i.e.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
int GetQueryGeneticCode() const
CRef< objects::CPssmWithParameters > PsiBlastComputePssmFromAlignment(const objects::CBioseq &query, CConstRef< objects::CSeq_align_set > alignment, CRef< objects::CScope > database_scope, const CPSIBlastOptionsHandle &opts_handle, CConstRef< CBlastAncillaryData > ancillary_data, PSIDiagnosticsRequest *diagnostics_req=0)
Computes a PSSM from the result of a PSI-BLAST iteration.
Definition: psiblast.cpp:102
bool GetSumStatisticsMode() const
Sum statistics options.
CRef< CSearchResultSet > GetResultSet()
Submit the search (if necessary) and return the results.
void Advance(const TSeqIds &list)
Advance the iterator by passing it the list of Seq-ids which passed the inclusion criteria for the cu...
static void GetSeqIds(CConstRef< objects::CSeq_align_set > seqalign, CConstRef< CPSIBlastOptionsHandle > opts, TSeqIds &retval)
Extract the sequence ids from the sequence alignment which identify those sequences that will be used...
const char * GetMatrixName() const
list< CRef< objects::CBlast4_error > > & GetMessages(void)
Return list of saved diag messages.
Definition: blast_aux.hpp:262
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
Definition: ncbiapp.cpp:1154
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1292
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:799
CVersionInfo GetVersion(void) const
Get the program version information.
Definition: ncbiapp.cpp:1164
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
static void PrintAsciiPssm(const objects::CPssmWithParameters &pssm, CConstRef< blast::CBlastAncillaryData > ancillary_data, CNcbiOstream &out)
Prints the PSSM in ASCII format (as in blastpgp's -Q option)
#define NULL
Definition: ncbistd.hpp:225
void SetDiagPostPrefix(const char *prefix)
Specify a string to prefix all subsequent error postings with.
Definition: ncbidiag.cpp:6097
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void SetDiagHandler(CDiagHandler *handler, bool can_delete=true)
Set the diagnostic handler using the specified diagnostic handler class.
Definition: ncbidiag.cpp:6288
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
bool Match(const CSeq_id &sid2) const
Match() - TRUE if SeqIds are equivalent.
Definition: Seq_id.hpp:1033
void AddDataLoader(const string &loader_name, TPriority pri=kPriority_Default)
Add data loader by name.
Definition: scope.cpp:510
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
CConstRef< CBioseq > GetCompleteBioseq(void) const
Get the complete bioseq.
TObjectType * GetNonNullPointer(void)
Get pointer value and throw a null pointer exception if pointer is null.
Definition: ncbiobj.hpp:968
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CConstRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:1392
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
#define NcbiEndl
Definition: ncbistre.hpp:548
#define NcbiCerr
Definition: ncbistre.hpp:544
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2765
#define CVersion
const TQuery & GetQuery(void) const
Get the Query member data.
Definition: Pssm_.hpp:772
const TPssm & GetPssm(void) const
Get the Pssm member data.
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
list< CRef< CSeqdesc > > Tdata
Definition: Seq_descr_.hpp:91
bool IsSetSeq_data(void) const
the sequence Check if a value has been assigned to Seq_data data member.
Definition: Seq_inst_.hpp:805
const TInst & GetInst(void) const
Get the Inst member data.
Definition: Bioseq_.hpp:336
const Tdata & Get(void) const
Get the member data.
Definition: Seq_descr_.hpp:166
void SetInst(TInst &value)
Assign a value to Inst data member.
Definition: Bioseq_.cpp:86
bool IsSetDescr(void) const
descriptors Check if a value has been assigned to Descr data member.
Definition: Bioseq_.hpp:303
void SetDescr(TDescr &value)
Assign a value to Descr data member.
Definition: Bioseq_.cpp:65
const TSeq_data & GetSeq_data(void) const
Get the Seq_data member data.
Definition: Seq_inst_.hpp:817
const TDescr & GetDescr(void) const
Get the Descr member data.
Definition: Bioseq_.hpp:315
static int input()
static int version
Definition: mdb_load.c:29
unsigned int a
Definition: ncbi_localip.c:102
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
The NCBI C++/STL use hints.
T max(T x_, T y_)
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares CPsiBlast, the C++ API for the PSI-BLAST engine.
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CRemoteBlast class.
Configuration structure for the CBlastScopeSource.
static string query
#define _ASSERT
else result
Definition: token2.c:20
Modified on Sat Dec 02 09:20:37 2023 by modify_doxy.py rev. 669887