NCBI C++ ToolKit
psiblast_app.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: psiblast_app.cpp 90946 2020-08-13 13:55:15Z zaretska $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Christiam Camacho
27  *
28  */
29 
30 /** @file psiblast_app.cpp
31  * PSI-BLAST command line application
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbifile.hpp>
46 #include "blast_app_util.hpp"
50 
51 #ifndef SKIP_DOXYGEN_PROCESSING
53 USING_SCOPE(blast);
55 #endif
56 
58 {
59 public:
60  /** @inheritDoc */
63  version->SetVersionInfo(new CBlastVersion());
66  if (m_UsageReport.IsEnabled()) {
68  }
69  }
72  }
73 private:
74  /** @inheritDoc */
75  virtual void Init();
76  /** @inheritDoc */
77  virtual int Run();
78 
79  /// Save the PSSM to a check point file and/or as an ASCII PSSM
80  /// @param pssm PSSM to save [in]
81  /// @param itr Iteration object, NULL in case of remote search [in]
83  const CPsiBlastIterationState* itr = NULL);
84 
85  /// Performs iterations for either multiple queries or input PSSM
86  ///@param opts_hndl BLAST options to use [in]
87  ///@param query query sequence(s) [in]
88  ///@param pssm PSSM saved from another psi-blast run [in]
89  ///@param db_args database [in]
90  ///@param db_adapter more db infor [in]
91  ///@param scope fetches sequences [in]
92  ///@param formatter formats results [in]
93  /// @return true if the PSI-BLAST search converged, otherwise false
98  CRef<CLocalDbAdapter> db_adapter,
99  CRef<CScope> scope,
100  CBlastFormat& formatter);
101  bool
104  CRef<CScope> scope,
105  CRef<CLocalDbAdapter> db_adapter,
106  CRef<CBlastOptionsHandle> opts_hndl,
107  CBlastFormat& formatter,
108  const size_t kNumIterations);
109 
111  ComputePssmForNextIteration(const CBioseq& bioseq,
114  CRef<CScope> scope,
115  CRef<CBlastAncillaryData> ancillary_data);
116 
117  /// This application's command line args
119  /// Ancillary results for the previously executed PSI-BLAST iteration
121 
125 };
126 
128 {
129  // formulate command line arguments
130 
132 
133  // read the command line
134 
137 }
138 
139 /**
140  * @brief Extract the bioseq which represents the query from either a PSSM or a
141  * CBlastQueryVector
142  *
143  * @param query container for query sequence(s) [in]
144  * @param scope Scope from which to retrieve the query if not in the PSSM [in]
145  * @param pssm if NON-NULL, the query will be extracted from this object [in]
146  */
147 static CConstRef<CBioseq>
150 {
151  CConstRef<CBioseq> retval;
152 
153  if (pssm) {
154  retval.Reset(&pssm->SetQuery().SetSeq());
155  } else {
156  _ASSERT(query.NotEmpty());
157  if (query->GetQuerySeqLoc(0)->GetId()) {
158  CBioseq_Handle bh =
159  scope->GetBioseqHandle(*query->GetQuerySeqLoc(0)->GetId());
160  _ASSERT(bh);
161  retval.Reset(bh.GetBioseqCore());
162  }
163  }
164  return retval;
165 }
166 
167 /// Auxiliary function to create the PSSM for the next iteration
172  CRef<CScope> scope,
173  CRef<CBlastAncillaryData> ancillary_data)
174 {
177  m_AncillaryData = ancillary_data;
178  return PsiBlastComputePssmFromAlignment(bioseq, sset, scope, *opts_handle,
179  m_AncillaryData, diags);
180 }
181 
182 /*** Convenience function to make a query factory object */
183 static CRef<IQueryFactory>
185 {
186  CRef<IQueryFactory> retval;
187  if (pssm.Empty() && !query.Empty()) {
188  retval.Reset(new CObjMgr_QueryFactory(*query));
189  }
190  return retval;
191 }
192 
193 bool
196  CRef<CScope> scope,
197  CRef<CLocalDbAdapter> db_adapter,
198  CRef<CBlastOptionsHandle> opts_hndl,
199  CBlastFormat& formatter,
200  const size_t kNumIterations)
201 {
202  bool converged = false;
203  CArgs& args = const_cast<CArgs&>(GetArgs());
204 
205  const CBlastOptions& opt = opts_hndl->GetOptions();
206  CPsiBlastIterationState itr(kNumIterations);
207 
210  int run_token = 0; // 1 means phi-blast, 2 means psi-blast, 3 means both
211  if (opt.GetPHIPattern() != NULL)
212  run_token += 1;
213  if (kNumIterations != 1 || opt.GetPHIPattern() == NULL)
214  run_token += 2;
215  _TRACE("Run_token=" << run_token);
216  if (run_token & 1)
217  {
218  phi_opts.Reset(dynamic_cast<CPHIBlastProtOptionsHandle*>(&*opts_hndl));
219  if (run_token & 2)
220  {
221  // m_CmdLineArgs->ForcePSIBlast();
224  CBlastOptions& options = opts_hndl_2->SetOptions();
225  options.SetPHIPattern(NULL, false);
227  psi_opts.Reset(dynamic_cast<CPSIBlastOptionsHandle*>(&*opts_hndl_2));
228  }
229  }
230  else if (run_token == 2) // This branch means only psi-blast, NO phi-blast.
231  psi_opts.Reset(dynamic_cast<CPSIBlastOptionsHandle*>(&*opts_hndl));
232 
233  CRef<IQueryFactory> query_factory(s_MakeQueryFactory(pssm, query));
234  CRef<CPsiBlast> psiblast;
235  if (run_token & 2)
236  {
237  _ASSERT(psi_opts.NotEmpty());
238  if (pssm.NotEmpty()) {
239  psiblast.Reset(new CPsiBlast(pssm, db_adapter, psi_opts));
240  } else {
241  psiblast.Reset(new CPsiBlast(query_factory, db_adapter, psi_opts));
242  }
244  }
245 
246  while (itr) {
247 
248  SavePssmToFile(pssm, &itr);
249 
250  CRef<CSearchResultSet> results;
251  if (run_token & 1 && itr.GetIterationNumber() == 1)
252  {
253  CLocalBlast lcl_blast(query_factory, CRef<CBlastOptionsHandle>(phi_opts), db_adapter);
255  results = lcl_blast.Run();
256  BlastFormatter_PreFetchSequenceData(*results, scope,
258  formatter.PrintPhiResult(*results, query, itr.GetIterationNumber(), itr.GetPreviouslyFoundSeqIds());
259  }
260  else
261  {
262  results = psiblast->Run();
263  BlastFormatter_PreFetchSequenceData(*results, scope,
267  {
268  if (pssm.Empty() && !query.Empty())
269  {
270  formatter.WriteArchive(*query_factory, *opts_hndl, *results, itr.GetIterationNumber(), m_bah.GetMessages());
272  }
273  else if (!pssm.Empty())
274  {
275  formatter.WriteArchive(*pssm, *opts_hndl, *results, itr.GetIterationNumber(), m_bah.GetMessages());
277  }
278  }
279  else
280  {
281  //BlastFormatter_PreFetchSequenceData(*results, scope);
282  ITERATE(CSearchResultSet, result, *results) {
283  formatter.PrintOneResultSet(**result, query,
284  itr.GetIterationNumber(),
286  }
287  }
288  }
289  // FIXME: what if there are no results!?!
290 
291  CSearchResults& results_1st_query = (*results)[0];
292  if ( !results_1st_query.HasAlignments() ) {
293  break;
294  }
295 
296  if (run_token & 2)
297  {
298  CConstRef<CSeq_align_set> aln(results_1st_query.GetSeqAlign());
300  CPsiBlastIterationState::GetSeqIds(aln, psi_opts, ids);
301 
302  itr.Advance(ids);
303 
304  if (itr || m_CmdLineArgs->GetSaveLastPssm()) {
305  CConstRef<CBioseq> seq =
306  s_GetQueryBioseq(query, scope, pssm);
307  pssm =
308  ComputePssmForNextIteration(*seq, aln, psi_opts, scope,
309  results_1st_query.GetAncillaryData());
310  psiblast->SetPssm(pssm);
311  }
312  }
313  else
314  break;
315  }
316  if (itr.HasConverged())
317  converged = true;
318 
320  SavePssmToFile(pssm, &itr);
321  }
322 
323  return converged;
324 }
325 
326 bool
330  CRef<CBlastDatabaseArgs> db_args,
331  CRef<CLocalDbAdapter> db_adapter,
332  CRef<CScope> scope,
333  CBlastFormat& formatter)
334 {
335  const CArgs& args = GetArgs();
336  const size_t kNumIterations = m_CmdLineArgs->GetNumberOfIterations();
337  _TRACE("PSI-BLAST running " << kNumIterations << " iterations");
338  CRef<IQueryFactory> query_factory(s_MakeQueryFactory(pssm, query));
339 
340  SaveSearchStrategy(args, m_CmdLineArgs, query_factory, opts_hndl, pssm, kNumIterations);
341 
342  bool retval = false;
344 
345  CRef<CRemoteBlast> rmt_psiblast =
346  InitializeRemoteBlast(query_factory, db_args, opts_hndl,
348  m_CmdLineArgs->GetClientId(), pssm);
349  // FIXME: determine if errors ocurred, if so, return appropriate
350  // exit code
351 
352  CRef<CSearchResultSet> results = rmt_psiblast->GetResultSet();
355  {
356  if (pssm.Empty() && !query.Empty())
357  {
358  formatter.WriteArchive(*query_factory, *opts_hndl, *results, 0, m_bah.GetMessages());
360  }
361  else if (!pssm.Empty())
362  {
363  formatter.WriteArchive(*pssm, *opts_hndl, *results, 0, m_bah.GetMessages());
365  }
366  }
367  else
368  {
369  BlastFormatter_PreFetchSequenceData(*results, scope,
371  ITERATE(CSearchResultSet, result, *results) {
372  formatter.PrintOneResultSet(**result, query);
373  }
374  }
375  SavePssmToFile(rmt_psiblast->GetPSSM());
376  } else {
377  retval = x_RunLocalPsiBlastIterations(query, pssm, scope, db_adapter,
378  opts_hndl, formatter, kNumIterations);
379  }
381  return retval;
382 }
383 
384 void
386  const CPsiBlastIterationState* itr)
387 {
388  if (pssm.Empty()) {
389  return;
390  }
391 
392  if (m_CmdLineArgs->SaveCheckpoint() &&
393  (itr == NULL || // this is true in the case of remote PSI-BLAST
394  itr->GetIterationNumber() >= 1)) {
396  }
397 
398  if (m_CmdLineArgs->SaveAsciiPssm() &&
399  (itr == NULL || // this is true in the case of remote PSI-BLAST
400  itr->GetIterationNumber() >= 1)) {
401  if (m_AncillaryData.Empty() && pssm.NotEmpty()) {
402  _ASSERT(itr->GetIterationNumber() == 1);
404  }
408  }
409 }
410 
411 
413 {
414  int status = BLAST_EXIT_SUCCESS;
415 
416  try {
417 
418  // Allow the fasta reader to complain on invalid sequence input
420  SetDiagPostPrefix("psiblast");
421  SetDiagHandler(&m_bah, false);
422 
423  const CArgs& args = GetArgs();
424  const bool recovered_from_search_strategy =
426 
427  CRef<CQueryOptionsArgs> query_opts =
429 
430  CRef<CBlastOptionsHandle> opts_hndl;
431  if(recovered_from_search_strategy) {
432  opts_hndl.Reset(&*m_CmdLineArgs->SetOptionsForSavedStrategy(args));
433  }
434  else {
435  opts_hndl.Reset(&*m_CmdLineArgs->SetOptions(args));
436  }
437  const CBlastOptions& opt = opts_hndl->GetOptions();
438  _TRACE("PSI-BLAST program = " << EProgramToTaskName(opt.GetProgram()));
439 
440  /*** Initialize the database ***/
442  CRef<CLocalDbAdapter> db_adapter;
444  InitializeSubject(db_args, opts_hndl, m_CmdLineArgs->ExecuteRemotely(),
445  db_adapter, scope);
446  _ASSERT(db_adapter && scope);
447 
448  /*** Get the query sequence(s) or PSSM (these two options are mutually
449  * exclusive) ***/
452 
453  if (pssm.Empty()) { // FASTA input
454  SDataLoaderConfig dlconfig =
456  db_adapter);
457  CBlastInputSourceConfig iconfig(dlconfig, query_opts->GetStrand(),
458  query_opts->UseLowercaseMasks(),
459  query_opts->GetParseDeflines(),
460  query_opts->GetRange());
462  ERR_POST(Warning << "Query is Empty!");
463  return BLAST_EXIT_SUCCESS;
464  }
467  iconfig));
468  input.Reset(new CBlastInput(&*fasta, 1));
469  _TRACE("PSI-BLAST running with FASTA input");
470  } else {
471  _TRACE("PSI-BLAST running with PSSM input");
473  }
474 
475  /*** Get the formatting options ***/
477  bool isArchiveFormat = fmt_args->ArchiveFormatRequested(args);
478  if(!isArchiveFormat) {
480  }
481  CNcbiOstream& out_stream = m_CmdLineArgs->GetOutputStream();
482  CBlastFormat formatter(opt, *db_adapter,
483  fmt_args->GetFormattedOutputChoice(),
484  query_opts->GetParseDeflines(),
485  out_stream,
486  fmt_args->GetNumDescriptions(),
487  fmt_args->GetNumAlignments(),
488  *scope,
489  opt.GetMatrixName(),
490  fmt_args->ShowGis(),
491  fmt_args->DisplayHtmlOutput(),
492  opt.GetQueryGeneticCode(),
493  opt.GetDbGeneticCode(),
494  opt.GetSumStatisticsMode(),
496  db_adapter->GetFilteringAlgorithm(),
497  fmt_args->GetCustomOutputFormatSpec(),
498  false, false, NULL, NULL,
500 
501  formatter.SetQueryRange(query_opts->GetRange());
502  formatter.SetLineLength(fmt_args->GetLineLength());
503  if(UseXInclude(*fmt_args, args[kArgOutput].AsString())) {
504  formatter.SetBaseFile(args[kArgOutput].AsString());
505  }
506  formatter.PrintProlog();
507 
508 
509  if (pssm.Empty())
510  { // Value may be modified for 2nd (PSSM) iteration, so save to reset for next query.
511  ECompoAdjustModes comp_stats_original = opts_hndl->GetOptions().GetCompositionBasedStats();
512 
513  for (; !input->End(); formatter.ResetScopeHistory(), QueryBatchCleanup()) {
514 
515  CRef<CBlastQueryVector> query_batch(input->GetNextSeqBatch(*scope));
516 
517  const bool converged = DoIterations(opts_hndl,
518  query_batch,
519  pssm,
520  db_args,
521  db_adapter,
522  scope,
523  formatter);
524 
525  if (converged && !fmt_args->HasStructuredOutputFormat() &&
527  out_stream << NcbiEndl << "Search has CONVERGED!" << NcbiEndl;
528  }
529  // Reset for next query sequence.
531  pssm.Reset();
532  opts_hndl->SetOptions().SetCompositionBasedStats(comp_stats_original);
533  }
534  } else {
535  _ASSERT(pssm->HasQuery());
536  _ASSERT(pssm->GetQuery().IsSeq()); // single query only!
537  scope->AddTopLevelSeqEntry(pssm->SetQuery());
538 
539  CRef<CSeq_loc> sl(new CSeq_loc());
540  sl->SetWhole().Assign(*pssm->GetQuery().GetSeq().GetFirstId());
541 
542  CRef<CBlastSearchQuery> q(new CBlastSearchQuery(*sl, *scope));
544  query->AddQuery(q);
545 
546 
547  // Searches with PSSM, only one query though
548  const bool converged = DoIterations(opts_hndl,
549  query,
550  pssm,
551  db_args,
552  db_adapter,
553  scope,
554  formatter);
555 
556  if (converged && !fmt_args->HasStructuredOutputFormat() &&
558  out_stream << NcbiEndl << "Search has CONVERGED!" << NcbiEndl;
559  }
560 
561  // finish up
562  formatter.PrintEpilog(opt);
563 
565  opts_hndl->GetOptions().DebugDumpText(NcbiCerr, "BLAST options", 1);
566  if(input) {
568  }
569 
571 
572  } CATCH_ALL(status)
573  if(!m_bah.GetMessages().empty()) {
574  const CArgs & a = GetArgs();
576  }
579  return status;
580 }
581 
582 #ifndef SKIP_DOXYGEN_PROCESSING
583 int main(int argc, const char* argv[] /*, const char* envp[]*/)
584 {
585  return CPsiBlastApp().AppMain(argc, argv);
586 }
587 #endif /* SKIP_DOXYGEN_PROCESSING */
User-defined methods of the data storage class.
Produce formatted blast output for command line applications.
CRef< blast::CRemoteBlast > InitializeRemoteBlast(CRef< blast::IQueryFactory > queries, CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool verbose_output, const string &client_id, CRef< objects::CPssmWithParameters > pssm)
Initializes a CRemoteBlast instance for usage by command line BLAST binaries.
blast::SDataLoaderConfig InitializeQueryDataLoaderConfiguration(bool query_is_protein, CRef< blast::CLocalDbAdapter > db_adapter)
Initialize the data loader configuration for the query.
void SaveSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args, CRef< blast::IQueryFactory > queries, CRef< blast::CBlastOptionsHandle > opts_hndl, CRef< objects::CPssmWithParameters > pssm, unsigned int num_iters)
Save the search strategy corresponding to the current command line search.
void QueryBatchCleanup()
Clean up formatter scope and release.
CRef< CBlastAncillaryData > ExtractPssmAncillaryData(const CPssmWithParameters &pssm)
Auxiliary function to extract the ancillary data from the PSSM.
bool RecoverSearchStrategy(const CArgs &args, blast::CBlastAppArgs *cmdline_args)
Recover search strategy from input file.
void PrintErrorArchive(const CArgs &a, const list< CRef< CBlast4_error > > &msg)
Function to print blast archive with only error messages (search failed) to output stream.
void InitializeSubject(CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, bool is_remote_search, CRef< blast::CLocalDbAdapter > &db_adapter, CRef< objects::CScope > &scope)
Initializes the subject/database as well as its scope.
string GetCmdlineArgs(const CNcbiArguments &a)
void BlastFormatter_PreFetchSequenceData(const blast::CSearchResultSet &results, CRef< CScope > scope, blast::CFormattingArgs::EOutputFormat format_type)
This method optimize the retrieval of sequence data to scope.
bool UseXInclude(const CFormattingArgs &f, const string &s)
bool IsIStreamEmpty(CNcbiIstream &in)
void LogQueryInfo(CBlastUsageReport &report, const CBlastInput &q_info)
Utility functions for BLAST command line applications.
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
#define BLAST_EXIT_SUCCESS
Command line binary exit code: success.
Interface for reading SRA sequences into blast input.
@ eAaLookupTable
standard protein (blastp) lookup table
Declares class to encapsulate all BLAST options.
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
PSIDiagnosticsRequest * PSIDiagnosticsRequestNewEx(Boolean save_ascii_pssm)
Allocates a PSIDiagnosticsRequest structure, setting fields to their default values for their use in ...
Definition: blast_psi.c:591
CArgs –.
Definition: ncbiargs.hpp:379
CBioseq_Handle –.
const CSeq_id * GetFirstId() const
Definition: Bioseq.cpp:271
CRef< CBlastOptionsHandle > SetOptionsForSavedStrategy(const CArgs &args)
Combine the command line arguments into a CBlastOptions object recovered from saved search strategy.
virtual CNcbiIstream & GetInputStream()
Get the input stream.
size_t GetNumThreads() const
Get the number of threads to spawn.
CRef< CBlastOptionsHandle > SetOptions(const CArgs &args)
Extract the command line arguments into a CBlastOptionsHandle object.
CRef< CBlastDatabaseArgs > GetBlastDatabaseArgs() const
Get the BLAST database arguments.
CArgDescriptions * SetCommandLine()
Set the command line arguments.
bool ExecuteRemotely() const
Determine whether the search should be executed remotely or not.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
CRef< CQueryOptionsArgs > GetQueryOptionsArgs() const
Get the options for the query sequence(s)
string GetClientId() const
Retrieve the client ID for remote requests.
CRef< CFormattingArgs > GetFormattingArgs() const
Get the formatting options.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
virtual CNcbiOstream & GetOutputStream()
Get the output stream.
Class to capture message from diag handler.
Definition: blast_aux.hpp:249
Class representing a text file containing sequences in fasta format.
This class formats the BLAST results for command line applications.
void LogBlastSearchInfo(blast::CBlastUsageReport &report)
void PrintOneResultSet(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds(), bool is_deltablast_domain_result=false)
Print all alignment information for a single query sequence along with any errors or warnings (errors...
void PrintEpilog(const blast::CBlastOptions &options)
Print the footer of the blast report.
void SetBaseFile(string base)
For use by XML2 only.
void ResetScopeHistory()
Resets the scope history for some output formats.
void SetLineLength(size_t len)
Set Alignment Length.
void WriteArchive(blast::IQueryFactory &queries, blast::CBlastOptionsHandle &options_handle, const blast::CSearchResultSet &results, unsigned int num_iters=0, const list< CRef< objects::CBlast4_error > > &msg=list< CRef< objects::CBlast4_error > >())
Writes out the query and results as an "archive" format.
void PrintProlog()
Print the header of the blast report.
void SetQueryRange(const TSeqRange &query_range)
Set query range.
void PrintPhiResult(const blast::CSearchResultSet &result_set, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds())
Print all alignment information for aa PHI-BLAST run.
Class that centralizes the configuration data for sequences to be converted.
Definition: blast_input.hpp:48
Generalized converter from an abstract source of biological sequence data to collections of blast inp...
Encapsulates ALL the BLAST algorithm's options.
Query Vector.
Definition: sseqloc.hpp:276
Search Query.
Definition: sseqloc.hpp:147
void AddParam(EUsageParams p, int val)
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
Definition: version.hpp:53
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
Definition: ddumpable.cpp:56
bool HasStructuredOutputFormat() const
Returns true if the desired output format is structured (needed to determine whether to print or not ...
virtual bool ArchiveFormatRequested(const CArgs &args) const
string GetCustomOutputFormatSpec() const
Retrieve for string that specifies the custom output format for tabular and comma-separated value.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
@ eArchiveFormat
BLAST archive format.
TSeqPos GetNumAlignments() const
Number of alignments to show in traditional BLAST output.
bool ShowGis() const
Display the NCBI GIs in formatted output?
TSeqPos GetNumDescriptions() const
Number of one-line descriptions to show in traditional BLAST output.
size_t GetLineLength() const
bool DisplayHtmlOutput() const
Display HTML output?
Class to perform a BLAST search on local BLAST databases Note that PHI-BLAST can be run using this cl...
Definition: local_blast.hpp:62
NCBI C++ Object Manager dependant implementation of IQueryFactory.
Handle to the protein PHI BLAST options.
Handle to the protein-protein options to the BLAST algorithm.
Wrapper class for PSIDiagnosticsRequest .
Definition: blast_aux.hpp:347
Handle command line arguments for psiblast binary Programs supported: psiblast, phi-blastn,...
CNcbiOstream * GetCheckpointStream()
Retrieve the stream to write the checkpoint file.
bool GetSaveLastPssm() const
Should the PSSM after the last database search be saved.
size_t GetNumberOfIterations() const
Get the number of iterations to perform.
CRef< objects::CPssmWithParameters > GetInputPssm() const
Get the PSSM specified as input from the command line.
bool SaveAsciiPssm() const
Should a PSSM be saved as ASCII in a file?
CNcbiOstream * GetAsciiPssmStream()
Retrieve the stream to write the ASCII PSSM.
bool SaveCheckpoint() const
Should a PSSM be saved in a checkpoint file?
CRef< CPsiBlastAppArgs > m_CmdLineArgs
This application's command line args.
void SavePssmToFile(CRef< CPssmWithParameters > pssm, const CPsiBlastIterationState *itr=NULL)
Save the PSSM to a check point file and/or as an ASCII PSSM.
CStopWatch m_StopWatch
CBlastAppDiagHandler m_bah
virtual int Run()
@inheritDoc
CConstRef< CBlastAncillaryData > m_AncillaryData
Ancillary results for the previously executed PSI-BLAST iteration.
virtual void Init()
@inheritDoc
CPsiBlastApp()
@inheritDoc
bool DoIterations(CRef< CBlastOptionsHandle > opts_hndl, CRef< CBlastQueryVector > query, CRef< CPssmWithParameters > pssm, CRef< CBlastDatabaseArgs > db_args, CRef< CLocalDbAdapter > db_adapter, CRef< CScope > scope, CBlastFormat &formatter)
Performs iterations for either multiple queries or input PSSM.
CRef< CPssmWithParameters > ComputePssmForNextIteration(const CBioseq &bioseq, CConstRef< CSeq_align_set > sset, CConstRef< CPSIBlastOptionsHandle > opts_handle, CRef< CScope > scope, CRef< CBlastAncillaryData > ancillary_data)
Auxiliary function to create the PSSM for the next iteration.
CBlastUsageReport m_UsageReport
bool x_RunLocalPsiBlastIterations(CRef< CBlastQueryVector > query, CRef< CPssmWithParameters > pssm, CRef< CScope > scope, CRef< CLocalDbAdapter > db_adapter, CRef< CBlastOptionsHandle > opts_hndl, CBlastFormat &formatter, const size_t kNumIterations)
Represents the iteration state in PSI-BLAST.
Runs a single iteration of the PSI-BLAST algorithm on a BLAST database.
Definition: psiblast.hpp:79
const CSeq_entry & GetQuery() const
Retrieve the query sequence.
CSeq_entry & SetQuery()
Retrieve the query sequence.
bool HasQuery() const
Has this PSSM a query in it?
objects::ENa_strand GetStrand() const
Get strand to search in query sequence(s)
Definition: blast_args.hpp:800
bool GetParseDeflines() const
Should the defline be parsed?
Definition: blast_args.hpp:804
bool QueryIsProtein() const
Is the query sequence protein?
Definition: blast_args.hpp:807
TSeqRange GetRange() const
Get query sequence range restriction.
Definition: blast_args.hpp:796
bool UseLowercaseMasks() const
Use lowercase masking in FASTA input?
Definition: blast_args.hpp:802
CRef –.
Definition: ncbiobj.hpp:618
CScope –.
Definition: scope.hpp:92
Search Results for All Queries.
Search Results for One Query.
CStopWatch –.
Definition: ncbitime.hpp:1938
const string kArgOutput
Output file name.
const string kArgPHIPatternFile
Argument to specify a PHI-BLAST pattern file.
void Print(const CCompactSAMApplication::AlignInfo &ai)
ECompoAdjustModes
An collection of constants that specify all permissible modes of composition adjustment.
void SetPHIPattern(const char *pattern, bool is_dna)
CRef< CSearchResultSet > Run()
Run the PSI-BLAST engine for one iteration.
Definition: psiblast.cpp:95
CRef< objects::CPssmWithParameters > GetPSSM(void)
Get the PSSM produced by the search.
int GetDbGeneticCode() const
void SetCompositionBasedStats(ECompoAdjustModes mode)
virtual void SetNumberOfThreads(size_t nthreads)
Mutator for the number of threads.
void ResetMessages(void)
Reset messgae buffer, erase all saved message.
Definition: blast_aux.cpp:1174
const char * GetPHIPattern() const
unsigned int GetIterationNumber() const
Return the number of the current iteration.
CConstRef< objects::CSeq_align_set > GetSeqAlign() const
Accessor for the Seq-align results.
TSeqIds GetPreviouslyFoundSeqIds() const
Retrieve the set of Seq-id's found in the previous iteration.
CRef< CSearchResultSet > Run()
Executes the search.
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
CRef< CBlastAncillaryData > GetAncillaryData() const
Accessor for the query's search ancillary.
int GetFilteringAlgorithm()
Retrieve the database filtering algorithm.
void SetPssm(CConstRef< objects::CPssmWithParameters > pssm)
This method allows the same object to be reused when performing multiple iterations.
Definition: psiblast.cpp:83
bool HasConverged()
Determines if the PSI-BLAST search has converged (i.e.
EProgram GetProgram() const
Accessors/Mutators for individual options.
const CBlastOptions & GetOptions() const
Return the object which this object is a handle for.
int GetQueryGeneticCode() const
CRef< objects::CPssmWithParameters > PsiBlastComputePssmFromAlignment(const objects::CBioseq &query, CConstRef< objects::CSeq_align_set > alignment, CRef< objects::CScope > database_scope, const CPSIBlastOptionsHandle &opts_handle, CConstRef< CBlastAncillaryData > ancillary_data, PSIDiagnosticsRequest *diagnostics_req=0)
Computes a PSSM from the result of a PSI-BLAST iteration.
Definition: psiblast.cpp:102
string EProgramToTaskName(EProgram p)
Convert a EProgram enumeration value to a task name (as those used in the BLAST command line binaries...
Definition: blast_aux.cpp:676
bool GetSumStatisticsMode() const
Sum statistics options.
ECompoAdjustModes GetCompositionBasedStats() const
CRef< CSearchResultSet > GetResultSet()
Submit the search (if necessary) and return the results.
void DoNotSaveMessages(void)
Call to turn off saving diag message, discard all saved message.
Definition: blast_aux.cpp:1189
void Advance(const TSeqIds &list)
Advance the iterator by passing it the list of Seq-ids which passed the inclusion criteria for the cu...
static void GetSeqIds(CConstRef< objects::CSeq_align_set > seqalign, CConstRef< CPSIBlastOptionsHandle > opts, TSeqIds &retval)
Extract the sequence ids from the sequence alignment which identify those sequences that will be used...
const char * GetMatrixName() const
list< CRef< objects::CBlast4_error > > & GetMessages(void)
Return list of saved diag messages.
Definition: blast_aux.hpp:262
void SetLookupTableType(ELookupTableType type)
bool HasAlignments() const
Return true if there are any alignments for this query.
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
Definition: ncbiapp.cpp:1154
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1292
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:799
CVersionInfo GetVersion(void) const
Get the program version information.
Definition: ncbiapp.cpp:1164
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
void Remove(const string &name)
Remove argument of name "name".
Definition: ncbiargs.cpp:1903
static void PrintAsciiPssm(const objects::CPssmWithParameters &pssm, CConstRef< blast::CBlastAncillaryData > ancillary_data, CNcbiOstream &out)
Prints the PSSM in ASCII format (as in blastpgp's -Q option)
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
void SetDiagPostPrefix(const char *prefix)
Specify a string to prefix all subsequent error postings with.
Definition: ncbidiag.cpp:6097
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
#define ERR_POST(message)
Error posting with file, line number information but without error codes.
Definition: ncbidiag.hpp:186
void SetDiagHandler(CDiagHandler *handler, bool can_delete=true)
Set the diagnostic handler using the specified diagnostic handler class.
Definition: ncbidiag.cpp:6288
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
void Warning(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1191
#define MSerial_AsnText
I/O stream manipulators –.
Definition: serialbase.hpp:696
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
CSeq_entry_Handle AddTopLevelSeqEntry(CSeq_entry &top_entry, TPriority pri=kPriority_Default, EExist action=eExist_Default)
Add seq_entry, default priority is higher than for defaults or loaders Add object to the score with p...
Definition: scope.cpp:522
CBioseq_Handle GetBioseqHandle(const CSeq_id &id)
Get bioseq handle by seq-id.
Definition: scope.cpp:95
TBioseqCore GetBioseqCore(void) const
Get bioseq core structure.
bool Empty(void) const THROWS_NONE
Check if CConstRef is empty – not pointing to any object which means having a null value.
Definition: ncbiobj.hpp:1385
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:1439
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
#define NcbiEndl
Definition: ncbistre.hpp:548
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
#define NcbiCerr
Definition: ncbistre.hpp:544
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2765
#define CVersion
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
TSeq & SetSeq(void)
Select the variant.
Definition: Seq_entry_.cpp:108
static int input()
Main class to perform a BLAST search on the local machine.
static int version
Definition: mdb_load.c:29
unsigned int a
Definition: ncbi_localip.c:102
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares CPsiBlast, the C++ API for the PSI-BLAST engine.
USING_SCOPE(blast)
static CConstRef< CBioseq > s_GetQueryBioseq(CConstRef< CBlastQueryVector > query, CRef< CScope > scope, CRef< CPssmWithParameters > pssm)
Extract the bioseq which represents the query from either a PSSM or a CBlastQueryVector.
int main(int argc, const char *argv[])
static CRef< IQueryFactory > s_MakeQueryFactory(CRef< CPssmWithParameters > pssm, CRef< CBlastQueryVector > query)
USING_NCBI_SCOPE
Main argument class for PSI-BLAST application.
Defines class which represents the iteration state in PSI-BLAST.
Declares the CRemoteBlast class.
Implementation of the BlastSeqSrc interface using the C++ BLAST databases API.
Configuration structure for the CBlastScopeSource.
static string query
#define _ASSERT
else result
Definition: token2.c:20
Modified on Thu Dec 07 10:11:02 2023 by modify_doxy.py rev. 669887