NCBI C++ ToolKit
blast_formatter.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_formatter.cpp 94345 2021-07-26 13:00:45Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_formatter.cpp
31  * Stand-alone command line formatter for BLAST.
32  */
33 
34 #include <ncbi_pch.hpp>
35 #include <corelib/ncbiapp.hpp>
36 #include <corelib/ncbistre.hpp>
37 #include <serial/iterator.hpp>
46 #include "blast_app_util.hpp"
47 
48 
49 #ifndef SKIP_DOXYGEN_PROCESSING
51 USING_SCOPE(blast);
52 #endif
53 
54 /// The application class
56 {
57 public:
58  /** @inheritDoc */
61  version->SetVersionInfo(new CBlastVersion());
63  m_LoadFromArchive = false;
65  if (m_UsageReport.IsEnabled()) {
67  m_UsageReport.AddParam(CBlastUsageReport::eProgram, (string) "blast_formatter");
68  }
69  }
70 
73  }
74 
75 private:
76  /** @inheritDoc */
77  virtual void Init();
78  /** @inheritDoc */
79  virtual int Run();
80 
81  /// Prints the BLAST formatted output
82  int PrintFormattedOutput(void);
83 
84  /// Extracts the queries to be formatted
85  /// @param query_is_protein Are the queries protein sequences? [in]
86  CRef<CBlastQueryVector> x_ExtractQueries(bool query_is_protein);
87 
88  /// Build the query from a PSSM
89  /// @param pssm PSSM to inspect [in]
92 
93  /// Package a scope and Seq-loc into a SSeqLoc from a Bioseq
94  /// @param bioseq Bioseq to inspect [in]
95  /// @param scope Scope object to add the sequence data to [in|out]
96  SSeqLoc x_QueryBioseqToSSeqLoc(const CBioseq& bioseq, CRef<CScope> scope);
97 
98  void x_AddCmdOptions();
99 
100  /// Our link to the NCBI BLAST service
102 
103  /// The source of CScope objects for queries
105 
106  /// Tracks whether results come from an archive file.
110 };
111 
113 {
115 
116  unique_ptr<CArgDescriptions> arg_desc(new CArgDescriptions);
117 
118  arg_desc->SetUsageContext(GetArguments().GetProgramBasename(),
119  "Stand-alone BLAST formatter client, version "
120  + CBlastVersion().Print());
121 
122  arg_desc->SetCurrentGroup("Input options");
123  arg_desc->AddOptionalKey(kArgRid, "BLAST_RID", "BLAST Request ID (RID)",
125 
126  // add input file for seq-align here?
127  arg_desc->AddOptionalKey(kArgArchive, "ArchiveFile", "File containing BLAST Archive format in ASN.1 (i.e.: output format 11)",
129  arg_desc->SetDependency(kArgRid, CArgDescriptions::eExcludes, kArgArchive);
130 
131  CFormattingArgs fmt_args(false, CFormattingArgs::eIsSAM);
132  fmt_args.SetArgumentDescriptions(*arg_desc);
133 
134  arg_desc->SetCurrentGroup("Output configuration options");
135  arg_desc->AddDefaultKey(kArgOutput, "output_file", "Output file name",
137 
138  arg_desc->SetCurrentGroup("Miscellaneous options");
139  arg_desc->AddFlag(kArgParseDeflines,
140  "Should the query and subject defline(s) be parsed?", true);
141  arg_desc->SetCurrentGroup("");
142 
143  CDebugArgs debug_args;
144  debug_args.SetArgumentDescriptions(*arg_desc);
145 
146  SetupArgDescriptions(arg_desc.release());
147 }
148 
149 SSeqLoc
151  CRef<CScope> scope)
152 {
153  static bool first_time = true;
154  _ASSERT(scope);
155 
156  if ( !HasRawSequenceData(bioseq) && first_time ) {
159  first_time = false;
160  }
161  else {
162  scope->AddBioseq(bioseq);
163  }
164  CRef<CSeq_loc> seqloc(new CSeq_loc);
165  seqloc->SetWhole().Assign(*bioseq.GetFirstId());
166  return SSeqLoc(seqloc, scope);
167 }
168 
171 {
172  if ( !pssm.HasQuery() ) {
173  throw runtime_error("PSSM has no query");
174  }
176  const CSeq_entry& seq_entry = pssm.GetQuery();
177  if ( !seq_entry.IsSeq() ) {
178  throw runtime_error("Cannot have multiple queries in a PSSM");
179  }
180  SSeqLoc ssl = x_QueryBioseqToSSeqLoc(seq_entry.GetSeq(), scope);
182  retval.Reset(new CBlastSearchQuery(*ssl.seqloc, *ssl.scope));
183  _ASSERT(ssl.scope.GetPointer() == scope.GetPointer());
184  return retval;
185 }
186 
189 {
191  _ASSERT(b4_queries);
192  const size_t kNumQueries = b4_queries->GetNumQueries();
193 
195 
196  SDataLoaderConfig dlconfig(query_is_protein, SDataLoaderConfig::eUseNoDataLoaders);
199 
200  if (b4_queries->IsPssm()) {
201  retval->AddQuery(x_BuildQueryFromPssm(b4_queries->GetPssm()));
202  } else if (b4_queries->IsSeq_loc_list()) {
205  b4_queries->GetSeq_loc_list()) {
206  _ASSERT( !(*seqloc)->GetId()->IsLocal() );
208  *scope));
209  retval->AddQuery(query);
210  }
211  } else if (b4_queries->IsBioseq_set()) {
213  eDetectLoops));
215  for (; itr; ++itr) {
216  SSeqLoc ssl = x_QueryBioseqToSSeqLoc(*itr, scope);
218  *ssl.scope));
219  retval->AddQuery(query);
220  }
221  }
222 
223  (void)kNumQueries; // eliminate compiler warning;
224  _ASSERT(kNumQueries == retval->size());
225  return retval;
226 }
227 
228 /// Extracts the subject sequence data from remote_blast into a TSeqLocVector.
229 /// All subjects are added to/use the same CScope object
230 /// @param remote_blast Source of subject sequences
231 static TSeqLocVector
233 {
234  TSeqLocVector retval;
235  CRef<CScope> subj_scope(new CScope(*CObjectManager::GetInstance()));
236  if (remote_blast->GetSubjectSeqLocs().empty()) {
237  const list<CRef<CBioseq> > subjects =
238  remote_blast->GetSubjectSequences();
239  ITERATE(list<CRef<CBioseq> >, bioseq, subjects) {
240  subj_scope->AddBioseq(**bioseq);
241  CRef<CSeq_id> seqid = FindBestChoice((*bioseq)->GetId(),
243  const TSeqPos length = (*bioseq)->GetInst().GetLength();
244  CRef<CSeq_loc> sl(new CSeq_loc(*seqid, 0, length-1));
245  retval.push_back(SSeqLoc(sl, subj_scope));
246  }
247  } else {
248  const CBlast4_subject::TSeq_loc_list seqlocs =
249  remote_blast->GetSubjectSeqLocs();
251  retval.push_back(SSeqLoc(*sl, subj_scope));
252  }
253  }
254  return retval;
255 }
256 
257 bool
260  CRef<blast::CLocalDbAdapter>& db_adapter,
261  CRef<objects::CScope>& scope)
262 {
263  bool isRemote = false;
264  db_adapter.Reset();
265 
266  _ASSERT(db_args.NotEmpty());
267  CRef<CSearchDatabase> search_db = db_args->GetSearchDatabase();
268 
269  if (scope.Empty()) {
271  }
272 
273  CRef<IQueryFactory> subjects;
274  if ( (subjects = db_args->GetSubjects(scope)) ) {
275  _ASSERT(search_db.Empty());
276  char* bl2seq_legacy = getenv("BL2SEQ_LEGACY");
277  if (bl2seq_legacy) {
278  db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, false));
279  }
280  else {
281  db_adapter.Reset(new CLocalDbAdapter(subjects, opts_hndl, true));
282  }
283  } else {
284  _ASSERT(search_db.NotEmpty());
285  try {
286  // Try to open the BLAST database even for remote searches, as if
287  // it is available locally, it will be better to fetch the
288  // sequence data for formatting from this (local) source
289  CRef<CSeqDB> seqdb = search_db->GetSeqDb();
290  db_adapter.Reset(new CLocalDbAdapter(*search_db));
292  LOG_POST(Info <<"Add local loader " << search_db->GetDatabaseName());
293  } catch (const CSeqDBException&) {
295  string remote_loader = kEmptyStr;
296  try {
297  db_adapter.Reset(new CLocalDbAdapter(*search_db));
300  search_db->GetDatabaseName(),
303  .GetLoader()->GetName();
304  scope->AddDataLoader(remote_loader, CBlastDatabaseArgs::kSubjectsDataLoaderPriority);
306  isRemote = true;
307  LOG_POST(Info <<"Remote " << search_db->GetDatabaseName());
308  }
309  catch (CException & ) {
311  NCBI_THROW(CException, eUnknown, "Fail to initialize local or remote DB" );
312  }
313  }
314  }
315  try {
316  const int kGenbankLoaderPriority = 99;
317  CRef<CReader> reader(new CId2Reader);
318  reader->SetPreopenConnection(false);
319  string genbank_loader = CGBDataLoader::RegisterInObjectManager
321  scope->AddDataLoader(genbank_loader, kGenbankLoaderPriority);
322  } catch (const CException& ) {
323  LOG_POST(Info << "Failed to add genbank dataloader");
324  // It's ok not to have genbank loader
325  }
326  return isRemote;
327 }
328 
330 {
331  int retval = 0;
332  const CArgs& args = GetArgs();
333  const string& kRid = args[kArgRid].HasValue()
334  ? args[kArgRid].AsString() : kEmptyStr;
335  CNcbiOstream& out = args[kArgOutput].AsOutputFile();
336  CFormattingArgs fmt_args(false, CFormattingArgs::eIsSAM) ;
337 
339  CBlastOptions& opts = opts_handle->SetOptions();
340  fmt_args.ExtractAlgorithmOptions(args, opts);
341  {{
342  CDebugArgs debug_args;
343  debug_args.ExtractAlgorithmOptions(args, opts);
344  if (debug_args.ProduceDebugOutput()) {
345  opts.DebugDumpText(NcbiCerr, "BLAST options", 1);
346  }
347  }}
348 
349 
350  const EBlastProgramType p = opts.GetProgramType();
351  if((fmt_args.GetFormattedOutputChoice() == CFormattingArgs::eSAM) &&
352  (p != eBlastTypeBlastn )) {
353  NCBI_THROW(CInputException, eInvalidInput,
354  "SAM format is only applicable to blastn results" );
355  }
356 
357  CRef<CBlastQueryVector> queries =
358  x_ExtractQueries(Blast_QueryIsProtein(p)?true:false);
359  CRef<CScope> scope = queries->GetScope(0);
360  _ASSERT(queries);
361 
362  CRef<CBlastDatabaseArgs> db_args(new CBlastDatabaseArgs()); // FIXME, what about rpsblast?
363  int filtering_algorithm = -1;
364  if (m_RmtBlast->IsDbSearch())
365  {
367  _ASSERT(db);
368  _TRACE("Fetching results for " + Blast_ProgramNameFromType(p) + " on "
369  + db->GetName());
370  filtering_algorithm = m_RmtBlast->GetDbFilteringAlgorithmId();
371  CRef<CSearchDatabase> search_db(new CSearchDatabase(db->GetName(), db->IsProtein()
374 
375  if(m_RmtBlast->GetTaxidList().size() > 0) {
376  CSeqDBGiList *gilist = new CSeqDBGiList();
377  gilist->AddTaxIds(m_RmtBlast->GetTaxidList());
378  search_db->SetGiList(gilist);
379  }
380 
381  if(m_RmtBlast->GetNegativeTaxidList().size() > 0) {
382  CSeqDBGiList *gilist = new CSeqDBGiList();
383  gilist->AddTaxIds(m_RmtBlast->GetTaxidList());
384  search_db->SetNegativeGiList(gilist);
385  }
386  db_args->SetSearchDatabase(search_db);
387  }
388  else
389  {
391  CRef<IQueryFactory> subject_factory(new CObjMgr_QueryFactory(subjects));
392  CRef<CScope> subj_scope = subjects.front().scope;
393  db_args->SetSubjects(subject_factory, subj_scope,
394  Blast_SubjectIsProtein(p)?true:false);
395  }
396 
397  CRef<CLocalDbAdapter> db_adapter;
398  bool isRemoteLoader = s_InitializeSubject(db_args, opts_handle, db_adapter, scope);
399 
400  const string kTask = m_RmtBlast->GetTask();
401 
402  CBlastFormat formatter(opts, *db_adapter,
403  fmt_args.GetFormattedOutputChoice(),
404  static_cast<bool>(args[kArgParseDeflines]),
405  out,
406  fmt_args.GetNumDescriptions(),
407  fmt_args.GetNumAlignments(),
408  *scope,
409  opts.GetMatrixName(),
410  fmt_args.ShowGis(),
411  fmt_args.DisplayHtmlOutput(),
412  opts.GetQueryGeneticCode(),
413  opts.GetDbGeneticCode(),
414  opts.GetSumStatisticsMode(),
415  (!kRid.empty() || isRemoteLoader),
416  filtering_algorithm,
417  fmt_args.GetCustomOutputFormatSpec(),
418  kTask == "megablast",
419  opts.GetMBIndexLoaded(),
420  NULL, NULL,
422  formatter.SetLineLength(fmt_args.GetLineLength());
423  formatter.SetHitsSortOption(fmt_args.GetHitsSortOption());
424  formatter.SetHspsSortOption(fmt_args.GetHspsSortOption());
425  formatter.SetCustomDelimiter(fmt_args.GetCustomDelimiter());
426  if(UseXInclude(fmt_args, args[kArgOutput].AsString())) {
427  formatter.SetBaseFile(args[kArgOutput].AsString());
428  }
430  formatter.PrintProlog();
431  bool isPsiBlast = ("psiblast" == kTask);
432  if (fmt_args.ArchiveFormatRequested(args))
433  {
434  if(isPsiBlast)
435  {
437  if(!pssm.Empty())
438  {
439  formatter.WriteArchive(*pssm, *opts_handle, *results, m_RmtBlast->GetPsiNumberOfIterations());
440  }
441  else
442  {
443  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(*queries));
444  formatter.WriteArchive(*query_factory, *opts_handle, *results, m_RmtBlast->GetPsiNumberOfIterations());
445  }
446  }
447  else
448  {
449  CRef<IQueryFactory> query_factory(new CObjMgr_QueryFactory(*queries));
450  formatter.WriteArchive(*query_factory, *opts_handle, *results);
451  }
452  } else {
453  while (1)
454  {
455  BlastFormatter_PreFetchSequenceData(*results, scope,
456  fmt_args.GetFormattedOutputChoice());
457  ITERATE(CSearchResultSet, result, *results) {
458  if(isPsiBlast)
459  {
460  formatter.PrintOneResultSet(**result, queries,
462  }
463  else
464  {
465  formatter.PrintOneResultSet(**result, queries);
466  }
467  }
468  // The entire archive file (multiple sets) is formatted in this loop for XML.
469  // That does not work for other formats. Ugly, but that's where it's at now.
475  || !m_RmtBlast->LoadFromArchive()) {
476  break;
477  }
478  // Reset these for next set from archive
479  results.Reset(m_RmtBlast->GetResultSet());
480  queries.Reset(x_ExtractQueries(Blast_QueryIsProtein(p)?true:false));
481  _ASSERT(queries);
483  scope.Reset(queries->GetScope(0));
484  }
485  else {
486  scope->AddScope(*(queries->GetScope(0)));
487  }
488  s_InitializeSubject(db_args, opts_handle, db_adapter, scope);
489  }
490  }
491  formatter.PrintEpilog(opts);
492  return retval;
493 }
494 
495 #define EXIT_CODE__UNKNOWN_RID 1
496 #define EXIT_CODE__SEARCH_PENDING 2
497 #define EXIT_CODE__SEARCH_FAILED 3
498 
500 {
501  int status = 0;
502  const CArgs& args = GetArgs();
503 
504  try {
506  if (args[kArgArchive].HasValue()) {
507  CNcbiIstream& istr = args[kArgArchive].AsInputFile();
508  try { m_RmtBlast.Reset(new CRemoteBlast(istr)); }
509  catch (const CBlastException& e) {
511  NCBI_RETHROW(e, CInputException, eInvalidInput,
512  "Invalid input format for BLAST Archive.");
513  }
514  }
515 
516  m_LoadFromArchive = true;
517  try {
518  while (m_RmtBlast->LoadFromArchive()) {
519  if(!m_RmtBlast->IsErrMsgArchive()) {
520  status = PrintFormattedOutput();
521  }
522  }
523  } catch (const CSerialException& e) {
524  NCBI_RETHROW(e, CInputException, eInvalidInput,
525  "Invalid input format for BLAST Archive.");
526  }
527  x_AddCmdOptions();
529  return status;
530  }
531 
532  const string kRid = args[kArgRid].AsString();
533  m_RmtBlast.Reset(new CRemoteBlast(kRid));
534  {{
535  CDebugArgs debug_args;
536  CBlastOptions dummy_options;
537  debug_args.ExtractAlgorithmOptions(args, dummy_options);
538  if (debug_args.ProduceDebugRemoteOutput()) {
540  }
541  }}
542 
543  switch (m_RmtBlast->CheckStatus()) {
545  cerr << "Unknown/invalid RID '" << kRid << "'." << endl;
546  status = EXIT_CODE__UNKNOWN_RID;
547  break;
548 
550  status = PrintFormattedOutput();
551  break;
552 
554  cerr << "RID '" << kRid << "' is still pending." << endl;
555  status = EXIT_CODE__SEARCH_PENDING;
556  break;
557 
559  cerr << "RID '" << kRid << "' has failed" << endl;
560  cerr << m_RmtBlast->GetErrors() << endl;
561  status = EXIT_CODE__SEARCH_FAILED;
562  break;
563 
564  default:
565  abort();
566  }
567 
568  } CATCH_ALL(status)
569  x_AddCmdOptions();
571  return status;
572 }
573 
575 {
576  const CArgs & args = GetArgs();
577  if (args[kArgRid].HasValue()) {
579  }
580  else if (args[kArgArchive].HasValue()) {
582  }
583 
584  if(args["outfmt"].HasValue()) {
585  m_UsageReport.AddParam(CBlastUsageReport::eOutputFmt, args["outfmt"].AsString());
586  }
587 }
588 
589 
590 #ifndef SKIP_DOXYGEN_PROCESSING
591 int main(int argc, const char* argv[] /*, const char* envp[]*/)
592 {
593  return CBlastFormatterApp().AppMain(argc, argv);
594 }
595 #endif /* SKIP_DOXYGEN_PROCESSING */
Declares singleton objects to store the version and reference for the BLAST engine.
Produce formatted blast output for command line applications.
Data loader implementation that uses the blast databases remotely.
string RegisterOMDataLoader(CRef< CSeqDB > db_handle)
Register the BLAST database data loader using the already initialized CSeqDB object.
string GetCmdlineArgs(const CNcbiArguments &a)
void BlastFormatter_PreFetchSequenceData(const blast::CSearchResultSet &results, CRef< CScope > scope, blast::CFormattingArgs::EOutputFormat format_type)
This method optimize the retrieval of sequence data to scope.
bool UseXInclude(const CFormattingArgs &f, const string &s)
Utility functions for BLAST command line applications.
#define CATCH_ALL(exit_code)
Standard catch statement for all BLAST command line programs.
USING_SCOPE(blast)
bool s_InitializeSubject(CRef< blast::CBlastDatabaseArgs > db_args, CRef< blast::CBlastOptionsHandle > opts_hndl, CRef< blast::CLocalDbAdapter > &db_adapter, CRef< objects::CScope > &scope)
#define EXIT_CODE__UNKNOWN_RID
static TSeqLocVector s_ConvertSubjects2TSeqLocVector(CRef< CRemoteBlast > remote_blast)
Extracts the subject sequence data from remote_blast into a TSeqLocVector.
#define EXIT_CODE__SEARCH_PENDING
int main(int argc, const char *argv[])
USING_NCBI_SCOPE
#define EXIT_CODE__SEARCH_FAILED
Auxiliary classes/functions for BLAST input library.
bool HasRawSequenceData(const objects::CBioseq &bioseq)
Returns true if the Bioseq passed as argument has the full, raw sequence data in its Seq-inst field.
Boolean Blast_SubjectIsProtein(EBlastProgramType p)
Returns true if the subject is protein.
Definition: blast_program.c:50
Boolean Blast_QueryIsProtein(EBlastProgramType p)
Returns true if the query is protein.
Definition: blast_program.c:40
EBlastProgramType
Defines the engine's notion of the different applications of the BLAST algorithm.
Definition: blast_program.h:72
@ eBlastTypeBlastn
Definition: blast_program.h:74
CArgDescriptions –.
Definition: ncbiargs.hpp:541
CArgs –.
Definition: ncbiargs.hpp:379
const CSeq_id * GetFirstId() const
Definition: Bioseq.cpp:271
bool IsProtein() const
Returns true if the database is protein.
size_t GetNumQueries() const
Retrieve the number of queries in this object.
Argument class to collect database/subject arguments.
Definition: blast_args.hpp:889
static const int kSubjectsDataLoaderPriority
The default priority for subjects, should be used for subjects/databases.
Definition: blast_args.hpp:893
void SetSubjects(CRef< IQueryFactory > subjects, CRef< CScope > scope, bool is_protein)
Sets the subject sequences.
Definition: blast_args.hpp:946
void SetSearchDatabase(CRef< CSearchDatabase > search_db)
Set the search database information.
Definition: blast_args.hpp:939
@ eNucleotide
nucleotide database
Definition: bdbloader.hpp:58
@ eProtein
protein database
Definition: bdbloader.hpp:59
Defines BLAST error codes (user errors included)
This class formats the BLAST results for command line applications.
void SetHitsSortOption(int hitsSortOption)
void SetHspsSortOption(int hspsSortOption)
void PrintOneResultSet(const blast::CSearchResults &results, CConstRef< blast::CBlastQueryVector > queries, unsigned int itr_num=numeric_limits< unsigned int >::max(), blast::CPsiBlastIterationState::TSeqIds prev_seqids=blast::CPsiBlastIterationState::TSeqIds(), bool is_deltablast_domain_result=false)
Print all alignment information for a single query sequence along with any errors or warnings (errors...
void SetCustomDelimiter(string customDelim)
void PrintEpilog(const blast::CBlastOptions &options)
Print the footer of the blast report.
void SetBaseFile(string base)
For use by XML2 only.
void SetLineLength(size_t len)
Set Alignment Length.
void WriteArchive(blast::IQueryFactory &queries, blast::CBlastOptionsHandle &options_handle, const blast::CSearchResultSet &results, unsigned int num_iters=0, const list< CRef< objects::CBlast4_error > > &msg=list< CRef< objects::CBlast4_error > >())
Writes out the query and results as an "archive" format.
void PrintProlog()
Print the header of the blast report.
The application class.
bool m_LoadFromArchive
Tracks whether results come from an archive file.
virtual int Run()
@inheritDoc
virtual void Init()
@inheritDoc
CRef< CRemoteBlast > m_RmtBlast
Our link to the NCBI BLAST service.
CBlastUsageReport m_UsageReport
CRef< CBlastQueryVector > x_ExtractQueries(bool query_is_protein)
Extracts the queries to be formatted.
int PrintFormattedOutput(void)
Prints the BLAST formatted output.
CRef< CBlastScopeSource > m_QueryScopeSource
The source of CScope objects for queries.
SSeqLoc x_QueryBioseqToSSeqLoc(const CBioseq &bioseq, CRef< CScope > scope)
Package a scope and Seq-loc into a SSeqLoc from a Bioseq.
CBlastFormatterApp()
@inheritDoc
CRef< CBlastSearchQuery > x_BuildQueryFromPssm(const CPssmWithParameters &pssm)
Build the query from a PSSM.
Encapsulates ALL the BLAST algorithm's options.
Query Vector.
Definition: sseqloc.hpp:276
void AddQuery(CRef< CBlastSearchQuery > q)
Add a query to the set.
Definition: sseqloc.hpp:293
CRef< objects::CScope > GetScope(size_type i) const
Get the scope containing a query by index.
Definition: sseqloc.hpp:322
size_type size() const
Identical to Size, provided to facilitate STL-style iteration.
Definition: sseqloc.hpp:382
Class whose purpose is to create CScope objects which have data loaders added with different prioriti...
void AddDataLoaders(CRef< objects::CScope > scope)
Add the data loader configured in the object to the provided scope.
CRef< objects::CScope > NewScope()
Create a new, properly configured CScope.
Search Query.
Definition: sseqloc.hpp:147
void AddParam(EUsageParams p, int val)
Keeps track of the version of the BLAST engine in the NCBI C++ toolkit.
Definition: version.hpp:53
Argument class to collect debugging options.
bool ProduceDebugRemoteOutput() const
Return whether debug (verbose) output should be produced on remote searches (only available when comp...
virtual void ExtractAlgorithmOptions(const CArgs &cmd_line_args, CBlastOptions &options)
Interface method,.
bool ProduceDebugOutput() const
Return whether debug (verbose) output should be produced (only available when compiled with _DEBUG)
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
void DebugDumpText(ostream &out, const string &bundle, unsigned int depth) const
Definition: ddumpable.cpp:56
Argument class to collect formatting options, use this to create a CBlastFormat object.
int GetHitsSortOption() const
virtual bool ArchiveFormatRequested(const CArgs &args) const
virtual void SetArgumentDescriptions(CArgDescriptions &arg_desc)
Interface method,.
string GetCustomOutputFormatSpec() const
Retrieve for string that specifies the custom output format for tabular and comma-separated value.
EOutputFormat GetFormattedOutputChoice() const
Get the choice of formatted output.
virtual void ExtractAlgorithmOptions(const CArgs &args, CBlastOptions &opts)
Interface method,.
@ eJson
JSON XInclude.
@ eXml2
XML2 XInclude.
@ eSAM
SAM format.
@ eXml2_S
XML2 single file.
@ eJson_S
JSON2 single file.
@ eXml
XML output.
int GetHspsSortOption() const
TSeqPos GetNumAlignments() const
Number of alignments to show in traditional BLAST output.
bool ShowGis() const
Display the NCBI GIs in formatted output?
TSeqPos GetNumDescriptions() const
Number of one-line descriptions to show in traditional BLAST output.
size_t GetLineLength() const
bool DisplayHtmlOutput() const
Display HTML output?
string GetCustomDelimiter()
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, CReader *reader=0, CObjectManager::EIsDefault is_default=CObjectManager::eDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
Definition: gbloader.cpp:366
Defines user input exceptions.
Interface to create a BlastSeqSrc suitable for use in CORE BLAST from a a variety of BLAST database/s...
NCBI C++ Object Manager dependant implementation of IQueryFactory.
const CSeq_entry & GetQuery() const
Retrieve the query sequence.
bool HasQuery() const
Has this PSSM a query in it?
void SetPreopenConnection(bool preopen=true)
Definition: reader.cpp:207
CRef –.
Definition: ncbiobj.hpp:618
static TRegisterLoaderInfo RegisterInObjectManager(CObjectManager &om, const string &dbname="nr", const EDbType dbtype=eUnknown, bool use_fixed_size_slices=true, CObjectManager::EIsDefault is_default=CObjectManager::eNonDefault, CObjectManager::TPriority priority=CObjectManager::kPriority_NotSet)
API for Remote Blast Requests.
CScope –.
Definition: scope.hpp:92
Blast Search Subject.
Search Results for All Queries.
CSeqDBException.
Definition: seqdbcommon.hpp:73
CSeqDBGiList.
void AddTaxIds(const set< TTaxId > &tax_ids)
Definition: Seq_entry.hpp:56
Root class for all serialization exceptions.
Definition: exception.hpp:50
CStopWatch –.
Definition: ncbitime.hpp:1938
Template class for iteration on objects of class C (non-medifiable version)
Definition: iterator.hpp:767
size_type size() const
Definition: set.hpp:132
const string kArgOutput
Output file name.
const string kArgArchive
Argument to blast_formatter to request BLAST archive file name.
const string kTask
Task to perform.
const string kArgParseDeflines
Argument to specify if the query and subject sequences defline should be parsed.
const string kArgRid
Argument to blast_formatter to request RID.
void Print(const CCompactSAMApplication::AlignInfo &ai)
std::ofstream out("events_result.xml")
main entry point for tests
bool LoadFromArchive()
Loads next chunk of archive from file.
CRef< CBlastOptionsHandle > GetSearchOptions()
Get the search options used for this search.
string GetDatabaseName() const
Accessor for the database name.
CRef< objects::CPssmWithParameters > GetPSSM(void)
Get the PSSM produced by the search.
int GetDbGeneticCode() const
void SetNegativeGiList(CSeqDBGiList *gilist)
Mutator for the negative gi list.
string GetErrors(void)
This returns a string containing any errors that were produced by the search.
set< TTaxId > & GetNegativeTaxidList()
CRef< CSeqDB > GetSeqDb() const
Obtain a reference to the database.
ESearchStatus CheckStatus()
Returns the status of a previously submitted search/RID.
void SetVerbose(EDebugMode verb=eDebug)
Adjust the debugging level.
string GetTask() const
Returns the task used to create the remote search (if any)
bool IsProtein() const
Determine whether this database contains protein sequences or not.
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
CRef< objects::CBlast4_queries > GetQueries()
Get the queries used for this search.
EBlastProgramType GetProgramType() const
Returns the CORE BLAST notion of program type.
int GetQueryGeneticCode() const
CRef< objects::CBlast4_database > GetDatabases()
Get the database used by the search.
bool GetSumStatisticsMode() const
Sum statistics options.
CRef< CSearchResultSet > GetResultSet()
Submit the search (if necessary) and return the results.
bool GetMBIndexLoaded() const
string Blast_ProgramNameFromType(EBlastProgramType program)
Returns a string program name, given a blast::EBlastProgramType enumeration.
Definition: blast_aux.cpp:813
CBlast4_subject::TSeq_loc_list GetSubjectSeqLocs()
set< TTaxId > & GetTaxidList()
bool IsErrMsgArchive(void)
Int4 GetDbFilteringAlgorithmId() const
Returns the filtering algorithm ID used in the database.
list< CRef< objects::CBioseq > > GetSubjectSequences()
Returns subject sequences if "bl2seq" mode used.
const char * GetMatrixName() const
bool IsDbSearch()
Return values states whether GetDatabases or GetSubjectSequences call should be used.
unsigned int GetPsiNumberOfIterations(void)
void SetGiList(CSeqDBGiList *gilist)
Mutator for the gi list.
@ eInvalidArgument
Invalid argument to some function/method (could be programmer error - prefer assertions in those case...
@ eStatus_Failed
Completed but failed, call GetErrors/GetErrorVector()
@ eStatus_Pending
Not completed yet.
@ eStatus_Unknown
Never submitted or purged from the system.
@ eStatus_Done
Completed successfully.
@ eBlastDbIsNucleotide
nucleotide
@ eBlastDbIsProtein
protein
void SetFullVersion(CRef< CVersionAPI > version)
Set version data for the program.
Definition: ncbiapp.cpp:1154
void HideStdArgs(THideStdArgs hide_mask)
Set the hide mask for the Hide Std Flags.
Definition: ncbiapp.cpp:1292
unsigned int TSeqPos
Type for sequence locations and lengths.
Definition: ncbimisc.hpp:875
virtual const CArgs & GetArgs(void) const
Get parsed command line arguments.
Definition: ncbiapp.cpp:285
int AppMain(int argc, const char *const *argv, const char *const *envp=0, EAppDiagStream diag=eDS_Default, const char *conf=NcbiEmptyCStr, const string &name=NcbiEmptyString)
Main function (entry point) for the NCBI application.
Definition: ncbiapp.cpp:799
CVersionInfo GetVersion(void) const
Get the program version information.
Definition: ncbiapp.cpp:1164
virtual void SetupArgDescriptions(CArgDescriptions *arg_desc)
Setup the command line argument descriptions.
Definition: ncbiapp.cpp:1175
#define ITERATE(Type, Var, Cont)
ITERATE macro to sequence through container elements.
Definition: ncbimisc.hpp:815
const CNcbiArguments & GetArguments(void) const
Get the application's cached unprocessed command-line arguments.
@ fHideXmlHelp
Hide XML help description.
@ fHideLogfile
Hide log file description.
@ fHideFullVersion
Hide full version description.
@ fHideDryRun
Hide dryrun description.
@ fHideConffile
Hide configuration file description.
@ eExcludes
One argument excludes another.
Definition: ncbiargs.hpp:957
@ eInputFile
Name of file (must exist and be readable)
Definition: ncbiargs.hpp:595
@ eString
An arbitrary string.
Definition: ncbiargs.hpp:589
@ eOutputFile
Name of file (must be writable)
Definition: ncbiargs.hpp:596
#define NULL
Definition: ncbistd.hpp:225
#define _TRACE(message)
Definition: ncbidbg.hpp:122
EDiagSev SetDiagPostLevel(EDiagSev post_sev=eDiag_Error)
Set the threshold severity for posting the messages.
Definition: ncbidiag.cpp:6129
#define LOG_POST(message)
This macro is deprecated and it's strongly recomended to move in all projects (except tests) to macro...
Definition: ncbidiag.hpp:226
@ eDiag_Warning
Warning message.
Definition: ncbidiag.hpp:652
@ eDiag_Critical
Critical error message.
Definition: ncbidiag.hpp:654
TErrCode GetErrCode(void) const
Get error code.
Definition: ncbiexpt.cpp:453
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define NCBI_RETHROW(prev_exception, exception_class, err_code, message)
Generic macro to re-throw an exception.
Definition: ncbiexpt.hpp:737
void Info(CExceptionArgs_Base &args)
Definition: ncbiexpt.hpp:1185
@ eUnknown
Definition: app_popup.hpp:72
static int BestRank(const CRef< CSeq_id > &id)
Definition: Seq_id.hpp:742
void SetWhole(TWhole &v)
Definition: Seq_loc.hpp:982
CConstBeginInfo ConstBegin(const C &obj)
Get starting point of non-modifiable object hierarchy.
Definition: iterator.hpp:1012
@ eDetectLoops
Definition: iterator.hpp:998
TLoader * GetLoader(void) const
Get pointer to the loader.
CBioseq_Handle AddBioseq(CBioseq &bioseq, TPriority pri=kPriority_Default, EExist action=eExist_Throw)
Add bioseq, return bioseq handle.
Definition: scope.cpp:530
static CRef< CObjectManager > GetInstance(void)
Return the existing object manager or create one.
void AddScope(CScope &scope, TPriority pri=kPriority_Default)
Add the scope's datasources as a single group with the given priority All data sources (data loaders ...
Definition: scope.cpp:516
TObjectType * GetPointer(void) THROWS_NONE
Get pointer,.
Definition: ncbiobj.hpp:998
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
bool NotEmpty(void) const THROWS_NONE
Check if CRef is not empty – pointing to an object and has a non-null value.
Definition: ncbiobj.hpp:726
bool Empty(void) const THROWS_NONE
Check if CRef is empty – not pointing to any object, which means having a null value.
Definition: ncbiobj.hpp:719
bool IsEnabled(void)
Indicates whether application usage statistics collection is enabled for a current reporter instance.
IO_PREFIX::ostream CNcbiOstream
Portable alias for ostream.
Definition: ncbistre.hpp:149
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
#define NcbiCerr
Definition: ncbistre.hpp:544
#define kEmptyStr
Definition: ncbistr.hpp:123
double Elapsed(void) const
Return time elapsed since first Start() or last Restart() call (in seconds).
Definition: ncbitime.hpp:2776
void Start(void)
Start the timer.
Definition: ncbitime.hpp:2765
C::value_type FindBestChoice(const C &container, F score_func)
Find the best choice (lowest score) for values in a container.
Definition: ncbiutil.hpp:250
#define CVersion
bool IsSeq_loc_list(void) const
Check if variant Seq_loc_list is selected.
const TName & GetName(void) const
Get the Name member data.
bool IsPssm(void) const
Check if variant Pssm is selected.
list< CRef< CSeq_loc > > TSeq_loc_list
const TBioseq_set & GetBioseq_set(void) const
Get the variant data.
bool IsBioseq_set(void) const
Check if variant Bioseq_set is selected.
const TSeq_loc_list & GetSeq_loc_list(void) const
Get the variant data.
const TPssm & GetPssm(void) const
Get the variant data.
list< CRef< CSeq_loc > > TSeq_loc_list
const TSeq & GetSeq(void) const
Get the variant data.
Definition: Seq_entry_.cpp:102
bool IsSeq(void) const
Check if variant Seq is selected.
Definition: Seq_entry_.hpp:257
static int version
Definition: mdb_load.c:29
Defines the CNcbiApplication and CAppException classes for creating NCBI applications.
NCBI C++ stream class wrappers for triggering between "new" and "old" C++ stream libraries.
void abort()
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares the CRemoteBlast class.
vector< SSeqLoc > TSeqLocVector
Vector of sequence locations.
Definition: sseqloc.hpp:129
Configuration structure for the CBlastScopeSource.
void OptimizeForWholeLargeSequenceRetrieval(bool value=true)
Configures the BLAST database data loader to optimize the retrieval of *entire* large sequences.
@ eUseNoDataLoaders
Do not add any data loaders.
Structure to represent a single sequence to be fed to BLAST.
Definition: sseqloc.hpp:47
CConstRef< objects::CSeq_loc > seqloc
Seq-loc describing the sequence to use as query/subject to BLAST The types of Seq-loc currently suppo...
Definition: sseqloc.hpp:50
CRef< objects::CScope > scope
Scope where the sequence referenced can be found by the toolkit's object manager.
Definition: sseqloc.hpp:54
static string query
#define _ASSERT
else result
Definition: token2.c:20
Modified on Thu Nov 30 04:55:02 2023 by modify_doxy.py rev. 669887