NCBI C++ ToolKit
blast_options_handle.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_options_handle.cpp 99389 2023-03-21 13:02:48Z fongah2 $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Christiam Camacho
27  *
28  */
29 
30 /// @file blast_options_handle.cpp
31 /// Implementation for the CBlastOptionsHandle and the
32 /// CBlastOptionsFactory classes.
33 
34 #include <ncbi_pch.hpp>
52 
53 /** @addtogroup AlgoBlast
54  *
55  * @{
56  */
57 
59 BEGIN_SCOPE(blast)
60 
62  : m_DefaultsMode(false)
63 {
64  m_Opts.Reset(new CBlastOptions(locality));
65 }
66 
68  : m_Opts(opt),
69  m_DefaultsMode(false)
70 {
71 }
72 
73 void
75 {
77  m_Opts->SetDefaultsMode(true);
86  m_Opts->SetDefaultsMode(false);
87  }
89 }
90 
91 bool
93 {
94  return m_Opts->Validate();
95 }
96 
97 char*
99 {
100  return m_Opts->GetFilterString(); /* NCBI_FAKE_WARNING */
101 }
102 
103 void
104 CBlastOptionsHandle::SetFilterString(const char* f, bool clear /* = true */)
105 {
106  m_Opts->SetFilterString(f, clear); /* NCBI_FAKE_WARNING */
107 }
108 
111 {
112  CBlastOptionsHandle* retval = NULL;
113 
114  switch (program) {
115  case eBlastn:
116  {
118  new CBlastNucleotideOptionsHandle(locality);
120  retval = opts;
121  break;
122  }
123 
124  case eBlastp:
125  retval = new CBlastAdvancedProteinOptionsHandle(locality);
126  break;
127 
128  case eBlastx:
129  retval = new CBlastxOptionsHandle(locality);
130  break;
131 
132  case eTblastn:
133  retval = new CTBlastnOptionsHandle(locality);
134  break;
135 
136  case eTblastx:
137  retval = new CTBlastxOptionsHandle(locality);
138  break;
139 
140  case eMegablast:
141  {
143  new CBlastNucleotideOptionsHandle(locality);
145  retval = opts;
146  break;
147  }
148 
149  case eDiscMegablast:
150  retval = new CDiscNucleotideOptionsHandle(locality);
151  break;
152 
153  case eRPSBlast:
154  retval = new CBlastRPSOptionsHandle(locality);
155  break;
156 
157  case eRPSTblastn:
158  retval = new CRPSTBlastnOptionsHandle(locality);
159  break;
160 
161  case ePSIBlast:
162  retval = new CPSIBlastOptionsHandle(locality);
163  break;
164 
165  case ePSITblastn:
166  retval = new CPSIBlastOptionsHandle(locality);
167  (dynamic_cast<CPSIBlastOptionsHandle *> (retval))->SetPSITblastnDefaults();
168  break;
169 
170  case ePHIBlastp:
171  retval = new CPHIBlastProtOptionsHandle(locality);
172  break;
173 
174  case ePHIBlastn:
175  retval = new CPHIBlastNuclOptionsHandle(locality);
176  break;
177 
178  case eDeltaBlast:
179  retval = new CDeltaBlastOptionsHandle(locality);
180  break;
181 
182  case eVecScreen:
183  {
185  new CBlastNucleotideOptionsHandle(locality);
186  opts->SetVecScreenDefaults();
187  retval = opts;
188  break;
189  }
190 
191  case eMapper:
192  retval = new CMagicBlastOptionsHandle(locality);
193  break;
194 
195  case eKBlastp:
196  retval = new CBlastpKmerOptionsHandle(locality);
197  break;
198 
199  case eBlastNotSet:
200  NCBI_THROW(CBlastException, eInvalidArgument,
201  "eBlastNotSet may not be used as argument");
202  break;
203 
204  default:
205  abort(); // should never happen
206  }
207 
208  return retval;
209 }
210 
213 {
214  set<string> retval;
215  if (choice == eNuclNucl || choice == eAll) {
216  retval.insert("blastn");
217  retval.insert("blastn-short");
218  retval.insert("megablast");
219  retval.insert("dc-megablast");
220  retval.insert("vecscreen");
221  // -RMH-
222  retval.insert("rmblastn");
223  }
224 
225  if (choice == eProtProt || choice == eAll) {
226  retval.insert("blastp");
227  retval.insert("blastp-short");
228  retval.insert("blastp-fast");
229  // retval.insert("kblastp");
230  }
231 
232  if (choice == eAll) {
233  retval.insert("psiblast");
234  //retval.insert("phiblastn"); // not supported yet
235  retval.insert("phiblastp");
236  retval.insert("rpsblast");
237  retval.insert("rpstblastn");
238  retval.insert("blastx");
239  retval.insert("blastx-fast");
240  retval.insert("deltablast");
241  retval.insert("tblastn");
242  retval.insert("tblastn-fast");
243  retval.insert("psitblastn");
244  retval.insert("tblastx");
245  retval.insert("kblastp");
246  }
247 
248  if (choice == eMapping || choice == eAll) {
249  retval.insert("mapper");
250  retval.insert("mapr2g");
251  retval.insert("mapr2r");
252  retval.insert("mapg2g");
253  }
254 
255  return retval;
256 }
257 
258 string
260 {
261  string task(task_name);
262  NStr::ToLower(task);
263  string retval;
264 
265  if (task == "blastn") {
266  retval.assign("Traditional BLASTN requiring an exact match of 11");
267  } else if (task == "blastn-short") {
268  retval.assign("BLASTN program optimized for sequences shorter than ");
269  retval += "50 bases";
270  } else if (task == "vecscreen") {
271  retval.assign("BLASTN with several options re-set for running VecScreen");
272  } else if (task == "rmblastn") {
273  retval.assign("BLASTN with complexity adjusted scoring and masklevel");
274  retval += "filtering";
275  } else if (task == "blastp") {
276  retval.assign("Traditional BLASTP to compare a protein query to a ");
277  retval += "protein database";
278  } else if (task == "blastp-short") {
279  retval.assign("BLASTP optimized for queries shorter than 30 residues");
280  } else if (task == "blastp-fast") {
281  retval.assign("BLASTP optimized for faster runtime");
282  } else if (task == "blastx") {
283  retval.assign("Search of a (translated) nucleotide query against a ");
284  retval += "protein database";
285  } else if (task == "blastx-fast") {
286  retval.assign("Search of a (translated) nucleotide query against a ");
287  retval += "protein database with parameters optimized for faster runtime";
288  } else if (task == "dc-megablast") {
289  retval.assign("Discontiguous megablast used to find more distant ");
290  retval += "(e.g., interspecies) sequences";
291  } else if (task == "megablast") {
292  retval.assign("Traditional megablast used to find very similar ");
293  retval += "(e.g., intraspecies or closely related species) sequences";
294  } else if (NStr::StartsWith(task, "phiblast")) {
295  retval.assign("Limits BLASTP search to those subjects with a ");
296  retval += "pattern matching one in the query";
297  } else if (task == "psiblast") {
298  retval.assign("PSIBLAST that searches a (protein) profile against ");
299  retval += "a protein database";
300  } else if (task == "rpsblast") {
301  retval.assign("Search of a protein query against a database of motifs");
302  } else if (task == "rpstblastn") {
303  retval.assign("Search of a (translated) nucleotide query against ");
304  retval.append("a database of motifs");
305  } else if (task == "tblastn") {
306  retval.assign("Search of a protein query against a (translated) ");
307  retval += "nucleotide database";
308  } else if (task == "tblastn-fast") {
309  retval.assign("Search of a protein query against a (translated) ");
310  retval += "nucleotide database with parameters optimized for faster runtime";
311  } else if (task == "psitblastn") {
312  retval.assign("Search of a PSSM against a (translated) ");
313  retval += "nucleotide database";
314  } else if (task == "tblastx") {
315  retval.assign("Search of a (translated) nucleotide query against ");
316  retval += "a (translated) nucleotide database";
317  } else if (task == "deltablast") {
318  retval.assign("DELTA-BLAST builds profile using conserved domain ");
319  retval += "and uses this profile to search protein database";
320  } else if (task == "mapper") {
321  retval.assign("Map short reads to a genome");
322  } else if (task == "mapr2g") {
323  retval.assign("Map RNA-seq sequence to a genome");
324  } else if (task == "mapr2r") {
325  retval.assign("Map RNA-seq sequences to an mRNA database");
326  } else if (task == "mapg2g") {
327  retval.assign("Map genomic reads to a genome");
328  } else if (task == "kblastp") {
329  retval.assign("Kmer screenign followed by BLASTP");
330  } else {
331  retval.assign("Unknown task");
332  }
333  return retval;
334 }
335 
338 {
339  CBlastOptionsHandle* retval = NULL;
340 
341  string lc_task(NStr::ToLower(task));
342  ThrowIfInvalidTask(lc_task);
343 
344  if (!NStr::CompareNocase(task, "blastn") ||
345  !NStr::CompareNocase(task, "blastn-short") ||
346  // -RMH-
347  !NStr::CompareNocase(task, "rmblastn") ||
348  !NStr::CompareNocase(task, "vecscreen"))
349  {
351  dynamic_cast<CBlastNucleotideOptionsHandle*>
353  _ASSERT(opts);
354  if (!NStr::CompareNocase(task, "blastn-short"))
355  {
356  opts->SetMatchReward(1);
357  opts->SetMismatchPenalty(-3);
358  opts->SetEvalueThreshold(1000);
359  opts->SetWordSize(7);
360  opts->ClearFilterOptions();
361  }
362  else if (!NStr::CompareNocase(task, "vecscreen"))
363  {
364  opts->SetVecScreenDefaults();
365  }else if ( !NStr::CompareNocase(task, "rmblastn") )
366  {
367  // -RMH- This blastn only supports full matrix scoring.
368  opts->SetMatchReward(0);
369  opts->SetMismatchPenalty(0);
370  }
371  retval = opts;
372  }
373  else if (!NStr::CompareNocase(task, "megablast"))
374  {
375  retval = CBlastOptionsFactory::Create(eMegablast, locality);
376  }
377  else if (!NStr::CompareNocase(task, "dc-megablast"))
378  {
379  retval = CBlastOptionsFactory::Create(eDiscMegablast, locality);
380  }
381  else if (!NStr::CompareNocase(task, "blastp") ||
382  !NStr::CompareNocase(task, "blastp-short") ||
383  !NStr::CompareNocase(task, "blastp-fast"))
384  {
386  dynamic_cast<CBlastAdvancedProteinOptionsHandle*>
388  if (task == "blastp-short") {
389  opts->SetMatrixName("PAM30");
390  opts->SetGapOpeningCost(9);
391  opts->SetGapExtensionCost(1);
392  opts->SetEvalueThreshold(20000);
393  opts->SetWordSize(2);
394  opts->ClearFilterOptions();
395  } else if (task == "blastp-fast") {
396  opts->SetWordSize(5);
399  }
400  retval = opts;
401  }
402  else if (!NStr::CompareNocase(task, "psiblast"))
403  {
404  retval = CBlastOptionsFactory::Create(ePSIBlast, locality);
405  }
406  else if (!NStr::CompareNocase(task, "psitblastn"))
407  {
408  retval = CBlastOptionsFactory::Create(ePSITblastn, locality);
409  }
410  else if (!NStr::CompareNocase(task, "phiblastp"))
411  {
412  retval = CBlastOptionsFactory::Create(ePHIBlastp, locality);
413  }
414  else if (!NStr::CompareNocase(task, "rpsblast"))
415  {
416  retval = CBlastOptionsFactory::Create(eRPSBlast, locality);
417  }
418  else if (!NStr::CompareNocase(task, "rpstblastn"))
419  {
420  retval = CBlastOptionsFactory::Create(eRPSTblastn, locality);
421  }
422  else if (!NStr::CompareNocase(task, "blastx") ||
423  !NStr::CompareNocase(task, "blastx-fast"))
424  {
425  CBlastxOptionsHandle* opts =
426  dynamic_cast<CBlastxOptionsHandle*>
428  if (task == "blastx-fast") {
429  opts->SetWordSize(5);
432  }
433  retval = opts;
434  }
435  else if (!NStr::CompareNocase(task, "tblastn") ||
436  !NStr::CompareNocase(task, "tblastn-fast"))
437  {
438  CTBlastnOptionsHandle* opts =
439  dynamic_cast<CTBlastnOptionsHandle*>
441  if(task == "tblastn-fast") {
442  opts->SetWordSize(5);
445  }
446  retval = opts;
447  }
448  else if (!NStr::CompareNocase(task, "tblastx"))
449  {
450  retval = CBlastOptionsFactory::Create(eTblastx, locality);
451  }
452  else if (!NStr::CompareNocase(task, "deltablast"))
453  {
454  retval = CBlastOptionsFactory::Create(eDeltaBlast, locality);
455  }
456  else if (!NStr::CompareNocase(task, "mapper") ||
457  !NStr::CompareNocase(task, "mapr2g") ||
458  !NStr::CompareNocase(task, "mapr2r") ||
459  !NStr::CompareNocase(task, "mapg2g")) {
460 
462  dynamic_cast<CMagicBlastOptionsHandle*>
464 
465  if (!NStr::CompareNocase(task, "mapr2g")) {
466  opts->SetRNAToGenomeDefaults();
467  }
468  else if (!NStr::CompareNocase(task, "mapr2r")) {
469  opts->SetRNAToRNADefaults();
470  }
471  else {
473  }
474 
475  retval = opts;
476  }
477  else if (!NStr::CompareNocase(task, "kblastp"))
478  {
479  retval = CBlastOptionsFactory::Create(eKBlastp, locality);
480  }
481  else
482  {
483  abort(); // should never get here
484  }
485  return retval;
486 }
487 
488 END_SCOPE(blast)
490 
491 
492 /* @} */
Declares the CBlastAdvancedProteinOptionsHandle class.
Declares the BLAST exception class.
Declares the CBlastNucleotideOptionsHandle class.
#define BLAST_WORD_THRESHOLD_BLASTP_FAST
@ eCompressedAaLookupTable
compressed alphabet (blastp) lookup table
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
Declares the CBlastProteinOptionsHandle class.
Declares the CBlastRPSOptionsHandle class.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
@ eBlastNotSet
Not yet set.
Definition: blast_types.hpp:57
@ eVecScreen
Vector screening.
Definition: blast_types.hpp:72
@ eTblastx
Translated nucl-Translated nucl.
Definition: blast_types.hpp:62
@ eBlastn
Nucl-Nucl (traditional blastn)
Definition: blast_types.hpp:58
@ eRPSBlast
protein-pssm (reverse-position-specific BLAST)
Definition: blast_types.hpp:63
@ ePHIBlastn
Nucleotide PHI BLAST.
Definition: blast_types.hpp:70
@ eBlastp
Protein-Protein.
Definition: blast_types.hpp:59
@ ePHIBlastp
Protein PHI BLAST.
Definition: blast_types.hpp:69
@ eMapper
Jumper alignment for mapping.
Definition: blast_types.hpp:73
@ ePSIBlast
PSI Blast.
Definition: blast_types.hpp:67
@ eTblastn
Protein-Translated nucl.
Definition: blast_types.hpp:61
@ eMegablast
Nucl-Nucl (traditional megablast)
Definition: blast_types.hpp:65
@ eDeltaBlast
Delta Blast.
Definition: blast_types.hpp:71
@ ePSITblastn
PSI Tblastn.
Definition: blast_types.hpp:68
@ eKBlastp
KMER screening and BLASTP.
Definition: blast_types.hpp:74
@ eDiscMegablast
Nucl-Nucl using discontiguous megablast.
Definition: blast_types.hpp:66
@ eRPSTblastn
nucleotide-pssm (RPS blast with translated query)
Definition: blast_types.hpp:64
@ eBlastx
Translated nucl-Protein.
Definition: blast_types.hpp:60
Declares the CBlastpKmerOptionsHandle class.
Declares the CBlastxOptionsHandle class.
Handle to the Advanced BLASTP options.
Defines BLAST error codes (user errors included)
Handle to the nucleotide-nucleotide options to the BLAST algorithm.
Handle to the options to the BLAST algorithm.
Encapsulates ALL the BLAST algorithm's options.
EAPILocality
Enumerates the possible contexts in which objects of this type can be used.
@ eRemote
To be used when running BLAST remotely.
Handle to the rpsblast options to the BLAST algorithm.
Handle to the KMER BLASTP options.
Handle to the translated nucleotide-protein options to the BLAST algorithm.
Handle to the protein-protein options to the BLAST algorithm.
Handle to the nucleotide-nucleotide options to the discontiguous BLAST algorithm.
Handle to the nucleotide mapping options to the BLAST algorithm.
Handle to the nuclein PHI BLAST options.
Handle to the protein PHI BLAST options.
Handle to the protein-protein options to the BLAST algorithm.
Handle to the options for translated nucleotide-RPS blast.
Handle to the protein-translated nucleotide options to the BLAST algorithm.
Handle to the translated nucleotide-translated nucleotide options to the BLAST algorithm.
iterator_bool insert(const value_type &val)
Definition: set.hpp:149
Declares the CDeltaBlastOptionsHandle class.
Declares the CDiscNucleotideOptionsHandle class.
#define false
Definition: bool.h:36
void SetGapExtensionCost(int e)
Sets GapExtensionCost.
void SetEvalueThreshold(double eval)
Sets EvalueThreshold.
void SetWordThreshold(double wt)
Sets WordThreshold.
virtual void SetScoringOptionsDefaults()=0
Sets ScoringOptionsDefaults.
void SetWordSize(int ws)
Sets WordSize.
EAPILocality GetLocality() const
Return the locality used when the object was created.
virtual void SetLookupTableDefaults()=0
Sets LookupTableDefaults.
void SetMatchReward(int r)
Sets MatchReward.
virtual void SetSubjectSequenceOptionsDefaults()=0
Sets SubjectSequenceOptionsDefaults.
virtual void SetGappedExtensionDefaults()=0
Sets GappedExtensionDefaults.
static string GetDocumentation(const string &task_name)
Return the documentation for the provided task.
void SetGapOpeningCost(int g)
Sets GapOpeningCost.
virtual void SetQueryOptionDefaults()=0
Sets QueryOptionDefaults.
CBlastOptionsHandle(EAPILocality locality)
Default c-tor.
CRef< CBlastOptions > m_Opts
Data type this class controls access to.
void SetVecScreenDefaults()
Sets default options for VecScreen.
static CBlastOptionsHandle * Create(EProgram program, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested program,...
CBlastOptions & SetOptions()
Returns a reference to the internal options class which this object is a handle for.
virtual void SetRNAToGenomeDefaults()
void SetTraditionalBlastnDefaults()
Sets TraditionalBlastnDefaults.
void SetMismatchPenalty(int p)
Sets MismatchPenalty.
virtual void SetEffectiveLengthsOptionsDefaults()=0
Sets EffectiveLengthsOptionsDefaults.
bool Validate() const
Validate the options.
virtual void SetGenomeToGenomeDefaults()
static set< string > GetTasks(ETaskSets choice=eAll)
Retrieve the set of supported tasks.
void SetTraditionalMegablastDefaults()
Sets TraditionalMegablastDefaults.
void SetMatrixName(const char *matrix)
Sets MatrixName.
char * GetFilterString() const
Return the filtering string used.
void SetFilterString(const char *f, bool clear=true)
virtual void SetInitialWordOptionsDefaults()=0
Sets InitialWordOptionsDefaults.
virtual void SetRemoteProgramAndService_Blast3()=0
Set the program and service name for remote blast.
bool Validate() const
Validate the options contained in this object.
void SetDefaultsMode(bool dmode)
If this is true, remote options will ignore "Set" calls.
void SetFilterString(const char *f, bool clear=true)
Sets FilterString.
void SetWordSize(int ws)
Sets WordSize.
virtual void SetHitSavingOptionsDefaults()=0
Sets HitSavingOptionsDefaults.
virtual void SetDefaults()
Resets the state of the object to all default values.
void ThrowIfInvalidTask(const string &task)
Validates that the task provided is indeed a valid task, otherwise throws a CBlastException.
Definition: blast_aux.cpp:662
ETaskSets
Sets of tasks for the command line BLAST binaries.
char * GetFilterString() const
Returns FilterString.
void ClearFilterOptions()
Clears the filtering options.
void SetLookupTableType(ELookupTableType type)
static CBlastOptionsHandle * CreateTask(string task, EAPILocality locality=CBlastOptions::eLocal)
Creates an options handle object configured with default options for the requested task,...
@ eProtProt
Protein-protein tasks.
@ eNuclNucl
Nucleotide-nucleotide tasks.
@ eAll
Retrieve all available tasks.
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
static int CompareNocase(const CTempString s1, SIZE_TYPE pos, SIZE_TYPE n, const char *s2)
Case-insensitive compare of a substring with another string.
Definition: ncbistr.cpp:219
static bool StartsWith(const CTempString str, const CTempString start, ECase use_case=eCase)
Check if a string starts with a specified prefix value.
Definition: ncbistr.hpp:5412
static string & ToLower(string &str)
Convert string to lower case – string& version.
Definition: ncbistr.cpp:405
void abort()
double f(double x_, const double &y_)
Definition: njn_root.hpp:188
Declares the CPHIBlastNuclOptionsHandle class.
Declares the CPHIBlastProtOptionsHandle class.
Declares the CPSIBlastOptionsHandle class.
Declares the CRPSTBlastnOptionsHandle class.
Declares the CTBlastnOptionsHandle class.
Declares the CTBlastxOptionsHandle class.
#define _ASSERT
Modified on Sat Apr 13 11:43:08 2024 by modify_doxy.py rev. 669887