NCBI C++ ToolKit
blast_services.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef ALGO_BLAST_API___REMOTE_SERVICES__HPP
2 #define ALGO_BLAST_API___REMOTE_SERVICES__HPP
3 
4 /* $Id: blast_services.hpp 84663 2018-11-27 18:22:00Z ucko $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Christiam Camacho, Kevin Bealer
30  *
31  */
32 
33 /// @file blast_services.hpp
34 /// Declares the CBlastServices class.
35 
36 #include <corelib/ncbistd.hpp>
37 #include <corelib/ncbiobj.hpp>
40 #include <objects/blast/names.hpp>
42 
43 /** @addtogroup AlgoBlast
44  *
45  * @{
46  */
47 
49 
51  /// forward declaration of ASN.1 object containing PSSM (scoremat.asn)
52  class CBioseq_set;
53  class CSeq_loc;
54  class CSeq_id;
55  class CSeq_align_set;
57 
58 using namespace ncbi::objects;
59 
60 #ifndef NCBI_MODULE
61 #define NCBI_MODULE NETBLAST
62 #endif
63 
64 /// RemoteServicesException
65 ///
66 
68 public:
69  /// Errors are classified into one of two types.
70  enum EErrCode {
71  /// Argument validation failed.
73 
74  /// Files were missing or contents were incorrect.
76 
77  /// Request failed
79 
80  /// Memory allocation failed.
81  eMemErr
82  };
83 
84  /// Get a message describing the situation leading to the throw.
85  virtual const char* GetErrCodeString() const override
86  {
87  switch ( GetErrCode() ) {
88  case eArgErr: return "eArgErr";
89  case eFileErr: return "eFileErr";
90  case eRequestErr: return "eRequestErr";
91  default: return CException::GetErrCodeString();
92  }
93  }
94 
95  /// Include standard NCBI exception behavior.
97 };
98 
99 
100 
101 /// API for Remote Blast Services
102 ///
103 /// Class to obtain information and data from the Remote BLAST service that is
104 /// not associated with a specific BLAST search
105 
107 {
108 public:
109  /// Default constructor
110  CBlastServices() { m_Verbose = false; }
111 
112  /// Analogous to CRemoteBlast::SetVerbose
113  void SetVerbose(bool value = true) { m_Verbose = value; }
114 
115  /// Returns true if the BLAST database specified exists in the NCBI servers
116  /// @param dbname BLAST database name [in]
117  /// @param is_protein is this a protein database? [in]
118  bool IsValidBlastDb(const string& dbname, bool is_protein);
119 
120  /// Retrieve detailed information for one BLAST database
121  /// If information about multiple databases is needed, use
122  /// the other GetDatabaseInfo method.
123  ///
124  /// @param blastdb object describing the database for which to get
125  /// detailed information
126  /// @return Detailed information for the requested BLAST database or an
127  /// empty object is the requested database wasn't found
129  GetDatabaseInfo(CRef<objects::CBlast4_database> blastdb);
130 
131  /// Retrieve detailed information for databases listed
132  /// in the string. If more than one database is supplied, it
133  /// they should be separated by spaces (e.g., "nt wgs est").
134  ///
135  /// @param dbname string listing the database(s)
136  /// @param is_protein is a protein for true, otherwise dna
137  /// @param found_all true if all databases were found.
138  /// @param missing_names pointer to an array with missing database(s)
139  /// @return Detailed information for the requested BLAST databases or an
140  /// empty vector if no databases were found.
141  vector< CRef<objects::CBlast4_database_info> >
142  GetDatabaseInfo(const string& dbname, bool is_protein, bool *found_all,
143  vector<string> *missing_names = NULL);
144  /// Same as GetDatabaseInfo but retrieving whole list of database
145  vector< CRef<objects::CBlast4_database_info> >
146  GetDatabaseInfoLegacy(const string& dbname, bool is_protein, bool *found_all,
147  vector<string> *missing_names = NULL);
148  /// Retrieve organism specific repeats databases
149  vector< CRef<objects::CBlast4_database_info> >
150  GetOrganismSpecificRepeatsDatabases();
151 
152  /// Retrieve a list of NCBI taxonomy IDs for which there exists
153  /// windowmasker masking data to support an alternative organism specific
154  /// filtering
155  objects::CBlast4_get_windowmasked_taxids_reply::Tdata
157 
158  /// Defines a std::vector of CRef<CSeq_id>
159  typedef vector< CRef<objects::CSeq_id> > TSeqIdVector;
160  /// Defines a std::vector of CRef<CBioseq>
161  typedef vector< CRef<objects::CBioseq> > TBioseqVector;
162 
163  /// Get a set of Bioseqs without their sequence data given an input set of
164  /// Seq-ids.
165  ///
166  /// @param seqids A vector of Seq-ids for which Bioseqs are requested.
167  /// @param database A list of databases from which to get the sequences.
168  /// @param seqtype The residue type, 'p' from protein, 'n' for nucleotide.
169  /// @param bioseqs The vector used to return the requested Bioseqs.
170  /// @param errors A null-separated list of errors.
171  /// @param warnings A null-separated list of warnings.
172  /// @param verbose Produce verbose output. [in]
173  /// @param target_only Filter the defline to include only the requested id. [in]
174  /// @todo FIXME: Add retry logic in case of transient errors
175  static void
176  GetSequencesInfo(TSeqIdVector& seqids, // in
177  const string& database, // in
178  char seqtype, // 'p' or 'n'
179  TBioseqVector& bioseqs, // out
180  string& errors, // out
181  string& warnings, // out
182  bool verbose = false, // in
183  bool target_only = false); // in
184 
185  /// Get a set of Bioseqs given an input set of Seq-ids.
186  ///
187  /// This retrieves the Bioseqs corresponding to the given Seq-ids
188  /// from the blast4 server. Normally this will be much faster
189  /// than consulting ID1 seperately for each sequence. Sometimes
190  /// there are multiple sequences for a given Seq-id. In such
191  /// cases, there are always 'non-ambiguous' ids available. This
192  /// interface does not currently address this issue, and will
193  /// simply return the Bioseqs corresponding to one of the
194  /// sequences. Errors will be returned if the operation cannot be
195  /// completed (or started). In the case of a sequence that cannot
196  /// be found, the error will indicate the index of (and Seq-id of)
197  /// the missing sequence; processing will continue, and the
198  /// sequences that can be found will be returned along with the
199  /// error.
200  ///
201  /// @param seqids A vector of Seq-ids for which Bioseqs are requested.
202  /// @param database A list of databases from which to get the sequences.
203  /// @param seqtype The residue type, 'p' from protein, 'n' for nucleotide.
204  /// @param bioseqs The vector used to return the requested Bioseqs.
205  /// @param errors A null-separated list of errors.
206  /// @param warnings A null-separated list of warnings.
207  /// @param verbose Produce verbose output. [in]
208  /// @param target_only Filter the defline to include only the requested id. [in]
209  /// @todo FIXME: Add retry logic in case of transient errors
210  static void
211  GetSequences(TSeqIdVector& seqids, // in
212  const string& database, // in
213  char seqtype, // 'p' or 'n'
214  TBioseqVector& bioseqs, // out
215  string& errors, // out
216  string& warnings, // out
217  bool verbose = false, // in
218  bool target_only = false); // in
219  /// Defines a std::vector of CRef<CSeq_interval>
220  typedef vector< CRef<objects::CSeq_interval> > TSeqIntervalVector;
221  /// Defines a std::vector of CRef<CSeq_data>
222  typedef vector< CRef<objects::CSeq_data> > TSeqDataVector;
223 
224  /// This retrieves (partial) sequence data from the remote BLAST server.
225  ///
226  /// @param seqid
227  /// A vector of Seq-ids for which sequence data are requested. [in]
228  /// @param database
229  /// A list of databases from which to get the sequences. [in]
230  /// @param seqtype
231  /// The residue type, 'p' from protein, 'n' for nucleotide. [in]
232  /// @param ids
233  /// The sequence IDs for those sequences which the seq data was
234  // obtained successfully [out]
235  /// @param seq_data
236  /// Sequence data in CSeq_data format. [out]
237  /// @param errors
238  /// An error message (if any). [out]
239  /// @param warnings
240  /// A warning (if any). [out]
241  /// @param verbose
242  /// Produce verbose output. [in]
243  /// @todo FIXME: Add retry logic in case of transient errors
244  static void
245  GetSequenceParts(const TSeqIntervalVector & seqids, // in
246  const string & database, // in
247  char seqtype, // 'p' or 'n'
248  TSeqIdVector & ids, // out
249  TSeqDataVector & seq_data, // out
250  string & errors, // out
251  string & warnings, // out
252  bool verbose = false);// in
253 
254 private:
255 
256  /// Retrieve the BLAST databases available for searching
257  void x_GetAvailableDatabases();
258 
259  /// Look for a database matching this method's argument and returned
260  /// detailed information about it.
261  /// @param blastdb database description
262  /// @return detailed information about the database requested or an empty
263  /// CRef<> if the database was not found
265  x_FindDbInfoFromAvailableDatabases(CRef<objects::CBlast4_database> blastdb);
266 
267  /// Prohibit copy construction.
269 
270  /// Prohibit assignment.
272 
273 
274  // Data
275 
276  /// BLAST databases available to search
277  objects::CBlast4_get_databases_reply::Tdata m_AvailableDatabases;
278  /// Taxonomy IDs for which there's windowmasker masking data at NCBI
279  objects::CBlast4_get_windowmasked_taxids_reply::Tdata m_WindowMaskedTaxIds;
280  /// Display verbose output to stdout?
281  bool m_Verbose;
282 };
283 
284 #undef NCBI_MODULE
285 
287 
288 /* @} */
289 
290 #endif /* ALGO_BLAST_API___REMOTE_SERVICES__HPP */
User-defined methods of the data storage class.
RemoteServicesException.
API for Remote Blast Services.
CObject –.
Definition: ncbiobj.hpp:180
CRef –.
Definition: ncbiobj.hpp:618
Include a standard set of the NCBI C++ Toolkit most basic headers.
CBlastServices & operator=(const CBlastServices &)
Prohibit assignment.
bool m_Verbose
Display verbose output to stdout?
CBlastServices()
Default constructor.
vector< CRef< objects::CSeq_data > > TSeqDataVector
Defines a std::vector of CRef<CSeq_data>
CBlastServices(const CBlastServices &)
Prohibit copy construction.
void GetTaxIdWithWindowMaskerSupport(set< int > &supported_taxids)
This function returns a list of NCBI taxonomy IDs for which there exists windowmasker masking data to...
vector< CRef< objects::CBioseq > > TBioseqVector
Defines a std::vector of CRef<CBioseq>
void SetVerbose(bool value=true)
Analogous to CRemoteBlast::SetVerbose.
objects::CBlast4_get_windowmasked_taxids_reply::Tdata m_WindowMaskedTaxIds
Taxonomy IDs for which there's windowmasker masking data at NCBI.
NCBI_EXCEPTION_DEFAULT(CBlastServicesException, CException)
Include standard NCBI exception behavior.
vector< CRef< objects::CSeq_interval > > TSeqIntervalVector
Defines a std::vector of CRef<CSeq_interval>
vector< CRef< objects::CSeq_id > > TSeqIdVector
Defines a std::vector of CRef<CSeq_id>
objects::CBlast4_get_databases_reply::Tdata m_AvailableDatabases
BLAST databases available to search.
virtual const char * GetErrCodeString() const override
Get a message describing the situation leading to the throw.
@ eArgErr
Argument validation failed.
@ eRequestErr
Request failed.
@ eFileErr
Files were missing or contents were incorrect.
#define NULL
Definition: ncbistd.hpp:225
EErrCode
Error types that an application can generate.
Definition: ncbiexpt.hpp:884
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
CSeq_loc(void)
constructors
Definition: Seq_loc.hpp:906
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
use only n Cassandra database for the lookups</td > n</tr > n< tr > n< td > yes</td > n< td > do not use tables BIOSEQ_INFO and BLOB_PROP in the Cassandra database
Names used in blast4 network communications.
const GenericPointer< typename T::ValueType > T2 value
Definition: pointer.h:1227
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
true_type verbose
Definition: processing.cpp:890
Modified on Mon May 20 04:59:06 2024 by modify_doxy.py rev. 669887