NCBI C++ ToolKit
uniform_search.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: uniform_search.hpp 84663 2018-11-27 18:22:00Z ucko $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Kevin Bealer
27  *
28  */
29 
30 /** @file uniform_search.hpp
31  * Uniform BLAST Search Interface.
32  * NOTE: This file contains work in progress and the APIs are likely to change,
33  * please do not rely on them until this notice is removed.
34  */
35 
36 #ifndef ALGO_BLAST_API___UNIFORM_SEARCH_HPP
37 #define ALGO_BLAST_API___UNIFORM_SEARCH_HPP
38 
45 
46 /** @addtogroup AlgoBlast
47  *
48  * @{
49  */
50 
52 
54  class CSeq_id;
55  class CSeq_align_set;
57 
58 BEGIN_SCOPE(blast)
59 
60 
61 // Errors & Warnings:
62 //
63 // An error is defined as a condition that halts or severely affects
64 // processing of one or more queries, and is represented as a string.
65 // A warning is defined as a detected condition or event that is
66 // reported by the search code, and may influence interpretation of
67 // the output, but does not stop the search or invalidate the
68 // results.
69 
70 /// Exception class
71 ///
72 /// Searches throw this when an error condition is detected in the
73 /// usage or execution of a search. An example of a case where an
74 /// exception is appropriate is when a database cannot be found for a
75 /// local search, or if a memory allocation fails. An example of a
76 /// non-exception error is if a search is completely masked.
77 
78 class CSearchException : public CException {
79 public:
80  /// Errors are classified into one of two types.
81  enum EErrCode {
82  /// Argument validation failed.
84 
85  /// Memory allocation failed.
87 
88  /// Internal error (e.g. unimplemented methods).
89  eInternal
90  };
91 
92  /// Get a message describing the situation leading to the throw.
93  virtual const char* GetErrCodeString() const override
94  {
95  switch ( GetErrCode() ) {
96  case eConfigErr: return "eConfigErr";
97  case eMemErr: return "eMemErr";
98  case eInternal: return "eInternal";
99  default: return CException::GetErrCodeString();
100  }
101  }
102 
103  /// Include standard NCBI exception behavior.
105 };
106 
107 
108 /// Blast Search Subject
110 public:
111  /// Define a list of gis
112  typedef vector<TGi> TGiList;
113 
114  /// Molecule of the BLAST database
116  eBlastDbIsProtein, ///< protein
117  eBlastDbIsNucleotide ///< nucleotide
118  };
119 
120  /// Simple constructor
121  /// @param dbname database name [in]
122  /// @param mol_type molecule type [in]
123  CSearchDatabase(const string& dbname, EMoleculeType mol_type);
124 
125  /// Constructor with allows an entrez query to be specified
126  /// @param dbname database name [in]
127  /// @param mol_type molecule type [in]
128  /// @param entrez_query entrez query string [in]
129  CSearchDatabase(const string& dbname, EMoleculeType mol_type,
130  const string& entrez_query);
131 
132  /// Mutator for the database name
133  /// @param dbname database name [in]
134  void SetDatabaseName(const string& dbname);
135  /// Accessor for the database name
136  string GetDatabaseName() const;
137 
138  /// Mutator for the molecule type
139  /// @param mol_type molecule type [in]
140  void SetMoleculeType(EMoleculeType mol_type);
141  /// Accessor for the molecule type
142  EMoleculeType GetMoleculeType() const;
143  /// Determine whether this database contains protein sequences or not
144  bool IsProtein() const {
145  return GetMoleculeType() == eBlastDbIsProtein;
146  }
147 
148  /// Mutator for the entrez query
149  /// @param entrez_query entrez query string [in]
150  void SetEntrezQueryLimitation(const string& entrez_query);
151  /// Accessor for the entrez query
152  string GetEntrezQueryLimitation() const;
153 
154  /// Mutator for the gi list
155  /// @param gilist list of gis [in]
156  void SetGiList(CSeqDBGiList * gilist);
157  /// Accessor for the gi list
158  const CRef<CSeqDBGiList>& GetGiList() const;
159  const TGiList GetGiListLimitation() const;
160  /// Mutator for the negative gi list
161  /// @param gilist list of gis [in]
162  void SetNegativeGiList(CSeqDBGiList * gilist);
163  /// Accessor for the negative gi list
164  const CRef<CSeqDBGiList>& GetNegativeGiList() const;
165  const TGiList GetNegativeGiListLimitation() const;
166 
167  /// Temporary fix for backwards compatibility with other 6.0 SCs
168  NCBI_DEPRECATED void SetFilteringAlgorithm(int filt_algorithm_id);
169 
170  /// Mutator for the filtering algorithm
171  /// @param filt_algorithm_id filtering algorithm ID [in]
172  /// @param mask_type type of mask [in]
173  void SetFilteringAlgorithm(int filt_algorithm_id, ESubjectMaskingType mask_type);
174  /// Mutator for the filtering algorithm
175  /// @param filt_algorithm filtering algorithm string [in]
176  /// @param mask_type type of mask [in]
177  void SetFilteringAlgorithm(const string &filt_algorithm,
178  ESubjectMaskingType mask_type);
179  /// Accessor for the filtering algorithm ID
180  int GetFilteringAlgorithm() const;
181  /// Accessor for the filtering algorithm key
182  string GetFilteringAlgorithmKey() const;
183  ESubjectMaskingType GetMaskType() const;
184 
185  /// Mutator for the seqdb
186  /// @param seqdb reference to an initialized db [in]
187  void SetSeqDb(CRef<CSeqDB> seqdb);
188  /// Obtain a reference to the database
189  CRef<CSeqDB> GetSeqDb() const;
190 
191 private:
192  string m_DbName; ///< database name
193  EMoleculeType m_MolType; ///< molecule type
194  string m_EntrezQueryLimitation; ///< entrez query
195  // N.B.: only one of the 2 below should be specified
196  mutable CRef<CSeqDBGiList> m_GiList; ///< gi list
197  mutable CRef<CSeqDBGiList> m_NegativeGiList; ///< negative gi list
199  /// filtering to apply to database sequences
204  mutable bool m_DbInitialized;
206 
207  /// Translate string algorithm id to numeric id
208  void x_TranslateFilteringAlgorithm() const;
209  /// Initialize the database
210  void x_InitializeDb() const;
211  /// Validates that the specified filtering algorithm ID is supported by the
212  /// specified BLAST database
213  /// @throws CBlastException if validation fails
214  void x_ValidateMaskingAlgorithm() const;
215 };
216 
217 
218 /// Single Iteration Blast Database Search
219 ///
220 /// This class is the top-level Uniform Search interface for blast
221 /// searches. Concrete subclasses of this class will accept blast
222 /// options, perform blast searches, and produce CSearchResultSets
223 /// as output. This class does not accept queries, however, so code
224 /// that provides the queries to the search will need to work with the
225 /// interfaces derived from this class.
226 
227 class ISearch : public CObject {
228 public:
229  // Configuration
230 
231  /// Configure the search
232  /// @param options The search will be configured with these options.
233  virtual void SetOptions(CRef<CBlastOptionsHandle> options) = 0;
234 
235  /// Set the subject database(s) to search
237 
238  /// Run the search to completion.
240 };
241 
242 
243 /// Single Iteration Search of Sequence(s) Against Blast Database(s)
244 ///
245 /// This interface class adds query-specific information to the
246 /// ISearch interface. This version works with sequence queries.
247 
248 class ISeqSearch : public ISearch {
249 public:
250  /// Destructor
251  virtual ~ISeqSearch() {}
252 
253  // Inputs
254 
255  /// Set the queries to search
256  /// @param query_factory This supplies the queries for which to search.
257  virtual void SetQueryFactory(CRef<IQueryFactory> query_factory) = 0;
258 };
259 
260 
261 /// Experimental interface (since this does not provide a full interface to
262 /// PSI-BLAST)
263 /// @note the CSearchResultSet that is returned from the Run method will
264 /// always contain 0 or 1 CSearchResults objects, as PSI-BLAST cannot do
265 /// multiple-PSSM searches
266 
267 class IPssmSearch : public ISearch {
268 public:
269 
270  /// Set the queries to search
271  /// @param pssm PSSM [in]
273 };
274 
275 
276 /// Factory for ISearch.
277 ///
278 /// This class is an abstract factory class for the ISearch class.
279 /// Concrete subclasses of this class will create and return concrete
280 /// subclasses of the ISearch class. Use this class when you need to
281 /// write code that decribes an algorithm over the abstract ISearch
282 /// API, and is ignorant of the concrete type of search it is
283 /// performing (i.e.: local vs. remote search).
284 
285 class ISearchFactory : public CObject {
286 public:
287  /// Create a new search object with a sequence-based query.
288  ///
289  /// A search object will be constructed and configured for a
290  /// search using a query that consists of one or more sequences.
291  ///
292  /// @return
293  /// A search object for a sequence search.
295 
296  /// Create a new search object with a pssm-based query.
297  ///
298  /// A search object will be constructed and configured for a
299  /// search using a PSSM query.
300  ///
301  /// @return
302  /// A search object for a PSSM search.
304 
305  /// Create a CBlastOptionsHandle
306  ///
307  /// This creates a CBlastOptionsHandle for the specified program
308  /// value. The options can be used to configure a search created
309  /// by the GetSeqSearch() or GetPssmSearch() methods. The search
310  /// object and the CBlastOptionsHandle object should be created by
311  /// the same ISearchFactory subclass.
312  ///
313  /// @param program
314  /// The program type for this search.
315  /// @return
316  /// An options handle object for this program and factory type.
318 };
319 
320 
321 END_SCOPE(BLAST)
323 
324 /* @} */
325 
326 #endif /* ALGO_BLAST_API___UNIFORM_SEARCH__HPP */
327 
ESubjectMaskingType
Define the possible subject masking types.
Definition: blast_def.h:235
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
Declares the CBlastOptionsHandle and CBlastOptionsFactory classes.
Definition of classes which constitute the results of running a BLAST search.
Definitions of special type used in BLAST.
EProgram
This enumeration is to evolve into a task/program specific list that specifies sets of default parame...
Definition: blast_types.hpp:56
CObject –.
Definition: ncbiobj.hpp:180
Blast Search Subject.
Exception class.
CSeqDBGiList.
Experimental interface (since this does not provide a full interface to PSI-BLAST)
Factory for ISearch.
Single Iteration Blast Database Search.
Single Iteration Search of Sequence(s) Against Blast Database(s)
virtual void SetSubject(CConstRef< CSearchDatabase > subject)=0
Set the subject database(s) to search.
bool m_NeedsFilteringTranslation
virtual void SetOptions(CRef< CBlastOptionsHandle > options)=0
Configure the search.
string m_EntrezQueryLimitation
entrez query
string m_FilteringAlgorithmString
filtering to apply to database sequences
virtual CRef< ISeqSearch > GetSeqSearch()=0
Create a new search object with a sequence-based query.
CRef< CSeqDBGiList > m_GiList
gi list
virtual void SetQuery(CRef< objects::CPssmWithParameters > pssm)=0
Set the queries to search.
virtual CRef< CBlastOptionsHandle > GetOptions(EProgram program)=0
Create a CBlastOptionsHandle.
CRef< CSeqDB > m_SeqDb
virtual void SetQueryFactory(CRef< IQueryFactory > query_factory)=0
Set the queries to search.
bool IsProtein() const
Determine whether this database contains protein sequences or not.
string m_DbName
database name
EMoleculeType m_MolType
molecule type
vector< TGi > TGiList
Define a list of gis.
virtual const char * GetErrCodeString() const override
Get a message describing the situation leading to the throw.
virtual CRef< CSearchResultSet > Run()=0
Run the search to completion.
virtual ~ISeqSearch()
Destructor.
CRef< CSeqDBGiList > m_NegativeGiList
negative gi list
ESubjectMaskingType m_MaskType
virtual CRef< IPssmSearch > GetPssmSearch()=0
Create a new search object with a pssm-based query.
NCBI_EXCEPTION_DEFAULT(CSearchException, CException)
Include standard NCBI exception behavior.
EMoleculeType
Molecule of the BLAST database.
EErrCode
Errors are classified into one of two types.
@ eBlastDbIsProtein
protein
@ eMemErr
Memory allocation failed.
@ eConfigErr
Argument validation failed.
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
#define NCBI_DEPRECATED
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
char * dbname(DBPROCESS *dbproc)
Get name of current database.
Definition: dblib.c:6929
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Defines BLAST database access classes.
Defines exception class and several constants for SeqDB.
static string subject
Modified on Sun Apr 14 05:28:34 2024 by modify_doxy.py rev. 669887