NCBI C++ ToolKit
setup_factory.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: setup_factory.hpp 78930 2017-07-31 13:06:45Z dicuccio $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho, Kevin Bealer
27  *
28  */
29 
30 /** @file setup_factory.hpp
31  * NOTE: This file contains work in progress and the APIs are likely to change,
32  * please do not rely on them until this notice is removed.
33  */
34 
35 #ifndef ALGO_BLAST_API___SETUP_FACTORY_HPP
36 #define ALGO_BLAST_API___SETUP_FACTORY_HPP
37 
39 #include <algo/blast/api/rps_aux.hpp> // for CBlastRPSInfo
44 
45 /** @addtogroup AlgoBlast
46  *
47  * @{
48  */
49 
51 BEGIN_SCOPE(blast)
52 
53 /// Forward declations
55 class CSearchDatabase;
56 
57 // -- RATIONALE: --
58 //
59 // This is a wrapper for a (C language) struct pointer, providing
60 // optional-at-runtime deletion / ownership semantics plus sharing.
61 // The simplest way to explain what it is for is to explain why the
62 // other smart pointer classes were not used or not used directly.
63 //
64 // CObject/CRef: These require the base object to be a CObject.
65 // Because of our requirement of continuing to work with a mixture of
66 // C and C++, we cannot make these particular structs into CObjects.
67 //
68 // auto_ptr and AutoPtr: One of the requirements is simultaneous
69 // ownership -- these classes cannot do this.
70 //
71 // CObjectFor: This does not provide configurable deletion, cannot
72 // control deletion at runtime, and copies data by value.
73 //
74 // DECLARE_AUTO_CLASS_WRAPPER: This lacks sharing semantics. It is
75 // also a macro, and requires more work to use than CStructWrapper.
76 //
77 // Combining two of these versions: .... would probably work. For
78 // example, something like CObjectFor< AutoPtr<T> > is almost good
79 // enough, but wrapping it to provide the optional deletion semantics
80 // would result in code the same size as that below.
81 
82 
83 // CStructWrapper
84 //
85 // This template wraps a C or C++ object in a CObject. A deletion
86 // function can be provided to the constructor, and if so, will be
87 // used to delete the object. The signature must be "T* D(T *)".
88 //
89 // CStructWrapper<T>(T *, TDelete * d) -> Uses "d(x)"
90 // CStructWrapper<T>(T *, 0) -> Non-deleting version
91 //
92 
93 template<class TData>
94 class CStructWrapper : public CObject {
95 public:
96  /// type definition for a function that deallocated memory associated with
97  /// an object.
98  /// This functions return value is ignored; it would be void,
99  /// except that most existing deletion functions return "NULL".
100  typedef TData* (TDelete)(TData*);
101 
102  /// Constructor
103  /// @param obj object to wrap [in]
104  /// @param dfun deallocation function for object above [in]
105  CStructWrapper(TData * obj, TDelete * dfun)
106  : m_Data(obj), m_DeleteFunction(dfun)
107  {
108  }
109 
110  /// Destructor
112  {
113  if (m_Data && m_DeleteFunction) {
115  }
116  m_Data = NULL;
117  }
118 
119  /// The a pointer to the wrapped object
120  TData * GetPointer()
121  {
122  return m_Data;
123  }
124 
125  /// The a reference to the wrapped object
126  TData & operator*()
127  {
128  return *m_Data;
129  }
130 
131  /// The a pointer to the wrapped object
132  TData * operator->()
133  {
134  return m_Data;
135  }
136 
137 private:
138  /// Prohibit copy constructor
140  /// Prohibit assignment operator
142 
143  /// the pointer managed by this object
144  TData * m_Data;
145  /// deallocation function for the pointer above
147 };
148 
149 
150 /// Auxiliary function to create a CStructWrapper for a pointer to an object
151 /// @param obj pointer to wrap [in]
152 /// @param del deallocation function [in]
153 template<class TData>
155 WrapStruct(TData * obj, TData* (*del)(TData*))
156 {
157  return new CStructWrapper<TData>(obj, del);
158 }
159 
160 /// Class that supports setting the number of threads to use with a given
161 /// algorithm. Ensures that this number is greater than or equal to 1.
163 {
164 public:
165  /// Never have less than 1 thread
166  enum { kMinNumThreads = 1 };
167 
168  /// Default ctor
169  CThreadable(void) : m_NumThreads(kMinNumThreads) {}
170  /// Our virtual destructor
171  virtual ~CThreadable(void) {}
172  /// Mutator for the number of threads
173  /// @param nthreads number of threads to use
174  virtual void SetNumberOfThreads(size_t nthreads);
175  /// Accessor for the number of threads to use
176  size_t GetNumberOfThreads(void) const;
177  /// Returns true if more than 1 thread is specified
178  bool IsMultiThreaded(void) const;
179 
180 protected:
181  size_t m_NumThreads; ///< Keep track of how many threads should be used
182 };
183 
184 
185 /// Auxiliary class to create the various C structures to set up the
186 /// preliminary and/or traceback stages of the search.
187 // Topological sort for calling these routines (after setting up queries):
188 // 1. RPS (if any)
189 // 2. ScoreBlk
190 // 3. LookupTable
191 // 4. diags, hspstream
193 public:
194  /// Initializes RPS-BLAST data structures
195  /// @param rps_dbname Name of the RPS-BLAST database [in]
196  /// @param options BLAST options (matrix name and gap costs will be
197  /// modified with data read from the RPS-BLAST auxiliary file) [in|out]
198  static CRef<CBlastRPSInfo>
199  CreateRpsStructures(const string& rps_dbname, CRef<CBlastOptions> options);
200 
201  /// Initializes the BlastScoreBlk. Caller owns the return value.
202  /// @param opts_memento Memento options object [in]
203  /// @param query_data source of query sequence data [in]
204  /// @param lookup_segments query segments to be searched because they were
205  /// not filtered, needed for the lookup table creation (otherwise pass
206  /// NULL). If this is passed to this function it should also be passed to
207  /// CreateLookupTable [in|out]
208  /// @param search_messages Error/warning messages [in|out]
209  /// @param masked_query_regions Regions of the query which were masked
210  /// including those masked outside the CORE. If non-NULL they will be
211  /// populated and caller assumes ownership of the object [in|out]
212  /// @param rps_info RPS-BLAST data structures as obtained from
213  /// CreateRpsStructures [in]
214  /// @todo need to convert the lookup_segments to some kind of c++ object
215  static BlastScoreBlk*
216  CreateScoreBlock(const CBlastOptionsMemento* opts_memento,
217  CRef<ILocalQueryData> query_data,
218  BlastSeqLoc** lookup_segments,
219  TSearchMessages& search_messages,
220  TSeqLocInfoVector* masked_query_regions = NULL,
221  const CBlastRPSInfo* rps_info = NULL);
222 
223  /// Initialize the lookup table. Note that for the case of PSI-BLAST the
224  /// PSSM must be initialized in the BlastScoreBlk for it to be recognized
225  /// properly by the lookup table code. Caller owns the return value.
226  /// @param query_data source of query sequence data [in]
227  /// @param opts_memento Memento options object [in]
228  /// @param score_blk BlastScoreBlk structure, as obtained in
229  /// CreateScoreBlock [in]
230  /// @param lookup_segments query segments to be searched because they were
231  /// not filtered, needed for the lookup table creation (otherwise pass
232  /// NULL) [in|out]
233  /// @todo need to convert the lookup_segments to some kind of c++ object
234  /// @param rps_info RPS-BLAST data structures as obtained from
235  /// CreateRpsStructures [in]
236  /// @param seqsrc BlastSeqSrc structure, only needed when performing
237  /// megablast indexed-database searches [in]
238  /// @param num_threads Number of threads to use. Multithreaded
239  /// implementation is only avaliable for Magic-BLAST lookup tables. [in]
240  static LookupTableWrap*
241  CreateLookupTable(CRef<ILocalQueryData> query_data,
242  const CBlastOptionsMemento* opts_memento,
243  BlastScoreBlk* score_blk,
244  CRef< CBlastSeqLocWrap > lookup_segments,
245  const CBlastRPSInfo* rps_info = NULL,
246  BlastSeqSrc* seqsrc = NULL,
247  size_t num_threads = 1);
248 
249  /// Create and initialize the BlastDiagnostics structure for
250  /// single-threaded applications
251  static BlastDiagnostics* CreateDiagnosticsStructure();
252 
253  /// Create and initialize the BlastDiagnostics structure for
254  /// multi-threaded applications
255  static BlastDiagnostics* CreateDiagnosticsStructureMT();
256 
257  /// Create and initialize the BlastHSPStream structure
258  /// @param opts_memento Memento options object [in]
259  /// @param number_of_queries number of queries involved in the search [in]
260  /// @param writer writer to be used within this stream [in]
261  static BlastHSPStream*
262  CreateHspStream(const CBlastOptionsMemento* opts_memento,
263  size_t number_of_queries,
264  BlastHSPWriter *writer);
265 
266  /// Create a writer to be registered for use by stream
267  /// @param opts_memento Memento options object [in]
268  /// @param query Concatenanted query sequence [in]
269  /// @param query_info Information about queries [in]
270  static BlastHSPWriter*
271  CreateHspWriter(const CBlastOptionsMemento* opts_memento,
273  BlastQueryInfo* query_info);
274 
275  /// Create a pipe to be registered for use by stream
276  /// @param opts_memento Memento options object [in]
277  /// @param query_info Information about queries [in]
278  static BlastHSPPipe*
279  CreateHspPipe(const CBlastOptionsMemento* opts_memento,
280  BlastQueryInfo* query_info);
281 
282  /// Create a BlastSeqSrc from a CSearchDatabase (uses CSeqDB)
283  /// @param db description of BLAST database to search [in]
284  static BlastSeqSrc*
285  CreateBlastSeqSrc(const CSearchDatabase& db);
286 
287  /// Create a BlastSeqSrc from an existing CSeqDB object
288  /// @param db Existing CSeqDB object for the searched BLAST database [in]
289  static BlastSeqSrc*
290  CreateBlastSeqSrc(CSeqDB * db, int filt_algo = -1,
291  ESubjectMaskingType mask_type = eNoSubjMasking);
292 
293  /// Initialize a megablast BLAST database index
294  /// @param options BLAST options (will be modified to record the fact that
295  /// the database index has been initialized [in|out]
296  static void
297  InitializeMegablastDbIndex(CRef<CBlastOptions> options);
298 
299 };
300 
301 #ifndef SKIP_DOXYGEN_PROCESSING
308 
309 #endif /* SKIP_DOXYGEN_PROCESSING */
310 
311 /// Lightweight wrapper to enclose C structures needed for running the
312 /// preliminary and traceback stages of the BLAST search
314 {
315  /// Default ctor
316  SInternalData();
317 
318  /// The query sequence data, these fields are "borrowed" from the query
319  /// factory (which owns them)
321  /// The query information structure
323 
324  /// BLAST score block structure
326 
327  /// Lookup table, usually only needed in the preliminary stage of the
328  /// search, but for PHI-BLAST it's also needed in the traceback stage.
330 
331  /// Diagnostic output from preliminary and traceback stages
333 
334  /// HSP output of the preliminary stage goes here
336 
337  /// The source of subject sequence data
339 
340  /// The RPS-BLAST related data
342 
343  /// The interrupt callback
345 
346  /// The user data structure to aid in progress monitoring
348 };
349 
350 /// Structure to hold results of the preliminary (databases scanning phase)
351 /// part of the search that are needed for the traceback.
352 /// Generally this structure will be used if the preliminary and traceback parts
353 /// are done as separate processes (or even machines).
355 {
356  /// Default ctor
358 
359  /// set to -1 in ctor, indicate that m_NumPatOccurInDB is unset or not applicable.
361 
362  /// Number of times pattern found to occur in database (for phi-blast only).
364 };
365 
366 inline void
368 {
369  m_NumThreads = nthreads == 0 ? static_cast<size_t>(kMinNumThreads) : nthreads;
370 }
371 
372 inline size_t
374 {
376  return m_NumThreads;
377 }
378 
379 inline bool
381 {
382  return m_NumThreads > kMinNumThreads;
383 }
384 
385 END_SCOPE(BLAST)
387 
388 /* @} */
389 
390 #endif /* ALGO_BLAST_API___SETUP_FACTORY__HPP */
391 
Declarations for indexed blast databases.
ESubjectMaskingType
Define the possible subject masking types.
Definition: blast_def.h:235
@ eNoSubjMasking
Definition: blast_def.h:236
Boolean(* TInterruptFnPtr)(SBlastProgress *progress_info)
Prototype for function pointer to determine whether the BLAST search should proceed or be interrupted...
Definition: blast_def.h:354
#define NCBI_XBLAST_EXPORT
NULL operations for other cases.
Definition: blast_export.h:65
Declaration of ADT to save and retrieve lists of HSPs in the BLAST engine.
Class that allows the transfer of data structures from the CBlastOptionsLocal class to either the BLA...
Wrapper class to manage the BlastRPSInfo structure, as currently there aren't any allocation or deall...
Definition: rps_aux.hpp:68
CObject –.
Definition: ncbiobj.hpp:180
Blast Search Subject.
CSeqDB.
Definition: seqdb.hpp:161
Auxiliary class to create the various C structures to set up the preliminary and/or traceback stages ...
Class that supports setting the number of threads to use with a given algorithm.
typedef for the messages for an entire BLAST search, which could be comprised of multiple query seque...
TData * m_Data
the pointer managed by this object
CStructWrapper< BlastDiagnostics > TBlastDiagnostics
CStructWrapper(TData *obj, TDelete *dfun)
Constructor.
CStructWrapper & operator=(CStructWrapper< TData > &x)
Prohibit assignment operator.
size_t GetNumberOfThreads(void) const
Accessor for the number of threads to use.
virtual void SetNumberOfThreads(size_t nthreads)
Mutator for the number of threads.
~CStructWrapper()
Destructor.
TData *() TDelete(TData *)
type definition for a function that deallocated memory associated with an object.
CStructWrapper< LookupTableWrap > TLookupTableWrap
virtual ~CThreadable(void)
Our virtual destructor.
CStructWrapper< BlastHSPStream > TBlastHSPStream
CStructWrapper< BlastScoreBlk > TBlastScoreBlk
TDelete * m_DeleteFunction
deallocation function for the pointer above
CRef< TBlastSeqSrc > m_SeqSrc
The source of subject sequence data.
TData * operator->()
The a pointer to the wrapped object.
CStructWrapper< TData > * WrapStruct(TData *obj, TData *(*del)(TData *))
Auxiliary function to create a CStructWrapper for a pointer to an object.
bool IsMultiThreaded(void) const
Returns true if more than 1 thread is specified.
TData & operator*()
The a reference to the wrapped object.
CRef< TLookupTableWrap > m_LookupTable
Lookup table, usually only needed in the preliminary stage of the search, but for PHI-BLAST it's also...
CStructWrapper< SPHIPatternSearchBlk > TSPHIPatternSearchBlk
CStructWrapper(CStructWrapper< TData > &x)
Prohibit copy constructor.
CStructWrapper< BlastSeqSrc > TBlastSeqSrc
CRef< CBlastRPSInfo > m_RpsData
The RPS-BLAST related data.
CRef< TBlastScoreBlk > m_ScoreBlk
BLAST score block structure.
TData * GetPointer()
The a pointer to the wrapped object.
size_t m_NumThreads
Keep track of how many threads should be used.
TInterruptFnPtr m_FnInterrupt
The interrupt callback.
CRef< CSBlastProgress > m_ProgressMonitor
The user data structure to aid in progress monitoring.
BLAST_SequenceBlk * m_Queries
The query sequence data, these fields are "borrowed" from the query factory (which owns them)
BlastQueryInfo * m_QueryInfo
The query information structure.
CRef< TBlastDiagnostics > m_Diagnostics
Diagnostic output from preliminary and traceback stages.
const int kNoPhiBlastPattern
set to -1 in ctor, indicate that m_NumPatOccurInDB is unset or not applicable.
CThreadable(void)
Default ctor.
int m_NumPatOccurInDB
Number of times pattern found to occur in database (for phi-blast only).
CRef< TBlastHSPStream > m_HspStream
HSP output of the preliminary stage goes here.
#define NULL
Definition: ncbistd.hpp:225
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define END_SCOPE(ns)
End the previously defined scope.
Definition: ncbistl.hpp:75
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define BEGIN_SCOPE(ns)
Define a new scope.
Definition: ncbistl.hpp:72
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
Functions for finding pattern matches in sequence (PHI-BLAST).
NOTE: This file contains work in progress and the APIs are likely to change, please do not rely on th...
Declares auxiliary classes to manage RPS-BLAST related C-structures.
Defines BLAST database access classes.
vector< TMaskedQueryRegions > TSeqLocInfoVector
Collection of masked regions for all queries in a BLAST search.
Definition: seqlocinfo.hpp:139
Structure to hold a sequence.
Definition: blast_def.h:242
Return statistics from the BLAST search.
ADT definition of BlastHSPPipe.
Default implementation of BlastHSPStream.
ADT definition of BlastHSPWriter.
The query related information.
Structure used for scoring calculations.
Definition: blast_stat.h:177
Used to hold a set of positions, mostly used for filtering.
Definition: blast_def.h:204
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Wrapper structure for different types of BLAST lookup tables.
Definition: lookup_wrap.h:50
Structure to hold results of the preliminary (databases scanning phase) part of the search that are n...
Lightweight wrapper to enclose C structures needed for running the preliminary and traceback stages o...
static string query
Modified on Fri Mar 01 10:04:55 2024 by modify_doxy.py rev. 669887