NCBI C++ ToolKit
blast_seqsrc_impl.h
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: blast_seqsrc_impl.h 52776 2012-01-26 19:01:24Z maning $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Christiam Camacho
27  *
28  */
29 
30 /** @file blast_seqsrc_impl.h
31  * Definitions needed for implementing the BlastSeqSrc interface and low level
32  * details of the implementation of the BlastSeqSrc framework
33  */
34 
35 #ifndef ALGO_BLAST_CORE__BLAST_SEQSRC_IMPL__H
36 #define ALGO_BLAST_CORE__BLAST_SEQSRC_IMPL__H
37 
40 
41 #ifdef __cplusplus
42 extern "C" {
43 #endif
44 
45 /** Function pointer typedef to create a new BlastSeqSrc structure.
46  * BlastSeqSrcNew uses this function pointer and the ctor_argument (both
47  * obtained from the BlastSeqSrcNewInfo structure) after allocating the
48  * BlastSeqSrc structure.
49  * Client implementations MUST return a non-NULL BlastSeqSrc (the one that is
50  * actually passed in) even if initialization of the BlastSeqSrc
51  * implementation fails, case in which only the functionality to retrieve an
52  * initialization error message and to deallocate the BlastSeqSrc structure
53  * must be defined (C++ implementations must NOT throw exceptions!).
54  * If initialization of the BlastSeqSrc implementation succeeds, then this
55  * function should initialize all the function pointers and appropriate data
56  * fields for the BlastSeqSrc using the _BlastSeqSrcImpl_* functions
57  * defined by the macros at the end of this file.
58  */
59 typedef BlastSeqSrc* (*BlastSeqSrcConstructor)
60  (BlastSeqSrc* seqsrc, /**< pointer to an already allocated structure to
61  be populated with implementation's function
62  pointers and data structures */
63  void* arg /**< place holder argument to pass arguments to the
64  client-defined BlastSeqSrc implementation */
65  );
66 
67 /** Complete type definition of the structure used to create a new
68  * BlastSeqSrc */
70  BlastSeqSrcConstructor constructor; /**< User-defined function to initialize
71  a BlastSeqSrc structure */
72  void* ctor_argument; /**< Argument to the above function */
73 };
74 
75 /** Function pointer typedef to deallocate a BlastSeqSrc structure, always
76  * returns NULL. This function's implementation should free resources allocated
77  * in the BlastSeqSrcConstructor, the BlastSeqSrc structure itself is free'd by
78  * BlastSeqSrcFree */
79 typedef BlastSeqSrc* (*BlastSeqSrcDestructor)
80  (BlastSeqSrc* seqrc /**< BlastSeqSrc structure to free */
81  );
82 
83 /** Function pointer typedef to modify the contents of a BlastSeqSrc
84  * structure, copied by BlastSeqSrcCopy, to achieve multi-thread safety.
85  * Argument is a pointer to the BlastSeqSrc structure to be modified.
86  * Returns the same structure, with modified contents.
87  */
88 typedef BlastSeqSrc* (*BlastSeqSrcCopier) (BlastSeqSrc*);
89 
90 /** Function pointer typedef to set a 4-byte integer. */
91 typedef void (*SetInt4FnPtr)
92  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
93  int arg);
94 
95 /** Function pointer typedef to return a 4-byte integer. */
96 typedef Int4 (*GetInt4FnPtr)
97  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
98  void* arg /**< place holder argument to pass arguments to the
99  client-defined BlastSeqSrc implementation */
100  );
101 
102 /** Function pointer typedef to return a 8-byte integer. */
103 typedef Int8 (*GetInt8FnPtr)
104  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
105  void* arg /**< place holder argument to pass arguments to the
106  client-defined BlastSeqSrc implementation */
107  );
108 
109 /** Function pointer typedef to return a null terminated string, used to return
110  * the name of a BlastSeqSrc implementation (e.g.: BLAST database name).
111  */
112 typedef const char* (*GetStrFnPtr)
113  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
114  void* arg /**< place holder argument to pass arguments to the
115  client-defined BlastSeqSrc implementation */
116  );
117 
118 /** Function pointer typedef to return a boolean value, used to return whether
119  * a given BlastSeqSrc implementation contains protein or nucleotide sequences
120  * (e.g.: BlastSeqSrcGetIsProt).
121  */
123  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
124  void* arg /**< place holder argument to pass arguments to the
125  client-defined BlastSeqSrc implementation */
126  );
127 
128 /** Function pointer typedef to set partial fetching range */
129 typedef void (*SetSeqRangeFnPtr)
130  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
131  BlastSeqSrcSetRangesArg* arg /**< arguments to set partial fetching ranges */
132  );
133 
134 /** Function pointer typedef to retrieve sequences from data structure embedded
135  * in the BlastSeqSrc structure. Return value is one of the BLAST_SEQSRC_*
136  * defines @sa BlastSeqSrcGetSeqArg */
138  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
139  BlastSeqSrcGetSeqArg* arg /**< arguments to fetch sequence data from a
140  client-defined BlastSeqSrc implementation */
141  );
142 
143 /** Function pointer typedef to release sequences obtained from the data
144  * structure embedded in the BlastSeqSrc structure.
145  * @sa BlastSeqSrcGetSeqArg */
146 typedef void (*ReleaseSeqBlkFnPtr)
147  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
148  BlastSeqSrcGetSeqArg* arg /**< arguments to fetch sequence data from a
149  client-defined BlastSeqSrc implementation */
150  );
151 
152 #ifdef KAPPA_PRINT_DIAGNOSTICS
153 /** Function pointer typedef to retrieve gis for a given ordinal id */
154 typedef Blast_GiList* (*GetGisFnPtr)
155  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
156  void* arg /**< place holder argument to pass arguments to the
157  client-defined BlastSeqSrc implementation */
158  );
159 #endif /* KAPPA_PRINT_DIAGNOSTICS */
160 
161 /******************** BlastSeqSrcIterator API *******************************/
162 
163 /** Defines the type of data contained in the BlastSeqSrcIterator structure */
164 typedef enum BlastSeqSrcItrType {
165  eOidList, /**< Data is a list of discontiguous ordinal ids (indices) */
166  eOidRange /**< Data is a range of contiguous ordinal ids (indices) */
168 
169 /** Complete type definition of Blast Sequence Source Iterator */
171  /** Indicates which member to access: oid_list or oid_range */
173 
174  /** Array of ordinal ids used when itr_type is eOidList */
175  int* oid_list;
176  /** This is a half-closed interval [a,b) */
177  int oid_range[2];
178 
179  /** Keep track of this iterator's current position, implementations use
180  * UINT4_MAX to indicate this is uninitialized/invalid */
181  unsigned int current_pos;
182  /** Size of the chunks to advance over the BlastSeqSrc, also size of
183  * oid_list member, this is provided to reduce mutex contention when
184  * implementing MT-safe iteration */
185  unsigned int chunk_sz;
186 };
187 
188 /** Function pointer typedef to obtain the next ordinal id to fetch from the
189  * BlastSeqSrc structure.
190  * Return value is the next ordinal id, or BLAST_SEQSRC_EOF if no more
191  * sequences are available. This is to be used in the oid field of the
192  * BlastSeqSrcGetSeqArg structure to indicate an index into the BlastSeqSrc
193  * from which the next sequence should be retrieved using
194  * BlastSeqSrcGetSequence
195  */
197  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
198  BlastSeqSrcIterator* itr /**< iterator which contains the state of the
199  iteration being performed */
200  );
201 
202 /** Function pointer typedef to obtain the next chunk of ordinal ids for the
203  * BLAST engine to search. By calling this function with a give chunk size
204  * (stored in the iterator structure), one reduces the number of calls which
205  * have to be guarded by a mutex in a multi-threaded environment by examining
206  * the BlastSeqSrc structure infrequently, i.e.: not every implementation of
207  * the BlastSeqSrc needs to provide this if this does not help in satisfying
208  * the MT-safe iteration requirement of the BlastSeqSrc interface.
209  * Return value is one of the BLAST_SEQSRC_* defines
210  */
212  (void* seqsrc_impl, /**< BlastSeqSrc implementation's data structure */
213  BlastSeqSrcIterator* itr /**< iterator which contains the state of the
214  iteration being performed */
215  );
216 
217 /** Function pointer typedef to reset the internal "bookmark" of the last chunk
218  * provided for iteration by the data structure embedded in the BlastSeqSrc
219  * structure.
220  */
221 typedef void (*ResetChunkIteratorFnPtr)
222  (void* seqsrc_impl); /**< BlastSeqSrc implementation's data structure */
223 
224 /*****************************************************************************/
225 
226 #ifndef SKIP_DOXYGEN_PROCESSING
227 
228 /* The following macros provide access to the BlastSeqSrc structure's data
229  This is provided to allow some basic error checking (no NULL pointer
230  dereferencing or assignment).
231  These "member functions" of the BlastSeqSrc should be called by
232  implementations of the interface to set the appropriate function pointers
233  and data structures.
234  */
235 
236 #define DECLARE_BLAST_SEQ_SRC_MEMBER_FUNCTIONS(member_type, member) \
237 DECLARE_BLAST_SEQ_SRC_ACCESSOR(member_type, member); \
238 DECLARE_BLAST_SEQ_SRC_MUTATOR(member_type, member)
239 
240 #define DECLARE_BLAST_SEQ_SRC_ACCESSOR(member_type, member) \
241 NCBI_XBLAST_EXPORT \
242 member_type _BlastSeqSrcImpl_Get##member(const BlastSeqSrc* var)
243 
244 #define DECLARE_BLAST_SEQ_SRC_MUTATOR(member_type, member) \
245 NCBI_XBLAST_EXPORT \
246 void _BlastSeqSrcImpl_Set##member(BlastSeqSrc* var, member_type arg) \
247 
248 
252 
261 
264 
265 DECLARE_BLAST_SEQ_SRC_MEMBER_FUNCTIONS(GetBoolFnPtr, GetSupportsPartialFetching);
267 
271 
273 #ifdef KAPPA_PRINT_DIAGNOSTICS
274 DECLARE_BLAST_SEQ_SRC_MEMBER_FUNCTIONS(GetGisFnPtr, GetGis);
275 #endif /* KAPPA_PRINT_DIAGNOSTICS */
277  ResetChunkIterator);
278 
279 /* Not really a member functions, but fields */
280 DECLARE_BLAST_SEQ_SRC_MEMBER_FUNCTIONS(void*, DataStructure);
281 DECLARE_BLAST_SEQ_SRC_MEMBER_FUNCTIONS(char*, InitErrorStr);
282 #endif
283 
284 /**
285  * @page _impl_blast_seqsrc_howto Implementing the BlastSeqSrc interface
286  *
287  * Implementations of this interface should provide life-cycle functions as
288  * well as functions which satisfy the BlastSeqSrc interface. These functions
289  * must have C linkage, as these functions are invoked by the BlastSeqSrc
290  * framework.
291  * An initialization function must also be provided, this is intended to be
292  * invoked by client code which wants to use a specific BlastSeqSrc
293  * implementation through the BlastSeqSrc interface.
294  * For example, MyDatabaseFormat implementation would define the following
295  * functions:
296  *
297  * - Initialization function
298  * @code
299  * // Calls BlastSeqSrcNew in behalf of client code, client should free using
300  * // BlastSeqSrcFree
301  * BlastSeqSrc* MyDatabaseFormatBlastSeqSrcInit(...);
302  * @endcode
303  *
304  * - Life-cycle functions
305  * @code
306  * extern "C" {
307  * // required signature: BlastSeqSrcConstructor
308  * BlastSeqSrc* MyDatabaseFormatNew(BlastSeqSrc*, void*);
309  * // required signature: BlastSeqSrcDestructor
310  * BlastSeqSrc* MyDatabaseFormatFree(BlastSeqSrc*);
311  * // required signature: BlastSeqSrcCopier
312  * BlastSeqSrc* MyDatabaseFormatCopy(BlastSeqSrc*);
313  * }
314  * @endcode
315  *
316  * - BlastSeqSrc interface
317  * @code
318  * extern "C" {
319  * // required signature: SetInt4FnPtr
320  * void MyDatabaseFormatSetNumberOfThreads(int);
321  * // required signature: GetInt4FnPtr
322  * Int4 MyDatabaseFormatGetNumSeqs(void*, void*);
323  * // required signature: GetInt4FnPtr
324  * Int4 MyDatabaseFormatGetNumSeqsStats(void*, void*);
325  * // required signature: GetInt4FnPtr
326  * Int4 MyDatabaseFormatGetMaxSeqLen(void*, void*);
327  * // required signature: GetInt4FnPtr
328  * Int4 MyDatabaseFormatGetMinSeqLen(void*, void*);
329  * // required signature: GetInt4FnPtr
330  * Int4 MyDatabaseFormatGetAvgSeqLen(void*, void*);
331  * // required signature: GetInt8FnPtr
332  * Int8 MyDatabaseFormatGetTotLen(void*, void*);
333  * // required signature: GetInt8FnPtr
334  * Int8 MyDatabaseFormatGetTotLenStats(void*, void*);
335  * // required signature: GetStrFnPtr
336  * const char* MyDatabaseFormatGetName(void*, void*);
337  * // required signature: GetBoolFnPtr
338  * Boolean MyDatabaseFormatGetIsProt(void*, void*);
339  * // required signature: GetSeqBlkFnPtr
340  * Int2 MyDatabaseFormatGetSequence(void*, BlastSeqSrcGetSeqArg*);
341  * // required signature: GetInt4FnPtr
342  * Int4 MyDatabaseFormatGetSeqLen(void*, void*);
343  * // required signature: ReleaseSeqBlkFnPtr
344  * void MyDatabaseFormatReleaseSequence(void*, BlastSeqSrcGetSeqArg*);
345  * // required signature: AdvanceIteratorFnPtr
346  * Int4 MyDatabaseFormatItrNext(void*, BlastSeqSrcIterator* itr);
347  * // required signature: ResetChunkIteratorFnPtr
348  * Int4 MyDatabaseFormatResetChunkIterator(void*);
349  * }
350  * @endcode
351  *
352  * Since the life-cycle and BlastSeqSrc interface functions above are
353  * called by the BlastSeqSrc framework (BlastSeqSrc* functions declared in
354  * blast_seqsrc.h), no exceptions should be thrown in C++ implementations.
355  * When not obvious, please see the required signature's documentation for
356  * determining what to implement (see blast_seqsrc_impl.h).
357  *
358  * For ease of maintenance, please follow the following conventions:
359  * - Client implementations' initialization function should be called
360  * \c XBlastSeqSrcInit, where \c X is the name of the implementation
361  * - Client implementations should reside in a file named \c seqsrc_X.[hc] or
362  * \c seqsrc_X.[ch]pp, where \c X is the name of the implementation.
363  */
364 
365 #ifdef __cplusplus
366 }
367 #endif
368 
369 #endif /* !ALGO_BLAST_CORE__BLAST_SEQSRC__H */
Structures for BLAST messages.
const char *(* GetStrFnPtr)(void *seqsrc_impl, void *arg)
Function pointer typedef to return a null terminated string, used to return the name of a BlastSeqSrc...
BlastSeqSrc *(* BlastSeqSrcDestructor)(BlastSeqSrc *seqrc)
Function pointer typedef to deallocate a BlastSeqSrc structure, always returns NULL.
void(* SetInt4FnPtr)(void *seqsrc_impl, int arg)
Function pointer typedef to set a 4-byte integer.
void(* ReleaseSeqBlkFnPtr)(void *seqsrc_impl, BlastSeqSrcGetSeqArg *arg)
Function pointer typedef to release sequences obtained from the data structure embedded in the BlastS...
BlastSeqSrc *(* BlastSeqSrcConstructor)(BlastSeqSrc *seqsrc, void *arg)
Function pointer typedef to create a new BlastSeqSrc structure.
Int8(* GetInt8FnPtr)(void *seqsrc_impl, void *arg)
Function pointer typedef to return a 8-byte integer.
Boolean(* GetBoolFnPtr)(void *seqsrc_impl, void *arg)
Function pointer typedef to return a boolean value, used to return whether a given BlastSeqSrc implem...
Int4(* GetInt4FnPtr)(void *seqsrc_impl, void *arg)
Function pointer typedef to return a 4-byte integer.
void(* SetSeqRangeFnPtr)(void *seqsrc_impl, BlastSeqSrcSetRangesArg *arg)
Function pointer typedef to set partial fetching range.
#define DECLARE_BLAST_SEQ_SRC_MEMBER_FUNCTIONS(member_type, member)
void(* ResetChunkIteratorFnPtr)(void *seqsrc_impl)
Function pointer typedef to reset the internal "bookmark" of the last chunk provided for iteration by...
BlastSeqSrc *(* BlastSeqSrcCopier)(BlastSeqSrc *)
Function pointer typedef to modify the contents of a BlastSeqSrc structure, copied by BlastSeqSrcCopy...
Int2(* GetSeqBlkFnPtr)(void *seqsrc_impl, BlastSeqSrcGetSeqArg *arg)
Function pointer typedef to retrieve sequences from data structure embedded in the BlastSeqSrc struct...
Int4(* AdvanceIteratorFnPtr)(void *seqsrc_impl, BlastSeqSrcIterator *itr)
Function pointer typedef to obtain the next ordinal id to fetch from the BlastSeqSrc structure.
BlastSeqSrcItrType
Defines the type of data contained in the BlastSeqSrcIterator structure.
@ eOidRange
Data is a range of contiguous ordinal ids (indices)
@ eOidList
Data is a list of discontiguous ordinal ids (indices)
Int2(* GetNextChunkFnPtr)(void *seqsrc_impl, BlastSeqSrcIterator *itr)
Function pointer typedef to obtain the next chunk of ordinal ids for the BLAST engine to search.
SBlastSequence GetSequence(const objects::CSeq_loc &sl, EBlastEncoding encoding, objects::CScope *scope, objects::ENa_strand strand=objects::eNa_strand_plus, ESentinelType sentinel=eSentinels, std::string *warnings=NULL)
Retrieves a sequence using the object manager.
#define Boolean
Definition: ncbistd.hpp:136
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
Type and macro definitions from C toolkit that are not defined in C++ toolkit.
static Int4 GetSeqLen(DataBlkPtr entry)
Definition: sp_ascii.cpp:3987
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Definition: blast_seqsrc.h:257
Complete type definition of Blast Sequence Source Iterator.
unsigned int chunk_sz
Size of the chunks to advance over the BlastSeqSrc, also size of oid_list member, this is provided to...
BlastSeqSrcItrType itr_type
Indicates which member to access: oid_list or oid_range.
int oid_range[2]
This is a half-closed interval [a,b)
int * oid_list
Array of ordinal ids used when itr_type is eOidList.
unsigned int current_pos
Keep track of this iterator's current position, implementations use UINT4_MAX to indicate this is uni...
Complete type definition of the structure used to create a new BlastSeqSrc.
BlastSeqSrcConstructor constructor
User-defined function to initialize a BlastSeqSrc structure.
void * ctor_argument
Argument to the above function.
Structure used as the argument to function SetRanges.
Definition: blast_seqsrc.h:208
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Modified on Fri Sep 20 14:58:08 2024 by modify_doxy.py rev. 669887