NCBI C++ ToolKit
seqsrc_vdb.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id:
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Vahram Avagyan
27 *
28 */
29 
30 // Local includes
31 #include "vdbsequtil.h"
32 
33 #ifdef __cplusplus
34 extern "C" {
35 #endif
36 
37 // ==========================================================================//
38 // Definitions / Constants
39 
40 /// Database name string returned by BlastSeqSrc (normally empty).
41 static const char *const kEmptyVDBName = "";
42 
43 // ==========================================================================//
44 // BlastSeqSrc interface - Global properties
45 
46 /// Get the number of sequences in the sequence source.
47 static Int4
48 s_VDBSRC_GetNumSeqs(void* vdbDataHandle, void* dummy)
49 {
50  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
51  uint64_t retval = 0;
52 
53  ASSERT(vdbDataHandle);
54  if (vdbData->isInitialized)
55  retval = vdbData->numSeqs;
56 
57  if(retval > kMax_I4)
58  retval = VDBSRC_OVERFLOW_RV;
59 
60  return (Int4) retval;
61 }
62 
63 /// Get the maximum sequence length in the sequence source.
64 static Int4
65 s_VDBSRC_GetMaxLength(void* vdbDataHandle, void* dummy)
66 {
67  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
68  uint64_t retval = 0;
69 
70  ASSERT(vdbDataHandle);
71  if (vdbData->isInitialized)
72  retval = VDBSRC_GetMaxSeqLen(vdbData);
73 
74  if(retval > kMax_I4)
75  retval = VDBSRC_OVERFLOW_RV;
76 
77  return (Int4) retval;
78 
79 }
80 
81 /// Get the average sequence length in the sequence source.
82 static Int4
83 s_VDBSRC_GetAvgLength(void* vdbDataHandle, void* dummy)
84 {
85  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
86  uint64_t retval = 0;
87 
88  ASSERT(vdbDataHandle);
89  if (vdbData->isInitialized)
90  retval = VDBSRC_GetAvgSeqLen(vdbData);
91 
92  if(retval > kMax_I4)
93  retval = VDBSRC_OVERFLOW_RV;
94 
95  return (Int4) retval;
96 
97 }
98 
99 /// Get the total sequence length in the sequence source.
100 static Int8
101 s_VDBSRC_GetTotLen(void* vdbDataHandle, void* dummy)
102 {
103  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
104  uint64_t retval = 0;
105 
106  ASSERT(vdbDataHandle);
107  if (vdbData->isInitialized)
108  retval = VDBSRC_GetTotSeqLen(vdbData);
109 
110  if(retval > kMax_I8)
111  retval = VDBSRC_OVERFLOW_RV;
112 
113  return (Int8) retval;
114 }
115 
116 /// Get the sequence source name (VDB run accessions).
117 static const char*
118 s_VDBSRC_GetName(void* vdbDataHandle, void* dummy)
119 {
120  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
121  const char* retval = kEmptyVDBName;
122 
123  ASSERT(vdbDataHandle);
124  if (vdbData->isInitialized)
125  {
126  retval = vdbData->names;
127  }
128 
129  return retval;
130 }
131 
132 // ==========================================================================// */
133 // BlastSeqSrc interface - Sequence retrieval and properties
134 
135 /// Get the sequence from the sequence source given the OID and encoding.
136 static Int2
137 s_VDBSRC_GetSequence(void* vdbDataHandle, BlastSeqSrcGetSeqArg* args)
138 {
139  // Init the error message and error return value
140  TVDBErrMsg vdbErrMsg;
141  Int2 retval = BLAST_SEQSRC_ERROR;
142 
143  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
144  oid_t oid;
145  EBlastEncoding encoding;
146  TNuclDataRequest req;
147  Boolean bufferAlloc;
148  TByteArray byteArray;
149  Int2 resSetUpBlk;
150 
151  // Note 1: We might need to return an empty sequence in this function
152  // instead of returning an error (also applies to OIDs that map to
153  // empty or tiny reads).
154  //
155  // Note 2: Currently the 2na format doesn't take care of the partial
156  // terminal bytes (basically it reports extra bases on the edges).
157  // These should be ignored at the traceback stage anyway, so we won't
158  // worry about it here.
159 
160  if (!vdbDataHandle || !args)
161  return BLAST_SEQSRC_ERROR;
162 
163  // Init the error message and error return value
164  VDBSRC_InitEmptyErrorMsg(&vdbErrMsg);
165 
166  // Read the arguments
167  oid = args->oid;
168  encoding = args->encoding;
169 
170  // Clean up the existing sequence
171  if (args->seq)
172  BlastSequenceBlkClean(args->seq);
173 
174  // Prepare the nucleotide data request structure
175 
176  req.read4na = (encoding == eBlastEncodingNucleotide ||
177  encoding == eBlastEncodingNcbi4na);
178  req.copyData = req.read4na;
179  req.hasSentinelBytes = (encoding == eBlastEncodingNucleotide);
181  req.readId = oid;
182 
183  bufferAlloc = req.copyData;
184  VDBSRC_InitByteArray_Empty(&byteArray);
185 
186  //Get the sequence buffer and length
187  if (req.read4na)
188  {
189  if(vdbData->reader_4na == NULL)
190  {
191  VDBSRC_Init4naReader(vdbData, &vdbErrMsg);
192 
193  if(vdbErrMsg.isError)
194  {
195  VDBSRC_ResetReader(vdbData);
196  return retval;
197  }
198  }
199 
200  if(!VDBSRC_GetSeq4naCopy(vdbData, &byteArray, &req, &vdbErrMsg))
201  return retval;
202  }
203  else
204  {
205  if(vdbData->reader_2na == NULL)
206  {
207  VDBSRC_InitErrorMsg(&vdbErrMsg, retval, eVDBSRC_READER_2NA_ERROR);
208  return retval;
209  }
210 
211  if(!VDBSRC_GetSeq2na(vdbData, &byteArray, &req, &vdbErrMsg))
212  return retval;
213  }
214 
215  // Set up the sequence data
216  resSetUpBlk =
217  BlastSetUp_SeqBlkNew(byteArray.data, byteArray.basesTotal, &args->seq, bufferAlloc);
218 
219  if (resSetUpBlk == 0)
220  {
221  // Set the sequence oid
222  args->seq->oid = oid;
223 
224  // Final adjustments to the sequence data
225  // (similar to the SeqDB BlastSeqSrc implementation)
226  if (bufferAlloc) {
227  if (!req.hasSentinelBytes)
228  args->seq->sequence = args->seq->sequence_start;
229  }
230  else {
231  if(! req.read4na)
232  {
233  args->seq->bases_offset = 4- byteArray.basesFirstByte ;
234  // Increase length to correct for seq end
235  args->seq->length += args->seq->bases_offset;
236  }
237  }
238  // Sequence data was successfully initialized
239  retval = BLAST_SEQSRC_SUCCESS;
240  }
241 
242  VDBSRC_ReleaseErrorMsg(&vdbErrMsg);
243  return retval;
244 }
245 
246 /// Get the sequence length from the sequence source given the OID.
247 Int4
248 s_VDBSRC_GetSeqLen(void* vdbDataHandle, void* oid)
249 {
250  // Access the SRA data
251  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
252 
253  // Read the sequence as string
254  Int4 * id = (Int4 *) oid;
255  uint64_t retval = 0;
256 
257  if (!vdbData || oid == NULL) {
258  return 0;
259  }
260 
261  retval = VDBSRC_GetSeqLen(vdbData, (uint64_t) (*id));
262 
263  if(retval > kMax_I4) {
264  retval = VDBSRC_OVERFLOW_RV;
265  }
266 
267  return (Int4) retval;
268 
269 }
270 
271 /// Release the sequence from the sequence source given the OID.
272 static void
274 {
275  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
276  if (args->seq->sequence_start_allocated) {
277  sfree(args->seq->sequence_start);
279  args->seq->sequence_start = NULL;
280  }
281  if (args->seq->sequence_allocated) {
282  sfree(args->seq->sequence);
283  args->seq->sequence_allocated = FALSE;
284  args->seq->sequence = NULL;
285  }
286  if(( vdbData->range_list != NULL) && (args->oid == vdbData->range_list->oid)) {
288  }
289  return;
290 }
291 
292 // ==========================================================================//
293 // BlastSeqSrc interface - Iteration
294 
295 /// Get the next sequence OID and increment the BlastSeqSrc iterator.
296 static Int4
297 s_VDBSRC_IteratorNext(void* vdbDataHandle, BlastSeqSrcIterator* itr)
298 {
299  Int4 retval = BLAST_SEQSRC_ERROR;
300  TVDBErrMsg vdbErrMsg;
301  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
302  TVDB2naICReader * r2na;
303  uint64_t read_id;
304 
305  if (!vdbDataHandle || !itr)
306  return retval;
307 
308  if (!vdbData->isInitialized)
309  return retval;
310 
311  VDBSRC_InitEmptyErrorMsg(&vdbErrMsg);
312 
313  if(vdbData->reader_2na == NULL)
314  {
315  VDBSRC_Init2naReader(vdbData, &vdbErrMsg);
316 
317  if(vdbErrMsg.isError)
318  {
319  VDBSRC_ResetReader(vdbData);
320  return retval;
321  }
322  }
323 
324  r2na = vdbData->reader_2na;
325 
326  r2na->current_index ++;
327  if(r2na->current_index >= r2na->max_index)
328  {
329  r2na->max_index = 0;
330  r2na->current_index = 0;
331 
332  retval = VDBSRC_Load2naSeqToBuffer(vdbData, &vdbErrMsg);
333  if((vdbErrMsg.isError) || (retval == BLAST_SEQSRC_EOF))
334  {
335  return retval;
336  }
337  }
338 
339  read_id = r2na->buffer[r2na->current_index].read_id;
340  if(read_id > kMax_I4)
341  {
342  retval = BLAST_SEQSRC_ERROR;
343  }
344 
345  itr->current_pos = read_id;
346 
347  return read_id;
348 }
349 
350 /// Resets the internal bookmark iterator (not applicable in our case).
351 /// @see s_MultiSeqResetChunkIter
352 static void
353 s_VDBSRC_ResetChunkIterator(void* vdbDataHandle)
354 {
355  // Note: Right now this is useless (we don't keep track of chunks)
356  return;
357 }
358 
359 // ==========================================================================//
360 // BlastSeqSrc interface - Unsupported or trivial functions
361 
362 /// GetNumSeqsStats - Not supported.
363 static Int4
364 s_VDBSRC_GetNumSeqsStats(void* vdbDataHandle, void* dummy)
365 {
366  // Not supported
367  return 0;
368 }
369 
370 /// GetTotLenStats - Not supported.
371 static Int8
372 s_VDBSRC_GetTotLenStats(void* vdbDataHandle, void* dummy)
373 {
374  // Not supported
375  return 0;
376 }
377 
378 /// GetIsProt - always returns FALSE in our case.
379 static Boolean
380 s_VDBSRC_GetIsProt(void* vdbDataHandle, void* dummy)
381 {
382  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
383  ASSERT(vdbDataHandle);
384  return VDBSRC_GetIsProtein(vdbData);
385 }
386 
387 // ==========================================================================//
388 // Copy
389 
390 /// Copy the BlastSeqSrc and its internal SRA data access object.
391 static BlastSeqSrc*
393 {
394  TVDBData* vdbData;
395  TVDBData* vdbDataNew;
396  TVDBErrMsg vdbErrMsg;
397  if (!seqSrc)
398  return NULL;
399 
400  vdbData =
402 
403  VDBSRC_InitEmptyErrorMsg(&vdbErrMsg);
404 
405  vdbDataNew = VDBSRC_CopyData(vdbData, &vdbErrMsg);
406 
407  // set the new copy of SRA data in BlastSeqSrc
408  _BlastSeqSrcImpl_SetDataStructure(seqSrc, (void*)vdbDataNew);
409 
410  VDBSRC_ReleaseErrorMsg(&vdbErrMsg);
411  return seqSrc;
412 }
413 
414 // ==========================================================================//
415 // Destruction
416 
417 /// Release the BlastSeqSrc and its internal SRA data access object.
418 static BlastSeqSrc*
420 {
421  TVDBData* vdbData;
422  TVDBErrMsg vdbErrMsg;
423  if (!seqSrc)
424  return NULL;
425 
426  vdbData =
428 
429  VDBSRC_InitEmptyErrorMsg(&vdbErrMsg);
430  VDBSRC_FreeData(vdbData);
431  VDBSRC_ReleaseErrorMsg(&vdbErrMsg);
432  return NULL;
433 }
434 
435 static Boolean
436 s_VDBSRC_GetSupportsPartialFetching(void* vdbDataHandle, void* dummy)
437 {
438  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
439  if(vdbData->refSet != NULL) {
440  return true;
441  }
442  return false;
443 }
444 
445 static void s_VDBSRC_SetSeqRange(void* vdbDataHandle, BlastSeqSrcSetRangesArg* args)
446 {
447  TVDBData* vdbData = (TVDBData*)vdbDataHandle;
448  ASSERT(vdbDataHandle);
449 
450  if(args== NULL) {
451  return;
452  }
454  if(vdbData->range_list && vdbData->range_list->ranges) {
455  memcpy(vdbData->range_list->ranges, args->ranges, 2*args->num_ranges*sizeof(Int4));
456  vdbData->range_list->oid = args->oid;
457  }
458  return;
459 }
460 
461 // ==========================================================================//
462 // Construction / Initialization
463 
464 /// Set the BlastSeqSrc function pointers and the SRA data access object.
465 static void
467 {
468  ASSERT(seqSrc);
469  ASSERT(vdbData);
470 
471  /* initialize the BlastSeqSrc structure fields with user-defined function
472  * pointers and SRA data */
475  _BlastSeqSrcImpl_SetDataStructure (seqSrc, (void*) vdbData);
491 }
492 
493 /// Initialize the SRA data access object and set up the BlastSeqSrc.
494 static BlastSeqSrc*
495 s_VDBSRC_SrcNew(BlastSeqSrc* seqSrc, void* args)
496 {
497  TVDBNewArgs* vdbArgs = (TVDBNewArgs*)args;
498  TVDBErrMsg vdbErrMsg;
499  TVDBData* vdbData;
500 
501  ASSERT(seqSrc);
502  ASSERT(args);
503  ASSERT(vdbArgs);
504 
505  // error message to be reported in case of a failure
506 
507  VDBSRC_InitEmptyErrorMsg(&vdbErrMsg);
508 
509  // allocate and initialize the SRA data structure
510  vdbData = VDBSRC_NewData(&vdbErrMsg);
511  if (!vdbErrMsg.isError)
512  {
513  if(vdbArgs->isCSRA)
514  VDBSRC_InitCSRAData(vdbData, vdbArgs, &vdbErrMsg, true);
515  else
516  VDBSRC_InitData(vdbData, vdbArgs, &vdbErrMsg, true);
517  }
518 
519  // set up the BlastSeqSrc structure
520  if (vdbData->isInitialized)
521  {
522  // set the VDB data and function pointers in BlastSeqSrc
523  s_InitVDBBlastSeqSrcFields(seqSrc, vdbData);
524  }
525  else
526  {
527  // format the error message
528  char* errMsg = 0;
529  VDBSRC_FormatErrorMsg(&errMsg, &vdbErrMsg);
530 
531  // store the error message in BlastSeqSrc
532  // (it is assumed that in case of an error the client will
533  // never invoke any other BlastSeqSrc functionality,
534  // as required by the BlastSeqSrc interface in blast_seqsrc.h)
535  _BlastSeqSrcImpl_SetInitErrorStr(seqSrc, errMsg);
536 
537  // clean up
538  VDBSRC_FreeData(vdbData);
539  }
540 
541  VDBSRC_ReleaseErrorMsg(&vdbErrMsg);
542  return seqSrc;
543 }
544 
545 BlastSeqSrc*
546 SRABlastSeqSrcInit(const char** vdbRunAccessions, Uint4 numRuns,
547  Boolean isProtein, Boolean * excluded_runs,
548  Uint4* status, Boolean isCSRA, Boolean include_filtered_reads)
549 {
550  BlastSeqSrcNewInfo bssNewInfo;
551  BlastSeqSrc* seqSrc = NULL;
552 
553  TVDBNewArgs vdbArgs;
554  vdbArgs.vdbRunAccessions = vdbRunAccessions;
555  vdbArgs.numRuns = numRuns;
556  vdbArgs.isProtein = isProtein;
557  vdbArgs.isRunExcluded = excluded_runs;
558  vdbArgs.status = 0;
559  vdbArgs.isCSRA = isCSRA;
560  vdbArgs.includeFilteredReads = include_filtered_reads;
561 
562  bssNewInfo.constructor = &s_VDBSRC_SrcNew;
563  bssNewInfo.ctor_argument = (void*)&vdbArgs;
564  seqSrc = BlastSeqSrcNew(&bssNewInfo);
565 
566  *status = vdbArgs.status;
567  return seqSrc;
568 }
569 
570 // ==========================================================================//
571 
572 #ifdef __cplusplus
573 }
574 #endif
575 
static CBioSource dummy
#define sfree(x)
Safe free a pointer: belongs to a higher level header.
Definition: blast_def.h:112
#define BLAST_SEQSRC_ERROR
Error while retrieving sequence.
Definition: blast_seqsrc.h:291
BlastSeqSrc * BlastSeqSrcNew(const BlastSeqSrcNewInfo *bssn_info)
Allocates memory for a BlastSeqSrc structure and then invokes the constructor function defined in its...
Definition: blast_seqsrc.c:90
#define BLAST_SEQSRC_SUCCESS
Successful sequence retrieval.
Definition: blast_seqsrc.h:293
#define BLAST_SEQSRC_EOF
No more sequences available.
Definition: blast_seqsrc.h:292
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetReleaseSequence(BlastSeqSrc *var, ReleaseSeqBlkFnPtr arg)
Definition: blast_seqsrc.c:574
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetResetChunkIterator(BlastSeqSrc *var, ResetChunkIteratorFnPtr arg)
Definition: blast_seqsrc.c:581
NCBI_XBLAST_EXPORT void * _BlastSeqSrcImpl_GetDataStructure(const BlastSeqSrc *var)
Definition: blast_seqsrc.c:555
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLenStats(BlastSeqSrc *var, GetInt8FnPtr arg)
Definition: blast_seqsrc.c:564
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSequence(BlastSeqSrc *var, GetSeqBlkFnPtr arg)
Definition: blast_seqsrc.c:572
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqsStats(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:559
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetTotLen(BlastSeqSrc *var, GetInt8FnPtr arg)
Definition: blast_seqsrc.c:563
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:573
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetIsProt(BlastSeqSrc *var, GetBoolFnPtr arg)
Definition: blast_seqsrc.c:567
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDataStructure(BlastSeqSrc *var, void *arg)
Definition: blast_seqsrc.c:555
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetCopyFnPtr(BlastSeqSrc *var, BlastSeqSrcCopier arg)
Definition: blast_seqsrc.c:553
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetAvgSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:562
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetIterNext(BlastSeqSrc *var, AdvanceIteratorFnPtr arg)
Definition: blast_seqsrc.c:576
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetInitErrorStr(BlastSeqSrc *var, char *arg)
Definition: blast_seqsrc.c:556
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetDeleteFnPtr(BlastSeqSrc *var, BlastSeqSrcDestructor arg)
Definition: blast_seqsrc.c:552
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetName(BlastSeqSrc *var, GetStrFnPtr arg)
Definition: blast_seqsrc.c:566
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetMaxSeqLen(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:560
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetNumSeqs(BlastSeqSrc *var, GetInt4FnPtr arg)
Definition: blast_seqsrc.c:558
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetSetSeqRange(BlastSeqSrc *var, SetSeqRangeFnPtr arg)
Definition: blast_seqsrc.c:570
NCBI_XBLAST_EXPORT void _BlastSeqSrcImpl_SetGetSupportsPartialFetching(BlastSeqSrc *var, GetBoolFnPtr arg)
Definition: blast_seqsrc.c:569
Int2 BlastSetUp_SeqBlkNew(const Uint1 *buffer, Int4 length, BLAST_SequenceBlk **seq_blk, Boolean buffer_allocated)
Allocates memory for *sequence_blk and then populates it.
Definition: blast_util.c:101
void BlastSequenceBlkClean(BLAST_SequenceBlk *seq_blk)
Deallocate memory only for the sequence in the sequence block.
Definition: blast_util.c:220
#define VDBSRC_OVERFLOW_RV
Definition: common_priv.h:109
Int4 oid_t
OID type (must be a SIGNED integer type).
Definition: common_priv.h:119
void VDBSRC_InitErrorMsg(TVDBErrMsg *vdbErrMsg, uint32_t rc, TVDBErrCode localCode)
Initialize an Error message.
Definition: error_priv.c:72
void VDBSRC_ReleaseErrorMsg(TVDBErrMsg *vdbErrMsg)
Release the Error message.
Definition: error_priv.c:142
void VDBSRC_InitEmptyErrorMsg(TVDBErrMsg *vdbErrMsg)
Initialize an empty Error message (No Error).
Definition: error_priv.c:131
void VDBSRC_FormatErrorMsg(char **errMsg, const TVDBErrMsg *vdbErrMsg)
Format the error message as a single human-readable string.
Definition: error_priv.c:154
@ eVDBSRC_READER_2NA_ERROR
Failed to process the NCBI-2na data.
Definition: error_priv.h:64
Uint8 uint64_t
EBlastEncoding
Different types of sequence encodings for sequence retrieval from the BLAST database.
@ eBlastEncodingNcbi4na
NCBI4na.
@ eBlastEncodingNucleotide
Special encoding for preliminary stage of BLAST: permutation of NCBI4na.
#define NULL
Definition: ncbistd.hpp:225
int16_t Int2
2-byte (16-bit) signed integer
Definition: ncbitype.h:100
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
#define kMax_I8
Definition: ncbi_limits.h:221
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
#define kMax_I4
Definition: ncbi_limits.h:218
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
Int4 s_VDBSRC_GetSeqLen(void *vdbDataHandle, void *oid)
Get the sequence length from the sequence source given the OID.
Definition: seqsrc_vdb.c:248
static Int8 s_VDBSRC_GetTotLenStats(void *vdbDataHandle, void *dummy)
GetTotLenStats - Not supported.
Definition: seqsrc_vdb.c:372
static Int2 s_VDBSRC_GetSequence(void *vdbDataHandle, BlastSeqSrcGetSeqArg *args)
Get the sequence from the sequence source given the OID and encoding.
Definition: seqsrc_vdb.c:137
static Int4 s_VDBSRC_IteratorNext(void *vdbDataHandle, BlastSeqSrcIterator *itr)
Get the next sequence OID and increment the BlastSeqSrc iterator.
Definition: seqsrc_vdb.c:297
BlastSeqSrc * SRABlastSeqSrcInit(const char **vdbRunAccessions, Uint4 numRuns, Boolean isProtein, Boolean *excluded_runs, Uint4 *status, Boolean isCSRA, Boolean include_filtered_reads)
Allocate and initialize the SRA BlastSeqSrc object.
Definition: seqsrc_vdb.c:546
static BlastSeqSrc * s_VDBSRC_SrcCopy(BlastSeqSrc *seqSrc)
Copy the BlastSeqSrc and its internal SRA data access object.
Definition: seqsrc_vdb.c:392
static BlastSeqSrc * s_VDBSRC_SrcFree(BlastSeqSrc *seqSrc)
Release the BlastSeqSrc and its internal SRA data access object.
Definition: seqsrc_vdb.c:419
static Int4 s_VDBSRC_GetNumSeqsStats(void *vdbDataHandle, void *dummy)
GetNumSeqsStats - Not supported.
Definition: seqsrc_vdb.c:364
static Boolean s_VDBSRC_GetSupportsPartialFetching(void *vdbDataHandle, void *dummy)
Definition: seqsrc_vdb.c:436
static Boolean s_VDBSRC_GetIsProt(void *vdbDataHandle, void *dummy)
GetIsProt - always returns FALSE in our case.
Definition: seqsrc_vdb.c:380
static Int8 s_VDBSRC_GetTotLen(void *vdbDataHandle, void *dummy)
Get the total sequence length in the sequence source.
Definition: seqsrc_vdb.c:101
static Int4 s_VDBSRC_GetMaxLength(void *vdbDataHandle, void *dummy)
Get the maximum sequence length in the sequence source.
Definition: seqsrc_vdb.c:65
static void s_VDBSRC_ResetChunkIterator(void *vdbDataHandle)
Resets the internal bookmark iterator (not applicable in our case).
Definition: seqsrc_vdb.c:353
static const char * s_VDBSRC_GetName(void *vdbDataHandle, void *dummy)
Get the sequence source name (VDB run accessions).
Definition: seqsrc_vdb.c:118
static void s_VDBSRC_SetSeqRange(void *vdbDataHandle, BlastSeqSrcSetRangesArg *args)
Definition: seqsrc_vdb.c:445
static const char *const kEmptyVDBName
Database name string returned by BlastSeqSrc (normally empty).
Definition: seqsrc_vdb.c:41
static Int4 s_VDBSRC_GetAvgLength(void *vdbDataHandle, void *dummy)
Get the average sequence length in the sequence source.
Definition: seqsrc_vdb.c:83
static BlastSeqSrc * s_VDBSRC_SrcNew(BlastSeqSrc *seqSrc, void *args)
Initialize the SRA data access object and set up the BlastSeqSrc.
Definition: seqsrc_vdb.c:495
static Int4 s_VDBSRC_GetNumSeqs(void *vdbDataHandle, void *dummy)
Get the number of sequences in the sequence source.
Definition: seqsrc_vdb.c:48
static void s_VDBSRC_ReleaseSequence(void *vdbDataHandle, BlastSeqSrcGetSeqArg *args)
Release the sequence from the sequence source given the OID.
Definition: seqsrc_vdb.c:273
static void s_InitVDBBlastSeqSrcFields(BlastSeqSrc *seqSrc, TVDBData *vdbData)
Set the BlastSeqSrc function pointers and the SRA data access object.
Definition: seqsrc_vdb.c:466
Uint1 * sequence_start
Start of sequence, usually one byte before sequence as that byte is a NULL sentinel byte.
Definition: blast_def.h:244
Int4 oid
The ordinal id of the current sequence.
Definition: blast_def.h:250
Boolean sequence_allocated
TRUE if memory has been allocated for sequence.
Definition: blast_def.h:251
Int4 length
Length of sequence.
Definition: blast_def.h:246
Uint1 * sequence
Sequence used for search (could be translation).
Definition: blast_def.h:243
Boolean sequence_start_allocated
TRUE if memory has been allocated for sequence_start.
Definition: blast_def.h:253
Structure used as the second argument to functions satisfying the GetSeqBlkFnPtr signature,...
Definition: blast_seqsrc.h:257
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Definition: blast_seqsrc.h:259
EBlastEncoding encoding
Encoding of sequence, i.e.
Definition: blast_seqsrc.h:263
BLAST_SequenceBlk * seq
Sequence to return, if NULL, it should allocated by GetSeqBlkFnPtr (using BlastSeqBlkNew or BlastSetU...
Definition: blast_seqsrc.h:284
Complete type definition of Blast Sequence Source Iterator.
unsigned int current_pos
Keep track of this iterator's current position, implementations use UINT4_MAX to indicate this is uni...
Complete type definition of the structure used to create a new BlastSeqSrc.
BlastSeqSrcConstructor constructor
User-defined function to initialize a BlastSeqSrc structure.
void * ctor_argument
Argument to the above function.
Structure used as the argument to function SetRanges.
Definition: blast_seqsrc.h:208
Int4 * ranges
Ranges in sorted order [in].
Definition: blast_seqsrc.h:219
Int4 num_ranges
Number of actual ranges contained.
Definition: blast_seqsrc.h:216
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Definition: blast_seqsrc.h:210
Complete type definition of Blast Sequence Source ADT.
Definition: blast_seqsrc.c:43
Structure providing top-level VDB data access.
Definition: vdb_priv.h:78
VdbBlast4naReader * reader_4na
Definition: vdb_priv.h:95
Boolean isInitialized
Is the object initialized.
Definition: vdb_priv.h:100
TVDB2naICReader * reader_2na
Definition: vdb_priv.h:94
uint64_t numSeqs
Definition: vdb_priv.h:85
VdbBlastReferenceSet * refSet
Definition: vdb_priv.h:88
TVDBPartialFetchingRanges * range_list
Definition: vdb_priv.h:97
char * names
Names of the VDB data represented by this object (usually this will include all the SVDB run accessio...
Definition: vdb_priv.h:92
Structure used for passing data in VDB APIs.
Definition: vdbsequtil.h:53
uint64_t basesTotal
Number of bases.
Definition: vdbsequtil.h:65
uint8_t * data
Pointer to the first byte of the sequence data array.
Definition: vdbsequtil.h:55
uint8_t basesFirstByte
Number of bases stored in the first byte (1, 2, 3, or 4).
Definition: vdbsequtil.h:61
Structure describing the error messages the library can generate.
Definition: error_priv.h:89
Boolean isError
True if the object describes an error.
Definition: error_priv.h:90
Structure used for initializing the SRA data access.
Definition: vdb_priv.h:109
const char ** vdbRunAccessions
Array of SRA accession strings identifying the runs to open.
Definition: vdb_priv.h:111
Boolean isCSRA
Definition: vdb_priv.h:130
uint32_t status
Definition: vdb_priv.h:127
Boolean includeFilteredReads
Definition: vdb_priv.h:132
uint32_t numRuns
Number of runs to open.
Definition: vdb_priv.h:113
Boolean * isRunExcluded
Definition: vdb_priv.h:121
Boolean isProtein
Definition: vdb_priv.h:115
Structure describing the properties of requested nucleotide data.
Definition: vdbsequtil.h:76
Boolean read4na
Retrieve the data in NCBI-4na format (if FALSE, use NCBI-2na).
Definition: vdbsequtil.h:78
Boolean hasSentinelBytes
Append sentinel bytes to both ends of the data.
Definition: vdbsequtil.h:80
Boolean convertDataToBlastna
Convert the data to the Blastna format (used in Blast engine).
Definition: vdbsequtil.h:82
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Definition: vdb_priv.h:62
Packed2naRead * buffer
Definition: vdb_priv.h:51
void VDBSRC_FillPartialFetchingList(TVDBData *vdbData, Int4 num_ranges)
Definition: vdb_priv.c:323
void VDBSRC_FreeData(TVDBData *vdbData)
Release the SRA data and free the memory allocated for the object.
Definition: vdb_priv.c:337
uint64_t VDBSRC_GetAvgSeqLen(TVDBData *vdbData)
Get the average sequence length in the open SRA data.
Definition: vdb_priv.c:67
void VDBSRC_ResetReader(TVDBData *vdbData)
Definition: vdb_priv.c:298
uint64_t VDBSRC_GetSeqLen(TVDBData *vdbData, uint64_t oid)
Get sequence length by oid.
Definition: vdb_priv.c:108
void VDBSRC_InitCSRAData(TVDBData *vdbData, TVDBNewArgs *vdbArgs, TVDBErrMsg *vdbErrMsg, Boolean getStats)
Definition: vdb_priv.c:759
TVDBData * VDBSRC_CopyData(TVDBData *vdbData, TVDBErrMsg *vdbErrMsg)
Need to call free data if error is returned.
Definition: vdb_priv.c:581
TVDBData * VDBSRC_NewData(TVDBErrMsg *vdbErrMsg)
Allocate a new SRA data object, flag it as not initialized.
Definition: vdb_priv.c:141
bool VDBSRC_GetIsProtein(TVDBData *vdbData)
Get if run set is protein or nucl.
Definition: vdb_priv.c:653
Int2 VDBSRC_Load2naSeqToBuffer(TVDBData *vdbData, TVDBErrMsg *vdbErrMsg)
Definition: vdb_priv.c:485
void VDBSRC_InitData(TVDBData *vdbData, TVDBNewArgs *vdbArgs, TVDBErrMsg *vdbErrMsg, Boolean getStats)
Initialize the VDB data.
Definition: vdb_priv.c:160
void VDBSRC_ResetPartialFetchingList(TVDBData *vdbData)
Definition: vdb_priv.c:304
void VDBSRC_Init2naReader(TVDBData *vdbData, TVDBErrMsg *vdbErrMsg)
Definition: vdb_priv.c:404
void VDBSRC_Init4naReader(TVDBData *vdbData, TVDBErrMsg *vdbErrMsg)
Definition: vdb_priv.c:462
uint64_t VDBSRC_GetTotSeqLen(TVDBData *vdbData)
Get the total sequence length in the open SRA data.
Definition: vdb_priv.c:84
uint64_t VDBSRC_GetMaxSeqLen(TVDBData *vdbData)
Get the maximum sequence length in the open SRA data.
Definition: vdb_priv.c:50
Boolean VDBSRC_GetSeq2na(TVDBData *vdbData, TByteArray *dataSeq, TNuclDataRequest *req2na, TVDBErrMsg *vdbErrMsg)
Get the specified subsequence in NCBI-2na format.
Definition: vdbsequtil.c:122
Boolean VDBSRC_GetSeq4naCopy(TVDBData *vdbData, TByteArray *dataSeq, TNuclDataRequest *req4na, TVDBErrMsg *vdbErrMsg)
Get the specified subsequence in NCBI-4na format.
Definition: vdbsequtil.c:469
void VDBSRC_InitByteArray_Empty(TByteArray *byteArray)
Initialize an empty ByteArray object.
Definition: vdbsequtil.c:85
Modified on Wed Apr 17 13:10:18 2024 by modify_doxy.py rev. 669887