NCBI C++ ToolKit
vdbsequtil.c
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: vdbsequtil.c 94176 2021-07-02 13:47:15Z fongah2 $
2 * ===========================================================================
3 *
4 * PUBLIC DOMAIN NOTICE
5 * National Center for Biotechnology Information
6 *
7 * This software/database is a "United States Government Work" under the
8 * terms of the United States Copyright Act. It was written as part of
9 * the author's official duties as a United States Government employee and
10 * thus cannot be copyrighted. This software/database is freely available
11 * to the public for use. The National Library of Medicine and the U.S.
12 * Government have not placed any restriction on its use or reproduction.
13 *
14 * Although all reasonable efforts have been taken to ensure the accuracy
15 * and reliability of the software and data, the NLM and the U.S.
16 * Government do not and cannot warrant the performance or results that
17 * may be obtained by using this software or data. The NLM and the U.S.
18 * Government disclaim all warranties, express or implied, including
19 * warranties of performance, merchantability or fitness for any particular
20 * purpose.
21 *
22 * Please cite the author in any work or product based on this material.
23 *
24 * ===========================================================================
25 *
26 * Author: Vahram Avagyan
27 *
28 */
29 
30 #include "vdbsequtil.h"
31 #include <ncbi/vdb-blast.h>
32 #include <math.h>
33 
34 
35 #ifdef __cplusplus
36 extern "C" {
37 #endif
38 
39 // ==========================================================================//
40 // Definitions / Constants
41 
42 /// Array of 4-character strings corresponding to NCBI-2na bytes.
43 static const char* SRASEQUTIL_2NA_BYTE_TO_STRING[256] =
44 {
45  "AAAA", "AAAC", "AAAG", "AAAT", "AACA", "AACC", "AACG", "AACT",
46  "AAGA", "AAGC", "AAGG", "AAGT", "AATA", "AATC", "AATG", "AATT",
47  "ACAA", "ACAC", "ACAG", "ACAT", "ACCA", "ACCC", "ACCG", "ACCT",
48  "ACGA", "ACGC", "ACGG", "ACGT", "ACTA", "ACTC", "ACTG", "ACTT",
49  "AGAA", "AGAC", "AGAG", "AGAT", "AGCA", "AGCC", "AGCG", "AGCT",
50  "AGGA", "AGGC", "AGGG", "AGGT", "AGTA", "AGTC", "AGTG", "AGTT",
51  "ATAA", "ATAC", "ATAG", "ATAT", "ATCA", "ATCC", "ATCG", "ATCT",
52  "ATGA", "ATGC", "ATGG", "ATGT", "ATTA", "ATTC", "ATTG", "ATTT",
53  "CAAA", "CAAC", "CAAG", "CAAT", "CACA", "CACC", "CACG", "CACT",
54  "CAGA", "CAGC", "CAGG", "CAGT", "CATA", "CATC", "CATG", "CATT",
55  "CCAA", "CCAC", "CCAG", "CCAT", "CCCA", "CCCC", "CCCG", "CCCT",
56  "CCGA", "CCGC", "CCGG", "CCGT", "CCTA", "CCTC", "CCTG", "CCTT",
57  "CGAA", "CGAC", "CGAG", "CGAT", "CGCA", "CGCC", "CGCG", "CGCT",
58  "CGGA", "CGGC", "CGGG", "CGGT", "CGTA", "CGTC", "CGTG", "CGTT",
59  "CTAA", "CTAC", "CTAG", "CTAT", "CTCA", "CTCC", "CTCG", "CTCT",
60  "CTGA", "CTGC", "CTGG", "CTGT", "CTTA", "CTTC", "CTTG", "CTTT",
61  "GAAA", "GAAC", "GAAG", "GAAT", "GACA", "GACC", "GACG", "GACT",
62  "GAGA", "GAGC", "GAGG", "GAGT", "GATA", "GATC", "GATG", "GATT",
63  "GCAA", "GCAC", "GCAG", "GCAT", "GCCA", "GCCC", "GCCG", "GCCT",
64  "GCGA", "GCGC", "GCGG", "GCGT", "GCTA", "GCTC", "GCTG", "GCTT",
65  "GGAA", "GGAC", "GGAG", "GGAT", "GGCA", "GGCC", "GGCG", "GGCT",
66  "GGGA", "GGGC", "GGGG", "GGGT", "GGTA", "GGTC", "GGTG", "GGTT",
67  "GTAA", "GTAC", "GTAG", "GTAT", "GTCA", "GTCC", "GTCG", "GTCT",
68  "GTGA", "GTGC", "GTGG", "GTGT", "GTTA", "GTTC", "GTTG", "GTTT",
69  "TAAA", "TAAC", "TAAG", "TAAT", "TACA", "TACC", "TACG", "TACT",
70  "TAGA", "TAGC", "TAGG", "TAGT", "TATA", "TATC", "TATG", "TATT",
71  "TCAA", "TCAC", "TCAG", "TCAT", "TCCA", "TCCC", "TCCG", "TCCT",
72  "TCGA", "TCGC", "TCGG", "TCGT", "TCTA", "TCTC", "TCTG", "TCTT",
73  "TGAA", "TGAC", "TGAG", "TGAT", "TGCA", "TGCC", "TGCG", "TGCT",
74  "TGGA", "TGGC", "TGGG", "TGGT", "TGTA", "TGTC", "TGTG", "TGTT",
75  "TTAA", "TTAC", "TTAG", "TTAT", "TTCA", "TTCC", "TTCG", "TTCT",
76  "TTGA", "TTGC", "TTGG", "TTGT", "TTTA", "TTTC", "TTTG", "TTTT",
77 };
78 
79 // ==========================================================================//
80 // Byte array functions
81 
82 
83 
84 void
86 {
87  ASSERT(byteArray);
88 
89  byteArray->data = NULL;
90  byteArray->size = 0;
91  byteArray->basesPerByte = 1;
92  byteArray->basesFirstByte = 0;
93  byteArray->basesLastByte = 0;
94  byteArray->basesTotal = 0;
95 }
96 
97 void
99  uint8_t* data,
100  uint64_t size,
101  uint8_t basesPerByte,
102  uint8_t basesFirstByte,
103  uint8_t basesLastByte,
104  uint64_t basesTotal)
105 {
106  ASSERT(byteArray);
107  ASSERT(data);
108  ASSERT(size > 0);
109  ASSERT(basesPerByte == 1 || basesPerByte == 2 || basesPerByte == 4);
110 
111  byteArray->data = data;
112  byteArray->size = size;
113  byteArray->basesPerByte = basesPerByte;
114  byteArray->basesFirstByte = basesFirstByte;
115  byteArray->basesLastByte = basesLastByte;
116  byteArray->basesTotal = basesTotal;
117 }
118 
119 // ==========================================================================//
120 // SRA Sequence-related functions - NCBI 2na
121 
123  TByteArray* dataSeq,
124  TNuclDataRequest* req2na,
125  TVDBErrMsg * vdbErrMsg)
126 {
127  uint32_t status = eVdbBlastNoErr;
128  TVDB2naICReader * r2na = vdbData->reader_2na;
129  Packed2naRead * read;
130  uint8_t * dataOut;
131  uint8_t offsetBits;
132  uint32_t sizeBits;
133  uint8_t basesFirstByte;
134  uint32_t offsetEnd; // last bit's offset
135  uint8_t bitsInLastByte; // 1 <= bitsInLastByte <= 8
136  uint8_t basesLastByte;
137  uint64_t buf_size = 2;
138 
139 
140  ASSERT(dataSeq);
141  ASSERT(vdbData);
142 
143  ASSERT(r2na->current_index < r2na->max_index);
144  read = r2na->buffer + r2na->current_index;
145 
146  if(req2na->readId != read->read_id)
147  {
148  VDBSRC_InitErrorMsg(vdbErrMsg, status, eVDBSRC_READ_ID_MISMATCH);
149  return FALSE;
150  }
151 
152  dataOut = (uint8_t *) read->starting_byte;
153  offsetBits = read->offset_to_first_bit;
154  sizeBits = read->length_in_bases * 2;
155 
156  // If offsetBits is an odd number (which I'm not sure it can be,
157  // but nothing in the interface or documentation says it can't)
158  // there is nothing we can do to avoid copying the data over to a
159  // new array, which is forbidden in this function.
160  // Therefore, we will only consider offsets that are off
161  // by 0, 2, 4, or 6 bits within the first byte they point to.
162  if (offsetBits & 0x01)
163  {
165  return FALSE;
166  }
167 
168  basesFirstByte = 4 - (offsetBits >> 1);
169  offsetEnd = offsetBits + sizeBits - 1; // last bit's offset
170  bitsInLastByte = 1 + (offsetEnd & 0x07); // 1 <= bitsInLastByte <= 8
171  basesLastByte = bitsInLastByte >> 1;
172 
173  if((read->length_in_bases - basesFirstByte - basesLastByte) > 0)
174  buf_size +=(read->length_in_bases - basesFirstByte - basesLastByte)/4;
175 
176 
177  // Set up the output sequence data
178  VDBSRC_InitByteArray_Data (dataSeq, dataOut, buf_size,
179  4, basesFirstByte, basesLastByte, read->length_in_bases);
180 
181  return TRUE;
182 }
183 
185  char** strData)
186 {
187  uint32_t sizeStr;
188  uint32_t iByteIndex = 0;
189  uint32_t iBaseIndex = 0;
190  char * str = NULL;
191 
192  ASSERT(dataSeq);
193 
194  if (!dataSeq->data || dataSeq->size == 0) {
195  return FALSE;
196  }
197 
198  sizeStr = dataSeq->size * 4 + 32;
199  str = (char*)calloc(sizeStr, sizeof(char));
200  if (str == NULL) {
201  return FALSE;
202  }
203  *strData = str;
204  // Convert the first (partial) byte to string
205  if(dataSeq->basesFirstByte != 4) {
206  unsigned int tmp = 4 - dataSeq->basesFirstByte;
207  iByteIndex =1;
208  iBaseIndex = dataSeq->basesFirstByte;
209  const char * tmp_ptr = SRASEQUTIL_2NA_BYTE_TO_STRING[dataSeq->data[0]] + tmp;
210  memcpy(str, tmp_ptr, dataSeq->basesFirstByte);
211  }
212  // Convert the array of (full) bytes to string
213 
214  for (; iByteIndex < (dataSeq->size - 1); iByteIndex++) {
215  memcpy(&str[iBaseIndex], SRASEQUTIL_2NA_BYTE_TO_STRING[dataSeq->data[iByteIndex]], 4);
216  iBaseIndex +=4;
217  }
218 
219  // Convert the last (partial) byte to string
220  memcpy(&str[iBaseIndex], SRASEQUTIL_2NA_BYTE_TO_STRING[dataSeq->data[iByteIndex]], dataSeq->basesLastByte);
221  iBaseIndex += dataSeq->basesLastByte;
222 
223  str[iBaseIndex] ='\0';
224  return TRUE;
225 }
226 
227 // ==========================================================================//
228 // SRA Sequence-related functions - NCBI 4na
229 
231  TNuclDataRequest* req4na, TVDBErrMsg * vdbErrMsg)
232 {
233  uint32_t status = eVdbBlastNoErr;
234  uint64_t seqLength = 0;
235  uint8_t * data = NULL;
236  uint64_t sizeOut=0;
237  uint8_t* start_byte = NULL;
238  Boolean sentinel = req4na->hasSentinelBytes;
239  uint64_t readId = req4na->readId;
240 
241  ASSERT(dataSeq);
242  ASSERT(req4na);
243  ASSERT(req4na->read4na);
244  ASSERT(vdbData->reader_4na != NULL);
245 
246  data = (uint8_t *) calloc(VDB_4NA_CHUNK_BUF_SIZE + (sentinel? 2:0), sizeof(uint8_t));
247  if(data == NULL) {
249  return FALSE;
250  }
251  if (vdbData->refSet!= NULL) {
252  readId |= REF_SEQ_ID_MASK;
253  }
254  start_byte = sentinel ? &(data[1]) : data;
255 
256  seqLength = VdbBlast4naReaderRead(vdbData->reader_4na, &status,
257  readId , 0, start_byte, VDB_4NA_CHUNK_BUF_SIZE);
258  if(status != eVdbBlastNoErr) {
260  return FALSE;
261  }
262 
263  if(seqLength == VDB_4NA_CHUNK_BUF_SIZE) {
264  uint8_t tmp[2];
265  uint64_t tl = VdbBlast4naReaderRead(vdbData->reader_4na, &status,
266  readId, seqLength, tmp, 2);
267  if(tl > 0) {
269  return FALSE;
270  }
271  }
272 
273  if((eVdbBlastNoErr != status) || (seqLength == 0)) {
274  if((status == eVdbBlastErr) && (seqLength == 0))
275  VDBSRC_InitErrorMsg(vdbErrMsg, status, eVDBSRC_ID_OUT_OF_RANGE);
276  else
278 
279  return FALSE;
280  }
281 
282  sizeOut = seqLength;
283 
284  // Add sentinel byte
285  if (sentinel) {
286  sizeOut+=2;
287  data[0] = kNuclSentinel;
288  data[sizeOut-1] = kNuclSentinel;
289  }
290 
291  if(req4na->convertDataToBlastna) {
292  for(int i=0; i< seqLength; i++ )
293  start_byte[i] = NCBI4NA_TO_BLASTNA[start_byte[i]];
294  }
295 
296  // Set up the sub-sequence data based on the newly created array
297  VDBSRC_InitByteArray_Data (dataSeq, data, sizeOut, 1, 1, 1, seqLength);
298 
299  return TRUE;
300 }
301 
303  TNuclDataRequest* req4na, TVDBErrMsg * vdbErrMsg)
304 {
305  uint32_t status = eVdbBlastNoErr;
306  uint8_t * data = NULL;
307  uint8_t* seq_start = NULL;
308  Boolean sentinel = req4na->hasSentinelBytes;
309  uint64_t readId = req4na->readId;
310  TVDBPartialFetchingRanges *pf_list = vdbData->range_list;
311  int64_t num_ranges = pf_list->num_ranges;
312  int64_t region_end = pf_list->ranges[num_ranges * 2 -1];
313  uint64_t buffer_length = 0;
314  uint64_t seq_length = 0;
315 
316  ASSERT(dataSeq);
317  ASSERT(req4na);
318  ASSERT(req4na->read4na);
319  ASSERT(vdbData->reader_4na != NULL);
320 
321  if (vdbData->refSet!= NULL) {
322  readId |= REF_SEQ_ID_MASK;
323  }
324  seq_length = VDBSRC_GetSeqLen(vdbData, readId);
325  ASSERT(seq_length);
326  region_end = MIN (seq_length, pf_list->ranges[num_ranges * 2 -1]);
327  //buffer_length = region_end + (sentinel ? 2:0);
328  buffer_length = seq_length + (sentinel ? 2:0);
329  pf_list->ranges[num_ranges * 2 -1] = region_end;
330  data = (uint8_t *) calloc(buffer_length, sizeof(uint8_t));
331  //printf("Paritial fetching add %ld, oid %d\n", data,req4na->readId);
332  if(data == NULL) {
334  return FALSE;
335  }
336 
337  seq_start = data + (sentinel ? 1:0);
338 
339  /* Place the fence sentinel aorund ranges first; so the range data is freeto reaplce the
340  * fence byte if adjacent rnages overlap, skipping first 'from' and last 'to'
341  */
342  for(unsigned int i =0; i < num_ranges * 2; i+=2) {
343  Int4 begin = pf_list->ranges[i];
344  Int4 end = pf_list->ranges[i+1];
345  if (begin > 0) {
346  seq_start[begin-1] =(char) FENCE_SENTRY;
347  }
348  if(end < seq_length) {
349  seq_start[end] =(char) FENCE_SENTRY;
350  }
351  }
352 
353  for(unsigned int i =0; i < num_ranges * 2; i+=2) {
354  int64_t range_start = pf_list->ranges[i];
355  uint64_t range_length = pf_list->ranges[i+1] - range_start;
356  uint8_t* buf_start = &(seq_start[range_start]);
357  uint64_t rl = VdbBlast4naReaderRead(vdbData->reader_4na, &status, readId,
358  range_start, buf_start, range_length);
359  //printf("Range start %ld, end %ld, length %ld\n", range_start, range_start +range_length, range_length);
360  if(((eVdbBlastNoErr != status) && (eVdbBlastCircularSequence != status)) || (rl == 0)) {
361  if((status == eVdbBlastErr) && (rl == 0)){
362  VDBSRC_InitErrorMsg(vdbErrMsg, status, eVDBSRC_ID_OUT_OF_RANGE);
363  }
364  else {
366  }
367  return FALSE;
368  }
369  if(rl != range_length){
371  return FALSE;
372  }
373 
374  if(req4na->convertDataToBlastna) {
375  for(int i=0; i< range_length; i++ ) {
376  buf_start[i] = NCBI4NA_TO_BLASTNA[buf_start[i]];
377  }
378  }
379  }
380 
381  // Add sentinel byte
382  if (sentinel) {
383  data[0] = kNuclSentinel;
384  data[buffer_length-1] = kNuclSentinel;
385  }
386 
387  // Set up the sub-sequence data based on the newly created array
388  VDBSRC_InitByteArray_Data (dataSeq, data, buffer_length, 1, 1, 1, seq_length);
389 
390  return TRUE;
391 }
392 
393 
395  TNuclDataRequest* req4na, TVDBErrMsg * vdbErrMsg)
396 {
397  uint32_t status = eVdbBlastNoErr;
398  size_t seqLength = 0;
399  const uint8_t * data;
400  uint64_t sizeOut;
401  uint8_t* dataOut;
402  uint8_t* start_byte;
403 
404  ASSERT(dataSeq);
405  ASSERT(req4na);
406  ASSERT(req4na->read4na);
407  ASSERT(vdbData->reader_4na != NULL);
408 
409 
410  //Note seq in 4na format but but has been expanded to 1 nucl per byte already
411  data = VdbBlast4naReaderData(vdbData->reader_4na, &status, req4na->readId, &seqLength);
412 
413  if((data == NULL) && (eVdbBlastChunkedSequence == status)) {
414  return s_GetSeq4naChunkSeq(vdbData, dataSeq, req4na, vdbErrMsg);
415  }
416 
417  if((eVdbBlastNoErr != status) || (seqLength == 0))
418  {
419  if((status == eVdbBlastInvalidId) && (seqLength == 0))
420  VDBSRC_InitErrorMsg(vdbErrMsg, status, eVDBSRC_FILTERED_READ);
421  else if((status == eVdbBlastErr) && (seqLength == 0))
422  VDBSRC_InitErrorMsg(vdbErrMsg, status, eVDBSRC_ID_OUT_OF_RANGE);
423  else
425 
426  return FALSE;
427  }
428 
429  sizeOut = seqLength;
430 
431  if (req4na->hasSentinelBytes)
432  sizeOut += 2;
433 
434  dataOut = (uint8_t*)calloc(sizeOut, sizeof(uint8_t));
435 
436  if(NULL == dataOut)
437  {
439  return false;
440  }
441 
442  // Add sentinel byte
443  start_byte = dataOut;
444  if (req4na->hasSentinelBytes)
445  {
446  dataOut[0] = kNuclSentinel;
447  dataOut[sizeOut-1] = kNuclSentinel;
448  memcpy(&dataOut[1], data, seqLength);
449  start_byte = &dataOut[1];
450  }
451 
452  if(req4na->convertDataToBlastna)
453  {
454  int i;
455  for(i=0; i< seqLength; i++ )
456  start_byte[i] = NCBI4NA_TO_BLASTNA[data[i]];
457  }
458  else
459  {
460  memcpy(start_byte, data, seqLength);
461  }
462 
463  // Set up the sub-sequence data based on the newly created array
464  VDBSRC_InitByteArray_Data (dataSeq, dataOut, sizeOut, 1, 1, 1, seqLength);
465 
466  return TRUE;
467 }
468 
470  TByteArray* dataSeq,
471  TNuclDataRequest* req4na,
472  TVDBErrMsg * vdbErrMsg)
473 {
474  if(vdbData->refSet == NULL) {
475  return s_GetSeq4na(vdbData, dataSeq, req4na, vdbErrMsg);
476  }
477  else {
478 
479  if((vdbData->range_list != NULL) &&
480  (vdbData->range_list->num_ranges > 0) &&
481  (vdbData->range_list->oid == req4na->readId)) {
482  return s_GetSeq4naChunkSeq_PartialFetching(vdbData, dataSeq, req4na, vdbErrMsg);
483  }
484  else {
485  return s_GetSeq4naChunkSeq(vdbData, dataSeq, req4na, vdbErrMsg);
486  }
487 
488  }
489 }
490 
492  char** strData)
493 {
494  uint32_t sizeStr;
495  uint32_t iByteIndex=0;
496  uint32_t lastIndex;
497  ASSERT(dataSeq);
498 
499  if (!dataSeq->data ||
500  dataSeq->size == 0 ||
501  dataSeq->basesPerByte != 1)
502  {
503  return FALSE;
504  }
505 
506  sizeStr = dataSeq->size + 1;
507  lastIndex = dataSeq->size;
508  if (req4na->hasSentinelBytes)
509  {
510  sizeStr -=2;
511  iByteIndex = 1;
512  lastIndex -=1;
513  }
514 
515  *strData = (char*)calloc(sizeStr, sizeof(char));
516  if (!(*strData))
517  {
518  return FALSE;
519  }
520 
521  // Convert the array of bytes to string
522  for (; iByteIndex < lastIndex; iByteIndex++)
523  {
524  uint8_t byteCur = dataSeq->data[iByteIndex] & 0x0F;
525 
526  (*strData)[iByteIndex] = NCBI4NA_TO_IUPACNA[byteCur];
527  }
528 
529  (*strData)[iByteIndex] = '\0';
530 
531  return TRUE;
532 }
533 Boolean
535  uint64_t oid,
536  char** seqIupacna,
537  TVDBErrMsg * vdbErrMsg)
538 {
539  TNuclDataRequest req;
540  TByteArray dataSeq;
541  ASSERT(vdbData);
542  ASSERT(seqIupacna);
543  // Open the read for given OID
544  req.read4na = TRUE;
545  req.copyData = TRUE;
546  req.hasSentinelBytes = FALSE;
548  req.readId = oid;
549 
550  if(vdbData->reader_4na == NULL) {
551  VDBSRC_Init4naReader(vdbData, vdbErrMsg);
552  }
553  if(vdbErrMsg->isError)
554  {
555  return FALSE;
556  }
557 
558  VDBSRC_InitByteArray_Empty(&dataSeq);
559  //clock_t start = clock(), diff;
560  if(!VDBSRC_GetSeq4naCopy( vdbData, &dataSeq, &req, vdbErrMsg))
561  return FALSE;
562 
563  /*
564  diff = clock() - start;
565  int msec = diff * 1000 / CLOCKS_PER_SEC;
566  printf("Time taken %d seconds %d milliseconds\n", msec/1000, msec%1000);
567  */
568  if (!VDBSRC_Convert4naToString(&dataSeq, &req, seqIupacna))
569  {
570  free(dataSeq.data);
572  return FALSE;
573  }
574 
575  free(dataSeq.data);
576  return TRUE;
577 }
578 
579 // Note that 2na reader can only read in sequential order
580 // The oid is provided as sanity check to make sure
581 // the application iterations count is in sycn with the
582 // internal count in the vdb lib
583 Boolean
585  uint64_t oid,
586  char** seqIupacna,
587  TVDBErrMsg * vdbErrMsg)
588 {
589  TNuclDataRequest req;
590  TByteArray dataSeq;
591  int8_t rv;
592  char* strData = 0;
593  ASSERT(vdbData);
594  ASSERT(seqIupacna);
595 
596  // Open the read for given OID
597 
598  req.read4na = FALSE;
599  req.copyData = FALSE;
600  req.hasSentinelBytes = FALSE;
602  req.readId = oid;
603 
604  VDBSRC_InitByteArray_Empty(&dataSeq);
605  rv = VDBSRC_GetSeq2na(vdbData, &dataSeq, &req, vdbErrMsg);
606 
607  if(rv == BLAST_SEQSRC_ERROR)
608  return FALSE;
609 
610  // Convert it to its string representation
611 
612  if (!VDBSRC_Convert2naToString(&dataSeq, &strData))
613  {
615  return FALSE;
616  }
617 
618  *seqIupacna = strData;
619  return TRUE;
620 }
621 // ==========================================================================//
622 
623 #ifdef __cplusplus
624 }
625 #endif
626 
#define BLAST_SEQSRC_ERROR
Error while retrieving sequence.
Definition: blast_seqsrc.h:291
#define FENCE_SENTRY
This sentry value is used as a 'fence' around the valid portions of partially decoded sequences.
Definition: blast_util.h:364
void VDBSRC_InitErrorMsg(TVDBErrMsg *vdbErrMsg, uint32_t rc, TVDBErrCode localCode)
Initialize an Error message.
Definition: error_priv.c:72
void VDBSRC_InitLocalErrorMsg(TVDBErrMsg *vdbErrMsg, TVDBErrCode localCode)
Initialize an Error message that is local to this library.
Definition: error_priv.c:113
@ eVDBSRC_FILTERED_READ
oid correpsond to filtered read
Definition: error_priv.h:78
@ eVDBSRC_4NA_SEQ_STRING_ERROR
4na convert to string error
Definition: error_priv.h:75
@ eVDBSRC_ID_OUT_OF_RANGE
oid is out of range
Definition: error_priv.h:79
@ eVDBSRC_READ_2NA_CACHE_ERROR
Failed to read 2na (cache)
Definition: error_priv.h:66
@ eVDBSRC_READ_ID_MISMATCH
Read id mismatch (requested id != retrieved id)
Definition: error_priv.h:74
@ eVDBSRC_4NA_REF_SEQ_BUF_OVERFLOW
4na seq overflow
Definition: error_priv.h:80
@ eVDBSRC_NO_MEM_FOR_VDBDATA
No memory for the VDB data.
Definition: error_priv.h:70
@ eVDBSRC_READ_4NA_COPY_ERROR
Failed to read 4na (copy buffer)
Definition: error_priv.h:69
@ eVDBSRC_2NA_SEQ_STRING_ERROR
2na convert to string error
Definition: error_priv.h:76
@ eVDBSRC_NO_MEM_FOR_CHUNK_SEQ
No memory for chunk seq.
Definition: error_priv.h:72
static const char * str(char *buf, int n)
Definition: stats.c:84
static char tmp[3200]
Definition: utf8.c:42
char data[12]
Definition: iconv.c:80
Uint8 uint64_t
Int8 int64_t
unsigned char uint8_t
signed char int8_t
Uint4 uint32_t
const char NCBI4NA_TO_IUPACNA[]
Translates between ncbi4na and iupacna.
const Uint1 NCBI4NA_TO_BLASTNA[]
Translates between ncbi4na and blastna.
const Uint1 kNuclSentinel
Sentinel nibble for nucleotide sequences.
#define NULL
Definition: ncbistd.hpp:225
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
int i
if(yy_accept[yy_current_state])
const struct ncbi::grid::netcache::search::fields::SIZE size
#define MIN(a, b)
returns smaller of a and b.
Definition: ncbi_std.h:112
Uint1 Boolean
bool replacment for C
Definition: ncbi_std.h:94
#define TRUE
bool replacment for C indicating true.
Definition: ncbi_std.h:97
#define FALSE
bool replacment for C indicating false.
Definition: ncbi_std.h:101
#define ASSERT
macro for assert.
Definition: ncbi_std.h:107
#define uint8_t
Definition: config.h:54
Structure providing top-level VDB data access.
Definition: vdb_priv.h:78
VdbBlast4naReader * reader_4na
Definition: vdb_priv.h:95
TVDB2naICReader * reader_2na
Definition: vdb_priv.h:94
VdbBlastReferenceSet * refSet
Definition: vdb_priv.h:88
TVDBPartialFetchingRanges * range_list
Definition: vdb_priv.h:97
Structure used for passing data in VDB APIs.
Definition: vdbsequtil.h:53
uint64_t basesTotal
Number of bases.
Definition: vdbsequtil.h:65
uint8_t basesPerByte
Compression ratio, i.e. number of bases packed in a byte (1, 2, or 4).
Definition: vdbsequtil.h:59
uint8_t * data
Pointer to the first byte of the sequence data array.
Definition: vdbsequtil.h:55
uint8_t basesFirstByte
Number of bases stored in the first byte (1, 2, 3, or 4).
Definition: vdbsequtil.h:61
uint64_t size
Size of the sequence data array in bytes.
Definition: vdbsequtil.h:57
uint8_t basesLastByte
Number of bases stored in the last byte (1, 2, 3, or 4).
Definition: vdbsequtil.h:63
Structure describing the error messages the library can generate.
Definition: error_priv.h:89
Boolean isError
True if the object describes an error.
Definition: error_priv.h:90
Structure describing the properties of requested nucleotide data.
Definition: vdbsequtil.h:76
Boolean read4na
Retrieve the data in NCBI-4na format (if FALSE, use NCBI-2na).
Definition: vdbsequtil.h:78
Boolean hasSentinelBytes
Append sentinel bytes to both ends of the data.
Definition: vdbsequtil.h:80
Boolean convertDataToBlastna
Convert the data to the Blastna format (used in Blast engine).
Definition: vdbsequtil.h:82
Int4 oid
Oid in BLAST database, index in an array of sequences, etc [in].
Definition: vdb_priv.h:62
Int4 num_ranges
Number of actual ranges contained.
Definition: vdb_priv.h:64
Packed2naRead * buffer
Definition: vdb_priv.h:51
uint64_t VDBSRC_GetSeqLen(TVDBData *vdbData, uint64_t oid)
Get sequence length by oid.
Definition: vdb_priv.c:108
void VDBSRC_Init4naReader(TVDBData *vdbData, TVDBErrMsg *vdbErrMsg)
Definition: vdb_priv.c:462
#define VDB_4NA_CHUNK_BUF_SIZE
Definition: vdb_priv.h:276
#define REF_SEQ_ID_MASK
Definition: vdb_priv.h:277
Boolean VDBSRC_Get4naSequenceAsString(TVDBData *vdbData, uint64_t oid, char **seqIupacna, TVDBErrMsg *vdbErrMsg)
Access and convert the selected sequence to a human-readable string.
Definition: vdbsequtil.c:534
void VDBSRC_InitByteArray_Data(TByteArray *byteArray, uint8_t *data, uint64_t size, uint8_t basesPerByte, uint8_t basesFirstByte, uint8_t basesLastByte, uint64_t basesTotal)
Initialize a ByteArray object from the given sequence data.
Definition: vdbsequtil.c:98
Boolean VDBSRC_Convert2naToString(TByteArray *dataSeq, char **strData)
Definition: vdbsequtil.c:184
Boolean s_GetSeq4na(TVDBData *vdbData, TByteArray *dataSeq, TNuclDataRequest *req4na, TVDBErrMsg *vdbErrMsg)
Definition: vdbsequtil.c:394
Boolean s_GetSeq4naChunkSeq_PartialFetching(TVDBData *vdbData, TByteArray *dataSeq, TNuclDataRequest *req4na, TVDBErrMsg *vdbErrMsg)
Definition: vdbsequtil.c:302
Boolean VDBSRC_GetSeq2na(TVDBData *vdbData, TByteArray *dataSeq, TNuclDataRequest *req2na, TVDBErrMsg *vdbErrMsg)
Get the specified subsequence in NCBI-2na format.
Definition: vdbsequtil.c:122
Boolean VDBSRC_GetSeq4naCopy(TVDBData *vdbData, TByteArray *dataSeq, TNuclDataRequest *req4na, TVDBErrMsg *vdbErrMsg)
Get the specified subsequence in NCBI-4na format.
Definition: vdbsequtil.c:469
void VDBSRC_InitByteArray_Empty(TByteArray *byteArray)
Initialize an empty ByteArray object.
Definition: vdbsequtil.c:85
Boolean s_GetSeq4naChunkSeq(TVDBData *vdbData, TByteArray *dataSeq, TNuclDataRequest *req4na, TVDBErrMsg *vdbErrMsg)
Definition: vdbsequtil.c:230
Boolean VDBSRC_Get2naSequenceAsString(TVDBData *vdbData, uint64_t oid, char **seqIupacna, TVDBErrMsg *vdbErrMsg)
Definition: vdbsequtil.c:584
Boolean VDBSRC_Convert4naToString(TByteArray *dataSeq, TNuclDataRequest *req4na, char **strData)
Definition: vdbsequtil.c:491
static const char * SRASEQUTIL_2NA_BYTE_TO_STRING[256]
Array of 4-character strings corresponding to NCBI-2na bytes.
Definition: vdbsequtil.c:43
void free(voidpf ptr)
voidp calloc(uInt items, uInt size)
Modified on Tue Apr 16 20:08:23 2024 by modify_doxy.py rev. 669887