NCBI C++ ToolKit
seqdbblob.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: seqdbblob.cpp 100101 2023-06-15 14:10:29Z merezhuk $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Author: Kevin Bealer
27  *
28  */
29 
30 /// @file seqdbblob.cpp
31 /// Defines BlastDb `Blob' class for SeqDB and WriteDB.
32 #include <ncbi_pch.hpp>
36 
38 
39 
41  : m_Owner(true), m_ReadOffset(0), m_WriteOffset(0)
42 {
43  if (size) {
44  m_DataHere.reserve(size);
45  }
46 }
47 
49  : m_Owner(copy), m_ReadOffset(0), m_WriteOffset(0)
50 {
51  if (m_Owner) {
52  m_DataHere.assign(data.data(), data.data() + data.size());
53  } else {
54  m_DataRef = data;
55  }
56 }
57 
59 {
60  m_Owner = true;
61  m_ReadOffset = 0;
62  m_WriteOffset = 0;
63  m_DataHere.resize(0);
64  m_DataRef = CTempString("");
65  m_Lifetime.Reset();
66 }
67 
69 {
70  m_Owner = false;
71  m_DataRef = data;
72  m_Lifetime.Reset();
73 }
74 
76 {
77  m_Owner = false;
78  m_DataRef = data;
79  m_Lifetime = lifetime;
80 }
81 
83 {
84  return x_ReadVarInt(& m_ReadOffset);
85 }
86 
88 {
89  return x_ReadVarInt(& offset);
90 }
91 
92 Int8 CBlastDbBlob::x_ReadVarInt(int * offsetp) const
93 {
94  CTempString all = Str();
95  Int8 rv(0);
96 
97  for(size_t i = *offsetp; i < all.size(); i++) {
98  int ch = all[i];
99 
100  if (ch & 0x80) {
101  // middle
102  rv = (rv << 7) | (ch & 0x7F);
103  } else {
104  // end
105  rv = (rv << 6) | (ch & 0x3F);
106  *offsetp = static_cast<int>(i+1);
107 
108  return (ch & 0x40) ? -rv : rv;
109  }
110  }
111 
113  eFileErr,
114  "CBlastDbBlob::ReadVarInt: eof while reading integer.");
115 }
116 
117 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
118  (!defined(NCBI_COMPILER_MIPSPRO)) )
120 {
121  return x_ReadIntFixed<int,1>(& m_ReadOffset);
122 }
123 
125 {
126  return x_ReadIntFixed<int, 1>(& offset);
127 }
128 
130 {
131  return x_ReadIntFixed<int,2>(& m_ReadOffset);
132 }
133 
135 {
136  return x_ReadIntFixed<int, 2>(& offset);
137 }
138 
140 {
141  return x_ReadIntFixed<Int4,4>(& m_ReadOffset);
142 }
143 
145 {
146  return x_ReadIntFixed<Int4, 4>(& offset);
147 }
148 
150 {
151  return x_ReadIntFixed<Int8, 8>(& m_ReadOffset);
152 }
153 
155 {
156  return x_ReadIntFixed<Int8, 8>(& offset);
157 }
158 
160 {
161  return x_ReadString(fmt, & m_ReadOffset);
162 }
163 
165 {
166  return x_ReadString(fmt, & offset);
167 }
168 
170 {
171  int sz = 0;
172 
173  if (fmt == eSize4) {
174  sz = x_ReadIntFixed<int,4>(offsetp);
175  } else if (fmt == eSizeVar) {
176  sz = static_cast<int>( x_ReadVarInt(offsetp));
177  }
178 
179  const char * datap = "";
180 
181  if (fmt == eNUL) {
182  CTempString ts = Str();
183  int zoffset = -1;
184 
185  for(size_t i = *offsetp; i < ts.size(); i++) {
186  if (ts[i] == (char)0) {
187  zoffset = static_cast<int>(i);
188  break;
189  }
190  }
191 
192  if (zoffset == -1) {
194  eFileErr,
195  "CBlastDbBlob::ReadString: Unterminated string.");
196  }
197 
198  datap = ts.data() + *offsetp;
199  sz = zoffset - *offsetp;
200  *offsetp = zoffset+1;
201  } else {
202  datap = x_ReadRaw(sz, offsetp);
203  }
204 
205  return CTempString(datap, sz);
206 }
207 #endif
208 
209 const char * CBlastDbBlob::x_ReadRaw(int size, int * offsetp) const
210 {
211  _ASSERT(offsetp);
212  _ASSERT(size >= 0);
213 
214  CTempString s = Str();
215 
216  int begin = *offsetp;
217  int end = begin + size;
218 
219  if (begin > end || end > (int)s.size()) {
221  eFileErr,
222  "CBlastDbBlob::x_ReadRaw: hit end of data");
223  }
224 
225  *offsetp = end;
226  return s.data() + begin;
227 }
228 
229 
230 // Variable length format for an integer. The most significant byte
231 // is first. 7 bits are encoded per byte, except for the last byte,
232 // where the sign is encoded using the 0x40 bit, with 6 bits of other
233 // data. Termination is handled by the 0x80 bit, which is on for all
234 // bytes except the last (the least significant byte). The data looks
235 // like Uint1 for values 0 to 63.
236 //
237 // 23 -> 23
238 // 84 -> 82 04
239 // 55 -> 81 15
240 // -55 -> 81 55
241 // 01 01 -> 82 01
242 
244 {
245  return x_WriteVarInt(x, NULL);
246 }
247 
249 {
250  return x_WriteVarInt(x, & offset);
251 }
252 
253 int CBlastDbBlob::x_WriteVarInt(Int8 x, int * offsetp)
254 {
255  // The variable length integer is written into the end of the 16
256  // byte array shown below.
257 
258  _ASSERT(((x >> 62) == -1) || ((x >> 62) == 0));
259 
260  char buf[16];
261  int end_ptr = sizeof(buf);
262  int ptr = end_ptr;
263 
264  Uint8 ux = (Uint8)((x >= 0) ? x : -x);
265 
266  buf[--ptr] = (ux & 0x3F);
267  ux >>= 6;
268 
269  if (x < 0) {
270  buf[ptr] |= 40;
271  }
272 
273  while(ux) {
274  buf[--ptr] = (ux & 0x7F) | 0x80;
275  ux >>= 7;
276  }
277 
278  int bytes = end_ptr - ptr;
279 
280  x_WriteRaw(buf + ptr, bytes, offsetp);
281 
282  return offsetp ? (bytes + *offsetp) : m_WriteOffset;
283 }
284 
286 {
287  // Compute storage length of a variable-length integer.
288 
289  int bytes = 1;
290 
291  Uint8 ux = ((Uint8)((x >= 0) ? x : -x)) >> 6;
292 
293  while(ux) {
294  ux >>= 7;
295  bytes++;
296  }
297 
298  return bytes;
299 }
300 
301 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
302  (!defined(NCBI_COMPILER_MIPSPRO)) )
304 {
305  x_WriteIntFixed<int,1>(x, NULL);
306 }
307 
309 {
310  x_WriteIntFixed<int,1>(x, & offset);
311 }
312 
314 {
315  x_WriteIntFixed<int,2>(x, NULL);
316 }
317 
319 {
320  x_WriteIntFixed<int,2>(x, & offset);
321 }
322 
324 {
325  x_WriteIntFixed<int,4>(x, NULL);
326 }
327 
329 {
330  x_WriteIntFixed<int,4>(x, & offset);
331 }
332 
334 {
335  x_WriteIntFixed<Int8, 8>(x, NULL);
336 }
337 
339 {
340  x_WriteIntFixed<Int8, 8>(x, & offset);
341 }
342 
344 {
345  x_WriteIntFixed_LE<int,1>(x, NULL);
346 }
347 
349 {
350  x_WriteIntFixed_LE<int,1>(x, & offset);
351 }
352 
354 {
355  x_WriteIntFixed_LE<int,2>(x, NULL);
356 }
357 
359 {
360  x_WriteIntFixed_LE<int,2>(x, & offset);
361 }
362 
364 {
365  x_WriteIntFixed_LE<int,4>(x, NULL);
366 }
367 
369 {
370  x_WriteIntFixed_LE<int,4>(x, & offset);
371 }
372 
374 {
375  x_WriteIntFixed_LE<Int8, 8>(x, NULL);
376 }
377 
379 {
380  x_WriteIntFixed_LE<Int8, 8>(x, & offset);
381 }
382 
384 {
385  return x_WriteString(str, fmt, NULL);
386 }
387 
389 {
390  return x_WriteString(str, fmt, & offset);
391 }
392 
394 {
395  int start_off = offsetp ? *offsetp : m_WriteOffset;
396 
397  if (fmt == eSize4) {
398  x_WriteIntFixed<int,4>(static_cast<int>(str.size()), offsetp);
399  } else if (fmt == eSizeVar) {
400  x_WriteVarInt(str.size(), offsetp);
401  }
402 
403  x_WriteRaw(str.data(),static_cast<int>( str.size()), offsetp);
404 
405  if (fmt == eNUL) {
406  char buf = 0;
407  x_WriteRaw(& buf, 1, offsetp);
408  }
409 
410  int end_off = offsetp ? *offsetp : m_WriteOffset;
411 
412  return end_off - start_off;
413 }
414 #endif
415 
416 const char * CBlastDbBlob::ReadRaw(int size)
417 {
418  return x_ReadRaw(size, &m_ReadOffset);
419 }
420 
421 void CBlastDbBlob::WriteRaw(const char * begin, int size)
422 {
423  x_WriteRaw(begin, size, NULL);
424 }
425 
426 void CBlastDbBlob::WriteRaw(const char * begin, int size, int offset)
427 {
428  x_WriteRaw(begin, size, & offset);
429 }
430 
431 void CBlastDbBlob::x_WriteRaw(const char * data, int size, int * offsetp)
432 {
433  int orig_size = size;
434 
435  if (offsetp == NULL) {
436  offsetp = & m_WriteOffset;
437  }
438 
439  int off = *offsetp;
440 
441  _ASSERT(data != NULL);
442  _ASSERT(off >= 0);
443  _ASSERT(size >= 0);
444 
445  // x_Reserve guarantees m_Owner == true.
446  x_Reserve(off + size);
447  _ASSERT(m_Owner);
448 
449  int overlap = int(m_DataHere.size()) - off;
450 
451  // If inserting past end of buffer, increase the buffer size.
452 
453  if (overlap < 0) {
454  m_DataHere.insert(m_DataHere.end(), -overlap, (char) 0);
455  overlap = 0;
456  }
457 
458  // If data is partly or wholly written into existing array space,
459  // memcpy the data into that space.
460 
461  if (overlap > 0) {
462  int len = std::min(overlap, size);
463 
464  memcpy(& m_DataHere[off], data, len);
465 
466  size -= len;
467  data += len;
468  off += len;
469  }
470 
471  if (size) {
472  m_DataHere.insert(m_DataHere.end(), data, data + size);
473  }
474 
475  *offsetp += orig_size;
476 }
477 
478 void CBlastDbBlob::x_Copy(int total)
479 {
480  _ASSERT(! m_Owner);
481  _ASSERT(! m_DataHere.size());
482 
483  if (total < (int)m_DataRef.size()) {
484  total = static_cast<int>(m_DataRef.size());
485  }
486 
487  m_Owner = true;
488  const char * ptr = m_DataRef.data();
489 
490  m_DataHere.reserve(total);
491  m_DataHere.assign(ptr, ptr + m_DataRef.size());
492  m_DataRef = CTempString("");
493 
494  m_Lifetime.Reset();
495 }
496 
498 {
499  if (! m_Owner) {
500  x_Copy(need);
501  } else {
502  int cur_cap = static_cast<int>(m_DataHere.capacity());
503 
504  if (cur_cap < need) {
505  // Skip the first few reallocations.
506 
507  int new_cap = 64;
508 
509  while(new_cap < need) {
510  new_cap *= 2;
511  }
512 
513  m_DataHere.reserve(new_cap);
514  }
515  }
516 }
517 
519 {
520  if (m_Owner) {
521  return static_cast<int>(m_DataHere.size());
522  }
523  return static_cast<int>(m_DataRef.size());
524 }
525 
527 {
528  if (m_Owner) {
529  if (m_DataHere.size()) {
530  const char * p = & m_DataHere[0];
531  return CTempString(p, m_DataHere.size());
532  }
533  } else {
534  if (m_DataRef.size()) {
535  return m_DataRef;
536  }
537  }
538 
539  return CTempString("");
540 }
541 
543 {
545 }
546 
548 {
550 }
551 
553 {
554  return m_WriteOffset;
555 }
556 
558 {
559  return m_ReadOffset;
560 }
561 
563 {
564  vector<char> pad;
566 
567  int pads = align ? (m_WriteOffset % align) : 0;
568 
569  if (fmt == eSimple) {
570  pads = pads ? (align - pads) : 0;
571  } else {
572  pads = align - pads;
573  }
574 
575  if (fmt == eSimple) {
576  for(int i = 0; i < pads; i++) {
577  x_WriteRaw("#", 1, NULL);
578  }
579  } else {
580  for(int i = 1; i < pads; i++) {
581  x_WriteRaw("#", 1, NULL);
582  }
583  char ch = (char)0;
584  x_WriteRaw(& ch, 1, NULL);
585  }
586 
587  _ASSERT(! (m_WriteOffset % align));
588 }
589 
591 {
592  if (fmt == eString) {
593 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
594  (!defined(NCBI_COMPILER_MIPSPRO)) )
595  ReadString(eNUL);
596 #endif
597  } else {
598  _ASSERT(fmt == eSimple);
599 
600  int pads = align ? (m_ReadOffset % align) : 0;
601  pads = pads ? (align-pads) : 0;
602 
603  CTempString tmp(x_ReadRaw(pads, & m_ReadOffset), pads);
604 
605  for(int i = 0; i < (int)tmp.size(); i++) {
606  SEQDB_FILE_ASSERT(tmp[i] == '#');
607  }
608  }
609 }
610 
612 
#define true
Definition: bool.h:35
static void pad(Char *s)
Definition: bzip2.c:908
void SkipPadBytes(int align, EPadding fmt)
Align the offset by skipping bytes.
Definition: seqdbblob.cpp:590
int GetReadOffset() const
Get the current read pointer offset.
Definition: seqdbblob.cpp:557
CTempString m_DataRef
Non-owned data (only used for `read' streams).
Definition: seqdbblob.hpp:501
Int4 ReadInt4()
Read a 4 byte integer at the pointer (and move the pointer).
Definition: seqdbblob.cpp:139
int ReadInt2()
Read a 2 byte integer at the pointer (and move the pointer).
Definition: seqdbblob.cpp:129
int ReadInt1()
Read a 1 byte integer at the pointer (and move the pointer).
Definition: seqdbblob.cpp:119
CBlastDbBlob(int size=0)
Create a new object, reserving 'size' bytes for writing.
Definition: seqdbblob.cpp:40
void x_WriteRaw(const char *ptr, int size, int *offsetp)
Write raw bytes as ptr + size at a given offset.
Definition: seqdbblob.cpp:431
EPadding
Padding style.
Definition: seqdbblob.hpp:270
@ eString
Pad using NUL terminated string of '#' bytes.
Definition: seqdbblob.hpp:272
@ eSimple
Just write NUL bytes until aligned.
Definition: seqdbblob.hpp:271
static int VarIntSize(Int8 x)
Compute bytes used for a variable length integer.
Definition: seqdbblob.cpp:285
int Size() const
Get size of blob contents.
Definition: seqdbblob.cpp:518
void ReferTo(CTempString data)
Refer to an existing memory area.
Definition: seqdbblob.cpp:68
int GetWriteOffset() const
Get the current write pointer offset.
Definition: seqdbblob.cpp:552
void WriteRaw(const char *begin, int size)
Write raw data to the blob (moving the write pointer).
Definition: seqdbblob.cpp:421
void WritePadBytes(int align, EPadding fmt)
Align the offset by writing pad bytes.
Definition: seqdbblob.cpp:562
void WriteInt2_LE(int x)
Definition: seqdbblob.cpp:353
EStringFormat
String termination style.
Definition: seqdbblob.hpp:233
@ eSizeVar
Write string length as VarInt, then string data.
Definition: seqdbblob.hpp:237
@ eNUL
Write a NUL terminated string.
Definition: seqdbblob.hpp:235
@ eSize4
Write string length as Int4, then string data.
Definition: seqdbblob.hpp:236
void WriteInt2(int x)
Write a 1 byte integer to the blob.
Definition: seqdbblob.cpp:313
CTempString ReadString(EStringFormat fmt)
Read string data from the blob (moving the read pointer).
Definition: seqdbblob.cpp:159
void x_Copy(int total)
Copy referenced data to owned data.
Definition: seqdbblob.cpp:478
void Clear()
Clear all owned data and reference an empty string.
Definition: seqdbblob.cpp:58
void WriteInt4(Int4 x)
Write a 4 byte integer to the blob.
Definition: seqdbblob.cpp:323
int x_WriteVarInt(Int8 x, int *offsetp)
Write a variable length integer into the buffer.
Definition: seqdbblob.cpp:253
void WriteInt8(Int8 x)
Write an 8 byte integer to the blob.
Definition: seqdbblob.cpp:333
int m_WriteOffset
The `write pointer' for stream-like access.
Definition: seqdbblob.hpp:495
Int8 x_ReadVarInt(int *offsetp) const
Read a variable length integer from the buffer.
Definition: seqdbblob.cpp:92
void WriteInt8_LE(Int8 x)
Definition: seqdbblob.cpp:373
vector< char > m_DataHere
Data owned by this object.
Definition: seqdbblob.hpp:498
Int8 ReadInt8()
Read an 8 byte integer at the pointer (and move the pointer).
Definition: seqdbblob.cpp:149
void SeekRead(int offset)
Move the read pointer to a specific location.
Definition: seqdbblob.cpp:547
CRef< CObject > m_Lifetime
Lifetime maintenance object for referenced data.
Definition: seqdbblob.hpp:504
int x_WriteString(CTempString str, EStringFormat fmt, int *offsetp)
Write string data to the blob.
Definition: seqdbblob.cpp:393
void WriteInt1(int x)
Write a 1 byte integer to the blob.
Definition: seqdbblob.cpp:303
void x_Reserve(int size)
Write raw bytes as a CTempString.
Definition: seqdbblob.cpp:497
CTempString Str() const
Get blob contents as a CTempString.
Definition: seqdbblob.cpp:526
void WriteInt4_LE(Int4 x)
Definition: seqdbblob.cpp:363
int WriteString(CTempString str, EStringFormat fmt)
Write string data to the blob.
Definition: seqdbblob.cpp:383
void SeekWrite(int offset)
Seek write pointer to a specific location.
Definition: seqdbblob.cpp:542
const char * x_ReadRaw(int size, int *offsetp) const
Read raw bytes from a given offset.
Definition: seqdbblob.cpp:209
const char * ReadRaw(int size)
Read raw data (moving the read pointer).
Definition: seqdbblob.cpp:416
CTempString x_ReadString(EStringFormat fmt, int *offsetp) const
Read string data from the blob.
Definition: seqdbblob.cpp:169
void WriteInt1_LE(int x)
Definition: seqdbblob.cpp:343
bool m_Owner
True if this object owns the target data.
Definition: seqdbblob.hpp:489
int m_ReadOffset
The `read pointer' for stream-like access.
Definition: seqdbblob.hpp:492
Int8 ReadVarInt()
Read a variable length integer from the blob.
Definition: seqdbblob.cpp:82
int WriteVarInt(Int8 x)
Write a variable length integer to the blob.
Definition: seqdbblob.cpp:243
CSeqDBException.
Definition: seqdbcommon.hpp:73
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
#define NULL
Definition: ncbistd.hpp:225
#define NCBI_THROW(exception_class, err_code, message)
Generic macro to throw an exception, given the exception class, error code and message string.
Definition: ncbiexpt.hpp:704
void Reset(void)
Reset reference object.
Definition: ncbiobj.hpp:773
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
const char * data(void) const
Return a pointer to the array represented.
Definition: tempstr.hpp:313
size_type size(void) const
Return the length of the represented array.
Definition: tempstr.hpp:327
unsigned int
A callback function used to compare two keys in a database.
Definition: types.hpp:1210
char * buf
int i
int len
const struct ncbi::grid::netcache::search::fields::SIZE size
T min(T x_, T y_)
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
static char tmp[2048]
Definition: utf8.c:42
int offset
Definition: replacements.h:160
Defines BlastDb `Blob' class for SeqDB and WriteDB.
Defines exception class and several constants for SeqDB.
This file defines several SeqDB utility functions related to byte order and file system portability.
#define SEQDB_FILE_ASSERT(YESNO)
static const char * str(char *buf, int n)
Definition: stats.c:84
#define _ASSERT
Modified on Tue Nov 28 02:22:06 2023 by modify_doxy.py rev. 669887