NCBI C++ ToolKit
seqdbblob.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef OBJTOOLS_BLAST_SEQDB_READER___SEQDBBLOB__HPP
2 #define OBJTOOLS_BLAST_SEQDB_READER___SEQDBBLOB__HPP
3 
4 /* $Id: seqdbblob.hpp 44485 2010-01-19 15:47:58Z maning $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Kevin Bealer
30  *
31  */
32 
33 /// @file seqdbblob.hpp
34 /// Defines BlastDb `Blob' class for SeqDB and WriteDB.
35 ///
36 /// Defines classes:
37 /// CBlastDbBlob
38 ///
39 /// Implemented for: UNIX, MS-Windows
40 
41 #include <ncbiconf.h>
42 #include <corelib/ncbiobj.hpp>
43 
45 
46 
47 /// `Blob' Class for SeqDB (and WriteDB).
48 ///
49 /// This manages serialization and deserialization of binary data of
50 /// unspecified size and format, known in RDBMS jargon as `blob' data.
51 /// The primary goals here are to standardize the encoding of data
52 /// types and to optimize performance. Read and write operations are
53 /// handled by the same class. For both read and write operations,
54 /// random and stream access are supported.
55 
57 public:
58  /// Create a new object, reserving 'size' bytes for writing.
59  /// @param size This many bytes will be reserved for writing.
60  CBlastDbBlob(int size = 0);
61 
62  /// Create a readable object containing the specified data.
63  ///
64  /// If `copy' is specified as false, only a reference to the data
65  /// will be kept, and write operations are illegal. If `copy' is
66  /// specified as true, the data is copied into an internal buffer,
67  /// and both read and write operations are legal.
68  ///
69  /// @param data The data to refer to.
70  /// @param copy Specify true to copy the data to a buffer.
71  CBlastDbBlob(CTempString data, bool copy);
72 
73  /// Get blob contents as a CTempString.
74  CTempString Str() const;
75 
76  /// Get size of blob contents.
77  int Size() const;
78 
79  /// Clear all owned data and reference an empty string.
80  void Clear();
81 
82  /// Refer to an existing memory area.
83  ///
84  /// This method causes this blob to refer to an existing area of
85  /// memory without copying it. The caller should guarantee that
86  /// the referenced data is valid until past the last read of the
87  /// data. If such a guarantee is not possible, then Clear() and
88  /// WriteRaw() can be substituted (at the cost of an additional
89  /// copy operation). Alternately, the two-argument ReferTo()
90  /// operation can be used to provides `lifetime' management.
91  ///
92  /// @param data Specifies the referenced memory region.
93  void ReferTo(CTempString data);
94 
95  /// Refer to an existing memory area.
96  ///
97  /// This method causes this blob to refer to an existing area of
98  /// memory without copying it. This version allows the caller to
99  /// specify a CObject that maintains the lifetime of the memory
100  /// region. This object will keep a reference to the CObject as
101  /// long as it references the specified memory region, after which
102  /// the CObject should be released. The specified CObject should
103  /// be allocated on the heap.
104  ///
105  /// @param data Specifies the referenced memory region.
106  /// @param lifetime The lifetime management object.
107  void ReferTo(CTempString data, CRef<CObject> lifetime);
108 
109  /// Read a variable length integer from the blob.
110  /// @param x The integer to read.
111  /// @return The number of bytes read.
112  Int8 ReadVarInt();
113 
114  /// Read a variable length integer from the blob.
115  /// @param x The integer to read.
116  /// @param offset The offset to read the integer at.
117  /// @return The number of bytes read.
118  Int8 ReadVarInt(int offset) const;
119 
120 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
121  (!defined(NCBI_COMPILER_MIPSPRO)) )
122  /// Read a 1 byte integer at the pointer (and move the pointer).
123  int ReadInt1();
124 
125  /// Read a 1 byte integer from the given offset.
126  /// @param off The offset from which to read the integer.
127  /// @return The 1 byte value read from the data.
128  int ReadInt1(int offset) const;
129 
130  /// Read a 2 byte integer at the pointer (and move the pointer).
131  int ReadInt2();
132 
133  /// Read a 2 byte integer from the given offset.
134  /// @param off The offset from which to read the integer.
135  /// @return The 2 byte value read from the data.
136  int ReadInt2(int offset) const;
137 
138  /// Read a 4 byte integer at the pointer (and move the pointer).
139  Int4 ReadInt4();
140 
141  /// Read a 4 byte integer from the given offset.
142  /// @param off The offset from which to read the integer.
143  /// @return The four byte value read from the data.
144  Int4 ReadInt4(int offset) const;
145 
146  /// Read an 8 byte integer at the pointer (and move the pointer).
147  Int8 ReadInt8();
148 
149  /// Read an 8 byte integer from the given offset.
150  /// @param off The offset from which to read the integer.
151  /// @return The eight byte value read from the data.
152  Int8 ReadInt8(int offset) const;
153 #endif
154 
155  /// Move the read pointer to a specific location.
156  /// @param offset The new read offset.
157  void SeekRead(int offset);
158 
159 
160  /// Write a variable length integer to the blob.
161  /// @param x The integer to write.
162  /// @return The number of bytes written.
163  int WriteVarInt(Int8 x);
164 
165  /// Write a variable length integer to the blob.
166  /// @param x The integer to write.
167  /// @param offset The offset to write the integer at.
168  /// @return The number of bytes written.
169  int WriteVarInt(Int8 x, int offset);
170 
171  /// Compute bytes used for a variable length integer.
172  /// @param x The integer value.
173  /// @return The number of bytes that would be written.
174  static int VarIntSize(Int8 x);
175 
176 
177 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
178  (!defined(NCBI_COMPILER_MIPSPRO)) )
179  /// Write a 1 byte integer to the blob.
180  /// @param x The integer to write.
181  void WriteInt1(int x);
182  void WriteInt1_LE(int x);
183 
184  /// Write a 1 byte integer to the blob.
185  /// @param x The integer to write.
186  /// @param offset The offset to write the integer at.
187  void WriteInt1(int x, int offset);
188  void WriteInt1_LE(int x, int offset);
189 
190 
191  /// Write a 1 byte integer to the blob.
192  /// @param x The integer to write.
193  void WriteInt2(int x);
194  void WriteInt2_LE(int x);
195 
196  /// Write a 1 byte integer to the blob.
197  /// @param x The integer to write.
198  /// @param offset The offset to write the integer at.
199  void WriteInt2(int x, int offset);
200  void WriteInt2_LE(int x, int offset);
201 
202 
203  /// Write a 4 byte integer to the blob.
204  /// @param x The integer to write.
205  void WriteInt4(Int4 x);
206  void WriteInt4_LE(Int4 x);
207 
208  /// Write a 4 byte integer into the blob at a given offset.
209  /// @param x The integer to write.
210  /// @param offset The offset to write the integer at.
211  void WriteInt4(Int4 x, int offset);
212  void WriteInt4_LE(Int4 x, int offset);
213 
214 
215  /// Write an 8 byte integer to the blob.
216  /// @param x The integer to write.
217  void WriteInt8(Int8 x);
218  void WriteInt8_LE(Int8 x);
219 
220  /// Write an 8 byte integer into the blob at a given offset.
221  /// @param x The integer to write.
222  /// @param offset The offset to write the integer at.
223  void WriteInt8(Int8 x, int offset);
224  void WriteInt8_LE(Int8 x, int offset);
225 #endif
226 
227  /// Seek write pointer to a specific location.
228  /// @param offset The new write offset.
229  void SeekWrite(int offset);
230 
231 
232  /// String termination style.
234  eNone, ///< Write the string as-is.
235  eNUL, ///< Write a NUL terminated string.
236  eSize4, ///< Write string length as Int4, then string data.
237  eSizeVar ///< Write string length as VarInt, then string data.
238  };
239 
240 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
241  (!defined(NCBI_COMPILER_MIPSPRO)) )
242  /// Read string data from the blob (moving the read pointer).
243  /// @param str The string to read.
244  /// @param fmt String termination criteria.
245  /// @return The number of bytes read.
246  CTempString ReadString(EStringFormat fmt);
247 
248  /// Read string data from the blob at a given offset.
249  /// @param str The string to read.
250  /// @param fmt String termination criteria.
251  /// @param offset The offset to read from.
252  /// @return The number of bytes read.
253  CTempString ReadString(EStringFormat fmt, int offset) const;
254 
255  /// Write string data to the blob.
256  /// @param str The string to write.
257  /// @param fmt String termination criteria.
258  /// @return The number of bytes written.
259  int WriteString(CTempString str, EStringFormat fmt);
260 
261  /// Write string data to the blob at a specific offset.
262  /// @param str The string to write.
263  /// @param fmt String termination criteria.
264  /// @param offset The offset to write at.
265  /// @return The number of bytes written.
266  int WriteString(CTempString str, EStringFormat fmt, int offset);
267 #endif
268 
269  /// Padding style.
270  enum EPadding {
271  eSimple, ///< Just write NUL bytes until aligned.
272  eString ///< Pad using NUL terminated string of '#' bytes.
273  };
274 
275  /// Align the offset by writing pad bytes.
276  ///
277  /// One of two padding methods is used. If eSimple is specified,
278  /// zero or more NUL bytes are written. This uses less overhead
279  /// but readers of the blob need to know the alignment to read
280  /// fields found after the padding. If eString is specified, a
281  /// normal string write is used with eNUL termination and a string
282  /// that will result in the requested alignment. This is self
283  /// describing but requires at least one byte.
284  ///
285  /// @param align Pad to a multiple of this size.
286  /// @param fmt String termination criteria.
287  void WritePadBytes(int align, EPadding fmt);
288 
289  /// Align the offset by skipping bytes.
290  ///
291  /// This works just like WritePadBytes, but verifies that the pad
292  /// bytes exist and have the correct values, and skips over them,
293  /// rather than writing them. If fmt is eString, the alignment
294  /// value is ignored.
295  ///
296  /// @param align Pad to a multiple of this size.
297  /// @param fmt String termination criteria.
298  void SkipPadBytes(int align, EPadding fmt);
299 
300 
301  /// Read raw data (moving the read pointer).
302  /// @param size Number of bytes to move the pointer.
303  const char * ReadRaw(int size);
304 
305  /// Write raw data to the blob (moving the write pointer).
306  /// @param begin Pointer to the start of the data.
307  /// @param size Number of bytes to copy.
308  void WriteRaw(const char * begin, int size);
309 
310  /// Write raw data to the blob at the given offset.
311  /// @param begin Pointer to the start of the data.
312  /// @param size Number of bytes to copy.
313  /// @param offset Location to write data at.
314  void WriteRaw(const char * begin, int size, int offset);
315 
316  /// Get the current write pointer offset.
317  /// @return The offset at which the next write would occur.
318  int GetWriteOffset() const;
319 
320  /// Get the current read pointer offset.
321  /// @return The offset at which the next read would occur.
322  int GetReadOffset() const;
323 
324 private:
325  /// Copy referenced data to owned data.
326  ///
327  /// This handles the Copy part of Copy On Write. To reduce the
328  /// allocation count, the `total' parameter can be used to request
329  /// the total number of bytes needed. If `total' is less than the
330  /// current size, the current size will be used instead.
331  ///
332  /// @param total Total space needed.
333  void x_Copy(int total);
334 
335  /// Write raw bytes as a CTempString.
336  /// @param data String data to write.
337  void x_Reserve(int size);
338 
339  /// Write raw bytes as ptr + size at a given offset.
340  /// @param data String data to write.
341  /// @param size Number of bytes to write.
342  /// @param offsetp Offset to write at (NULL means use write pointer).
343  void x_WriteRaw(const char * ptr, int size, int * offsetp);
344 
345  /// Read raw bytes from a given offset.
346  ///
347  /// This method checks that enough bytes exist, updates the read
348  /// pointer, and returns a pointer to the given data. Unlike with
349  /// x_WriteRaw, do not use NULL for the read pointer, instead the
350  /// internal read pointer should be provided if the user did not
351  /// provide one. This method will throw an exception if there is
352  /// not enough data.
353  ///
354  /// @param size Number of bytes needed by caller.
355  /// @param offsetp Offset from which to read (should not be NULL).
356  /// @return Pointer to beginning of requested data.
357  const char * x_ReadRaw(int size, int * offsetp) const;
358 
359  /// Write a variable length integer into the buffer.
360  /// @param x The integer to write.
361  /// @param offsetp The offset to write at (or NULL).
362  /// @return The number of bytes written.
363  int x_WriteVarInt(Int8 x, int * offsetp);
364 
365  /// Read a variable length integer from the buffer.
366  /// @param offsetp The offset to read at (should not be NULL).
367  /// @return The integer value.
368  Int8 x_ReadVarInt(int * offsetp) const;
369 
370 #if ((!defined(NCBI_COMPILER_WORKSHOP) || (NCBI_COMPILER_VERSION > 550)) && \
371  (!defined(NCBI_COMPILER_MIPSPRO)) )
372  /// Write string data to the blob.
373  /// @param str The string to write.
374  /// @param fmt String termination criteria.
375  /// @param offset The offset to write at (NULL to use write pointer.)
376  /// @return The number of bytes written.
377  int x_WriteString(CTempString str, EStringFormat fmt, int * offsetp);
378 
379  /// Read string data from the blob.
380  /// @param fmt String termination criteria.
381  /// @param offset The offset to read at (should not be NULL).
382  /// @return The string that was read.
383  CTempString x_ReadString(EStringFormat fmt, int * offsetp) const;
384 
385  /// Read a fixed length integer.
386  /// @param offsetp The offset to read at.
387  /// @return The value that was read.
388  template<typename TValue, int TBytes>
389  TValue x_ReadIntFixed(int * offsetp) const
390  {
391  // Check that the value fits in the specified range.
392  unsigned char * buf = (unsigned char*) x_ReadRaw(TBytes, offsetp);
393  unsigned char * vbuf = buf - 8 + TBytes;
394 
395  TValue x = vbuf[7];
396 
397  switch(TBytes) {
398  case 8: x |= Uint8(vbuf[0]) << 56;
399  case 7: x |= Uint8(vbuf[1]) << 48;
400  case 6: x |= Uint8(vbuf[2]) << 40;
401  case 5: x |= Uint8(vbuf[3]) << 32;
402  case 4: x |= Uint4(vbuf[4]) << 24;
403  case 3: x |= Uint4(vbuf[5]) << 16;
404  case 2: x |= Uint4(vbuf[6]) << 8;
405  case 1:
406  break;
407  default:
408  _ASSERT(0);
409  }
410 
411  if (TBytes < sizeof(TValue)) {
412  // This only applies to 'shortened' formats, such as an
413  // Int8 packed in 5 bytes or an Int4 packed in 3 bytes.
414  // It only affects signed values, its purposes is to fix
415  // the numeric sign. The current design does not use
416  // shortened formats anywhere.
417 
418  int bits = (sizeof(TValue)-TBytes) * 8;
419  x = (x << bits) >> bits;
420  }
421 
422  return x;
423  }
424 
425  /// Write a fixed length integer in big endian.
426  /// @param x The value to write.
427  /// @param offsetp The offset at which to write.
428  template<typename TValue, int TBytes>
429  void x_WriteIntFixed(TValue x, int * offsetp)
430  {
431  // Check that the value fits in the specified range.
432  _ASSERT(((Int8(x) >> (TBytes*8-1)) >> 1) ==
433  ((Int8(x) >> (TBytes*8-1)) >> 2));
434 
435  unsigned char buf[8];
436 
437  switch(TBytes) {
438  case 8: buf[0] = Uint8(x) >> 56;
439  case 7: buf[1] = Uint8(x) >> 48;
440  case 6: buf[2] = Uint8(x) >> 40;
441  case 5: buf[3] = Uint8(x) >> 32;
442  case 4: buf[4] = Uint4(x) >> 24;
443  case 3: buf[5] = Uint4(x) >> 16;
444  case 2: buf[6] = Uint4(x) >> 8;
445  case 1: buf[7] = Uint4(x);
446  break;
447  default:
448  _ASSERT(0);
449  }
450 
451  x_WriteRaw((char*)(buf + 8 - TBytes), TBytes, offsetp);
452  }
453 
454  /// Write a fixed length integer in small endian.
455  /// @param x The value to write.
456  /// @param offsetp The offset at which to write.
457  template<typename TValue, int TBytes>
458  void x_WriteIntFixed_LE(TValue x, int * offsetp)
459  {
460  // Check that the value fits in the specified range.
461  _ASSERT(((Int8(x) >> (TBytes*8-1)) >> 1) ==
462  ((Int8(x) >> (TBytes*8-1)) >> 2));
463 
464  unsigned char buf[8];
465 
466  switch(TBytes) {
467  case 8: buf[7] = Uint8(x) >> 56;
468  case 7: buf[6] = Uint8(x) >> 48;
469  case 6: buf[5] = Uint8(x) >> 40;
470  case 5: buf[4] = Uint8(x) >> 32;
471  case 4: buf[3] = Uint4(x) >> 24;
472  case 3: buf[2] = Uint4(x) >> 16;
473  case 2: buf[1] = Uint4(x) >> 8;
474  case 1: buf[0] = Uint4(x);
475  break;
476  default:
477  _ASSERT(0);
478  }
479 
480  x_WriteRaw((char*)(buf), TBytes, offsetp);
481  }
482 
483 #endif
484 
485 
486  // Data
487 
488  /// True if this object owns the target data.
489  bool m_Owner;
490 
491  /// The `read pointer' for stream-like access.
493 
494  /// The `write pointer' for stream-like access.
496 
497  /// Data owned by this object.
498  vector<char> m_DataHere;
499 
500  /// Non-owned data (only used for `read' streams).
502 
503  /// Lifetime maintenance object for referenced data.
505 };
506 
507 
509 
510 #endif // OBJTOOLS_BLAST_SEQDB_READER___SEQDBBLOB__HPP
511 
void ReadString(CStreamBuffer &b)
Definition: test.cpp:196
`Blob' Class for SeqDB (and WriteDB).
Definition: seqdbblob.hpp:56
CTempString m_DataRef
Non-owned data (only used for `read' streams).
Definition: seqdbblob.hpp:501
void x_WriteIntFixed(TValue x, int *offsetp)
Write a fixed length integer in big endian.
Definition: seqdbblob.hpp:429
EPadding
Padding style.
Definition: seqdbblob.hpp:270
@ eSimple
Just write NUL bytes until aligned.
Definition: seqdbblob.hpp:271
EStringFormat
String termination style.
Definition: seqdbblob.hpp:233
@ eNone
Write the string as-is.
Definition: seqdbblob.hpp:234
@ eNUL
Write a NUL terminated string.
Definition: seqdbblob.hpp:235
@ eSize4
Write string length as Int4, then string data.
Definition: seqdbblob.hpp:236
TValue x_ReadIntFixed(int *offsetp) const
Read a fixed length integer.
Definition: seqdbblob.hpp:389
int m_WriteOffset
The `write pointer' for stream-like access.
Definition: seqdbblob.hpp:495
vector< char > m_DataHere
Data owned by this object.
Definition: seqdbblob.hpp:498
CRef< CObject > m_Lifetime
Lifetime maintenance object for referenced data.
Definition: seqdbblob.hpp:504
bool m_Owner
True if this object owns the target data.
Definition: seqdbblob.hpp:489
int m_ReadOffset
The `read pointer' for stream-like access.
Definition: seqdbblob.hpp:492
void x_WriteIntFixed_LE(TValue x, int *offsetp)
Write a fixed length integer in small endian.
Definition: seqdbblob.hpp:458
CObject –.
Definition: ncbiobj.hpp:180
CTempString implements a light-weight string on top of a storage buffer whose lifetime management is ...
Definition: tempstr.hpp:65
int32_t Int4
4-byte (32-bit) signed integer
Definition: ncbitype.h:102
uint32_t Uint4
4-byte (32-bit) unsigned integer
Definition: ncbitype.h:103
int64_t Int8
8-byte (64-bit) signed integer
Definition: ncbitype.h:104
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
#define NCBI_XOBJREAD_EXPORT
Definition: ncbi_export.h:1315
char * buf
const struct ncbi::grid::netcache::search::fields::SIZE size
Front end for a platform-specific configuration summary.
Portable reference counted smart and weak pointers using CWeakRef, CRef, CObject and CObjectEx.
void copy(Njn::Matrix< S > *matrix_, const Njn::Matrix< T > &matrix0_)
Definition: njn_matrix.hpp:613
int offset
Definition: replacements.h:160
static const char * str(char *buf, int n)
Definition: stats.c:84
#define _ASSERT
Modified on Tue Feb 27 05:52:39 2024 by modify_doxy.py rev. 669887