NCBI C++ ToolKit
zlib.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef UTIL_COMPRESS__ZLIB__HPP
2 #define UTIL_COMPRESS__ZLIB__HPP
3 
4 /* $Id: zlib.hpp 101434 2023-12-13 16:37:07Z ivanov $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Author: Vladimir Ivanov
30  *
31  */
32 
33 /// @file zlib.hpp
34 ///
35 /// ZLib Compression API
36 ///
37 /// CZipCompression - base methods for compression/decompression
38 /// memory buffers and files.
39 /// CZipCompressionFile - allow read/write operations on files in
40 /// zlib or gzip (.gz) format.
41 /// CZipCompressor - zlib based compressor
42 /// (used in CZipStreamCompressor).
43 /// CZipDecompressor - zlib based decompressor
44 /// (used in CZipStreamDecompressor).
45 /// CZipStreamCompressor - zlib based compression stream processor
46 /// (see util/compress/stream.hpp for details).
47 /// CZipStreamDecompressor - zlib based decompression stream processor
48 /// (see util/compress/stream.hpp for details).
49 ///
50 /// The zlib documentation can be found here:
51 /// http://zlib.org, or
52 /// http://www.gzip.org/zlib/manual.html
53 
54 
55 #include <util/compress/stream.hpp>
56 
57 /** @addtogroup Compression
58  *
59  * @{
60  */
61 
63 
64 
65 // Use default values, defined in zlib library
66 // @deprecated Please don't use, will be deleted later
67 const int kZlibDefaultWbits = -1;
68 const int kZlibDefaultMemLevel = -1;
69 const int kZlibDefaultStrategy = -1;
70 const int kZlibDefaultCompression = -1;
71 
72 
73 /////////////////////////////////////////////////////////////////////////////
74 ///
75 /// CZipCompression --
76 ///
77 /// Define a base methods for compression/decompression memory buffers
78 /// and files.
79 
81 {
82 public:
83  /// Initialize compression library (for API compatibility, zlib don't need it).
84  static bool Initialize(void) { return true; };
85 
86  /// Compression/decompression flags.
87  enum EFlags {
88  /// Allow transparent reading data from buffer/file/stream
89  /// regardless is it compressed or not. But be aware,
90  /// if data source contains broken data and API cannot detect that
91  /// it is compressed data, that you can get binary instead of
92  /// decompressed data. By default this flag is OFF.
93  /// Note: zlib v1.1.4 and earlier have a bug in decoding.
94  /// In some cases decompressor can produce output data on invalid
95  /// compressed data. So, it is not recommended to use this flag
96  /// with old zlib versions.
97  fAllowTransparentRead = (1<<0),
98  /// Allow to "compress/decompress" empty data. Buffer compression
99  /// functions starts to return TRUE instead of FALSE for zero-length
100  /// input. And, if this flag is used together with fWriteGZipFormat
101  /// than the output will have gzip header and footer only.
102  fAllowEmptyData = (1<<1),
103  /// Check (and skip) gzip file header on decompression stage
104  fCheckFileHeader = (1<<2),
105  /// Use gzip (.gz) file format to write into compression stream
106  /// (the archive also can store file name and file modification
107  /// date in this format). Note: gzip file header and footer will be
108  /// omitted by default if no input data is provided, and you will
109  /// have empty output, that may not be acceptable to tools like
110  /// gunzip and etc -- in this case use fAllowEmptyData.
111  fWriteGZipFormat = (1<<3),
112  /// Allow concatenated gzip files.
113  /// Multiple compressed files can be concatenated into one file.
114  /// In this case, decompressor will try to extract all members
115  /// at once. But note, that better compression can be usually
116  /// obtained if all members are decompressed and then recompressed
117  /// in a single step.
118  fAllowConcatenatedGZip = (1<<4),
119  /// Set of flags for gzip file support. See each flag description above.
120  fGZip = fCheckFileHeader | fWriteGZipFormat | fAllowConcatenatedGZip,
121  /// This flag can be used only with DecompressFile[IntoDir]().
122  /// It allow to restore the original file name and/or time stamp stored
123  /// in the file header, if present.
124  /// @sa DecompressFile, DecompressFileIntoDir
125  fRestoreFileAttr = (1<<5)
126  };
127  typedef CZipCompression::TFlags TZipFlags; ///< Bitwise OR of EFlags
128 
129  /// Constructor.
130  /// @note
131  /// For setting up advanced compression parameters see Set*() methods.
132  CZipCompression(ELevel level = eLevel_Default);
133 
134  /// Destructor.
135  virtual ~CZipCompression(void);
136 
137  /// Return name and version of the compression library.
138  virtual CVersionInfo GetVersion(void) const;
139 
140  /// Returns default compression level for a compression algorithm.
141  virtual ELevel GetDefaultLevel(void) const
142  { return ELevel(eLevel_Default); };
143 
144  /// Check if compression have support for a specified feature
145  virtual bool HaveSupport(ESupportFeature feature);
146 
147 
148  //=======================================================================
149  // Utility functions
150  //=======================================================================
151 
152  /// Compress data in the buffer.
153  ///
154  /// @param src_buf
155  /// Source buffer.
156  /// @param src_len
157  /// Size of data in source buffer.
158  /// @param dst_buf
159  /// Destination buffer.
160  /// @param dst_size
161  /// Size of destination buffer.
162  /// In some cases, small source data or bad compressed data for example,
163  /// it should be a little more then size of the source buffer.
164  /// @param dst_len
165  /// Size of compressed data in destination buffer.
166  /// @return
167  /// Return TRUE if operation was successfully or FALSE otherwise.
168  /// On success, 'dst_buf' contains compressed data of 'dst_len' size.
169  /// @sa
170  /// EstimateCompressionBufferSize, DecompressBuffer
171  virtual bool CompressBuffer(
172  const void* src_buf, size_t src_len,
173  void* dst_buf, size_t dst_size,
174  /* out */ size_t* dst_len
175  );
176 
177  /// Decompress data in the buffer.
178  ///
179  /// @note
180  /// The decompressor stops and returns TRUE, if it find logical
181  /// end in the compressed data, even not all compressed data was processed.
182  /// Only for case of decompressing concatenated gzip files in memory
183  /// it try to decompress data behind of logical end of recurrent gzip chunk,
184  /// to check on next portion of data. See fCheckFileHeader,
185  /// fAllowConcatenatedGZip and fGZip flags description.
186  /// @param src_buf
187  /// Source buffer.
188  /// @param src_len
189  /// Size of data in source buffer.
190  /// @param dst_buf
191  /// Destination buffer.
192  /// It must be large enough to hold all of the uncompressed data for the operation to complete.
193  /// @param dst_size
194  /// Size of destination buffer.
195  /// @param dst_len
196  /// Size of decompressed data in destination buffer.
197  /// @return
198  /// Return TRUE if operation was successfully or FALSE otherwise.
199  /// On success, 'dst_buf' contains decompressed data of dst_len size.
200  /// @sa
201  /// CompressBuffer, EFlags
202  virtual bool DecompressBuffer(
203  const void* src_buf, size_t src_len,
204  void* dst_buf, size_t dst_size,
205  /* out */ size_t* dst_len
206  );
207 
208  /// Estimate buffer size for data compression.
209  ///
210  /// The function shall estimate the size of buffer required to compress
211  /// specified number of bytes of data using the CompressBuffer() function.
212  /// This function may return a conservative value that may be larger
213  /// than 'src_len'.
214  ///
215  /// @param src_len
216  /// Size of data in source buffer.
217  /// @return
218  /// Estimated buffer size. 0 if unable to determine.
219  /// @note
220  /// This method ignores used dictionary.
221  /// @sa
222  /// CompressBuffer
223  virtual size_t EstimateCompressionBufferSize(size_t src_len);
224 
225  /// Get recommended buffer sizes for stream/file I/O.
226  ///
227  /// These buffer sizes are softly recommended. They are not required, (de)compression
228  /// streams accepts any reasonable buffer size, for both input and output.
229  /// Respecting the recommended size just makes it a bit easier for (de)compressor,
230  /// reducing the amount of memory shuffling and buffering, resulting in minor
231  /// performance savings. If compression library doesn't have preferences about
232  /// I/O buffer sizes, kCompressionDefaultBufSize will be used.
233  /// @param round_up_by
234  /// If specified, round up a returned value by specified amount.
235  /// Useful for better memory management. For example you can round up to virtual
236  /// memory page size.
237  /// @return
238  /// Structure with recommended buffer sizes.
239  /// @note
240  /// Applicable for streaming/file operations.
241  /// @sa
242  /// kCompressionDefaultBufSize, CSystemInfo::GetVirtualMemoryPageSize()
243  ///
245 
246  /// Compress file.
247  ///
248  /// @param src_file
249  /// File name of source file.
250  /// @param dst_file
251  /// File name of result file.
252  /// @param file_io_bufsize
253  /// Size of the buffer used to read from a source file.
254  /// Writing happens immediately on receiving some data from a compressor.
255  /// @param compression_in_bufsize
256  /// Size of the internal buffer holding input data to be compressed.
257  /// It can be different from 'file_io_bufsize' depending on a using
258  /// compression method, OS and file system.
259  /// @param compression_out_bufsize
260  /// Size of the internal buffer to receive data from a compressor.
261  /// @return
262  /// Return TRUE on success, FALSE on error.
263  /// @note
264  /// This method, as well as some gzip utilities, always keeps the original
265  /// file name and timestamp in the compressed file. On this moment
266  /// DecompressFile() method do not use original file name at all,
267  /// but be aware... If you assign different base name to destination
268  /// compressed file, that behavior of decompression utilities
269  /// on different platforms may differ. For example, WinZip on MS Windows
270  /// always restore original file name and timestamp stored in the file.
271  /// UNIX gunzip have -N option for this, but by default do not use it,
272  /// and just creates a decompressed file with the name of the compressed
273  /// file without .gz extension.
274  /// @sa
275  /// DecompressFile, DecompressFileIntoDir, GetRecommendedBufferSizes,
276  /// CZipCompressionFile
277  ///
278  virtual bool CompressFile(
279  const string& src_file,
280  const string& dst_file,
281  size_t file_io_bufsize = kCompressionDefaultBufSize,
282  size_t compression_in_bufsize = kCompressionDefaultBufSize,
283  size_t compression_out_bufsize = kCompressionDefaultBufSize
284  );
285 
286  /// Decompress file.
287  ///
288  /// @param src_file
289  /// File name of source file.
290  /// @param dst_file
291  /// File name of result file.
292  /// @param file_io_bufsize
293  /// Size of the buffer used to read from a source file.
294  /// Writing happens immediately on receiving some data from a decompressor.
295  /// @param decompression_in_bufsize
296  /// Size of the internal buffer holding input data to be decompressed.
297  /// It can be different from 'file_io_bufsize' depending on a using
298  /// compression method, OS and file system.
299  /// @param decompression_out_bufsize
300  /// Size of the internal buffer to receive data from a decompressor.
301  /// @return
302  /// Return TRUE on success, FALSE on error.
303  /// @sa
304  /// CompressFile, DecompressFileIntoDir, GetRecommendedBufferSizes, CZipCompressionFile
305  /// @note
306  /// CompressFile() method, as well as some gzip utilities, always keeps
307  /// the original file name and timestamp in the compressed file.
308  /// If fRestoreFileAttr flag is set, that timestamp, stored in the file
309  /// header will be restored. The original file name cannot be restored here,
310  /// see DecompressFileIntoDir().
311  ///
312  virtual bool DecompressFile(
313  const string& src_file,
314  const string& dst_file,
315  size_t file_io_bufsize = kCompressionDefaultBufSize,
316  size_t decompression_in_bufsize = kCompressionDefaultBufSize,
317  size_t decompression_out_bufsize = kCompressionDefaultBufSize
318  );
319 
320  /// Decompress file into specified directory.
321  ///
322  /// @param src_file
323  /// File name of source file.
324  /// @param dst_dir
325  /// Destination directory.
326  /// @param file_io_bufsize
327  /// Size of the buffer used to read from a source file.
328  /// Writing happens immediately on receiving some data from a decompressor.
329  /// @param decompression_in_bufsize
330  /// Size of the internal buffer holding input data to be decompressed.
331  /// It can be different from 'file_io_bufsize' depending on a using
332  /// compression method, OS and file system.
333  /// @param decompression_out_bufsize
334  /// Size of the internal buffer to receive data from a decompressor.
335  /// @return
336  /// Return TRUE on success, FALSE on error.
337  /// @sa
338  /// CompressFile, DecompressFile, GetRecommendedBufferSizes, CZipCompressionFile
339  /// @note
340  /// CompressFile() method, as well as some gzip utilities, always keeps
341  /// the original file name and timestamp in the compressed file.
342  /// If fRestoreFileAttr flag is set, that original file name and timestamp,
343  /// stored in the file header will be restored. If not, that destination
344  /// file will be named as archive name without extension.
345  ///
346  virtual bool DecompressFileIntoDir(
347  const string& src_file,
348  const string& dst_dir,
349  size_t file_io_bufsize = kCompressionDefaultBufSize,
350  size_t decompression_in_bufsize = kCompressionDefaultBufSize,
351  size_t decompression_out_bufsize = kCompressionDefaultBufSize
352  );
353 
354  /// Structure to keep compressed file information.
355  struct SFileInfo {
356  string name;
357  string comment;
358  time_t mtime;
359  SFileInfo(void) : mtime(0) {};
360  };
361 
362  /// Set a dictionary for all compression/decompression operations.
363  ///
364  /// Using dictionary can significantly reduce the size of the compressed data.
365  /// Refer to the C++ documentation how to choose/prepare a dictionary.
366  ///
367  /// @param dict
368  /// Dictionary to use. New dictionary will be used for all subsequent
369  /// compression/decompression buffer and file operations. NULL value
370  /// invalidates previous dictionary, meaning "return to no-dictionary mode".
371  /// @param own
372  /// If set to eTakeOwnership the dictionary will be owned by CCompression and
373  /// automatically deleted when necessary.
374  /// @return
375  /// Return TRUE on success, FALSE on error.
376  /// FALSE usually mean that dictionaries are not supported for a current compression.
377  /// @note
378  /// Each compression algorithm have its own dictionary format and cannot
379  /// be reused by some other compression algorithm.
380  /// @note
381  /// Same dictionary should be used to compress and decompress data.
382  /// @note
383  /// .gz files don't store a dictionary inside, so you will be unable to decompress
384  /// files created using CompressFile() or streams with an active dictionary using
385  /// any external utilities like 'gunzip'. But they will be still decompressible
386  /// with DecompressFile() using the same dictionary.
387  /// @note
388  /// If decompressed data have concatenated gzip files, and it is allowed to process
389  /// them all, each gzip file should be compressed with the same dictionary.
390  /// It is allowed to mix dictionary and non-dictionary compressed gzip files,
391  /// the dictionary will be applied only when necessary.
392  /// @sa
393  /// CompressBuffer, DecompressBuffer, CompressFile, DecompressFile
394  ///
395  virtual bool SetDictionary(
396  CCompressionDictionary& dict,
398  );
399 
400  //=======================================================================
401  // Advanced compression-specific parameters
402  //=======================================================================
403  // Allow to tune up (de)compression for a specific needs.
404  //
405  // - Pin down compression parameters to some specific values, so these
406  // values are no longer dynamically selected by the compressor.
407  // - All setting parameters should be in the range [min,max],
408  // or equal to default.
409  // - All parameters should be set before starting (de)compression,
410  // or it will be ignored for current operation.
411  //=======================================================================
412  // You can use listed Z_* parameters after #include <zlib.h>.
413 
414  /// Compression strategy.
415  ///
416  /// The strategy parameter is used to tune the compression algorithm.
417  /// - Z_DEFAULT_STRATEGY
418  /// for normal data;
419  /// - Z_HUFFMAN_ONLY
420  /// to force Huffman encoding only (no string match);
421  /// - Z_RLE
422  /// run-length encoding (RLE) compression;
423  /// - Z_FILTERED
424  /// for data produced by a filter (or predictor);
425  /// Filtered data consists mostly of small values with a somewhat
426  /// random distribution. In this case, the compression algorithm
427  /// is tuned to compress them better. The effect of Z_FILTERED is
428  /// to force more Huffman coding and less string matching;
429  /// it is somewhat intermediate between Z_DEFAULT and Z_HUFFMAN_ONLY.
430  /// - Z_FIXED
431  /// prevents the use of dynamic Huffman codes, allowing for a simpler
432  /// decoder for special applications;
433  ///
434  /// The strategy parameter only affects the compression ratio but not the
435  /// correctness of the compressed output even if it is not set appropriately.
436  /// Used for compression only.
437  ///
438  void SetStrategy(int strategy) {
439  m_c_Strategy = (strategy == kZlibDefaultStrategy ) ? GetStrategyDefault() : strategy;
440  }
441  int GetStrategy(void) const { return m_c_Strategy; }
442  static int GetStrategyDefault(void);
443  static int GetStrategyMin(void);
444  static int GetStrategyMax(void);
445 
446  /// Memory level.
447  ///
448  /// The "mem_level" parameter specifies how much memory should be
449  /// allocated for the internal compression state. Low levels uses
450  /// less memory but are slow and reduces compression ratio; maximum level
451  /// uses maximum memory for optimal speed. See zconf.h for total memory usage
452  /// as a function of windowBits and memLevel.
453  ///
454  void SetMemoryLevel(int mem_level) {
455  m_c_MemLevel = (mem_level == kZlibDefaultMemLevel) ? GetMemoryLevelDefault() : mem_level;
456  }
457  int GetMemoryLevel(void) const { return m_c_MemLevel; }
458  static int GetMemoryLevelDefault(void);
459  static int GetMemoryLevelMin(void);
460  static int GetMemoryLevelMax(void);
461 
462  /// Window bits.
463  ///
464  /// This parameter is the base two logarithm of the window size
465  /// (the size of the history buffer). Larger values of this parameter result
466  /// in better compression at the expense of memory usage.
467  /// Used for compression and decompression. By default it is set to a maximum
468  /// allowed values. Reducing windows bits from default can make to it unable
469  /// to extract .gz files created by gzip.
470  /// @note
471  /// API support positive values for this parameters only. RAW deflate
472  /// data is processed by default, for gzip format we have a apecial
473  /// flags, see description for: fGZip, fCheckFileHeader, fWriteGZipFormat.
474  ///
475  void SetWindowBits(int window_bits) {
476  m_cd_WindowBits = (window_bits == kZlibDefaultWbits) ? GetWindowBitsDefault() : window_bits;
477  }
478  int GetWindowBits(void) const { return m_cd_WindowBits; }
479  static int GetWindowBitsDefault(void);
480  static int GetWindowBitsMin(void);
481  static int GetWindowBitsMax(void);
482 
483 protected:
484  /// Format string with last error description.
485  /// If pos == 0, that use internal m_Stream's position to report.
486  string FormatErrorMessage(string where, size_t pos = 0) const;
487 
488 protected:
489  void* m_Stream; ///< Compressor stream.
490  int m_cd_WindowBits; ///< The base two logarithm of the window size.
491  int m_c_MemLevel; ///< The allocation memory level for the compression.
492  int m_c_Strategy; ///< The parameter to tune up a compression algorithm.
493 
494 private:
495  /// Private copy constructor to prohibit copy.
497  /// Private assignment operator to prohibit assignment.
498  CZipCompression& operator= (const CZipCompression&);
499 };
500 
501 
502 /////////////////////////////////////////////////////////////////////////////
503 ///
504 /// CZipCompressionFile --
505 ///
506 /// Allow read/write operations on files in zlib or gzip (.gz) formats.
507 /// Throw exceptions on critical errors.
508 
510  public CCompressionFile
511 {
512 public:
513  /// Constructor.
514  ///
515  /// Automatically calls Open() with given file name, mode and compression level.
516  /// @note
517  /// This constructor don't allow to use any advanced compression parameters
518  /// or a dictionary. If you need to set any of them, please use simplified
519  /// conventional constructor, set advanced parameters and use Open().
520  ///
522  const string& file_name,
523  EMode mode,
524  ELevel level = eLevel_Default,
525  size_t compression_in_bufsize = kCompressionDefaultBufSize,
526  size_t compression_out_bufsize = kCompressionDefaultBufSize
527  );
528  /// Conventional constructor.
530  ELevel level = eLevel_Default
531  );
532 
533  /// Destructor
535 
536  /// Opens a compressed file for reading or writing.
537  ///
538  /// @param file_name
539  /// File name of the file to open.
540  /// @param mode
541  /// File open mode.
542  /// @param compression_in_bufsize
543  /// Size of the internal buffer holding input data to be (de)compressed.
544  /// @param compression_out_bufsize
545  /// Size of the internal buffer to receive data from a (de)compressor.
546  /// @return
547  /// TRUE if file was opened successfully or FALSE otherwise.
548  /// @sa
549  /// CZipCompression, Read, Write, Close
550  /// @note
551  /// All advanced compression parameters or a dictionary should be set before
552  /// Open() method, otherwise they will not have any effect.
553  ///
554  virtual bool Open(
555  const string& file_name,
556  EMode mode,
557  size_t compression_in_bufsize = kCompressionDefaultBufSize,
558  size_t compression_out_bufsize = kCompressionDefaultBufSize
559  );
560 
561  /// Opens a compressed file for reading or writing.
562  ///
563  /// Do the same as standard Open(), but can also get/set file info.
564  /// @param file_name
565  /// File name of the file to open.
566  /// @param mode
567  /// File open mode.
568  /// @param info
569  /// Pointer to file information structure. If it is not NULL,
570  /// that it will be used to get information about compressed file
571  /// in the read mode, and set it in the write mode for gzip files.
572  /// @param compression_in_bufsize
573  /// Size of the internal buffer holding input data to be (de)compressed.
574  /// @param compression_out_bufsize
575  /// Size of the internal buffer to receive data from a (de)compressor.
576  /// @return
577  /// TRUE if file was opened successfully or FALSE otherwise.
578  /// @sa
579  /// CZipCompression, Read, Write, Close
580  ///
581  virtual bool Open(
582  const string& file_name,
583  EMode mode,
584  SFileInfo* info,
585  size_t compression_in_bufsize = kCompressionDefaultBufSize,
586  size_t compression_out_bufsize = kCompressionDefaultBufSize
587  );
588 
589  /// Read data from compressed file.
590  ///
591  /// Read up to "len" uncompressed bytes from the compressed file "file"
592  /// into the buffer "buf".
593  /// @param buf
594  /// Buffer for requested data.
595  /// @param len
596  /// Number of bytes to read.
597  /// @return
598  /// Number of bytes actually read (0 for end of file, -1 for error).
599  /// The number of really read bytes can be less than requested.
600  /// @sa
601  /// Open, Write, Close
602  ///
603  virtual long Read(void* buf, size_t len);
604 
605  /// Write data to compressed file.
606  ///
607  /// Writes the given number of uncompressed bytes from the buffer
608  /// into the compressed file.
609  /// @param buf
610  /// Buffer with written data.
611  /// @param len
612  /// Number of bytes to write.
613  /// @return
614  /// Number of bytes actually written or -1 for error.
615  /// Returned value can be less than "len".
616  /// @sa
617  /// Open, Read, Close
618  ///
619  virtual long Write(const void* buf, size_t len);
620 
621  /// Close compressed file.
622  ///
623  /// Flushes all pending output if necessary, closes the compressed file.
624  /// @return
625  /// TRUE on success, FALSE on error.
626  /// @sa
627  /// Open, Read, Write
628  ///
629  virtual bool Close(void);
630 
631 protected:
632  /// Get error code/description of last stream operation (m_Stream).
633  /// It can be received using GetErrorCode()/GetErrorDescription() methods.
634  void GetStreamError(void);
635 
636 protected:
637  EMode m_Mode; ///< I/O mode (read/write).
638  CNcbiFstream* m_File; ///< File stream.
639  CCompressionIOStream* m_Stream; ///< [De]comression stream.
640 
641 private:
642  /// Private copy constructor to prohibit copy.
644  /// Private assignment operator to prohibit assignment.
646 };
647 
648 
649 /////////////////////////////////////////////////////////////////////////////
650 ///
651 /// CZipCompressor -- zlib based compressor
652 ///
653 /// Used in CZipStreamCompressor.
654 /// @sa CZipStreamCompressor, CZipCompression, CCompressionProcessor
655 
657  public CCompressionProcessor
658 {
659 public:
660  /// Constructor.
662  ELevel level = eLevel_Default,
663  TZipFlags flags = 0
664  );
665 
666  /// Destructor.
667  virtual ~CZipCompressor(void);
668 
669  /// Set information about compressed file.
670  ///
671  /// Used for compression of gzip files.
672  void SetFileInfo(const SFileInfo& info);
673 
674  /// Return TRUE if fAllowEmptyData flag is set.
675  /// @note
676  /// Used by stream buffer, that don't have access to specific
677  /// compression implementation flags.
678  virtual bool AllowEmptyData() const
679  { return (GetFlags() & fAllowEmptyData) == fAllowEmptyData; }
680 
681 protected:
682  virtual EStatus Init (void);
683  virtual EStatus Process(const char* in_buf, size_t in_len,
684  char* out_buf, size_t out_size,
685  /* out */ size_t* in_avail,
686  /* out */ size_t* out_avail);
687  virtual EStatus Flush (char* out_buf, size_t out_size,
688  /* out */ size_t* out_avail);
689  virtual EStatus Finish (char* out_buf, size_t out_size,
690  /* out */ size_t* out_avail);
691  virtual EStatus End (int abandon = 0);
692 
693 private:
694  unsigned long m_CRC32; ///< CRC32 for compressed data.
695  string m_Cache; ///< Buffer to cache small pieces of data.
697  ///< Is true if needed to write a file header.
698  SFileInfo m_FileInfo; ///< Compressed file info.
699 };
700 
701 
702 
703 /////////////////////////////////////////////////////////////////////////////
704 ///
705 /// CZipDecompressor -- zlib based decompressor
706 ///
707 /// Used in CZipStreamDecompressor.
708 /// @sa CZipStreamDecompressor, CZipCompression, CCompressionProcessor
709 
711  public CCompressionProcessor
712 {
713 public:
714  /// Constructor.
716 
717  /// Destructor.
718  virtual ~CZipDecompressor(void);
719 
720  /// Return TRUE if fAllowEmptyData flag is set.
721  /// @note
722  /// Used by stream buffer, that don't have access to specific
723  /// compression implementation flags.
724  virtual bool AllowEmptyData() const
725  { return (GetFlags() & fAllowEmptyData) == fAllowEmptyData; }
726 
727 protected:
728  virtual EStatus Init (void);
729  virtual EStatus Process(const char* in_buf, size_t in_len,
730  char* out_buf, size_t out_size,
731  /* out */ size_t* in_avail,
732  /* out */ size_t* out_avail);
733  virtual EStatus Flush (char* out_buf, size_t out_size,
734  /* out */ size_t* out_avail);
735  virtual EStatus Finish (char* out_buf, size_t out_size,
736  /* out */ size_t* out_avail);
737  virtual EStatus End (int abandon = 0);
738 
739 private:
740  bool m_NeedCheckHeader; ///< TRUE if needed to check to file header.
741  bool m_IsGZ; ///< TRUE if data have gzip format.
742  size_t m_SkipInput; ///< Number of bytes to skip from input stream.
743  ///< Used to process concatenated .gz files.
744  string m_Cache; ///< Buffer to cache small pieces of data.
745 };
746 
747 
748 
749 /////////////////////////////////////////////////////////////////////////////
750 ///
751 /// CZipStreamCompressor -- zlib based compression stream processor
752 ///
753 /// See util/compress/stream.hpp for details of stream processing.
754 /// @note
755 /// Compression/decompression flags (CZipCompression:EFlags) can greatly
756 /// affect CZipStreamCompressor behavior. By default, compressor
757 /// produce plain zip data, that is not compatible with gzip/gunzip utility.
758 /// Please use appropriate flags in constructor to change default behavior.
759 /// @sa CCompressionStreamProcessor
760 
763 {
764 public:
765  /// Full constructor
768  streamsize in_bufsize,
769  streamsize out_bufsize,
771  )
773  new CZipCompressor(level, flags), eDelete, in_bufsize, out_bufsize)
774  {}
775 
776  /// Conventional constructor.
777  /// Uses default buffer sizes for I/O, that can be not ideal for some scenarios.
781  )
783  new CZipCompressor(level, flags),
785  {}
786 
787  /// Conventional constructor.
788  /// Uses default buffer sizes for I/O, that can be not ideal for some scenarios.
791  new CZipCompressor(CZipCompression::eLevel_Default, flags),
793  {}
794 
795  /// Return a pointer to compressor.
796  /// Can be used mostly for setting an advanced compression-specific parameters.
798  return dynamic_cast<CZipCompressor*>(GetProcessor());
799  }
800 };
801 
802 
803 /////////////////////////////////////////////////////////////////////////////
804 ///
805 /// CZipStreamDecompressor -- zlib based decompression stream processor
806 ///
807 /// See util/compress/stream.hpp for details of stream processing.
808 /// @note
809 /// Compression/decompression flags (CZipCompression:EFlags) can greatly
810 /// affect CZipStreamDecompressor behavior. By default, decompressor
811 /// do not allow data in gzip format. Please use appropriate flags
812 /// in constructor to change default behavior.
813 /// @sa CCompressionStreamProcessor
814 
817 {
818 public:
819  /// Full constructor
821  streamsize in_bufsize,
822  streamsize out_bufsize,
824  )
826  new CZipDecompressor(flags), eDelete, in_bufsize, out_bufsize)
827  {}
828 
829  /// Conventional constructor.
830  /// Uses default buffer sizes for I/O, that can be not ideal for some scenarios.
833  new CZipDecompressor(flags),
835  {}
836 
837  /// Return a pointer to decompressor.
838  /// Can be used mostly for setting an advanced compression-specific parameters.
840  return dynamic_cast<CZipDecompressor*>(GetProcessor());
841  }
842 };
843 
844 
845 //////////////////////////////////////////////////////////////////////////////
846 //
847 // Global functions
848 //
849 
850 /// Get list of positions of separate gzip files in the concatenated gzip file.
851 /// Return results via user defined handler.
852 /// Throw CCoreException/CCompressionException on error.
853 ///
854 /// @param is
855 /// Opened input stream to scan (should be opened in binary mode).
856 /// @param handler
857 /// Call handler's IChunkHandler::OnChunk() method and pass position
858 /// of each new gzip file inside a stream and size of uncompressed data
859 /// on that moment.
860 /// @note
861 /// This method don't support concatenated .gz files compressed with a dictionary.
862 ///
865 
866 
868 
869 
870 /* @} */
871 
872 #endif /* UTIL_COMPRESS__ZLIB__HPP */
CVersionInfo –.
CZipCompressionFile –.
Definition: zlib.hpp:511
CZipCompression –.
Definition: zlib.hpp:81
CZipCompressor – zlib based compressor.
Definition: zlib.hpp:658
CZipDecompressor – zlib based decompressor.
Definition: zlib.hpp:712
CZipStreamCompressor – zlib based compression stream processor.
Definition: zlib.hpp:763
CZipStreamDecompressor – zlib based decompression stream processor.
Definition: zlib.hpp:817
Interface class to scan data source for seekable data chunks.
Definition: compress.hpp:631
void(*)(CSeq_entry_Handle seh, IWorkbench *wb, const CSerialObject &obj) handler
static uch flags
const char * file_name[]
@ eNoOwnership
No ownership is assumed.
Definition: ncbi_types.h:135
virtual long Write(const void *buf, size_t len)
Write data to compressed file.
virtual bool DecompressFileIntoDir(const string &src_file, const string &dst_dir, size_t file_io_bufsize=kCompressionDefaultBufSize, size_t decompression_in_bufsize=kCompressionDefaultBufSize, size_t decompression_out_bufsize=kCompressionDefaultBufSize)
Decompress file into specified directory.
void SetFileInfo(const SFileInfo &info)
Set information about compressed file.
void SetMemoryLevel(int mem_level)
Memory level.
Definition: zlib.hpp:454
void g_GZip_ScanForChunks(CNcbiIstream &is, IChunkHandler &handler)
Get list of positions of separate gzip files in the concatenated gzip file.
void GetStreamError(void)
Get error code/description of last stream operation (m_Stream).
static SRecommendedBufferSizes GetRecommendedBufferSizes(size_t round_up=0)
Get recommended buffer sizes for stream/file I/O.
virtual size_t EstimateCompressionBufferSize(size_t src_len)
Estimate buffer size for data compression.
void SetWindowBits(int window_bits)
Window bits.
Definition: zlib.hpp:475
unsigned long m_CRC32
CRC32 for compressed data.
Definition: zlib.hpp:694
virtual EStatus End(int abandon=0)
Free all dynamically allocated data structures.
bool m_NeedWriteHeader
Is true if needed to write a file header.
Definition: zlib.hpp:696
size_t m_SkipInput
Number of bytes to skip from input stream.
Definition: zlib.hpp:742
virtual bool Open(const string &file_name, EMode mode, size_t compression_in_bufsize=kCompressionDefaultBufSize, size_t compression_out_bufsize=kCompressionDefaultBufSize)
Opens a compressed file for reading or writing.
CZipStreamDecompressor(CZipCompression::TZipFlags flags=0)
Conventional constructor.
Definition: zlib.hpp:831
CZipCompressionFile(const CZipCompressionFile &)
Private copy constructor to prohibit copy.
string m_Cache
Buffer to cache small pieces of data.
Definition: zlib.hpp:695
virtual EStatus Flush(char *out_buf, size_t out_size, size_t *out_avail)
Flush compressed/decompressed data from the output buffer.
void * m_Stream
Compressor stream.
Definition: zlib.hpp:489
static int GetMemoryLevelMax(void)
virtual TFlags GetFlags(void) const
Get/set flags.
Definition: compress.cpp:105
const int kZlibDefaultStrategy
Definition: zlib.hpp:69
CZipCompressor(ELevel level=eLevel_Default, TZipFlags flags=0)
Constructor.
static int GetMemoryLevelMin(void)
ELevel
Compression level.
Definition: compress.hpp:142
static int GetWindowBitsMin(void)
int GetMemoryLevel(void) const
Definition: zlib.hpp:457
virtual EStatus Init(void)
Initialize the internal stream state for compression/decompression.
CZipCompression::TFlags TZipFlags
Bitwise OR of EFlags.
Definition: zlib.hpp:127
virtual EStatus Finish(char *out_buf, size_t out_size, size_t *out_avail)
Finish the compression/decompression process.
virtual EStatus End(int abandon=0)
Free all dynamically allocated data structures.
virtual bool CompressFile(const string &src_file, const string &dst_file, size_t file_io_bufsize=kCompressionDefaultBufSize, size_t compression_in_bufsize=kCompressionDefaultBufSize, size_t compression_out_bufsize=kCompressionDefaultBufSize)
Compress file.
virtual long Read(void *buf, size_t len)
Read data from compressed file.
const streamsize kCompressionDefaultBufSize
Default compression I/O stream buffer size.
Definition: compress.hpp:111
virtual EStatus Flush(char *out_buf, size_t out_size, size_t *out_avail)
Flush compressed/decompressed data from the output buffer.
const int kZlibDefaultCompression
Definition: zlib.hpp:70
CZipDecompressor * GetDecompressor(void) const
Return a pointer to decompressor.
Definition: zlib.hpp:839
virtual bool DecompressFile(const string &src_file, const string &dst_file, size_t file_io_bufsize=kCompressionDefaultBufSize, size_t decompression_in_bufsize=kCompressionDefaultBufSize, size_t decompression_out_bufsize=kCompressionDefaultBufSize)
Decompress file.
CZipStreamCompressor(CZipCompression::ELevel level, CZipCompression::TZipFlags flags=0)
Conventional constructor.
Definition: zlib.hpp:778
SFileInfo m_FileInfo
Compressed file info.
Definition: zlib.hpp:698
static int GetWindowBitsMax(void)
CNcbiFstream * m_File
File stream.
Definition: zlib.hpp:638
virtual bool CompressBuffer(const void *src_buf, size_t src_len, void *dst_buf, size_t dst_size, size_t *dst_len)
Compress data in the buffer.
int m_cd_WindowBits
The base two logarithm of the window size.
Definition: zlib.hpp:490
virtual bool AllowEmptyData() const
Return TRUE if fAllowEmptyData flag is set.
Definition: zlib.hpp:678
CZipCompressionFile(const string &file_name, EMode mode, ELevel level=eLevel_Default, size_t compression_in_bufsize=kCompressionDefaultBufSize, size_t compression_out_bufsize=kCompressionDefaultBufSize)
Constructor.
virtual EStatus Process(const char *in_buf, size_t in_len, char *out_buf, size_t out_size, size_t *in_avail, size_t *out_avail)
Compress/decompress as much data as possible, and stops when the input buffer becomes empty or the ou...
int m_c_MemLevel
The allocation memory level for the compression.
Definition: zlib.hpp:491
int GetWindowBits(void) const
Definition: zlib.hpp:478
CZipDecompressor(TZipFlags flags=0)
Constructor.
const int kZlibDefaultWbits
Definition: zlib.hpp:67
CCompressionIOStream * m_Stream
[De]comression stream.
Definition: zlib.hpp:639
virtual bool Open(const string &file_name, EMode mode, SFileInfo *info, size_t compression_in_bufsize=kCompressionDefaultBufSize, size_t compression_out_bufsize=kCompressionDefaultBufSize)
Opens a compressed file for reading or writing.
CZipCompression(ELevel level=eLevel_Default)
Constructor.
virtual ~CZipCompression(void)
Destructor.
~CZipCompressionFile(void)
Destructor.
string m_Cache
Buffer to cache small pieces of data.
Definition: zlib.hpp:744
bool m_IsGZ
TRUE if data have gzip format.
Definition: zlib.hpp:741
unsigned int TFlags
Compression flags.
Definition: compress.hpp:160
virtual ~CZipCompressor(void)
Destructor.
CZipStreamCompressor(CZipCompression::TZipFlags flags=0)
Conventional constructor.
Definition: zlib.hpp:789
virtual EStatus Process(const char *in_buf, size_t in_len, char *out_buf, size_t out_size, size_t *in_avail, size_t *out_avail)
Compress/decompress as much data as possible, and stops when the input buffer becomes empty or the ou...
const int kZlibDefaultMemLevel
Definition: zlib.hpp:68
int m_c_Strategy
The parameter to tune up a compression algorithm.
Definition: zlib.hpp:492
virtual EStatus Finish(char *out_buf, size_t out_size, size_t *out_avail)
Finish the compression/decompression process.
virtual bool SetDictionary(CCompressionDictionary &dict, ENcbiOwnership own=eNoOwnership)
Set a dictionary for all compression/decompression operations.
virtual bool Close(void)
Close compressed file.
virtual bool AllowEmptyData() const
Return TRUE if fAllowEmptyData flag is set.
Definition: zlib.hpp:724
virtual CVersionInfo GetVersion(void) const
Return name and version of the compression library.
static int GetStrategyMax(void)
virtual bool HaveSupport(ESupportFeature feature)
Check if compression have support for a specified feature.
string FormatErrorMessage(string where, size_t pos=0) const
Format string with last error description.
virtual ELevel GetDefaultLevel(void) const
Returns default compression level for a compression algorithm.
Definition: zlib.hpp:141
EStatus
Type of the result of all basic functions.
Definition: compress.hpp:467
void SetStrategy(int strategy)
Compression strategy.
Definition: zlib.hpp:438
static int GetStrategyMin(void)
virtual EStatus Init(void)
Initialize the internal stream state for compression/decompression.
bool m_NeedCheckHeader
TRUE if needed to check to file header.
Definition: zlib.hpp:740
static int GetWindowBitsDefault(void)
CZipStreamDecompressor(streamsize in_bufsize, streamsize out_bufsize, CZipCompression::TZipFlags flags=0)
Full constructor.
Definition: zlib.hpp:820
CZipCompressionFile(ELevel level=eLevel_Default)
Conventional constructor.
static bool Initialize(void)
Initialize compression library (for API compatibility, zlib don't need it).
Definition: zlib.hpp:84
int GetStrategy(void) const
Definition: zlib.hpp:441
static int GetMemoryLevelDefault(void)
CZipCompression(const CZipCompression &)
Private copy constructor to prohibit copy.
CZipCompressor * GetCompressor(void) const
Return a pointer to compressor.
Definition: zlib.hpp:797
static int GetStrategyDefault(void)
CZipStreamCompressor(CZipCompression::ELevel level, streamsize in_bufsize, streamsize out_bufsize, CZipCompression::TZipFlags flags=0)
Full constructor.
Definition: zlib.hpp:766
EMode m_Mode
I/O mode (read/write).
Definition: zlib.hpp:637
virtual ~CZipDecompressor(void)
Destructor.
ESupportFeature
Supported features.
Definition: compress.hpp:191
EMode
File open mode.
Definition: compress.hpp:371
EFlags
Compression/decompression flags.
Definition: zlib.hpp:87
virtual bool DecompressBuffer(const void *src_buf, size_t src_len, void *dst_buf, size_t dst_size, size_t *dst_len)
Decompress data in the buffer.
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::fstream CNcbiFstream
Portable alias for fstream.
Definition: ncbistre.hpp:538
ENcbiOwnership
Ownership relations between objects.
Definition: ncbi_types.h:134
strategy
Block allocation strategies.
Definition: bmconst.h:146
char * buf
int len
static MDB_envinfo info
Definition: mdb_load.c:37
mdb_mode_t mode
Definition: lmdb++.h:38
NCBI_XUTIL_EXPORT
Parameter to control printing diagnostic message about conversion of static array data from a differe...
Definition: static_set.hpp:72
Structure to get information about recommended buffer sizes for file/stream I/O to tune up a (de)comp...
Definition: compress.hpp:299
Structure to keep compressed file information.
Definition: zlib.hpp:355
Modified on Wed Sep 04 15:02:02 2024 by modify_doxy.py rev. 669887