NCBI C++ ToolKit
tar.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef UTIL_COMPRESS__TAR__HPP
2 #define UTIL_COMPRESS__TAR__HPP
3 
4 /* $Id: tar.hpp 86638 2019-05-31 23:39:51Z lavr $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Vladimir Ivanov
30  * Anton Lavrentiev
31  *
32  * File Description:
33  * Tar archive API
34  */
35 
36 /// @file
37 /// Tar archive API.
38 ///
39 /// Supports subsets of POSIX.1-1988 (ustar), POSIX 1003.1-2001 (posix), old
40 /// GNU (POSIX 1003.1), and V7 formats (all partially but reasonably). New
41 /// archives are created using POSIX (genuine ustar) format, using GNU
42 /// extensions for long names/links only when unavoidable. It cannot,
43 /// however, handle all the exotics like sparse files (except for GNU/1.0
44 /// sparse PAX extension) and contiguous files (yet still can work around both
45 /// of them gracefully, if needed), multivolume / incremental archives, etc.
46 /// but just regular files, devices (character or block), FIFOs, directories,
47 /// and limited links: can extract both hard- and symlinks, but can store
48 /// symlinks only. Also, this implementation is only minimally PAX(Portable
49 /// Archive eXchange)-aware for file extractions (and does not yet use any PAX
50 /// extensions to store the files).
51 ///
52 
53 #include <corelib/ncbifile.hpp>
54 #include <utility>
55 
56 
57 /** @addtogroup Compression
58  *
59  * @{
60  */
61 
62 
64 
65 
66 /////////////////////////////////////////////////////////////////////////////
67 ///
68 /// TTarMode --
69 ///
70 /// Permission bits as defined in tar
71 ///
72 
74  // Special mode bits
75  fTarSetUID = 04000, ///< set UID on execution
76  fTarSetGID = 02000, ///< set GID on execution
77  fTarSticky = 01000, ///< reserved (sticky bit)
78  // File permissions
79  fTarURead = 00400, ///< read by owner
80  fTarUWrite = 00200, ///< write by owner
81  fTarUExecute = 00100, ///< execute/search by owner
82  fTarGRead = 00040, ///< read by group
83  fTarGWrite = 00020, ///< write by group
84  fTarGExecute = 00010, ///< execute/search by group
85  fTarORead = 00004, ///< read by other
86  fTarOWrite = 00002, ///< write by other
87  fTarOExecute = 00001 ///< execute/search by other
88 };
89 typedef unsigned int TTarMode; ///< Bitwise OR of ETarModeBits
90 
91 
92 /////////////////////////////////////////////////////////////////////////////
93 ///
94 /// CTarException --
95 ///
96 /// Define exceptions generated by the API.
97 /// Exception text may include detailed dump of a tar header (when appropriate)
98 /// if fDumpEntryHeaders is set in the archive flags.
99 ///
100 /// CTarException inherits its basic functionality from CCoreException
101 /// and defines additional error codes for tar archive operations.
102 ///
103 /// @sa
104 /// CTar::SetFlags
105 
107 {
108 public:
109  /// Error types that file operations can generate.
110  enum EErrCode {
123  eRestoreAttrs
124  };
125 
126  /// Translate from an error code value to its string representation.
127  virtual const char* GetErrCodeString(void) const override
128  {
129  switch (GetErrCode()) {
130  case eUnsupportedTarFormat: return "eUnsupportedTarFormat";
131  case eUnsupportedEntryType: return "eUnsupportedEntryType";
132  case eUnsupportedSource: return "eUnsupportedSource";
133  case eNameTooLong: return "eNameTooLong";
134  case eChecksum: return "eChecksum";
135  case eBadName: return "eBadName";
136  case eCreate: return "eCreate";
137  case eOpen: return "eOpen";
138  case eRead: return "eRead";
139  case eWrite: return "eWrite";
140  case eBackup: return "eBackup";
141  case eMemory: return "eMemory";
142  case eRestoreAttrs: return "eRestoreAttrs";
143  default: return CException::GetErrCodeString();
144  }
145  }
146 
147  // Standard exception boilerplate code.
149 };
150 
151 
152 //////////////////////////////////////////////////////////////////////////////
153 ///
154 /// CTarEntryInfo class
155 ///
156 /// Information about a tar archive entry.
157 
159 {
160 public:
161  /// Archive entry type.
162  enum EType {
163  eFile = CDirEntry::eFile, ///< Regular file
164  eDir = CDirEntry::eDir, ///< Directory
165  eSymLink = CDirEntry::eSymLink, ///< Symbolic link
166  ePipe = CDirEntry::ePipe, ///< Pipe (FIFO)
167  eCharDev = CDirEntry::eCharSpecial, ///< Character device
168  eBlockDev = CDirEntry::eBlockSpecial, ///< Block device
169  eUnknown = CDirEntry::eUnknown, ///< Unknown type
170  eHardLink, ///< Hard link
171  eVolHeader, ///< Volume header
172  ePAXHeader, ///< PAX extended header
173  eSparseFile, ///< GNU/STAR sparse file
174  eGNULongName, ///< GNU long name
175  eGNULongLink ///< GNU long link
176  };
177 
178  /// Position type.
179  enum EPos {
181  ePos_Data
182  };
183 
184  // No setters -- they are not needed for access by the user, and thus are
185  // done directly from CTar for the sake of performance and code clarity.
186 
187  // Getters only!
188  EType GetType(void) const { return m_Type; }
189  const string& GetName(void) const { return m_Name; }
190  const string& GetLinkName(void) const { return m_LinkName; }
191  const string& GetUserName(void) const { return m_UserName; }
192  const string& GetGroupName(void) const { return m_GroupName; }
193  time_t GetModificationTime(void) const
194  { return m_Stat.orig.st_mtime; }
196  { CTime mtime(m_Stat.orig.st_mtime);
197  mtime.SetNanoSecond(m_Stat.mtime_nsec);
198  return mtime; }
199  time_t GetLastAccessTime(void) const
200  { return m_Stat.orig.st_atime; }
202  { CTime atime(m_Stat.orig.st_atime);
203  atime.SetNanoSecond(m_Stat.atime_nsec);
204  return atime; }
205  time_t GetCreationTime(void) const
206  { return m_Stat.orig.st_ctime; }
208  { CTime ctime(m_Stat.orig.st_ctime);
209  ctime.SetNanoSecond(m_Stat.ctime_nsec);
210  return ctime; }
211  Uint8 GetSize(void) const
212  { return m_Stat.orig.st_size; }
213  TTarMode GetMode(void) const;// Raw mode as stored in tar
214  void GetMode(CDirEntry::TMode* user_mode,
215  CDirEntry::TMode* group_mode = 0,
216  CDirEntry::TMode* other_mode = 0,
217  CDirEntry::TSpecialModeBits* special_bits = 0) const;
218  unsigned int GetMajor(void) const;
219  unsigned int GetMinor(void) const;
220  unsigned int GetUserId(void) const
221  { return m_Stat.orig.st_uid; }
222  unsigned int GetGroupId(void) const
223  { return m_Stat.orig.st_gid; }
224  Uint8 GetPosition(EPos which) const
225  { return which == ePos_Header ? m_Pos : m_Pos + m_HeaderSize; }
226 
227  // Comparison operator.
228  bool operator == (const CTarEntryInfo& info) const
229  { return (m_Type == info.m_Type &&
230  m_Name == info.m_Name &&
231  m_LinkName == info.m_LinkName &&
232  m_UserName == info.m_UserName &&
233  m_GroupName == info.m_GroupName &&
234  m_HeaderSize == info.m_HeaderSize &&
235  memcmp(&m_Stat,&info.m_Stat, sizeof(m_Stat)) == 0 &&
236  m_Pos == info.m_Pos ? true : false); }
237 
238 protected:
239  // Constructor.
241  : m_Type(eUnknown), m_HeaderSize(0), m_Pos(pos)
242  { memset(&m_Stat, 0, sizeof(m_Stat)); }
243 
244  EType m_Type; ///< Type
245  string m_Name; ///< Entry name
246  string m_LinkName; ///< Link name if type is e{Sym|Hard}Link
247  string m_UserName; ///< User name
248  string m_GroupName; ///< Group name
249  streamsize m_HeaderSize; ///< Total size of all headers for the entry
250  CDirEntry::SStat m_Stat; ///< Direntry-compatible info
251  Uint8 m_Pos; ///< Entry (not data!) position in archive
252 
253  friend class CTar; // Setter
254 };
255 
256 
257 /// User-creatable info for streaming into a tar.
258 /// Since the entry info is built largerly incomplete, all getters have been
259 /// disabled; should some be needed they could be brought back by subclassing
260 /// and redeclaring the necessary one(s) in the public part of the new class.
262 {
263 public:
264  CTarUserEntryInfo(const string& name, Uint8 size)
265  {
266  m_Name = name;
267  m_Stat.orig.st_size = size;
268  }
269 
270  friend class CTar; // Accessor
271 };
272 
273 
274 /// Nice TOC(table of contents) printout.
275 NCBI_XUTIL_EXPORT ostream& operator << (ostream&, const CTarEntryInfo&);
276 
277 
278 /// Forward declaration of a tar header used internally.
279 struct STarHeader;
280 
281 
282 //////////////////////////////////////////////////////////////////////////////
283 ///
284 /// CTar class
285 ///
286 /// (Throws exceptions on most errors.)
287 /// Note that if stream constructor is used, then CTar can only perform one
288 /// pass over the archive. This means that only one full action will succeed
289 /// (and if the action was to update -- e.g. append -- the archive, it has to
290 /// be explicitly followed by Close() when no more appends are expected).
291 /// Before the next read/update action, the stream position has to be reset
292 /// explicitly to the beginning of the archive, or it may also remain at the
293 /// end of the archive for a series of successive append operations.
294 
296 {
297 public:
298  /// General flags
299  enum EFlags {
300  // --- Extract/List/Test ---
301  /// Ignore blocks of zeros in archive.
302  // Generally, 2 or more consecutive zero blocks indicate EOT.
303  fIgnoreZeroBlocks = (1<<1),
304 
305  // --- Extract/Append/Update ---
306  /// Follow symbolic links (instead of storing/extracting them)
307  fFollowLinks = (1<<2),
308 
309  // --- Extract --- (NB: fUpdate also applies to Update)
310  /// Allow to overwrite destinations with entries from the archive
311  fOverwrite = (1<<3),
312  /// Only update entries that are older than those already existing
313  fUpdate = (1<<4) | fOverwrite,
314  /// Backup destinations if they exist (all entries including dirs)
315  fBackup = (1<<5) | fOverwrite,
316  /// If destination entry exists, it must have the same type as source
317  fEqualTypes = (1<<6),
318  /// Create extracted files with the original ownership
319  fPreserveOwner = (1<<7),
320  /// Create extracted files with the original permissions
321  fPreserveMode = (1<<8),
322  /// Preserve date/times for extracted files
323  fPreserveTime = (1<<9),
324  /// Preserve all file attributes
325  fPreserveAll = fPreserveOwner | fPreserveMode | fPreserveTime,
326  /// Preserve absolute path instead of stripping the leadind slash('/')
327  fKeepAbsolutePath = (1<<12),
328  /// Do not extract PAX GNU/1.0 sparse files (treat 'em as unsupported)
329  fSparseUnsupported = (1<<13),
330 
331  // --- Extract/List ---
332  /// Skip unsupported entries rather than make files out of them when
333  /// extracting (the latter is the default behavior required by POSIX)
334  fSkipUnsupported = (1<<15),
335 
336  // --- Append ---
337  /// Ignore unreadable files/dirs (still warn them, but don't stop)
338  fIgnoreUnreadable = (1<<17),
339  /// Always use OldGNU headers for long names (default:only when needed)
340  fLongNameSupplement = (1<<18),
341 
342  // --- Debugging ---
343  fDumpEntryHeaders = (1<<20),
344  fSlowSkipWithRead = (1<<21),
345 
346  // --- Miscellaneous ---
347  /// Stream tar data through
348  fStreamPipeThrough = (1<<24),
349  /// Do not trim tar file size after append/update
350  fTarfileNoTruncate = (1<<26),
351  /// Suppress NCBI signatures in entry headers
352  fStandardHeaderOnly = (1<<28),
353 
354  /// Default flags
355  fDefault = fOverwrite | fPreserveAll
356  };
357  typedef unsigned int TFlags; ///< Bitwise OR of EFlags
358 
359  /// Mask type enumerator.
360  /// @enum eExtractMask
361  /// CMask can select both inclusions and exclusions (in this order) of
362  /// fully-qualified archive entries for listing or extraction, so that
363  /// e.g. ".svn" does not match an entry like "a/.svn" for processing.
364  /// @enum eExcludeMask
365  /// CMask can select both exclusions and inclusions (in this order) of
366  /// patterns of the archive entries for all operations (excepting eTest),
367  /// and so that ".svn" matches "a/b/c/.svn".
368  enum EMaskType {
369  eExtractMask = 0, ///< exact for list or extract
370  eExcludeMask ///< pattern for all but test
371  };
372 
373  /// Constructors
374  CTar(const string& filename, size_t blocking_factor = 20);
375  /// Stream version does not at all use stream positioning and so is safe on
376  /// non-positionable streams, like pipes/sockets (or magnetic tapes :-I).
377  CTar(CNcbiIos& stream, size_t blocking_factor = 20);
378 
379  /// Destructor (finalize the archive if currently open).
380  /// @sa
381  /// Close
382  virtual ~CTar();
383 
384 
385  /// Define a list of entries.
386  typedef list<CTarEntryInfo> TEntries;
387 
388  /// Define a list of files with sizes (directories and specials, such as
389  /// devices, must be given with sizes of 0; symlinks -- with the sizes
390  /// of the names they are linking to).
391  typedef pair<string, Uint8> TFile;
392  typedef list<TFile> TFiles;
393 
394 
395  //------------------------------------------------------------------------
396  // Main functions
397  //------------------------------------------------------------------------
398 
399  /// Create a new empty archive.
400  ///
401  /// If a file with such a name already exists it will be overwritten.
402  /// @sa
403  /// Append
404  void Create(void);
405 
406  /// Close the archive making sure all pending output is flushed.
407  ///
408  /// Normally, direct call of this method need _not_ intersperse successive
409  /// archive manipulations by other methods, as they open and close the
410  /// archive automagically as needed. Rather, this call is to make sure the
411  /// archive is complete earlier than it otherwise usually be done
412  /// automatically in the destructor of the CTar object.
413  /// @sa
414  /// ~CTar
415  void Close(void);
416 
417  /// Append an entry at the end of the archive that already exists.
418  ///
419  /// Appended entry can be either a file, a directory, a symbolic link,
420  /// a device special file (block or character), or a FIFO special file,
421  /// subject to any exclusions as set by SetMask() with eExcludeMask.
422  /// The name is taken with respect to the base directory, if any set.
423  ///
424  /// Adding a directory results in all its files and subdirectories (subject
425  // for the exclusion mask) to get added: examine the return value to find
426  /// out what has been added.
427  ///
428  /// Note that the final name of an entry may not contain embedded '..'.
429  /// Leading slash in the absolute paths will be retained. The names of
430  /// all appended entries will be converted to Unix format (that is, to
431  /// have only forward slashes in the paths, and drive letter, if any on
432  /// MS-Windows, stripped). All entries will be added at the logical end
433  /// (not always EOF) of the archive, when appending to a non-empty one.
434  ///
435  /// @note Adding to a stream archive does not seek to the logical end of
436  /// the archive but begins at the current position right away.
437  ///
438  /// @return
439  /// A list of entries appended.
440  /// @sa
441  /// Create, Update, SetBaseDir, SetMask
442  unique_ptr<TEntries> Append(const string& name);
443 
444  /// Append an entry from a stream (exactly entry.GetSize() bytes).
445  /// @note
446  /// Name masks (if any set with SetMask()) are all ignored.
447  /// @return
448  /// A list (containing this one entry) with full archive info filled in
449  /// @sa
450  /// Append
451  unique_ptr<TEntries> Append(const CTarUserEntryInfo& entry,
452  CNcbiIstream& is);
453 
454  /// Look whether more recent copies of the archive members are available in
455  /// the file system, and if so, append them to the archive:
456  ///
457  /// - if fUpdate is set in processing flags, only the existing archive
458  /// entries (including directories) will be updated; that is, Update(".")
459  /// won't recursively add "." if "." is not an archive member; it will,
460  /// however, do the recursive update should "." be found in the archive;
461  ///
462  /// - if fUpdate is unset, the existing entries will be updated (if their
463  /// file system counterparts are newer), and nonexistent entries will be
464  /// added to the archive; that is, Update(".") will recursively scan "."
465  /// to update both existing entries (if newer files found), and also add
466  /// new entries for any files/directories, which are currently not in.
467  ///
468  /// @note Updating stream archive may (and most certainly will) cause
469  /// zero-filled gaps in the archive (can be read with "ignore zeroes").
470  ///
471  /// @return
472  /// A list of entries that have been updated.
473  /// @sa
474  /// Append, SetBaseDir, SetMask, SetFlags
475  unique_ptr<TEntries> Update(const string& name);
476 
477  /// Extract the entire archive (into either current directory or a
478  /// directory otherwise specified by SetBaseDir()).
479  ///
480  /// If the same-named files exist, they will be replaced (subject to
481  /// fOverwrite) or backed up (fBackup), unless fUpdate is set, which would
482  /// cause the replacement / backup only if the files are older than the
483  /// archive entries. Note that if fOverwrite is stripped, no matching
484  /// files will be updated / backed up / overwritten, but skipped.
485  ///
486  /// Extract all archive entries, whose names match the pre-set mask.
487  /// @note
488  /// Unlike Append(), extracting a matching directory does *not*
489  /// automatically extract all files within: for them to be extracted,
490  /// they still must match the mask. So if there is a directory "dir/"
491  /// stored in the archive, the extract mask can be "dir/*" for the
492  /// entire subtree to be extracted. Note that "dir/" will only extract
493  /// the directory itself, and "dir" won't cause that directory to be
494  /// extracted at all (mismatch due to the trailing slash '/' missing).
495  /// @return
496  /// A list of entries that have been actually extracted.
497  /// @sa
498  /// SetMask, SetBaseDir, SetFlags
499  unique_ptr<TEntries> Extract(void);
500 
501  /// Get information about all matching archive entries.
502  ///
503  /// @return
504  /// An array containing information on those archive entries, whose
505  /// names match the pre-set mask.
506  /// @sa
507  /// SetMask
508  unique_ptr<TEntries> List(void);
509 
510  /// Verify archive integrity.
511  ///
512  /// Read through the archive without actually extracting anything from it.
513  /// Flag fDumpEntryHeaders causes most of archive headers to be dumped to
514  /// the log (with eDiag_Info) as the Test() advances through the archive.
515  /// @sa
516  /// SetFlags
517  void Test(void);
518 
519 
520  //------------------------------------------------------------------------
521  // Utility functions
522  //------------------------------------------------------------------------
523 
524  /// Get processing flags.
525  TFlags GetFlags(void) const;
526 
527  /// Set processing flags.
528  void SetFlags(TFlags flags);
529 
530  /// Get current stream position.
531  Uint8 GetCurrentPosition(void) const;
532 
533  /// Set name mask.
534  ///
535  /// The set of masks is used to process existing entries in the archive:
536  /// both the extract and exclude masks apply to the list and extract
537  /// operations, and only the exclude mask apply to the named append.
538  /// If masks are not defined then all archive entries will be processed.
539  ///
540  /// @note Unset mask means wildcard processing (all entries match).
541  ///
542  /// @param mask
543  /// Set of masks (0 to unset the current set without setting a new one).
544  /// @param own
545  /// Whether to take ownership on the mask (delete upon CTar destruction).
546  /// @sa
547  // SetFlags
548  void SetMask(CMask* mask,
549  EOwnership own = eNoOwnership,
550  EMaskType type = eExtractMask,
551  NStr::ECase acase = NStr::eCase);
552 
553  /// Get base directory to use for files while extracting from/adding to
554  /// the archive, and in the latter case used only for relative paths.
555  /// @sa
556  /// SetBaseDir
557  const string& GetBaseDir(void) const;
558 
559  /// Set base directory to use for files while extracting from/adding to
560  /// the archive, and in the latter case used only for relative paths.
561  /// @sa
562  /// GetBaseDir
563  void SetBaseDir(const string& dirname);
564 
565  /// Return archive size as if all specified input entries were put in it.
566  /// Note that the return value is not the exact but the upper bound of
567  /// what the archive size can be expected. This call does not recurse
568  /// into any subdirectories but relies solely upon the information as
569  /// passed via the parameter.
570  ///
571  /// The returned size includes all necessary alignments and padding.
572  /// @return
573  /// An upper estimate of archive size given that all specified files
574  /// were stored in it (the actual size may turn out to be smaller).
575  static Uint8 EstimateArchiveSize(const TFiles& files,
576  size_t blocking_factor = 20,
577  const string& base_dir = kEmptyStr);
578 
579 
580  //------------------------------------------------------------------------
581  // Streaming
582  //------------------------------------------------------------------------
583 
584  /// Iterate over the archive forward and return first (or next) entry.
585  ///
586  /// When using this method (possibly along with GetNextEntryData()), the
587  /// archive stream (if any) must not be accessed outside the CTar API,
588  /// because otherwise inconsistency in data may result.
589  /// An application may call GetNextEntryData() to stream some or all of the
590  /// data out of this entry, or it may call GetNextEntryInfo() again to skip
591  /// to the next archive entry, etc.
592  /// Note that the archive can contain multiple versions of the same entry
593  /// (in case if an update was done on it), all of which but the last one
594  /// are to be ignored. This call traverses through all those entry
595  /// versions, and sequentially exposes them to the application level.
596  /// See test suite (in test/test_tar.cpp) for a usage example.
597  /// @return
598  /// Pointer to next entry info in the archive or 0 if EOF encountered.
599  /// @sa
600  /// CTarEntryInfo, GetNextEntryData
601  const CTarEntryInfo* GetNextEntryInfo(void);
602 
603  /// Create and return an IReader, which can extract the current archive
604  /// entry that has been previously returned via GetNextEntryInfo.
605  ///
606  /// The returned pointer is non-zero only if the current entry is a file
607  /// (even of size 0). The ownership of the pointer is passed to the caller
608  /// (so it has to be explicitly deleted when no longer needed).
609  /// The IReader may be used to read all or part of data out of the entry
610  /// without affecting GetNextEntryInfo()'s ability to find any following
611  /// entry in the archive.
612  /// See test suite (in test/test_tar.cpp) for a usage example.
613  /// @return
614  /// Pointer to IReader, or 0 if the current entry is not a file.
615  /// @sa
616  /// GetNextEntryData, IReader, CRStream
617  IReader* GetNextEntryData(void);
618 
619  /// Create and return an IReader, which can extract contents of one named
620  /// file (which can be requested by a name mask in the "name" parameter).
621  ///
622  /// The tar archive is deemed to be in the specified stream "is", properly
623  /// positioned (either at the beginning of the archive, or at any
624  /// CTarEntryInfo::GetPosition(ePos_Header)'s result possibly off-set
625  /// with some fixed archive base position, e.g. if there is any preamble).
626  /// The extraction is done at the first matching entry only, then stops.
627  /// @note fStreamPipeThrough will be ignored if passed in flags.
628  /// See test suite (in test/test_tar.cpp) for a usage example.
629  /// @return
630  /// IReader interface to read the file contents with; 0 on error.
631  /// @sa
632  /// CTarEntryInfo::GetPosition, Extract, SetMask, SetFlags,
633  /// GetNextEntryInfo, GetNextEntryData, IReader, CRStream
634  static IReader* Extract(CNcbiIstream& is, const string& name,
635  TFlags flags = fSkipUnsupported);
636 
637 protected:
638  //------------------------------------------------------------------------
639  // User-redefinable callback
640  //------------------------------------------------------------------------
641 
642  /// Return false to skip the current entry when reading;
643  /// the return code gets ignored when writing.
644  ///
645  /// Note that the callback can encounter multiple entries of the same file
646  /// in case the archive has been updated (so only the last occurrence is
647  /// the actual copy of the file when extracted).
648  virtual bool Checkpoint(const CTarEntryInfo& /*current*/,
649  bool /*ifwrite: write==true, read==false*/)
650  { return true; }
651 
652 private:
653  /// Archive open mode and action
654  enum EOpenMode {
655  eNone = 0,
656  eWO = 1,
657  eRO = 2,
658  eRW = eRO | eWO
659  };
660  enum EAction {
662  eList = (1 << 2) | eRO,
663  eAppend = (1 << 3) | eRW,
664  eUpdate = eList | eAppend,
665  eExtract = (1 << 4) | eRO,
666  eTest = eList | eExtract,
667  eCreate = (1 << 5) | eWO,
668  eInternal = (1 << 6) | eRO
669  };
670  /// I/O completion code
671  enum EStatus {
672  eFailure = -1,
673  eSuccess = 0,
676  eEOF
677  };
678  /// Mask storage
679  struct SMask {
683 
684  SMask(void)
685  : mask(0), acase(NStr::eNocase), owned(eNoOwnership)
686  { }
687  };
688 
689  // Common part of initialization.
690  void x_Init(void);
691 
692  // Open/close the archive.
693  void x_Open(EAction action);
694  void x_Close(bool truncate); // NB: "truncate" effects file archives only
695 
696  // Flush the archive (w/EOT); return "true" if it is okay to truncate
697  bool x_Flush(bool nothrow = false);
698 
699  // Backspace and fast-forward the archive.
700  void x_Backspace(EAction action); // NB: m_ZeroBlockCount blocks back
701  void x_Skip(Uint8 blocks); // NB: Can do by either skip or read
702 
703  // Parse in extended entry information (PAX) for the current entry.
704  EStatus x_ParsePAXData(const string& data);
705 
706  // Read information about current entry in the archive.
707  EStatus x_ReadEntryInfo(bool dump, bool pax);
708 
709  // Pack current name or linkname into archive entry header.
710  bool x_PackCurrentName(STarHeader* header, bool link);
711 
712  // Write information for current entry into the archive.
713  void x_WriteEntryInfo(const string& name);
714 
715  // Read the archive and do the requested "action" on current entry.
716  unique_ptr<TEntries> x_ReadAndProcess(EAction action);
717 
718  // Process current entry from the archive (the actual size passed in).
719  // If action != eExtract, then just skip the entry without any processing.
720  // Return true iff the entry was successfully extracted (ie with eExtract).
721  bool x_ProcessEntry(EAction action, Uint8 size, const TEntries* done);
722 
723  // Extract current entry (archived size passed in) from the archive into
724  // the file system, and update the size still remaining in the archive, if
725  // any. Return true if the extraction succeeded, false otherwise.
726  bool x_ExtractEntry(Uint8& size, const CDirEntry* dst,
727  const CDirEntry* src);
728 
729  // Extract file data from the archive.
730  void x_ExtractPlainFile (Uint8& size, const CDirEntry* dst);
731  bool x_ExtractSparseFile(Uint8& size, const CDirEntry* dst,
732  bool dump = false);
733 
734  // Restore attributes of an entry in the file system.
735  // If "path" is not specified, then the destination path will be
736  // constructed from "info", and the base directory (if any). Otherwise,
737  // "path" will be used "as is", assuming it corresponds to "info".
738  void x_RestoreAttrs(const CTarEntryInfo& info,
739  TFlags what,
740  const CDirEntry* path = 0,
741  TTarMode perm = 0/*override*/) const;
742 
743  // Read a text string terminated with '\n'.
744  string x_ReadLine(Uint8& size, const char*& data, size_t& nread);
745 
746  // Read/write specified number of bytes from/to the archive.
747  const char* x_ReadArchive (size_t& n);
748  void x_WriteArchive(size_t n, const char* buffer = 0);
749 
750  // Append an entry from the file system to the archive.
751  unique_ptr<TEntries> x_Append(const string& name, const TEntries* toc = 0);
752 
753  // Append an entry from an istream to the archive.
754  unique_ptr<TEntries> x_Append(const CTarUserEntryInfo& entry,
755  CNcbiIstream& is);
756 
757  // Append data from an istream to the archive.
758  void x_AppendStream(const string& name, CNcbiIstream& is);
759 
760  // Append a regular file to the archive.
761  bool x_AppendFile(const string& file);
762 
763 private:
764  string m_FileName; ///< Tar archive file name (only if file)
765  CNcbiFstream* m_FileStream; ///< File stream of the archive (if file)
766  CNcbiIos& m_Stream; ///< Archive stream (used for all I/O)
767  size_t m_ZeroBlockCount; ///< Zero blocks seen in between entries
768  const size_t m_BufferSize; ///< Buffer(record) size for I/O operations
769  size_t m_BufferPos; ///< Position within the record
770  Uint8 m_StreamPos; ///< Position in stream (0-based)
771  char* m_BufPtr; ///< Page-unaligned buffer pointer
772  char* m_Buffer; ///< I/O buffer (page-aligned)
773  SMask m_Mask[2]; ///< Entry masks for operations
774  EOpenMode m_OpenMode; ///< What was it opened for
775  bool m_Modified; ///< True after at least one write
776  bool m_Bad; ///< True if a fatal output error occurred
777  TFlags m_Flags; ///< Bitwise OR of flags
778  string m_BaseDir; ///< Base directory for relative paths
779  CTarEntryInfo m_Current; ///< Current entry being processed
780 
781 private:
782  // Prohibit assignment and copy
783  CTar& operator=(const CTar&);
784  CTar(const CTar&);
785 
786  friend class CTarReader;
787 };
788 
789 
790 //////////////////////////////////////////////////////////////////////////////
791 //
792 // Inline methods
793 //
794 
795 inline
796 void CTar::Create(void)
797 {
798  x_Open(eCreate);
799 }
800 
801 inline
802 void CTar::Close(void)
803 {
804  x_Close(x_Flush());
805 }
806 
807 inline
808 unique_ptr<CTar::TEntries> CTar::Append(const string& name)
809 {
810  x_Open(eAppend);
811  return x_Append(name);
812 }
813 
814 inline
815 unique_ptr<CTar::TEntries> CTar::Append(const CTarUserEntryInfo& entry,
816  CNcbiIstream& is)
817 {
818  x_Open(eAppend);
819  return x_Append(entry, is);
820 }
821 
822 inline
823 unique_ptr<CTar::TEntries> CTar::Update(const string& name)
824 {
825  x_Open(eUpdate);
826  return x_Append(name, x_ReadAndProcess(eUpdate).get());
827 }
828 
829 inline
830 unique_ptr<CTar::TEntries> CTar::List(void)
831 {
832  x_Open(eList);
833  return x_ReadAndProcess(eList);
834 }
835 
836 inline
837 void CTar::Test(void)
838 {
839  x_Open(eTest);
841 }
842 
843 inline
845 {
846  return m_Flags;
847 }
848 
849 inline
851 {
852  m_Flags = flags;
853 }
854 
855 inline Uint8 CTar::GetCurrentPosition(void) const
856 {
857  return m_StreamPos;
858 }
859 
860 inline
861 const string& CTar::GetBaseDir(void) const
862 {
863  return m_BaseDir;
864 }
865 
866 
868 
869 
870 /* @} */
871 
872 
873 #endif /* UTIL_COMPRESS__TAR__HPP */
EStatus
ncbi::TMaskedQueryRegions mask
CCoreException –.
Definition: ncbiexpt.hpp:1476
CDirEntry –.
Definition: ncbifile.hpp:262
CMask –.
Definition: ncbi_mask.hpp:59
CTarEntryInfo class.
Definition: tar.hpp:159
CTarException –.
Definition: tar.hpp:107
User-creatable info for streaming into a tar.
Definition: tar.hpp:262
CTar class.
Definition: tar.hpp:296
CTime –.
Definition: ncbitime.hpp:296
A very basic data-read interface.
NStr –.
Definition: ncbistr.hpp:243
static void Test(const char *bind1, SQLSMALLINT type1, const char *bind2, SQLSMALLINT type2)
Definition: convert_error.c:15
static uch flags
bool operator==(const CEquivRange &A, const CEquivRange &B)
@ eNoOwnership
No ownership is assumed.
Definition: ncbi_types.h:135
void Create(void)
Create a new empty archive.
Definition: tar.hpp:796
void Test(void)
Verify archive integrity.
Definition: tar.hpp:837
const size_t m_BufferSize
Buffer(record) size for I/O operations.
Definition: tar.hpp:768
string m_GroupName
Group name.
Definition: tar.hpp:248
string m_LinkName
Link name if type is e{Sym|Hard}Link.
Definition: tar.hpp:246
SMask(void)
Definition: tar.hpp:684
void x_Close(bool truncate)
Definition: tar.cpp:1485
const string & GetLinkName(void) const
Definition: tar.hpp:190
const string & GetUserName(void) const
Definition: tar.hpp:191
bool m_Bad
True if a fatal output error occurred.
Definition: tar.hpp:776
CTarEntryInfo m_Current
Current entry being processed.
Definition: tar.hpp:779
bool m_Modified
True after at least one write.
Definition: tar.hpp:775
Uint8 m_StreamPos
Position in stream (0-based)
Definition: tar.hpp:770
void x_Open(EAction action)
Definition: tar.cpp:1506
CTar(const CTar &)
EOpenMode
Archive open mode and action.
Definition: tar.hpp:654
time_t GetLastAccessTime(void) const
Definition: tar.hpp:199
Uint8 GetCurrentPosition(void) const
Get current stream position.
Definition: tar.hpp:855
unique_ptr< TEntries > x_Append(const string &name, const TEntries *toc=0)
Definition: tar.cpp:4116
char * m_Buffer
I/O buffer (page-aligned)
Definition: tar.hpp:772
EType GetType(void) const
Definition: tar.hpp:188
string m_UserName
User name.
Definition: tar.hpp:247
pair< string, Uint8 > TFile
Define a list of files with sizes (directories and specials, such as devices, must be given with size...
Definition: tar.hpp:391
const string & GetGroupName(void) const
Definition: tar.hpp:192
const string & GetBaseDir(void) const
Get base directory to use for files while extracting from/adding to the archive, and in the latter ca...
Definition: tar.hpp:861
CNcbiIos & m_Stream
Archive stream (used for all I/O)
Definition: tar.hpp:766
Uint8 GetSize(void) const
Definition: tar.hpp:211
virtual bool Checkpoint(const CTarEntryInfo &, bool)
Return false to skip the current entry when reading; the return code gets ignored when writing.
Definition: tar.hpp:648
ETarModeBits
TTarMode –.
Definition: tar.hpp:73
virtual const char * GetErrCodeString(void) const override
Translate from an error code value to its string representation.
Definition: tar.hpp:127
CTar & operator=(const CTar &)
TFlags m_Flags
Bitwise OR of flags.
Definition: tar.hpp:777
unsigned int GetGroupId(void) const
Definition: tar.hpp:222
CTime GetModificationCTime(void) const
Definition: tar.hpp:195
list< CTarEntryInfo > TEntries
Define a list of entries.
Definition: tar.hpp:386
Uint8 GetPosition(EPos which) const
Definition: tar.hpp:224
NCBI_EXCEPTION_DEFAULT(CTarException, CCoreException)
CTime GetCreationCTime(void) const
Definition: tar.hpp:207
char * m_BufPtr
Page-unaligned buffer pointer.
Definition: tar.hpp:771
EType
Archive entry type.
Definition: tar.hpp:162
unsigned int TTarMode
Bitwise OR of ETarModeBits.
Definition: tar.hpp:89
time_t GetModificationTime(void) const
Definition: tar.hpp:193
CTime GetLastAccessCTime(void) const
Definition: tar.hpp:201
CMask * mask
Definition: tar.hpp:680
const string & GetName(void) const
Definition: tar.hpp:189
size_t m_ZeroBlockCount
Zero blocks seen in between entries.
Definition: tar.hpp:767
CTarUserEntryInfo(const string &name, Uint8 size)
Definition: tar.hpp:264
Uint8 m_Pos
Entry (not data!) position in archive.
Definition: tar.hpp:251
unique_ptr< TEntries > List(void)
Get information about all matching archive entries.
Definition: tar.hpp:830
void SetFlags(TFlags flags)
Set processing flags.
Definition: tar.hpp:850
void Close(void)
Close the archive making sure all pending output is flushed.
Definition: tar.hpp:802
string m_Name
Entry name.
Definition: tar.hpp:245
EOwnership owned
Definition: tar.hpp:682
EOpenMode m_OpenMode
What was it opened for.
Definition: tar.hpp:774
unique_ptr< TEntries > Append(const string &name)
Append an entry at the end of the archive that already exists.
Definition: tar.hpp:808
EType m_Type
Type.
Definition: tar.hpp:244
TFlags GetFlags(void) const
Get processing flags.
Definition: tar.hpp:844
list< TFile > TFiles
Definition: tar.hpp:392
EMaskType
Definition: tar.hpp:368
unsigned int GetUserId(void) const
Definition: tar.hpp:220
unsigned int TFlags
Bitwise OR of EFlags.
Definition: tar.hpp:357
unique_ptr< TEntries > Update(const string &name)
Look whether more recent copies of the archive members are available in the file system,...
Definition: tar.hpp:823
string m_FileName
Tar archive file name (only if file)
Definition: tar.hpp:764
EPos
Position type.
Definition: tar.hpp:179
EAction
Definition: tar.hpp:660
CTarEntryInfo(Uint8 pos=0)
Definition: tar.hpp:240
string m_BaseDir
Base directory for relative paths.
Definition: tar.hpp:778
bool x_Flush(bool nothrow=false)
Definition: tar.cpp:1396
CDirEntry::SStat m_Stat
Direntry-compatible info.
Definition: tar.hpp:250
unique_ptr< TEntries > x_ReadAndProcess(EAction action)
Definition: tar.cpp:2943
streamsize m_HeaderSize
Total size of all headers for the entry.
Definition: tar.hpp:249
EStatus
I/O completion code.
Definition: tar.hpp:671
size_t m_BufferPos
Position within the record.
Definition: tar.hpp:769
EFlags
General flags.
Definition: tar.hpp:299
NStr::ECase acase
Definition: tar.hpp:681
time_t GetCreationTime(void) const
Definition: tar.hpp:205
ostream & operator<<(ostream &, const CTarEntryInfo &)
Nice TOC(table of contents) printout.
CNcbiFstream * m_FileStream
File stream of the archive (if file)
Definition: tar.hpp:765
@ fTarOWrite
write by other
Definition: tar.hpp:86
@ fTarUWrite
write by owner
Definition: tar.hpp:80
@ fTarGRead
read by group
Definition: tar.hpp:82
@ fTarORead
read by other
Definition: tar.hpp:85
@ fTarURead
read by owner
Definition: tar.hpp:79
@ fTarGExecute
execute/search by group
Definition: tar.hpp:84
@ fTarGWrite
write by group
Definition: tar.hpp:83
@ fTarOExecute
execute/search by other
Definition: tar.hpp:87
@ fTarSetGID
set GID on execution
Definition: tar.hpp:76
@ fTarSetUID
set UID on execution
Definition: tar.hpp:75
@ fTarSticky
reserved (sticky bit)
Definition: tar.hpp:77
@ fTarUExecute
execute/search by owner
Definition: tar.hpp:81
@ eSparseFile
GNU/STAR sparse file.
Definition: tar.hpp:173
@ eHardLink
Hard link.
Definition: tar.hpp:170
@ eGNULongName
GNU long name.
Definition: tar.hpp:174
@ ePAXHeader
PAX extended header.
Definition: tar.hpp:172
@ eVolHeader
Volume header.
Definition: tar.hpp:171
@ ePos_Header
Definition: tar.hpp:180
@ eUpdate
Definition: tar.hpp:664
@ eList
Definition: tar.hpp:662
@ eCreate
Definition: tar.hpp:667
@ eAppend
Definition: tar.hpp:663
@ eTest
Definition: tar.hpp:666
@ eZeroBlock
Definition: tar.hpp:675
@ eContinue
Definition: tar.hpp:674
@ eUnsupportedEntryType
Definition: tar.hpp:112
@ eUnsupportedSource
Definition: tar.hpp:113
@ eUnsupportedTarFormat
Definition: tar.hpp:111
@ eNameTooLong
Definition: tar.hpp:114
virtual void x_Init(const CDiagCompileInfo &info, const string &message, const CException *prev_exception, EDiagSev severity)
Helper method for initializing exception data.
Definition: ncbiexpt.cpp:509
EErrCode
Error types that an application can generate.
Definition: ncbiexpt.hpp:884
TErrCode GetErrCode(void) const
Definition: ncbiexpt.hpp:1493
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
unsigned int TSpecialModeBits
Bitwise OR of ESpecialModeBits.
Definition: ncbifile.hpp:1179
unsigned int TMode
Bitwise OR of "EMode".
Definition: ncbifile.hpp:1172
TNcbiSys_stat orig
Original stat structure.
Definition: ncbifile.hpp:823
@ eDir
Directory.
Definition: ncbifile.hpp:784
@ eFile
Regular file.
Definition: ncbifile.hpp:783
@ eSymLink
Symbolic link (UNIX only)
Definition: ncbifile.hpp:787
@ eUnknown
Unknown type.
Definition: ncbifile.hpp:793
@ eBlockSpecial
Block special (UNIX only)
Definition: ncbifile.hpp:790
@ eCharSpecial
Character special.
Definition: ncbifile.hpp:791
@ ePipe
Pipe.
Definition: ncbifile.hpp:785
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ios CNcbiIos
Portable alias for ios.
Definition: ncbistre.hpp:140
IO_PREFIX::fstream CNcbiFstream
Portable alias for fstream.
Definition: ncbistre.hpp:538
#define kEmptyStr
Definition: ncbistr.hpp:123
ECase
Which type of string comparison.
Definition: ncbistr.hpp:1204
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
void SetNanoSecond(long nanosecond)
Set nanoseconds.
Definition: ncbitime.cpp:1126
enum ENcbiOwnership EOwnership
Ownership relations between objects.
#define dump(b)
FILE * file
yy_size_t n
static MDB_envinfo info
Definition: mdb_load.c:37
const struct ncbi::grid::netcache::search::fields::SIZE size
static EIO_Status x_Flush(CONN conn, const STimeout *timeout, int isflush)
static void x_Close(SHttpConnector *uuu)
static const char * x_ReadLine(const char *path, char *line, size_t size)
Definition: ncbi_namerd.c:1081
static SERV_ITER x_Open(const char *service, int ismask, TSERV_Type types, unsigned int preferred_host, unsigned short preferred_port, double preference, const SConnNetInfo *net_info, SSERV_InfoCPtr skip[], size_t n_skip, int external, const char *arg, const char *val, SSERV_Info **info, HOST_INFO *host_info)
Definition: ncbi_service.c:253
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
@ eRead
Definition: ns_types.hpp:56
static pcre_uint8 * buffer
Definition: pcretest.c:1051
@ eSuccess
Successfully retrieved.
NCBI_XUTIL_EXPORT
Parameter to control printing diagnostic message about conversion of static array data from a differe...
Definition: static_set.hpp:72
Alternate stat structure for use instead of the standard struct stat.
Definition: ncbifile.hpp:822
Mask storage.
Definition: tar.hpp:679
POSIX "ustar" tar archive member header.
Definition: tar.cpp:392
static DP_BlockInfo * blocks
Definition: type.c:6
done
Definition: token1.c:1
Modified on Fri Dec 01 04:43:59 2023 by modify_doxy.py rev. 669887