NCBI C++ ToolKit
tar.hpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 #ifndef UTIL_COMPRESS__TAR__HPP
2 #define UTIL_COMPRESS__TAR__HPP
3 
4 /* $Id: tar.hpp 102739 2024-07-03 15:40:31Z lavr $
5  * ===========================================================================
6  *
7  * PUBLIC DOMAIN NOTICE
8  * National Center for Biotechnology Information
9  *
10  * This software/database is a "United States Government Work" under the
11  * terms of the United States Copyright Act. It was written as part of
12  * the author's official duties as a United States Government employee and
13  * thus cannot be copyrighted. This software/database is freely available
14  * to the public for use. The National Library of Medicine and the U.S.
15  * Government have not placed any restriction on its use or reproduction.
16  *
17  * Although all reasonable efforts have been taken to ensure the accuracy
18  * and reliability of the software and data, the NLM and the U.S.
19  * Government do not and cannot warrant the performance or results that
20  * may be obtained by using this software or data. The NLM and the U.S.
21  * Government disclaim all warranties, express or implied, including
22  * warranties of performance, merchantability or fitness for any particular
23  * purpose.
24  *
25  * Please cite the author in any work or product based on this material.
26  *
27  * ===========================================================================
28  *
29  * Authors: Vladimir Ivanov
30  * Anton Lavrentiev
31  *
32  * File Description:
33  * Tar archive API
34  */
35 
36 /// @file
37 /// Tar archive API.
38 ///
39 /// Supports subsets of POSIX.1-1988 (ustar), POSIX 1003.1-2001 (posix), old
40 /// GNU (POSIX 1003.1), and V7 formats (all partially but reasonably). New
41 /// archives are created using POSIX (genuine ustar) format, using GNU
42 /// extensions for long names/links only when unavoidable. It cannot,
43 /// however, handle all the exotics like sparse files (except for GNU/1.0
44 /// sparse PAX extension) and contiguous files (yet still can work around both
45 /// of them gracefully, if needed), multivolume / incremental archives, etc.
46 /// but just regular files, devices (character or block), FIFOs, directories,
47 /// and limited links: can extract both hard- and symlinks, but can store
48 /// symlinks only. Also, this implementation is only minimally PAX(Portable
49 /// Archive eXchange)-aware for file extractions (and does not yet use any PAX
50 /// extensions to store the files).
51 ///
52 
53 #include <corelib/ncbifile.hpp>
54 #include <utility>
55 
56 
57 /** @addtogroup Compression
58  *
59  * @{
60  */
61 
62 
64 
65 
66 /////////////////////////////////////////////////////////////////////////////
67 ///
68 /// TTarMode --
69 ///
70 /// Permission bits as defined in tar
71 ///
72 
74  // Special mode bits
75  fTarSetUID = 04000, ///< set UID on execution
76  fTarSetGID = 02000, ///< set GID on execution
77  fTarSticky = 01000, ///< reserved (sticky bit)
78  // File permissions
79  fTarURead = 00400, ///< read by owner
80  fTarUWrite = 00200, ///< write by owner
81  fTarUExecute = 00100, ///< execute/search by owner
82  fTarGRead = 00040, ///< read by group
83  fTarGWrite = 00020, ///< write by group
84  fTarGExecute = 00010, ///< execute/search by group
85  fTarORead = 00004, ///< read by other
86  fTarOWrite = 00002, ///< write by other
87  fTarOExecute = 00001 ///< execute/search by other
88 };
89 typedef unsigned int TTarMode; ///< Bitwise OR of ETarModeBits
90 
91 
92 /////////////////////////////////////////////////////////////////////////////
93 ///
94 /// CTarException --
95 ///
96 /// Define exceptions generated by the API.
97 /// Exception text may include detailed dump of a tar header (when appropriate)
98 /// if fDumpEntryHeaders is set in the archive flags.
99 ///
100 /// CTarException inherits its basic functionality from CCoreException
101 /// and defines additional error codes for tar archive operations.
102 ///
103 /// @sa
104 /// CTar::SetFlags
105 
107 {
108 public:
109  /// Error types that file operations can generate.
110  enum EErrCode {
123  eRestoreAttrs
124  };
125 
126  /// Translate from an error code value to its string representation.
127  virtual const char* GetErrCodeString(void) const override
128  {
129  switch (GetErrCode()) {
130  case eUnsupportedTarFormat: return "eUnsupportedTarFormat";
131  case eUnsupportedEntryType: return "eUnsupportedEntryType";
132  case eUnsupportedSource: return "eUnsupportedSource";
133  case eNameTooLong: return "eNameTooLong";
134  case eChecksum: return "eChecksum";
135  case eBadName: return "eBadName";
136  case eCreate: return "eCreate";
137  case eOpen: return "eOpen";
138  case eRead: return "eRead";
139  case eWrite: return "eWrite";
140  case eBackup: return "eBackup";
141  case eMemory: return "eMemory";
142  case eRestoreAttrs: return "eRestoreAttrs";
143  default: return CException::GetErrCodeString();
144  }
145  }
146 
147  // Standard exception boilerplate code.
149 };
150 
151 
152 //////////////////////////////////////////////////////////////////////////////
153 ///
154 /// CTarEntryInfo class
155 ///
156 /// Information about a tar archive entry.
157 
159 {
160 public:
161  /// Archive entry type.
162  enum EType {
163  eFile = CDirEntry::eFile, ///< Regular file
164  eDir = CDirEntry::eDir, ///< Directory
165  eSymLink = CDirEntry::eSymLink, ///< Symbolic link
166  ePipe = CDirEntry::ePipe, ///< Pipe (FIFO)
167  eCharDev = CDirEntry::eCharSpecial, ///< Character device
168  eBlockDev = CDirEntry::eBlockSpecial, ///< Block device
169  eUnknown = CDirEntry::eUnknown, ///< Unknown type
170  eHardLink, ///< Hard link
171  eVolHeader, ///< Volume header
172  ePAXHeader, ///< PAX extended header
173  eSparseFile, ///< GNU/STAR sparse file
174  eGNULongName, ///< GNU long name
175  eGNULongLink ///< GNU long link
176  };
177 
178  /// Position type.
179  enum EPos {
181  ePos_Data
182  };
183 
184  // No setters -- they are not needed for access by the user, and thus are
185  // done directly from CTar for the sake of performance and code clarity.
186 
187  // Getters only!
188  EType GetType(void) const { return m_Type; }
189  const string& GetName(void) const { return m_Name; }
190  const string& GetLinkName(void) const { return m_LinkName; }
191  const string& GetUserName(void) const { return m_UserName; }
192  const string& GetGroupName(void) const { return m_GroupName; }
193  time_t GetModificationTime(void) const
194  { return m_Stat.orig.st_mtime; }
196  { CTime mtime(m_Stat.orig.st_mtime);
197  mtime.SetNanoSecond(m_Stat.mtime_nsec);
198  return mtime; }
199  time_t GetLastAccessTime(void) const
200  { return m_Stat.orig.st_atime; }
202  { CTime atime(m_Stat.orig.st_atime);
203  atime.SetNanoSecond(m_Stat.atime_nsec);
204  return atime; }
205  time_t GetCreationTime(void) const
206  { return m_Stat.orig.st_ctime; }
208  { CTime ctime(m_Stat.orig.st_ctime);
209  ctime.SetNanoSecond(m_Stat.ctime_nsec);
210  return ctime; }
211  Uint8 GetSize(void) const
212  { return m_Stat.orig.st_size; }
213  TTarMode GetMode(void) const;// Raw mode as stored in tar
214  void GetMode(CDirEntry::TMode* user_mode,
215  CDirEntry::TMode* group_mode = 0,
216  CDirEntry::TMode* other_mode = 0,
217  CDirEntry::TSpecialModeBits* special_bits = 0) const;
218  unsigned int GetMajor(void) const;
219  unsigned int GetMinor(void) const;
220  unsigned int GetUserId(void) const
221  { return m_Stat.orig.st_uid; }
222  unsigned int GetGroupId(void) const
223  { return m_Stat.orig.st_gid; }
224  Uint8 GetPosition(EPos which) const
225  { return which == ePos_Header ? m_Pos : m_Pos + m_HeaderSize; }
226 
227  // Non-empty only if filesystem was involed (append/extract/update)
228  const string& GetPath(void) const { return m_Path; }
229 
230  // Comparison operator.
231  bool operator == (const CTarEntryInfo& info) const
232  { return (m_Type == info.m_Type &&
233  m_Name == info.m_Name &&
234  m_LinkName == info.m_LinkName &&
235  m_UserName == info.m_UserName &&
236  m_GroupName == info.m_GroupName &&
237  m_HeaderSize == info.m_HeaderSize &&
238  memcmp(&m_Stat,&info.m_Stat, sizeof(m_Stat)) == 0 &&
239  m_Pos == info.m_Pos ? true : false); }
240 
241 protected:
242  // Constructor.
244  : m_Type(eUnknown), m_HeaderSize(0), m_Pos(pos)
245  { memset(&m_Stat, 0, sizeof(m_Stat)); }
246 
247  EType m_Type; ///< Type
248  string m_Name; ///< Entry name
249  string m_LinkName; ///< Link name if type is e{Sym|Hard}Link
250  string m_UserName; ///< User name
251  string m_GroupName; ///< Group name
252  streamsize m_HeaderSize; ///< Total size of all headers for the entry
253  CDirEntry::SStat m_Stat; ///< Direntry-compatible info
254  Uint8 m_Pos; ///< Entry (not data!) position in archive
255 
256  string m_Path; ///< Iff filesystem involved in processing
257 
258  friend class CTar; // Setter
259 };
260 
261 
262 /// User-creatable info for streaming into a tar.
263 /// Since the entry info is built largerly incomplete, all getters have been
264 /// disabled; should some be needed they could be brought back by subclassing
265 /// and redeclaring the necessary one(s) in the public part of the new class.
267 {
268 public:
269  CTarUserEntryInfo(const string& name, Uint8 size)
270  {
271  m_Name = name;
272  m_Stat.orig.st_size = size;
273  }
274 
275  friend class CTar; // Accessor
276 };
277 
278 
279 /// Nice TOC(table of contents) printout.
280 NCBI_XUTIL_EXPORT ostream& operator << (ostream&, const CTarEntryInfo&);
281 
282 
283 /// Forward declaration of a tar header used internally.
284 struct STarHeader;
285 
286 
287 //////////////////////////////////////////////////////////////////////////////
288 ///
289 /// CTar class
290 ///
291 /// (Throws exceptions on most errors.)
292 /// Note that if stream constructor is used, then CTar can only perform one
293 /// pass over the archive. This means that only one full action will succeed
294 /// (and if the action was to update -- e.g. append -- the archive, it has to
295 /// be explicitly followed by Close() when no more appends are expected).
296 /// Before the next read/update action, the stream position has to be reset
297 /// explicitly to the beginning of the archive, or it may also remain at the
298 /// end of the archive for a series of successive append operations.
299 
301 {
302 public:
303  /// General flags
304  enum EFlags {
305  // --- Extract/List/Test ---
306  /// Ignore blocks of zeros in archive.
307  // Generally, 2 or more consecutive zero blocks indicate EOT.
308  fIgnoreZeroBlocks = (1<<1),
309 
310  // --- Extract/Append/Update ---
311  /// Follow symbolic links (instead of storing/extracting them)
312  fFollowLinks = (1<<2),
313 
314  // --- Extract --- (NB: fUpdate also applies to Update)
315  /// Allow to overwrite destinations with entries from the archive
316  fOverwrite = (1<<3),
317  /// Only update entries that are older than those already existing
318  fUpdate = (1<<4) | fOverwrite,
319  /// Backup destinations if they exist (all entries including dirs)
320  fBackup = (1<<5) | fOverwrite,
321  /// If destination entry exists, it must have the same type as source
322  fEqualTypes = (1<<6),
323  /// Create extracted files with the original ownership
324  fPreserveOwner = (1<<7),
325  /// Create extracted files with the original permissions
326  fPreserveMode = (1<<8),
327  /// Preserve date/times for extracted files
328  fPreserveTime = (1<<9),
329  /// Preserve all file attributes
330  fPreserveAll = fPreserveOwner | fPreserveMode | fPreserveTime,
331  /// Preserve absolute path instead of stripping the leadind slash('/')
332  fKeepAbsolutePath = (1<<12),
333  /// Do not extract PAX GNU/1.0 sparse files (treat 'em as unsupported)
334  fSparseUnsupported = (1<<13),
335 
336  // --- Extract/List ---
337  /// Skip unsupported entries rather than make files out of them when
338  /// extracting (the latter is the default behavior required by POSIX)
339  fSkipUnsupported = (1<<15),
340 
341  // --- Append ---
342  /// Ignore unreadable files/dirs (still warn them, but don't stop)
343  fIgnoreUnreadable = (1<<17),
344  /// Always use OldGNU headers for long names (default:only when needed)
345  fLongNameSupplement = (1<<18),
346 
347  // --- Debugging ---
348  fDumpEntryHeaders = (1<<20),
349  fSlowSkipWithRead = (1<<21),
350 
351  // --- Miscellaneous ---
352  /// Ignore case difference in names
353  fIgnoreNameCase = (1<<24),
354  /// Conflict overwrite allowed for entries
355  fConflictOverwrite = (1<<25),
356 
357  /// Stream tar data through
358  fStreamPipeThrough = (1<<28),
359  /// Do not trim tar file size after append/update
360  fTarfileNoTruncate = (1<<29),
361 
362  /// Suppress NCBI signatures in entry headers
363  fStandardHeaderOnly = (1<<30),
364 
365  /// Default flags
366  fDefault = fOverwrite | fPreserveAll
367  };
368  typedef unsigned int TFlags; ///< Bitwise OR of EFlags
369 
370  /// Mask type enumerator.
371  /// @enum eExtractMask
372  /// CMask can select both inclusions and exclusions (in this order) of
373  /// fully-qualified archive entries for listing or extraction, so that
374  /// e.g. ".svn" does not match an entry like "a/.svn" for processing.
375  /// @enum eExcludeMask
376  /// CMask can select both exclusions and inclusions (in this order) of
377  /// patterns of the archive entries for all operations (excepting eTest),
378  /// and so that ".svn" matches "a/b/c/.svn".
379  enum EMaskType {
380  eExtractMask = 0, ///< exact for list or extract
381  eExcludeMask ///< pattern for all but test
382  };
383 
384  /// Constructors
385  CTar(const string& filename, size_t blocking_factor = 20);
386  /// Stream version does not at all use stream positioning and so is safe on
387  /// non-positionable streams, like pipes/sockets (or magnetic tapes :-I).
388  CTar(CNcbiIos& stream, size_t blocking_factor = 20);
389 
390  /// Destructor (finalize the archive if currently open).
391  /// @sa
392  /// Close
393  virtual ~CTar();
394 
395 
396  /// Define a list of entries.
397  typedef list<CTarEntryInfo> TEntries;
398 
399  /// Define a list of files with sizes (directories and specials, such as
400  /// devices, must be given with sizes of 0; symlinks -- with the sizes
401  /// of the names they are linking to).
402  typedef pair<string, Uint8> TFile;
403  typedef list<TFile> TFiles;
404 
405 
406  //------------------------------------------------------------------------
407  // Main functions
408  //------------------------------------------------------------------------
409 
410  /// Create a new empty archive.
411  ///
412  /// If a file with such a name already exists it will be overwritten.
413  /// @sa
414  /// Append
415  void Create(void);
416 
417  /// Close the archive making sure all pending output is flushed.
418  ///
419  /// Normally, direct call of this method need _not_ intersperse successive
420  /// archive manipulations by other methods, as they open and close the
421  /// archive automagically as needed. Rather, this call is to make sure the
422  /// archive is complete earlier than it otherwise usually be done
423  /// automatically in the destructor of the CTar object.
424  /// @sa
425  /// ~CTar
426  void Close(void);
427 
428  /// Append an entry at the end of the archive that already exists.
429  ///
430  /// Appended entry can be either a file, a directory, a symbolic link,
431  /// a device special file (block or character), or a FIFO special file,
432  /// subject to any exclusions as set by SetMask() with eExcludeMask.
433  /// The name is taken with respect to the base directory, if any set.
434  ///
435  /// Adding a directory results in all its files and subdirectories (subject
436  // for the exclusion mask) to get added: examine the return value to find
437  /// out what has been added.
438  ///
439  /// Note that the final name of an entry may not contain embedded '..'.
440  /// Leading slash in the absolute paths will be retained. The names of
441  /// all appended entries will be converted to Unix format (that is, to
442  /// have only forward slashes in the paths, and drive letter, if any on
443  /// MS-Windows, stripped). All entries will be added at the logical end
444  /// (not always EOF) of the archive, when appending to a non-empty one.
445  ///
446  /// @warning Adding to a stream archive does not seek to the logical end
447  /// of the archive but begins at the current position right away.
448  ///
449  /// @return
450  /// A list of entries appended.
451  /// @sa
452  /// Create, Update, SetBaseDir, SetMask
453  unique_ptr<TEntries> Append(const string& name);
454 
455  /// Append an entry from a stream (exactly entry.GetSize() bytes).
456  /// @note
457  /// The base directory and name masks (if any set with SetBaseDir() or
458  /// SetMask(), respectively) are all ignored.
459  /// @return
460  /// A list (containing this one entry) with full archive info filled in
461  /// @sa
462  /// Append
463  unique_ptr<TEntries> Append(const CTarUserEntryInfo& entry,
464  CNcbiIstream& is);
465 
466  /// Look whether more recent copies of the archive members are available in
467  /// the file system, and if so, append them to the archive:
468  ///
469  /// - if fUpdate is set in processing flags, only the existing archive
470  /// entries (including directories) will be updated; that is, Update(".")
471  /// won't recursively add "." if "." is not an archive member; it will,
472  /// however, do the recursive update should "." be found in the archive;
473  ///
474  /// - if fUpdate is unset, the existing entries will be updated (if their
475  /// file system counterparts are newer), and nonexistent entries will be
476  /// added to the archive; that is, Update(".") will recursively scan "."
477  /// to update both existing entries (if newer files found), and also add
478  /// new entries for any files/directories, which are currently not in.
479  ///
480  /// @note Updating stream archive may (and most certainly will) cause
481  /// zero-filled gaps in the archive (can be read with "ignore zeroes").
482  ///
483  /// @return
484  /// A list of entries that have been updated.
485  /// @sa
486  /// Append, SetBaseDir, SetMask, SetFlags
487  unique_ptr<TEntries> Update(const string& name);
488 
489  /// Extract the entire archive (into either current directory or a
490  /// directory otherwise specified by SetBaseDir()).
491  ///
492  /// If the same-named files exist, they will be replaced (subject to
493  /// fOverwrite) or backed up (fBackup), unless fUpdate is set, which would
494  /// cause the replacement / backup only if the files are older than the
495  /// archive entries. Note that if the fOverwrite bit is stripped, then no
496  /// matching files will be updated / backed up / overwritten, but skipped.
497  ///
498  /// Extract all archive entries, whose names match the pre-set mask.
499  /// @note
500  /// Unlike Append(), extracting a matching directory does *not*
501  /// automatically extract all files within: for them to be extracted,
502  /// they still must match the mask. So if there is a directory "dir/"
503  /// stored in the archive, the extract mask can be "dir/*" for the
504  /// entire subtree to be extracted. Note that "dir/" will only extract
505  /// the directory itself, and "dir" won't cause that directory to be
506  /// extracted at all (mismatch due to the trailing slash '/' missing).
507  /// @return
508  /// A list of entries that have been actually extracted.
509  /// @sa
510  /// SetMask, SetBaseDir, SetFlags
511  unique_ptr<TEntries> Extract(void);
512 
513  /// Get information about all matching archive entries.
514  ///
515  /// @return
516  /// An array containing information on those archive entries, whose
517  /// names match the pre-set mask.
518  /// @sa
519  /// SetMask
520  unique_ptr<TEntries> List(void);
521 
522  /// Verify archive integrity.
523  ///
524  /// Read through the archive without actually extracting anything from it.
525  /// Flag fDumpEntryHeaders causes most of archive headers to be dumped to
526  /// the log (with eDiag_Info) as the Test() advances through the archive.
527  /// @sa
528  /// SetFlags
529  void Test(void);
530 
531 
532  //------------------------------------------------------------------------
533  // Utility functions
534  //------------------------------------------------------------------------
535 
536  /// Get processing flags.
537  TFlags GetFlags(void) const;
538 
539  /// Set processing flags.
540  void SetFlags(TFlags flags);
541 
542  /// Get current stream position.
543  Uint8 GetCurrentPosition(void) const;
544 
545  /// Set name mask.
546  ///
547  /// The set of masks is used to process existing entries in the archive:
548  /// both the extract and exclude masks apply to the list and extract
549  /// operations, and only the exclude mask apply to the named append.
550  /// If masks are not defined then all archive entries will be processed.
551  ///
552  /// @note Unset mask means wildcard processing (all entries match).
553  ///
554  /// @param mask
555  /// Set of masks (0 to unset the current set without setting a new one).
556  /// @param own
557  /// Whether to take ownership on the mask (delete upon CTar destruction).
558  /// @sa
559  // SetFlags
560  void SetMask(CMask* mask,
561  EOwnership own = eNoOwnership,
562  EMaskType type = eExtractMask,
563  NStr::ECase acase = NStr::eCase);
564 
565  /// Get base directory to use for files while extracting from/adding to
566  /// the archive, and in the latter case used only for relative paths.
567  /// @sa
568  /// SetBaseDir
569  const string& GetBaseDir(void) const;
570 
571  /// Set base directory to use for files while extracting from/adding to
572  /// the archive, and in the latter case used only for relative paths.
573  /// @sa
574  /// GetBaseDir
575  void SetBaseDir(const string& dirname);
576 
577  /// Return archive size as if all specified input entries were put in it.
578  /// Note that the return value is not the exact but the upper bound of
579  /// what the archive size can be expected. This call does not recurse
580  /// into any subdirectories but relies solely upon the information as
581  /// passed via the parameter.
582  ///
583  /// The returned size includes all necessary alignments and padding.
584  /// @return
585  /// An upper estimate of archive size given that all specified files
586  /// were stored in it (the actual size may turn out to be smaller).
587  static Uint8 EstimateArchiveSize(const TFiles& files,
588  size_t blocking_factor = 20,
589  const string& base_dir = kEmptyStr);
590 
591 
592  //------------------------------------------------------------------------
593  // Streaming
594  //------------------------------------------------------------------------
595 
596  /// Iterate over the archive forward and return first (or next) entry.
597  ///
598  /// When using this method (possibly along with GetNextEntryData()), the
599  /// archive stream (if any) must not be accessed outside the CTar API,
600  /// because otherwise inconsistency in data may result.
601  /// An application may call GetNextEntryData() to stream some or all of the
602  /// data out of this entry, or it may call GetNextEntryInfo() again to skip
603  /// to the next archive entry, etc.
604  /// Note that the archive can contain multiple versions of the same entry
605  /// (in case if an update was done on it), all of which but the last one
606  /// are to be ignored. This call traverses through all those entry
607  /// versions, and sequentially exposes them to the application level.
608  /// See test suite (in test/test_tar.cpp) for a usage example.
609  /// @return
610  /// Pointer to next entry info in the archive or 0 if EOF encountered.
611  /// @sa
612  /// CTarEntryInfo, GetNextEntryData
613  const CTarEntryInfo* GetNextEntryInfo(void);
614 
615  /// Create and return an IReader, which can extract the current archive
616  /// entry that has been previously returned via GetNextEntryInfo.
617  ///
618  /// The returned pointer is non-zero only if the current entry is a file
619  /// (even of size 0). The ownership of the pointer is passed to the caller
620  /// (so it has to be explicitly deleted when no longer needed).
621  /// The IReader may be used to read all or part of data out of the entry
622  /// without affecting GetNextEntryInfo()'s ability to find any following
623  /// entry in the archive.
624  /// See test suite (in test/test_tar.cpp) for a usage example.
625  /// @return
626  /// Pointer to IReader, or 0 if the current entry is not a file.
627  /// @sa
628  /// GetNextEntryData, IReader, CRStream
629  IReader* GetNextEntryData(void);
630 
631  /// Create and return an IReader, which can extract contents of one named
632  /// file (which can be requested by a name mask in the "name" parameter).
633  ///
634  /// The tar archive is deemed to be in the specified stream "is", properly
635  /// positioned (either at the beginning of the archive, or at any
636  /// CTarEntryInfo::GetPosition(ePos_Header)'s result possibly off-set
637  /// with some fixed archive base position, e.g. if there is any preamble).
638  /// The extraction is done at the first matching entry only, then stops.
639  /// @note fStreamPipeThrough will be ignored if passed in flags.
640  /// See test suite (in test/test_tar.cpp) for a usage example.
641  /// @return
642  /// IReader interface to read the file contents with; 0 on error.
643  /// @sa
644  /// CTarEntryInfo::GetPosition, Extract, SetMask, SetFlags,
645  /// GetNextEntryInfo, GetNextEntryData, IReader, CRStream
646  static IReader* Extract(CNcbiIstream& is, const string& name,
647  TFlags flags = fSkipUnsupported);
648 
649 protected:
650  //------------------------------------------------------------------------
651  // User-redefinable callback
652  //------------------------------------------------------------------------
653 
654  /// Return false to skip the current entry when reading;
655  /// the return code gets ignored when writing.
656  ///
657  /// Note that the callback can encounter multiple entries of the same file
658  /// in case the archive has been updated (so only the last occurrence is
659  /// the actual copy of the file when extracted).
660  virtual bool Checkpoint(const CTarEntryInfo& /*current*/,
661  bool /*ifwrite: write==true, read==false*/)
662  { return true; }
663 
664 private:
665  /// Archive open mode and action
666  enum EOpenMode {
667  eNone = 0,
668  eWO = 1,
669  eRO = 2,
670  eRW = eRO | eWO
671  };
672  enum EAction {
674  eList = (1 << 2) | eRO,
675  eAppend = (1 << 3) | eRW,
676  eUpdate = eList | eAppend,
677  eExtract = (1 << 4) | eRO,
678  eTest = eList | eExtract,
679  eCreate = (1 << 5) | eWO,
680  eInternal = (1 << 6) | eRO
681  };
682  /// I/O completion code
683  enum EStatus {
684  eFailure = -1,
685  eSuccess = 0,
688  eEOF
689  };
690  /// Mask storage
691  struct SMask {
695 
696  SMask(void)
697  : mask(0), acase(NStr::eNocase), owned(eNoOwnership)
698  { }
699  };
700 
701  // Common part of initialization.
702  void x_Init(void);
703 
704  // Open/close the archive.
705  void x_Open(EAction action);
706  void x_Close(bool truncate); // NB: "truncate" effects file archives only
707 
708  // Flush the archive (w/EOT); return "true" if it is okay to truncate
709  bool x_Flush(bool nothrow = false);
710 
711  // Backspace and fast-forward the archive.
712  void x_Backspace(EAction action); // NB: m_ZeroBlockCount blocks back
713  void x_Skip(Uint8 blocks); // NB: Can do by either skip or read
714 
715  // Parse in extended entry information (PAX) for the current entry.
716  EStatus x_ParsePAXData(const string& data);
717 
718  // Read information about current entry in the archive.
719  EStatus x_ReadEntryInfo(bool dump, bool pax);
720 
721  // Pack current name or linkname into archive entry header.
722  bool x_PackCurrentName(STarHeader* header, bool link);
723 
724  // Write information for current entry into the archive.
725  void x_WriteEntryInfo(const string& name);
726 
727  // Read the archive and do the requested "action" on current entry.
728  unique_ptr<TEntries> x_ReadAndProcess(EAction action);
729 
730  // Process current entry from the archive (the actual size passed in).
731  // If action != eExtract, then just skip the entry without any processing.
732  // Return true iff the entry was successfully extracted (ie with eExtract).
733  bool x_ProcessEntry(EAction action, Uint8 size, const TEntries* done);
734 
735  // Extract current entry (archived size passed in) from the archive into
736  // the file system, and update the size still remaining in the archive, if
737  // any. Return true if the extraction succeeded, false otherwise.
738  bool x_ExtractEntry(Uint8& size, const CDirEntry* dst,
739  const CDirEntry* src);
740 
741  // Extract file data from the archive.
742  void x_ExtractPlainFile (Uint8& size, const CDirEntry* dst);
743  bool x_ExtractSparseFile(Uint8& size, const CDirEntry* dst,
744  bool dump = false);
745 
746  // Restore attributes of an entry in the file system.
747  // If "path" is not specified, then the destination path will be
748  // constructed from "info", and the base directory (if any). Otherwise,
749  // "path" will be used "as is", assuming it corresponds to "info".
750  void x_RestoreAttrs(const CTarEntryInfo& info,
751  TFlags what,
752  const CDirEntry* path = 0,
753  TTarMode perm = 0/*override*/) const;
754 
755  // Read a text string terminated with '\n'.
756  string x_ReadLine(Uint8& size, const char*& data, size_t& nread);
757 
758  // Read/write specified number of bytes from/to the archive.
759  const char* x_ReadArchive (size_t& n);
760  void x_WriteArchive(size_t n, const char* buffer = 0);
761 
762  // Append an entry from the file system to the archive ("toc" when update).
763  unique_ptr<TEntries> x_Append(const string& name, const TEntries* toc = 0);
764 
765  // Append an entry from an istream to the archive.
766  unique_ptr<TEntries> x_Append(const CTarUserEntryInfo& entry,
767  CNcbiIstream& is);
768 
769  // Append data from an istream to the archive.
770  void x_AppendStream(const string& name, CNcbiIstream& is);
771 
772  // Append a regular file to the archive.
773  bool x_AppendFile(const string& file);
774 
775 private:
776  string m_FileName; ///< Tar archive file name (only if file)
777  CNcbiFstream* m_FileStream; ///< File stream of the archive (if file)
778  CNcbiIos& m_Stream; ///< Archive stream (used for all I/O)
779  size_t m_ZeroBlockCount; ///< Zero blocks seen in between entries
780  const size_t m_BufferSize; ///< Buffer(record) size for I/O operations
781  size_t m_BufferPos; ///< Position within the record
782  Uint8 m_StreamPos; ///< Position in stream (0-based)
783  char* m_BufPtr; ///< Page-unaligned buffer pointer
784  char* m_Buffer; ///< I/O buffer (page-aligned)
785  SMask m_Mask[2]; ///< Entry masks for operations
786  EOpenMode m_OpenMode; ///< What was it opened for
787  bool m_Modified; ///< True after at least one write
788  bool m_Bad; ///< True if a fatal output error occurred
789  TFlags m_Flags; ///< Bitwise OR of flags
790  string m_BaseDir; ///< Base directory for relative paths
791  CTarEntryInfo m_Current; ///< Current entry being processed
792 
793 private:
794  // Prohibit assignment and copy
795  CTar& operator=(const CTar&);
796  CTar(const CTar&);
797 
798  friend class CTarReader;
799 };
800 
801 
802 //////////////////////////////////////////////////////////////////////////////
803 //
804 // Inline methods
805 //
806 
807 inline
808 void CTar::Create(void)
809 {
810  x_Open(eCreate);
811 }
812 
813 inline
814 void CTar::Close(void)
815 {
816  x_Close(x_Flush());
817 }
818 
819 inline
820 unique_ptr<CTar::TEntries> CTar::Append(const string& name)
821 {
822  x_Open(eAppend);
823  return x_Append(name);
824 }
825 
826 inline
827 unique_ptr<CTar::TEntries> CTar::Append(const CTarUserEntryInfo& entry,
828  CNcbiIstream& is)
829 {
830  x_Open(eAppend);
831  return x_Append(entry, is);
832 }
833 
834 inline
835 unique_ptr<CTar::TEntries> CTar::Update(const string& name)
836 {
837  x_Open(eUpdate);
838  return x_Append(name, x_ReadAndProcess(eUpdate).get());
839 }
840 
841 inline
842 unique_ptr<CTar::TEntries> CTar::List(void)
843 {
844  x_Open(eList);
845  return x_ReadAndProcess(eList);
846 }
847 
848 inline
849 void CTar::Test(void)
850 {
851  x_Open(eTest);
853 }
854 
855 inline
857 {
858  return m_Flags;
859 }
860 
861 inline
863 {
864  m_Flags = flags;
865 }
866 
867 inline Uint8 CTar::GetCurrentPosition(void) const
868 {
869  return m_StreamPos;
870 }
871 
872 inline
873 const string& CTar::GetBaseDir(void) const
874 {
875  return m_BaseDir;
876 }
877 
878 
880 
881 
882 /* @} */
883 
884 
885 #endif /* UTIL_COMPRESS__TAR__HPP */
EStatus
ncbi::TMaskedQueryRegions mask
CCoreException –.
Definition: ncbiexpt.hpp:1476
CDirEntry –.
Definition: ncbifile.hpp:262
CMask –.
Definition: ncbi_mask.hpp:59
CTarEntryInfo class.
Definition: tar.hpp:159
CTarException –.
Definition: tar.hpp:107
User-creatable info for streaming into a tar.
Definition: tar.hpp:267
CTar class.
Definition: tar.hpp:301
CTime –.
Definition: ncbitime.hpp:296
A very basic data-read interface.
NStr –.
Definition: ncbistr.hpp:243
static uch flags
bool operator==(const CEquivRange &A, const CEquivRange &B)
static void Test(const char *bind1, SQLSMALLINT type1, const char *bind2, SQLSMALLINT type2)
Definition: convert_error.c:15
char data[12]
Definition: iconv.c:80
@ eNoOwnership
No ownership is assumed.
Definition: ncbi_types.h:135
const string & GetPath(void) const
Definition: tar.hpp:228
void Create(void)
Create a new empty archive.
Definition: tar.hpp:808
void Test(void)
Verify archive integrity.
Definition: tar.hpp:849
const size_t m_BufferSize
Buffer(record) size for I/O operations.
Definition: tar.hpp:780
string m_GroupName
Group name.
Definition: tar.hpp:251
string m_LinkName
Link name if type is e{Sym|Hard}Link.
Definition: tar.hpp:249
SMask(void)
Definition: tar.hpp:696
void x_Close(bool truncate)
Definition: tar.cpp:1486
const string & GetLinkName(void) const
Definition: tar.hpp:190
const string & GetUserName(void) const
Definition: tar.hpp:191
bool m_Bad
True if a fatal output error occurred.
Definition: tar.hpp:788
CTarEntryInfo m_Current
Current entry being processed.
Definition: tar.hpp:791
bool m_Modified
True after at least one write.
Definition: tar.hpp:787
Uint8 m_StreamPos
Position in stream (0-based)
Definition: tar.hpp:782
void x_Open(EAction action)
Definition: tar.cpp:1507
CTar(const CTar &)
EOpenMode
Archive open mode and action.
Definition: tar.hpp:666
time_t GetLastAccessTime(void) const
Definition: tar.hpp:199
Uint8 GetCurrentPosition(void) const
Get current stream position.
Definition: tar.hpp:867
unique_ptr< TEntries > x_Append(const string &name, const TEntries *toc=0)
Definition: tar.cpp:4190
char * m_Buffer
I/O buffer (page-aligned)
Definition: tar.hpp:784
string m_Path
Iff filesystem involved in processing.
Definition: tar.hpp:256
EType GetType(void) const
Definition: tar.hpp:188
string m_UserName
User name.
Definition: tar.hpp:250
pair< string, Uint8 > TFile
Define a list of files with sizes (directories and specials, such as devices, must be given with size...
Definition: tar.hpp:402
const string & GetGroupName(void) const
Definition: tar.hpp:192
const string & GetBaseDir(void) const
Get base directory to use for files while extracting from/adding to the archive, and in the latter ca...
Definition: tar.hpp:873
CNcbiIos & m_Stream
Archive stream (used for all I/O)
Definition: tar.hpp:778
Uint8 GetSize(void) const
Definition: tar.hpp:211
virtual bool Checkpoint(const CTarEntryInfo &, bool)
Return false to skip the current entry when reading; the return code gets ignored when writing.
Definition: tar.hpp:660
ETarModeBits
TTarMode –.
Definition: tar.hpp:73
virtual const char * GetErrCodeString(void) const override
Translate from an error code value to its string representation.
Definition: tar.hpp:127
CTar & operator=(const CTar &)
TFlags m_Flags
Bitwise OR of flags.
Definition: tar.hpp:789
unsigned int GetGroupId(void) const
Definition: tar.hpp:222
CTime GetModificationCTime(void) const
Definition: tar.hpp:195
list< CTarEntryInfo > TEntries
Define a list of entries.
Definition: tar.hpp:397
Uint8 GetPosition(EPos which) const
Definition: tar.hpp:224
NCBI_EXCEPTION_DEFAULT(CTarException, CCoreException)
CTime GetCreationCTime(void) const
Definition: tar.hpp:207
char * m_BufPtr
Page-unaligned buffer pointer.
Definition: tar.hpp:783
EType
Archive entry type.
Definition: tar.hpp:162
unsigned int TTarMode
Bitwise OR of ETarModeBits.
Definition: tar.hpp:89
time_t GetModificationTime(void) const
Definition: tar.hpp:193
CTime GetLastAccessCTime(void) const
Definition: tar.hpp:201
CMask * mask
Definition: tar.hpp:692
const string & GetName(void) const
Definition: tar.hpp:189
size_t m_ZeroBlockCount
Zero blocks seen in between entries.
Definition: tar.hpp:779
CTarUserEntryInfo(const string &name, Uint8 size)
Definition: tar.hpp:269
Uint8 m_Pos
Entry (not data!) position in archive.
Definition: tar.hpp:254
unique_ptr< TEntries > List(void)
Get information about all matching archive entries.
Definition: tar.hpp:842
void SetFlags(TFlags flags)
Set processing flags.
Definition: tar.hpp:862
void Close(void)
Close the archive making sure all pending output is flushed.
Definition: tar.hpp:814
string m_Name
Entry name.
Definition: tar.hpp:248
EOwnership owned
Definition: tar.hpp:694
EOpenMode m_OpenMode
What was it opened for.
Definition: tar.hpp:786
unique_ptr< TEntries > Append(const string &name)
Append an entry at the end of the archive that already exists.
Definition: tar.hpp:820
EType m_Type
Type.
Definition: tar.hpp:247
TFlags GetFlags(void) const
Get processing flags.
Definition: tar.hpp:856
list< TFile > TFiles
Definition: tar.hpp:403
EMaskType
Definition: tar.hpp:379
unsigned int GetUserId(void) const
Definition: tar.hpp:220
unsigned int TFlags
Bitwise OR of EFlags.
Definition: tar.hpp:368
unique_ptr< TEntries > Update(const string &name)
Look whether more recent copies of the archive members are available in the file system,...
Definition: tar.hpp:835
string m_FileName
Tar archive file name (only if file)
Definition: tar.hpp:776
EPos
Position type.
Definition: tar.hpp:179
EAction
Definition: tar.hpp:672
CTarEntryInfo(Uint8 pos=0)
Definition: tar.hpp:243
string m_BaseDir
Base directory for relative paths.
Definition: tar.hpp:790
bool x_Flush(bool nothrow=false)
Definition: tar.cpp:1397
CDirEntry::SStat m_Stat
Direntry-compatible info.
Definition: tar.hpp:253
unique_ptr< TEntries > x_ReadAndProcess(EAction action)
Definition: tar.cpp:2940
streamsize m_HeaderSize
Total size of all headers for the entry.
Definition: tar.hpp:252
EStatus
I/O completion code.
Definition: tar.hpp:683
size_t m_BufferPos
Position within the record.
Definition: tar.hpp:781
EFlags
General flags.
Definition: tar.hpp:304
NStr::ECase acase
Definition: tar.hpp:693
time_t GetCreationTime(void) const
Definition: tar.hpp:205
ostream & operator<<(ostream &, const CTarEntryInfo &)
Nice TOC(table of contents) printout.
CNcbiFstream * m_FileStream
File stream of the archive (if file)
Definition: tar.hpp:777
@ fTarOWrite
write by other
Definition: tar.hpp:86
@ fTarUWrite
write by owner
Definition: tar.hpp:80
@ fTarGRead
read by group
Definition: tar.hpp:82
@ fTarORead
read by other
Definition: tar.hpp:85
@ fTarURead
read by owner
Definition: tar.hpp:79
@ fTarGExecute
execute/search by group
Definition: tar.hpp:84
@ fTarGWrite
write by group
Definition: tar.hpp:83
@ fTarOExecute
execute/search by other
Definition: tar.hpp:87
@ fTarSetGID
set GID on execution
Definition: tar.hpp:76
@ fTarSetUID
set UID on execution
Definition: tar.hpp:75
@ fTarSticky
reserved (sticky bit)
Definition: tar.hpp:77
@ fTarUExecute
execute/search by owner
Definition: tar.hpp:81
@ eSparseFile
GNU/STAR sparse file.
Definition: tar.hpp:173
@ eHardLink
Hard link.
Definition: tar.hpp:170
@ eGNULongName
GNU long name.
Definition: tar.hpp:174
@ ePAXHeader
PAX extended header.
Definition: tar.hpp:172
@ eVolHeader
Volume header.
Definition: tar.hpp:171
@ ePos_Header
Definition: tar.hpp:180
@ eUpdate
Definition: tar.hpp:676
@ eList
Definition: tar.hpp:674
@ eCreate
Definition: tar.hpp:679
@ eAppend
Definition: tar.hpp:675
@ eTest
Definition: tar.hpp:678
@ eZeroBlock
Definition: tar.hpp:687
@ eContinue
Definition: tar.hpp:686
@ eUnsupportedEntryType
Definition: tar.hpp:112
@ eUnsupportedSource
Definition: tar.hpp:113
@ eUnsupportedTarFormat
Definition: tar.hpp:111
@ eNameTooLong
Definition: tar.hpp:114
virtual void x_Init(const CDiagCompileInfo &info, const string &message, const CException *prev_exception, EDiagSev severity)
Helper method for initializing exception data.
Definition: ncbiexpt.cpp:509
EErrCode
Error types that an application can generate.
Definition: ncbiexpt.hpp:884
TErrCode GetErrCode(void) const
Definition: ncbiexpt.hpp:1493
virtual const char * GetErrCodeString(void) const
Get error code interpreted as text.
Definition: ncbiexpt.cpp:444
unsigned int TSpecialModeBits
Bitwise OR of ESpecialModeBits.
Definition: ncbifile.hpp:1180
unsigned int TMode
Bitwise OR of "EMode".
Definition: ncbifile.hpp:1173
TNcbiSys_stat orig
Original stat structure.
Definition: ncbifile.hpp:824
@ eDir
Directory.
Definition: ncbifile.hpp:785
@ eFile
Regular file.
Definition: ncbifile.hpp:784
@ eSymLink
Symbolic link (UNIX only)
Definition: ncbifile.hpp:788
@ eUnknown
Unknown type.
Definition: ncbifile.hpp:794
@ eBlockSpecial
Block special (UNIX only)
Definition: ncbifile.hpp:791
@ eCharSpecial
Character special.
Definition: ncbifile.hpp:792
@ ePipe
Pipe.
Definition: ncbifile.hpp:786
uint64_t Uint8
8-byte (64-bit) unsigned integer
Definition: ncbitype.h:105
#define END_NCBI_SCOPE
End previously defined NCBI scope.
Definition: ncbistl.hpp:103
#define BEGIN_NCBI_SCOPE
Define ncbi namespace.
Definition: ncbistl.hpp:100
IO_PREFIX::istream CNcbiIstream
Portable alias for istream.
Definition: ncbistre.hpp:146
IO_PREFIX::ios CNcbiIos
Portable alias for ios.
Definition: ncbistre.hpp:140
IO_PREFIX::fstream CNcbiFstream
Portable alias for fstream.
Definition: ncbistre.hpp:538
#define kEmptyStr
Definition: ncbistr.hpp:123
ECase
Which type of string comparison.
Definition: ncbistr.hpp:1204
@ eCase
Case sensitive compare.
Definition: ncbistr.hpp:1205
void SetNanoSecond(long nanosecond)
Set nanoseconds.
Definition: ncbitime.cpp:1127
enum ENcbiOwnership EOwnership
Ownership relations between objects.
#define dump(b)
FILE * file
yy_size_t n
static MDB_envinfo info
Definition: mdb_load.c:37
const struct ncbi::grid::netcache::search::fields::SIZE size
static EIO_Status x_Flush(CONN conn, const STimeout *timeout, int isflush)
static void x_Close(SHttpConnector *uuu)
static const char * x_ReadLine(const char *path, char *line, size_t size)
Definition: ncbi_namerd.c:1084
static SERV_ITER x_Open(const char *service, int ismask, TSERV_Type types, unsigned int preferred_host, unsigned short preferred_port, double preference, const SConnNetInfo *net_info, SSERV_InfoCPtr skip[], size_t n_skip, int external, const char *arg, const char *val, SSERV_Info **info, HOST_INFO *host_info)
Definition: ncbi_service.c:253
Defines classes: CDirEntry, CFile, CDir, CSymLink, CMemoryFile, CFileUtil, CFileLock,...
@ eRead
Definition: ns_types.hpp:56
static uint8_t * buffer
Definition: pcre2test.c:1016
@ eSuccess
Successfully retrieved.
NCBI_XUTIL_EXPORT
Parameter to control printing diagnostic message about conversion of static array data from a differe...
Definition: static_set.hpp:72
Alternate stat structure for use instead of the standard struct stat.
Definition: ncbifile.hpp:823
Mask storage.
Definition: tar.hpp:691
POSIX "ustar" tar archive member header.
Definition: tar.cpp:392
static DP_BlockInfo * blocks
Definition: type.c:6
done
Definition: token1.c:1
Modified on Fri Sep 20 14:57:40 2024 by modify_doxy.py rev. 669887