NCBI C++ ToolKit
tar.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: tar.cpp 102756 2024-07-08 15:16:10Z lavr $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Vladimir Ivanov
27  * Anton Lavrentiev
28  *
29  * File Description:
30  * Tar archive API.
31  *
32  * Supports subsets of POSIX.1-1988 (ustar), POSIX 1003.1-2001 (posix), old
33  * GNU (POSIX 1003.1), and V7 formats (all partially but reasonably). New
34  * archives are created using POSIX (genuine ustar) format, using GNU
35  * extensions for long names/links only when unavoidable. It cannot,
36  * however, handle all the exotics like sparse files (except for GNU/1.0
37  * sparse PAX extension) and contiguous files (yet still can work around both
38  * of them gracefully, if needed), multivolume / incremental archives, etc.
39  * but just regular files, devices (character or block), FIFOs, directories,
40  * and limited links: can extract both hard- and symlinks, but can store
41  * symlinks only. Also, this implementation is only minimally PAX(Portable
42  * Archive eXchange)-aware for file extractions (and does not yet use any PAX
43  * extensions to store the files).
44  *
45  */
46 
47 #include <ncbi_pch.hpp>
48 // Cancel __wur (warn unused result) ill effects in GCC
49 #ifdef _FORTIFY_SOURCE
50 # undef _FORTIFY_SOURCE
51 #endif /*_FORTIFY_SOURCE*/
52 #define _FORTIFY_SOURCE 0
53 #include <util/compress/tar.hpp>
54 #include <util/error_codes.hpp>
55 
56 #if !defined(NCBI_OS_UNIX) && !defined(NCBI_OS_MSWIN)
57 # error "Class CTar can be defined on UNIX and MS-Windows platforms only!"
58 #endif
59 
60 #if defined(NCBI_OS_UNIX)
61 # include "../../../corelib/ncbi_os_unix_p.hpp"
62 # include <grp.h>
63 # include <pwd.h>
64 # include <unistd.h>
65 # ifdef NCBI_OS_IRIX
66 # include <sys/mkdev.h>
67 # endif //NCBI_OS_IRIX
68 # ifdef HAVE_SYS_SYSMACROS_H
69 # include <sys/sysmacros.h>
70 # endif //HAVE_SYS_SYSMACROS_H
71 # ifdef NCBI_OS_DARWIN
72 // macOS supplies these as inline functions rather than macros.
73 # define major major
74 # define minor minor
75 # define makedev makedev
76 # endif
77 # if !defined(major) || !defined(minor) || !defined(makedev)
78 # error "Device macros undefined in this UNIX build!"
79 # endif
80 #elif defined(NCBI_OS_MSWIN)
81 # include "../../../corelib/ncbi_os_mswin_p.hpp"
82 # include <io.h>
83 typedef unsigned int mode_t;
84 typedef unsigned int uid_t;
85 typedef unsigned int gid_t;
86 #endif //NCBI_OS
87 
88 
89 #define NCBI_USE_ERRCODE_X Util_Compress
90 #define NCBI_MODULE NCBITAR
91 
92 
94 
95 
96 /////////////////////////////////////////////////////////////////////////////
97 //
98 // TAR helper routines
99 //
100 
101 // Convert a number to an octal string padded to the left
102 // with [leading] zeros ('0') and having _no_ trailing '\0'.
103 static bool s_NumToOctal(Uint8 val, char* ptr, size_t len)
104 {
105  _ASSERT(len > 0);
106  do {
107  ptr[--len] = char('0' + char(val & 7));
108  val >>= 3;
109  } while (len);
110  return val ? false : true;
111 }
112 
113 
114 // Convert an octal number (possibly preceded by spaces) to numeric form.
115 // Stop either at the end of the field or at first '\0' (if any).
116 static bool s_OctalToNum(Uint8& val, const char* ptr, size_t len)
117 {
118  _ASSERT(ptr && len > 0);
119  size_t i = *ptr ? 0 : 1;
120  while (i < len && ptr[i]) {
121  if (!isspace((unsigned char) ptr[i]))
122  break;
123  ++i;
124  }
125  val = 0;
126  bool okay = false;
127  while (i < len && '0' <= ptr[i] && ptr[i] <= '7') {
128  okay = true;
129  val <<= 3;
130  val |= ptr[i++] - '0';
131  }
132  while (i < len && ptr[i]) {
133  if (!isspace((unsigned char) ptr[i]))
134  return false;
135  ++i;
136  }
137  return okay;
138 }
139 
140 
141 static bool s_NumToBase256(Uint8 val, char* ptr, size_t len)
142 {
143  _ASSERT(len > 0);
144  do {
145  ptr[--len] = (unsigned char)(val & 0xFF);
146  val >>= 8;
147  } while (len);
148  *ptr |= '\x80'; // set base-256 encoding flag
149  return val ? false : true;
150 }
151 
152 
153 // Return 0 (false) if conversion failed; 1 if the value converted to
154 // conventional octal representation (perhaps, with terminating '\0'
155 // sacrificed), or -1 if the value converted using base-256.
156 static int s_EncodeUint8(Uint8 val, char* ptr, size_t len)
157 { // Max file size (for len == 12):
158  if (s_NumToOctal (val, ptr, len)) { // 8GiB-1
159  return 1/*okay*/;
160  }
161  if (s_NumToOctal (val, ptr, ++len)) { // 64GiB-1
162  return 1/*okay*/;
163  }
164  if (s_NumToBase256(val, ptr, len)) { // up to 2^94-1
165  return -1/*okay, base-256*/;
166  }
167  return 0/*failure*/;
168 }
169 
170 
171 // Return true if conversion succeeded; false otherwise.
172 static bool s_Base256ToNum(Uint8& val, const char* ptr, size_t len)
173 {
174  const Uint8 lim = kMax_UI8 >> 8;
175  if (*ptr & '\x40') { // negative base-256?
176  return false;
177  }
178  val = *ptr++ & '\x3F';
179  while (--len) {
180  if (val > lim) {
181  return false;
182  }
183  val <<= 8;
184  val |= (unsigned char)(*ptr++);
185  }
186  return true;
187 }
188 
189 
190 // Return 0 (false) if conversion failed; 1 if the value was read into
191 // as a conventional octal string (perhaps, without the terminating '\0');
192 // or -1 if base-256 representation used.
193 static int s_DecodeUint8(Uint8& val, const char* ptr, size_t len)
194 {
195  if (*ptr & '\x80') {
196  return s_Base256ToNum(val, ptr, len) ? -1/*okay*/ : 0/*failure*/;
197  } else {
198  return s_OctalToNum (val, ptr, len) ? 1/*okay*/ : 0/*failure*/;
199  }
200 }
201 
202 
203 static void s_TarToMode(TTarMode perm,
204  CDirEntry::TMode* usr_mode,
205  CDirEntry::TMode* grp_mode,
206  CDirEntry::TMode* oth_mode,
207  CDirEntry::TSpecialModeBits* special_bits)
208 {
209  // User
210  if (usr_mode) {
211  *usr_mode = ((perm & fTarURead ? CDirEntry::fRead : 0) |
212  (perm & fTarUWrite ? CDirEntry::fWrite : 0) |
213  (perm & fTarUExecute ? CDirEntry::fExecute : 0));
214  }
215 
216  // Group
217  if (grp_mode) {
218  *grp_mode = ((perm & fTarGRead ? CDirEntry::fRead : 0) |
219  (perm & fTarGWrite ? CDirEntry::fWrite : 0) |
220  (perm & fTarGExecute ? CDirEntry::fExecute : 0));
221  }
222 
223  // Others
224  if (oth_mode) {
225  *oth_mode = ((perm & fTarORead ? CDirEntry::fRead : 0) |
226  (perm & fTarOWrite ? CDirEntry::fWrite : 0) |
227  (perm & fTarOExecute ? CDirEntry::fExecute : 0));
228  }
229 
230  // Special bits
231  if (special_bits) {
232  *special_bits = ((perm & fTarSetUID ? CDirEntry::fSetUID : 0) |
233  (perm & fTarSetGID ? CDirEntry::fSetGID : 0) |
234  (perm & fTarSticky ? CDirEntry::fSticky : 0));
235  }
236 }
237 
238 
240 {
241  mode_t mode = (
242 #ifdef S_ISUID
243  (perm & fTarSetUID ? S_ISUID : 0) |
244 #endif
245 #ifdef S_ISGID
246  (perm & fTarSetGID ? S_ISGID : 0) |
247 #endif
248 #ifdef S_ISVTX
249  (perm & fTarSticky ? S_ISVTX : 0) |
250 #endif
251 #if defined(S_IRUSR)
252  (perm & fTarURead ? S_IRUSR : 0) |
253 #elif defined(S_IREAD)
254  (perm & fTarURead ? S_IREAD : 0) |
255 #endif
256 #if defined(S_IWUSR)
257  (perm & fTarUWrite ? S_IWUSR : 0) |
258 #elif defined(S_IWRITE)
259  (perm & fTarUWrite ? S_IWRITE : 0) |
260 #endif
261 #if defined(S_IXUSR)
262  (perm & fTarUExecute ? S_IXUSR : 0) |
263 #elif defined(S_IEXEC)
264  (perm & fTarUExecute ? S_IEXEC : 0) |
265 #endif
266 #ifdef S_IRGRP
267  (perm & fTarGRead ? S_IRGRP : 0) |
268 #endif
269 #ifdef S_IWGRP
270  (perm & fTarGWrite ? S_IWGRP : 0) |
271 #endif
272 #ifdef S_IXGRP
273  (perm & fTarGExecute ? S_IXGRP : 0) |
274 #endif
275 #ifdef S_IROTH
276  (perm & fTarORead ? S_IROTH : 0) |
277 #endif
278 #ifdef S_IWOTH
279  (perm & fTarOWrite ? S_IWOTH : 0) |
280 #endif
281 #ifdef S_IXOTH
282  (perm & fTarOExecute ? S_IXOTH : 0) |
283 #endif
284  0);
285  return mode;
286 }
287 
288 
290 {
291  // Keep in mind that the mode may be extracted on a different platform
292  TTarMode perm = (
293 #ifdef S_ISUID
294  (mode & S_ISUID ? fTarSetUID : 0) |
295 #endif
296 #ifdef S_ISGID
297  (mode & S_ISGID ? fTarSetGID : 0) |
298 #endif
299 #ifdef S_ISVTX
300  (mode & S_ISVTX ? fTarSticky : 0) |
301 #endif
302 #if defined(S_IRUSR)
303  (mode & S_IRUSR ? fTarURead : 0) |
304 #elif defined(S_IREAD)
305  (mode & S_IREAD ? fTarURead : 0) |
306 #endif
307 #if defined(S_IWUSR)
308  (mode & S_IWUSR ? fTarUWrite : 0) |
309 #elif defined(S_IWRITE)
310  (mode & S_IWRITE ? fTarUWrite : 0) |
311 #endif
312 #if defined(S_IXUSR)
313  (mode & S_IXUSR ? fTarUExecute : 0) |
314 #elif defined(S_IEXEC)
315  (mode & S_IEXEC ? fTarUExecute : 0) |
316 #endif
317 #if defined(S_IRGRP)
318  (mode & S_IRGRP ? fTarGRead : 0) |
319 #elif defined(S_IREAD)
320  // emulate read permission when file is readable
321  (mode & S_IREAD ? fTarGRead : 0) |
322 #endif
323 #ifdef S_IWGRP
324  (mode & S_IWGRP ? fTarGWrite : 0) |
325 #endif
326 #ifdef S_IXGRP
327  (mode & S_IXGRP ? fTarGExecute : 0) |
328 #endif
329 #if defined(S_IROTH)
330  (mode & S_IROTH ? fTarORead : 0) |
331 #elif defined(S_IREAD)
332  // emulate read permission when file is readable
333  (mode & S_IREAD ? fTarORead : 0) |
334 #endif
335 #ifdef S_IWOTH
336  (mode & S_IWOTH ? fTarOWrite : 0) |
337 #endif
338 #ifdef S_IXOTH
339  (mode & S_IXOTH ? fTarOExecute : 0) |
340 #endif
341  0);
342 #if defined(S_IFMT) || defined(_S_IFMT)
344 # ifdef S_IFMT
345  mask &= S_IFMT;
346 # else
347  mask &= _S_IFMT;
348 # endif
349  if (!(mask & 07777)) {
350  perm |= mask;
351  }
352 #endif
353  return perm;
354 }
355 
356 
357 static size_t s_Length(const char* ptr, size_t maxsize)
358 {
359  const char* pos = (const char*) memchr(ptr, '\0', maxsize);
360  return pos ? (size_t)(pos - ptr) : maxsize;
361 }
362 
363 
364 //////////////////////////////////////////////////////////////////////////////
365 //
366 // Constants / macros / typedefs
367 //
368 
369 /// Round up to the nearest multiple of BLOCK_SIZE:
370 //#define ALIGN_SIZE(size) SIZE_OF(BLOCK_OF(size + (BLOCK_SIZE-1)))
371 #define ALIGN_SIZE(size) (((size) + (BLOCK_SIZE-1)) & ~(BLOCK_SIZE-1))
372 #define OFFSET_OF(size) ( (size) & (BLOCK_SIZE-1))
373 #define BLOCK_OF(pos) ((pos) >> 9)
374 #define SIZE_OF(blk) ((blk) << 9)
375 
376 /// Tar block size (512 bytes)
377 #define BLOCK_SIZE SIZE_OF(1)
378 
379 
380 /// Recognized TAR formats
386  eTar_Posix = 5, // |= eTar_Ustar
387  eTar_Star = 6 // |= eTar_Ustar
388 };
389 
390 
391 /// POSIX "ustar" tar archive member header
392 struct STarHeader { // byte offset
393  char name[100]; // 0
394  char mode[8]; // 100
395  char uid[8]; // 108
396  char gid[8]; // 116
397  char size[12]; // 124
398  char mtime[12]; // 136
399  char checksum[8]; // 148
400  char typeflag[1]; // 156
401  char linkname[100]; // 157
402  char magic[6]; // 257
403  char version[2]; // 263
404  char uname[32]; // 265
405  char gname[32]; // 297
406  char devmajor[8]; // 329
407  char devminor[8]; // 337
408  union { // 345
409  char prefix[155]; // NB: not valid with old GNU format (no need)
410  struct { // NB: old GNU format only
411  char atime[12];
412  char ctime[12]; // 357
413  char unused[17]; // 369
414  char sparse[96]; // 386 sparse map: ([12] offset + [12] size) x 4
415  char contind[1]; // 482 non-zero if continued in the next header
416  char realsize[12];// 483 true file size
417  } gnu;
418  struct {
419  char prefix[131]; // NB: prefix + 107: realsize (char[12]) for 'S'
420  char atime[12]; // 476
421  char ctime[12]; // 488
422  } star;
423  }; // 500
424  // NCBI in last 4 bytes // 508
425 };
426 
427 
428 /// Block as a header.
429 union TTarBlock {
432 };
433 
434 
435 static bool s_TarChecksum(TTarBlock* block, bool isgnu)
436 {
437  STarHeader* h = &block->header;
438  size_t len = sizeof(h->checksum) - (isgnu ? 2 : 1);
439 
440  // Compute the checksum
441  memset(h->checksum, ' ', sizeof(h->checksum));
442  unsigned long checksum = 0;
443  const unsigned char* p = (const unsigned char*) block->buffer;
444  for (size_t i = 0; i < sizeof(block->buffer); ++i) {
445  checksum += *p++;
446  }
447  // ustar: '\0'-terminated checksum
448  // GNU special: 6 digits, then '\0', then a space [already in place]
449  if (!s_NumToOctal(checksum, h->checksum, len)) {
450  return false;
451  }
452  h->checksum[len] = '\0';
453  return true;
454 }
455 
456 
457 
458 //////////////////////////////////////////////////////////////////////////////
459 //
460 // CTarEntryInfo
461 //
462 
464 {
465  // Raw tar mode gets returned here (as kept in the info)
466  return (TTarMode)(m_Stat.orig.st_mode & 07777);
467 }
468 
469 
471  CDirEntry::TMode* grp_mode,
472  CDirEntry::TMode* oth_mode,
473  CDirEntry::TSpecialModeBits* special_bits) const
474 {
475  s_TarToMode(GetMode(), usr_mode, grp_mode, oth_mode, special_bits);
476 }
477 
478 
479 unsigned int CTarEntryInfo::GetMajor(void) const
480 {
481 #ifdef major
482  if (m_Type == eCharDev || m_Type == eBlockDev) {
483  return major(m_Stat.orig.st_rdev);
484  }
485 #else
486  if (sizeof(int) >= 4 && sizeof(m_Stat.orig.st_rdev) >= 4) {
487  return (*((unsigned int*) &m_Stat.orig.st_rdev) >> 16) & 0xFFFF;
488  }
489 #endif //major
490  return (unsigned int)(-1);
491 }
492 
493 
494 unsigned int CTarEntryInfo::GetMinor(void) const
495 {
496 #ifdef minor
497  if (m_Type == eCharDev || m_Type == eBlockDev) {
498  return minor(m_Stat.orig.st_rdev);
499  }
500 #else
501  if (sizeof(int) >= 4 && sizeof(m_Stat.orig.st_rdev) >= 4) {
502  return *((unsigned int*) &m_Stat.orig.st_rdev) & 0xFFFF;
503  }
504 #endif //minor
505  return (unsigned int)(-1);
506 }
507 
508 
510 {
511  char buf[9];
512  memset(buf, '-', sizeof(buf));
513 
514  char* usr = buf;
515  char* grp = usr + 3;
516  char* oth = grp + 3;
517 
518  if (mode & fTarURead) {
519  usr[0] = 'r';
520  }
521  if (mode & fTarUWrite) {
522  usr[1] = 'w';
523  }
524  if (mode & fTarUExecute) {
525  usr[2] = mode & fTarSetUID ? 's' : 'x';
526  } else if (mode & fTarSetUID) {
527  usr[2] = 'S';
528  }
529  if (mode & fTarGRead) {
530  grp[0] = 'r';
531  }
532  if (mode & fTarGWrite) {
533  grp[1] = 'w';
534  }
535  if (mode & fTarGExecute) {
536  grp[2] = mode & fTarSetGID ? 's' : 'x';
537  } else if (mode & fTarSetGID) {
538  grp[2] = 'S';
539  }
540  if (mode & fTarORead) {
541  oth[0] = 'r';
542  }
543  if (mode & fTarOWrite) {
544  oth[1] = 'w';
545  }
546  if (mode & fTarOExecute) {
547  oth[2] = mode & fTarSticky ? 't' : 'x';
548  } else if (mode & fTarSticky) {
549  oth[2] = 'T';
550  }
551 
552  return string(buf, sizeof(buf));
553 }
554 
555 
557 {
558  switch (type) {
561  return '-';
563  return 'l';
564  case CTarEntryInfo::eDir:
565  return 'd';
567  return 'p';
569  return 'c';
571  return 'b';
573  return 'V';
575  return 'S';
576  default:
577  break;
578  }
579  return '?';
580 }
581 
582 
584 {
585  string user(info.GetUserName());
586  if (user.empty()) {
587  NStr::UIntToString(user, info.GetUserId());
588  }
589  string group(info.GetGroupName());
590  if (group.empty()) {
591  NStr::UIntToString(group, info.GetGroupId());
592  }
593  return user + '/' + group;
594 }
595 
596 
597 static string s_MajorMinor(unsigned int n)
598 {
599  return n != (unsigned int)(-1) ? NStr::UIntToString(n) : string(1, '?');
600 }
601 
602 
603 static string s_SizeOrMajorMinor(const CTarEntryInfo& info)
604 {
605  if (info.GetType() == CTarEntryInfo::eCharDev ||
606  info.GetType() == CTarEntryInfo::eBlockDev) {
607  unsigned int major = info.GetMajor();
608  unsigned int minor = info.GetMinor();
609  return s_MajorMinor(major) + ',' + s_MajorMinor(minor);
610  } else if (info.GetType() == CTarEntryInfo::eDir ||
611  info.GetType() == CTarEntryInfo::ePipe ||
612  info.GetType() == CTarEntryInfo::eSymLink ||
613  info.GetType() == CTarEntryInfo::eVolHeader) {
614  return string("-");
615  } else if (info.GetType() == CTarEntryInfo::eSparseFile &&
616  info.GetSize() == 0) {
617  return string("?");
618  }
619  return NStr::NumericToString(info.GetSize());
620 }
621 
622 
624 {
625  CTime mtime(info.GetModificationTime());
626  os << s_TypeAsChar(info.GetType())
627  << s_ModeAsString(info.GetMode()) << ' '
628  << setw(17) << s_UserGroupAsString(info) << ' '
629  << setw(10) << s_SizeOrMajorMinor(info) << ' '
630  << mtime.ToLocalTime().AsString(" Y-M-D h:m:s ")
631  << info.GetName();
632  if (info.GetType() == CTarEntryInfo::eSymLink ||
633  info.GetType() == CTarEntryInfo::eHardLink) {
634  os << " -> " << info.GetLinkName();
635  }
636  return os;
637 }
638 
639 
640 
641 //////////////////////////////////////////////////////////////////////////////
642 //
643 // Debugging utilities
644 //
645 
646 static string s_OSReason(int x_errno)
647 {
648  static const char kUnknownError[] = "Unknown error";
649  const char* strerr;
650  char errbuf[80];
651  CNcbiError::SetErrno(x_errno);
652  if (!x_errno)
653  return kEmptyStr;
654  strerr = ::strerror(x_errno);
655  if (!strerr || !*strerr
656  || !NStr::strncasecmp(strerr,
657  kUnknownError, sizeof(kUnknownError) - 1)) {
658  if (x_errno > 0) {
659  ::sprintf(errbuf, "Error %d", x_errno);
660  } else if (x_errno != -1) {
661  ::sprintf(errbuf, "Error 0x%08X", (unsigned int) x_errno);
662  } else {
663  ::strcpy (errbuf, "Unknown error (-1)");
664  }
665  strerr = errbuf;
666  }
667  _ASSERT(strerr && *strerr);
668  return string(": ") + strerr;
669 }
670 
671 
672 static string s_PositionAsString(const string& file, Uint8 pos, size_t recsize,
673  const string& entryname)
674 {
675  _ASSERT(!OFFSET_OF(recsize));
676  _ASSERT(recsize >= BLOCK_SIZE);
677  string result;
678  if (!file.empty()) {
679  CDirEntry temp(file);
680  result = (temp.GetType() == CDirEntry::eFile ? temp.GetName() : file)
681  + ": ";
682  }
683  result += "At record " + NStr::NumericToString(pos / recsize);
684  if (recsize != BLOCK_SIZE) {
685  result +=
686  ", block " + NStr::NumericToString(BLOCK_OF(pos % recsize)) +
687  " [thru #" + NStr::NumericToString(BLOCK_OF(pos),
688  NStr::fWithCommas) + ']';
689  }
690  if (!entryname.empty()) {
691  result += ", while in '" + entryname + '\'';
692  }
693  return result + ":\n";
694 }
695 
696 
697 static string s_OffsetAsString(size_t offset)
698 {
699  char buf[20];
700  _ASSERT(offset < 1000);
701  _VERIFY(sprintf(buf, "%03u", (unsigned int) offset));
702  return buf;
703 }
704 
705 
706 static bool memcchr(const char* s, char c, size_t len)
707 {
708  for (size_t i = 0; i < len; ++i) {
709  if (*s++ != c)
710  return true;
711  }
712  return false;
713 }
714 
715 
716 static string s_Printable(const char* field, size_t maxsize, bool text)
717 {
718  bool check = false;
719  if (!text && maxsize > 1 && !*field) {
720  field++, maxsize--;
721  check = true;
722  }
723  size_t len = s_Length(field, maxsize);
724  string retval = NStr::PrintableString(CTempString(field,
725  memcchr(field + len,
726  '\0',
727  maxsize - len)
728  ? maxsize
729  : len));
730  return check && !retval.empty() ? "\\0" + retval : retval;
731 }
732 
733 
734 #if !defined(__GNUC__) && !defined(offsetof)
735 # define offsetof(T, F) ((char*) &(((T*) 0)->F) - (char*) 0)
736 #endif
737 
738 #define TAR_PRINTABLE_EX(field, text, size) \
739  "@" + s_OffsetAsString((size_t) offsetof(STarHeader, field)) + \
740  "[" NCBI_AS_STRING(field) "]:" + \
741  string(14 - sizeof(NCBI_AS_STRING(field)), ' ') + \
742  '"' + s_Printable(h->field, size, text || excpt) + '"'
743 
744 #define TAR_PRINTABLE(field, text) \
745  TAR_PRINTABLE_EX(field, text, sizeof(h->field))
746 
747 
748 #define TAR_GNU_REGION "[gnu.region]: "
749 #define TAR_GNU_CONTIND "[gnu.contind]: "
750 
751 static string s_DumpSparseMap(const STarHeader* h, const char* sparse,
752  const char* contind, bool excpt = false)
753 {
754  string dump;
755  size_t offset;
756  bool done = false;
757  string region(TAR_GNU_REGION);
758 
759  do {
760  if (memcchr(sparse, '\0', 24)) {
761  offset = (size_t)(sparse - (const char*) h);
762  if (!dump.empty())
763  dump += '\n';
764  dump += '@' + s_OffsetAsString(offset);
765  if (!done) {
766  Uint8 off, len;
767  int ok_off = s_DecodeUint8(off, sparse, 12);
768  int ok_len = s_DecodeUint8(len, sparse + 12, 12);
769  if (ok_off & ok_len) {
770  dump += region;
771  region = ':' + string(sizeof(TAR_GNU_REGION) - 2, ' ');
772  if (ok_off > 0) {
773  dump += '"';
774  dump += s_Printable(sparse, 12, excpt);
775  dump += "\" ";
776  } else {
777  dump += string(14, ' ');
778  }
779  sparse += 12;
780  if (ok_len > 0) {
781  dump += '"';
782  dump += s_Printable(sparse, 12, excpt);
783  dump += "\" ";
784  } else {
785  dump += string(14, ' ');
786  }
787  sparse += 12;
788  dump += "[@";
789  dump += NStr::NumericToString(off);
790  dump += ", ";
792  dump += ']';
793  continue;
794  }
795  done = true;
796  }
797  dump += ':' + string(sizeof(TAR_GNU_REGION) - 2, ' ')
798  + '"' + NStr::PrintableString(string(sparse, 24)) + '"';
799  } else {
800  done = true;
801  }
802  sparse += 24;
803  } while (sparse < contind);
804  if (!dump.empty()) {
805  dump += '\n';
806  }
807  offset = (size_t)(contind - (const char*) h);
809  "\"" + NStr::PrintableString(string(contind, 1))
810  + (*contind ? "\" [to-be-cont'd]" : "\" [last]");
811  return dump;
812 }
813 
814 
815 static string s_DumpSparseMap(const vector< pair<Uint8, Uint8> >& bmap)
816 {
817  size_t size = bmap.size();
818  string dump("Regions: " + NStr::NumericToString(size));
819  for (size_t n = 0; n < size; ++n) {
820  dump += "\n [" + NStr::NumericToString(n) + "]: @"
821  + NStr::NumericToString(bmap[n].first) + ", "
822  + NStr::NumericToString(bmap[n].second);
823  }
824  return dump;
825 }
826 
827 
828 static string s_DumpHeader(const STarHeader* h, ETar_Format fmt,
829  bool excpt = false)
830 {
831  string dump;
832  Uint8 val;
833  int ok;
834 
835  dump += TAR_PRINTABLE(name, true);
836  dump += '\n';
837 
838  ok = s_OctalToNum(val, h->mode, sizeof(h->mode));
839  dump += TAR_PRINTABLE(mode, !ok);
840  if (ok && val) {
841  dump += " [" + s_ModeAsString((TTarMode) val) + ']';
842  }
843  dump += '\n';
844 
845  ok = s_DecodeUint8(val, h->uid, sizeof(h->uid));
846  dump += TAR_PRINTABLE(uid, ok <= 0);
847  if (ok && (ok < 0 || val > 7)) {
848  dump += " [" + NStr::NumericToString(val) + ']';
849  if (ok < 0) {
850  dump += " (base-256)";
851  }
852  }
853  dump += '\n';
854 
855  ok = s_DecodeUint8(val, h->gid, sizeof(h->gid));
856  dump += TAR_PRINTABLE(gid, ok <= 0);
857  if (ok && (ok < 0 || val > 7)) {
858  dump += " [" + NStr::NumericToString(val) + ']';
859  if (ok < 0) {
860  dump += " (base-256)";
861  }
862  }
863  dump += '\n';
864 
865  ok = s_DecodeUint8(val, h->size, sizeof(h->size));
866  dump += TAR_PRINTABLE(size, ok <= 0);
867  if (ok && (ok < 0 || val > 7)) {
868  dump += " [" + NStr::NumericToString(val) + ']';
869  if (ok && h->typeflag[0] == 'S' && fmt == eTar_OldGNU) {
870  dump += " w/o map(s)!";
871  }
872  if (ok < 0) {
873  dump += " (base-256)";
874  }
875  }
876  dump += '\n';
877 
878  ok = s_OctalToNum(val, h->mtime, sizeof(h->mtime));
879  dump += TAR_PRINTABLE(mtime, !ok);
880  if (ok && val) {
881  CTime mtime((time_t) val);
882  ok = (Uint8) mtime.GetTimeT() == val ? true : false;
883  if (ok || val > 7) {
884  dump += (" ["
885  + (val > 7 ? NStr::NumericToString(val) + ", " : "")
886  + (ok ? mtime.ToLocalTime().AsString("Y-M-D h:m:s") : "")
887  + ']');
888  }
889  }
890  dump += '\n';
891 
892  ok = s_OctalToNum(val, h->checksum, sizeof(h->checksum));
893  dump += TAR_PRINTABLE(checksum, !ok);
894  dump += '\n';
895 
896  // Classify to the extent possible to help debug the problem (if any)
897  dump += TAR_PRINTABLE(typeflag, true);
898  ok = false;
899  const char* tname = 0;
900  switch (h->typeflag[0]) {
901  case '\0':
902  case '0':
903  ok = true;
904  if (!(fmt & eTar_Ustar) && fmt != eTar_OldGNU) {
905  size_t namelen = s_Length(h->name, sizeof(h->name));
906  if (namelen && h->name[namelen - 1] == '/')
907  tname = "legacy regular entry (directory)";
908  }
909  if (!tname)
910  tname = "legacy regular entry (file)";
911  tname += h->typeflag[0] ? 7/*skip "legacy "*/ : 0;
912  break;
913  case '\1':
914  case '1':
915  ok = true;
916 #ifdef NCBI_OS_UNIX
917  tname = "legacy hard link";
918 #else
919  tname = "legacy hard link - not FULLY supported";
920 #endif //NCBI_OS_UNIX
921  tname += h->typeflag[0] != '\1' ? 7/*skip "legacy "*/ : 0;
922  break;
923  case '\2':
924  case '2':
925  ok = true;
926 #ifdef NCBI_OS_UNIX
927  tname = "legacy symbolic link";
928 #else
929  tname = "legacy symbolic link - not FULLY supported";
930 #endif //NCBI_OS_UNIX
931  tname += h->typeflag[0] != '\2' ? 7/*skip "legacy "*/ : 0;
932  break;
933  case '3':
934 #ifdef NCBI_OS_UNIX
935  ok = true;
936 #endif //NCBI_OS_UNIX
937  tname = "character device";
938  break;
939  case '4':
940 #ifdef NCBI_OS_UNIX
941  ok = true;
942 #endif //NCBI_OS_UNIX
943  tname = "block device";
944  break;
945  case '5':
946  ok = true;
947  tname = "directory";
948  break;
949  case '6':
950 #ifdef NCBI_OS_UNIX
951  ok = true;
952 #endif //NCBI_OS_UNIX
953  tname = "FIFO";
954  break;
955  case '7':
956  tname = "contiguous file";
957  break;
958  case 'g':
959  tname = "global extended header";
960  break;
961  case 'x':
962  case 'X':
963  if (fmt & eTar_Ustar) {
964  ok = true;
965  if (h->typeflag[0] == 'x') {
966  tname = "extended (POSIX 1003.1-2001 [PAX]) header"
967  " - not FULLY supported";
968  } else {
969  tname = "extended (POSIX 1003.1-2001 [PAX] by Sun) header"
970  " - not FULLY supported";
971  }
972  } else {
973  tname = "extended header";
974  }
975  break;
976  case 'A':
977  tname = "Solaris ACL";
978  break;
979  case 'D':
980  if (fmt == eTar_OldGNU) {
981  tname = "GNU extension: directory dump";
982  }
983  break;
984  case 'E':
985  tname = "Solaris extended attribute file";
986  break;
987  case 'I':
988  // CAUTION: Entry size shows actual file size in the filesystem but
989  // no actual data blocks stored in the archive following the header!
990  tname = "Inode metadata only";
991  break;
992  case 'K':
993  if (fmt == eTar_OldGNU) {
994  ok = true;
995  tname = "GNU extension: long link";
996  }
997  break;
998  case 'L':
999  if (fmt == eTar_OldGNU) {
1000  ok = true;
1001  tname = "GNU extension: long name";
1002  }
1003  break;
1004  case 'M':
1005  switch (fmt) {
1006  case eTar_OldGNU:
1007  tname = "GNU extension: multi-volume entry";
1008  break;
1009  case eTar_Star:
1010  tname = "STAR extension: multi-volume entry";
1011  break;
1012  default:
1013  break;
1014  }
1015  break;
1016  case 'N':
1017  if (fmt == eTar_OldGNU) {
1018  tname = "GNU extension (obsolete): long filename(s)";
1019  }
1020  break;
1021  case 'S':
1022  switch (fmt) {
1023  case eTar_OldGNU:
1024  // CAUTION: Entry size does not include sparse entry map stored in
1025  // additional (non-standard) headers that may follow this header!
1026  tname = "GNU extension: sparse file";
1027  break;
1028  case eTar_Star:
1029  // Entry size already includes size of additional sparse file maps
1030  // that may follow this header before the actual file data.
1031  tname = "STAR extension: sparse file";
1032  break;
1033  default:
1034  break;
1035  }
1036  break;
1037  case 'V':
1038  ok = true;
1039  tname = "Volume header";
1040  break;
1041  default:
1042  break;
1043  }
1044  if (!tname && 'A' <= h->typeflag[0] && h->typeflag[0] <= 'Z') {
1045  tname = "local vendor enhancement / user-defined extension";
1046  }
1047  dump += (" [" + string(tname ? tname : "reserved")
1048  + (ok
1049  ? "]\n"
1050  : " -- NOT SUPPORTED]\n"));
1051 
1052  dump += TAR_PRINTABLE(linkname, true);
1053  dump += '\n';
1054 
1055  switch (fmt) {
1056  case eTar_Legacy: // NCBI never writes this header
1057  tname = "legacy (V7)";
1058  break;
1059  case eTar_OldGNU:
1060  if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1061  tname = "old GNU (NCBI)";
1062  } else {
1063  tname = "old GNU";
1064  }
1065  break;
1066  case eTar_Ustar:
1067  if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1068  tname = "ustar (NCBI)";
1069  } else {
1070  tname = "ustar";
1071  }
1072  break;
1073  case eTar_Posix: // aka "pax"
1074  if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1075  tname = "posix (NCBI)";
1076  } else {
1077  tname = "posix";
1078  }
1079  break;
1080  case eTar_Star: // NCBI never writes this header
1081  tname = "star";
1082  break;
1083  default:
1084  tname = 0;
1085  break;
1086  }
1087  dump += TAR_PRINTABLE(magic, true);
1088  if (tname) {
1089  dump += " [" + string(tname) + ']';
1090  }
1091  dump += '\n';
1092 
1093  dump += TAR_PRINTABLE(version, true);
1094 
1095  if (fmt != eTar_Legacy) {
1096  dump += '\n';
1097 
1098  dump += TAR_PRINTABLE(uname, true);
1099  dump += '\n';
1100 
1101  dump += TAR_PRINTABLE(gname, true);
1102  dump += '\n';
1103 
1104  ok = s_OctalToNum(val, h->devmajor, sizeof(h->devmajor));
1105  dump += TAR_PRINTABLE(devmajor, !ok);
1106  if (ok && val > 7) {
1107  dump += " [" + NStr::NumericToString(val) + ']';
1108  }
1109  dump += '\n';
1110 
1111  ok = s_OctalToNum(val, h->devminor, sizeof(h->devminor));
1112  dump += TAR_PRINTABLE(devminor, !ok);
1113  if (ok && val > 7) {
1114  dump += " [" + NStr::NumericToString(val) + ']';
1115  }
1116  dump += '\n';
1117 
1118  switch (fmt) {
1119  case eTar_Star:
1120  if (h->typeflag[0] == 'S') {
1121  dump += TAR_PRINTABLE_EX(star.prefix, true, 107);
1122  const char* realsize = h->star.prefix + 107;
1123  ok = s_DecodeUint8(val, realsize, 12);
1124  dump += "@"
1125  + s_OffsetAsString((size_t)(realsize - (const char*) h))
1126  + "[star.realsize]:\""
1127  + s_Printable(realsize, 12, !ok || excpt) + '"';
1128  if (ok && (ok < 0 || val > 7)) {
1129  dump += " [" + NStr::NumericToString(val) + ']';
1130  if (ok < 0) {
1131  dump += " (base-256)";
1132  }
1133  }
1134  } else {
1135  dump += TAR_PRINTABLE(star.prefix, true);
1136  }
1137  dump += '\n';
1138 
1139  ok = s_OctalToNum(val, h->star.atime, sizeof(h->star.atime));
1140  dump += TAR_PRINTABLE(star.atime, !ok);
1141  if (ok && val) {
1142  CTime atime((time_t) val);
1143  ok = (Uint8) atime.GetTimeT() == val ? true : false;
1144  if (ok || val > 7) {
1145  dump += (" ["
1146  + (val > 7 ? NStr::NumericToString(val)+", " : "")
1147  + (ok
1148  ? atime.ToLocalTime().AsString("Y-M-D h:m:s")
1149  : "")
1150  + ']');
1151  }
1152  }
1153  dump += '\n';
1154 
1155  ok = s_OctalToNum(val, h->star.ctime, sizeof(h->star.ctime));
1156  dump += TAR_PRINTABLE(star.ctime, !ok);
1157  if (ok && val) {
1158  CTime ctime((time_t) val);
1159  ok = (Uint8) ctime.GetTimeT() == val ? true : false;
1160  if (ok || val > 7) {
1161  dump += (" ["
1162  + (val > 7 ? NStr::NumericToString(val)+", " : "")
1163  + (ok
1164  ? ctime.ToLocalTime().AsString("Y-M-D h:m:s")
1165  : "")
1166  + ']');
1167  }
1168  }
1169  tname = (const char*) &h->star + sizeof(h->star);
1170  break;
1171 
1172  case eTar_OldGNU:
1173  ok = s_OctalToNum(val, h->gnu.atime, sizeof(h->gnu.atime));
1174  dump += TAR_PRINTABLE(gnu.atime, !ok);
1175  if (ok && val) {
1176  CTime atime((time_t) val);
1177  ok = (Uint8) atime.GetTimeT() == val ? true : false;
1178  if (ok || val > 7) {
1179  dump += (" ["
1180  + (val > 7 ? NStr::NumericToString(val)+", " : "")
1181  + (ok
1182  ? atime.ToLocalTime().AsString("Y-M-D h:m:s")
1183  : "")
1184  + ']');
1185  }
1186  }
1187  dump += '\n';
1188 
1189  ok = s_OctalToNum(val, h->gnu.ctime, sizeof(h->gnu.ctime));
1190  dump += TAR_PRINTABLE(gnu.ctime, !ok);
1191  if (ok && val) {
1192  CTime ctime((time_t) val);
1193  ok = (Uint8) ctime.GetTimeT() == val ? true : false;
1194  if (ok || val > 7) {
1195  dump += (" ["
1196  + (val > 7 ? NStr::NumericToString(val)+", " : "")
1197  + (ok
1198  ? ctime.ToLocalTime().AsString("Y-M-D h:m:s")
1199  : "")
1200  + ']');
1201  }
1202  }
1203 
1204  if (h->typeflag[0] == 'S') {
1205  if (memcchr(h->gnu.unused, '\0', sizeof(h->gnu.unused))) {
1206  dump += '\n';
1207  dump += TAR_PRINTABLE(gnu.unused, true);
1208  }
1209  dump += '\n' + s_DumpSparseMap(h, h->gnu.sparse,
1210  h->gnu.contind, excpt);
1211  if (memcchr(h->gnu.realsize, '\0', sizeof(h->gnu.realsize))) {
1212  ok = s_DecodeUint8(val, h->gnu.realsize,
1213  sizeof(h->gnu.realsize));
1214  dump += '\n';
1215  dump += TAR_PRINTABLE(gnu.realsize, ok <= 0);
1216  if (ok && (ok < 0 || val > 7)) {
1217  dump += " [" + NStr::NumericToString(val) + ']';
1218  }
1219  if (ok < 0) {
1220  dump += " (base-256)";
1221  }
1222  }
1223  tname = (const char*) &h->gnu + sizeof(h->gnu);
1224  } else {
1225  tname = h->gnu.ctime + sizeof(h->gnu.ctime);
1226  }
1227  break;
1228 
1229  default:
1230  dump += TAR_PRINTABLE(prefix, true);
1231  tname = h->prefix + sizeof(h->prefix);
1232  break;
1233  }
1234  } else {
1235  tname = h->version + sizeof(h->version);
1236  }
1237 
1238  size_t n = 0;
1239  while (&tname[n] < (const char*) h + BLOCK_SIZE) {
1240  if (tname[n]) {
1241  size_t offset = (size_t)(&tname[n] - (const char*) h);
1242  size_t len = BLOCK_SIZE - offset;
1243  if (len & ~0xF) { // len > 16
1244  len = 0x10; // len = 16
1245  }
1246  const char* e = (const char*) memchr(&tname[n], '\0', len);
1247  if (e) {
1248  len = (size_t)(e - &tname[n]);
1249  ok = s_DecodeUint8(val, &tname[n], len);
1250  } else {
1251  if (len > (offset & 0xF)) {
1252  len -= (offset & 0xF);
1253  }
1254  ok = false;
1255  }
1256  _ASSERT(len);
1257  dump += "\n@" + s_OffsetAsString(offset) + ':' + string(15, ' ')
1258  + '"' + NStr::PrintableString(string(&tname[n], len)) + '"';
1259  if (ok) {
1260  CTime time((time_t) val);
1261  bool okaytime = (Uint8) time.GetTimeT() == val;
1262  if (ok < 0 || val > 7 || okaytime) {
1263  dump += " [";
1264  if (ok < 0 || val > 7) {
1266  }
1267  if (ok < 0) {
1268  dump += "] (base-256)";
1269  } else if (okaytime) {
1270  if (val > 7) {
1271  dump += ", ";
1272  }
1273  dump += time.ToLocalTime().AsString("Y-M-D h:m:s]");
1274  } else {
1275  dump += ']';
1276  }
1277  }
1278  }
1279  n += len;
1280  } else {
1281  n++;
1282  }
1283  }
1284 
1285  return dump;
1286 }
1287 
1288 #undef TAR_PRINTABLE
1289 
1290 #undef _STR
1291 
1292 
1293 inline void s_SetStateSafe(CNcbiIos& ios, IOS_BASE::iostate state) throw()
1294 {
1295  try {
1296  ios.setstate(state);
1297  } catch (IOS_BASE::failure&) {
1298  ;
1299  }
1300 }
1301 
1302 
1303 //////////////////////////////////////////////////////////////////////////////
1304 //
1305 // CTar
1306 //
1307 
1308 CTar::CTar(const string& filename, size_t blocking_factor)
1309  : m_FileName(filename),
1310  m_FileStream(new CNcbiFstream),
1311  m_Stream(*m_FileStream),
1312  m_ZeroBlockCount(0),
1313  m_BufferSize(SIZE_OF(blocking_factor)),
1314  m_BufferPos(0),
1315  m_StreamPos(0),
1316  m_BufPtr(0),
1317  m_Buffer(0),
1318  m_OpenMode(eNone),
1319  m_Modified(false),
1320  m_Bad(false),
1321  m_Flags(fDefault)
1322 {
1323  x_Init();
1324 }
1325 
1326 
1327 CTar::CTar(CNcbiIos& stream, size_t blocking_factor)
1328  : m_FileName(kEmptyStr),
1329  m_FileStream(0),
1330  m_Stream(stream),
1331  m_ZeroBlockCount(0),
1332  m_BufferSize(SIZE_OF(blocking_factor)),
1333  m_BufferPos(0),
1334  m_StreamPos(0),
1335  m_BufPtr(0),
1336  m_Buffer(0),
1337  m_OpenMode(eNone),
1338  m_Modified(false),
1339  m_Bad(false),
1340  m_Flags(fDefault)
1341 {
1342  x_Init();
1343 }
1344 
1345 
1347 {
1348  // Close stream(s)
1349  x_Close(x_Flush(true/*no_throw*/));
1350  delete m_FileStream;
1351  m_FileStream = 0;
1352 
1353  // Delete owned masks
1354  for (size_t i = 0; i < sizeof(m_Mask) / sizeof(m_Mask[0]); ++i) {
1356  }
1357 
1358  // Delete buffer
1359  delete[] m_BufPtr;
1360  m_BufPtr = 0;
1361 }
1362 
1363 
1364 #define TAR_THROW(who, errcode, message) \
1365  NCBI_THROW(CTarException, errcode, \
1366  s_PositionAsString(who->m_FileName, who->m_StreamPos, \
1367  who->m_BufferSize, \
1368  who->m_Current.GetName()) + (message))
1369 
1370 #define TAR_THROW_EX(who, errcode, message, hdr, fmt) \
1371  TAR_THROW(who, errcode, \
1372  who->m_Flags & fDumpEntryHeaders \
1373  ? string(message) + ":\n" + s_DumpHeader(hdr, fmt, true) \
1374  : string(message))
1375 
1376 #define TAR_POST(subcode, severity, message) \
1377  ERR_POST_X(subcode, (severity) << \
1378  s_PositionAsString(m_FileName, m_StreamPos, m_BufferSize,\
1379  m_Current.GetName()) + (message))
1380 
1381 
1382 void CTar::x_Init(void)
1383 {
1385  size_t pagesize = (size_t) CSystemInfo::GetVirtualMemoryPageSize();
1386  if (pagesize < 4096 || (pagesize & (pagesize - 1))) {
1387  pagesize = 4096; // reasonable default
1388  }
1389  size_t pagemask = pagesize - 1;
1390  m_BufPtr = new char[m_BufferSize + pagemask];
1391  // Make m_Buffer page-aligned
1392  m_Buffer = m_BufPtr +
1393  ((((size_t) m_BufPtr + pagemask) & ~pagemask) - (size_t) m_BufPtr);
1394 }
1395 
1396 
1397 bool CTar::x_Flush(bool no_throw)
1398 {
1399  m_Current.m_Name.clear();
1400  if (m_BufferPos == m_BufferSize) {
1401  m_Bad = true; // In case of unhandled exception(s)
1402  }
1403  if (m_Bad || !m_OpenMode) {
1404  return false;
1405  }
1406  if (!m_Modified &&
1408  return false;
1409  }
1410 
1412  if (m_BufferPos || m_ZeroBlockCount < 2) {
1413  // Assure proper blocking factor and pad the archive as necessary
1414  size_t zbc = m_ZeroBlockCount;
1415  size_t pad = m_BufferSize - m_BufferPos;
1416  memset(m_Buffer + m_BufferPos, 0, pad);
1417  x_WriteArchive(pad, no_throw ? (const char*)(-1L) : 0);
1418  _ASSERT(!(m_BufferPos % m_BufferSize) // m_BufferSize if write error
1419  && !m_Bad == !m_BufferPos);
1420  if (!m_Bad && (zbc += BLOCK_OF(pad)) < 2) {
1421  // Write EOT (two zero blocks), if have not padded enough already
1422  memset(m_Buffer, 0, m_BufferSize - pad);
1423  x_WriteArchive(m_BufferSize, no_throw ? (const char*)(-1L) : 0);
1425  && !m_Bad == !m_BufferPos);
1426  if (!m_Bad && (zbc += BLOCK_OF(m_BufferSize)) < 2) {
1427  _ASSERT(zbc == 1 && m_BufferSize == BLOCK_SIZE);
1428  x_WriteArchive(BLOCK_SIZE, no_throw ? (const char*)(-1L) : 0);
1430  && !m_Bad == !m_BufferPos);
1431  }
1432  }
1433  m_ZeroBlockCount = zbc;
1434  }
1436 
1437  if (!m_Bad && m_Stream.rdbuf()->PUBSYNC() != 0) {
1438  m_Bad = true;
1439  int x_errno = errno;
1441  if (!no_throw) {
1442  TAR_THROW(this, eWrite,
1443  "Archive flush failed" + s_OSReason(x_errno));
1444  }
1445  TAR_POST(83, Error,
1446  "Archive flush failed" + s_OSReason(x_errno));
1447  }
1448  if (!m_Bad) {
1449  m_Modified = false;
1450  }
1451  return true;
1452 }
1453 
1454 
1455 static int s_TruncateFile(const string& filename, Uint8 filesize)
1456 {
1457  int x_error = 0;
1458 #ifdef NCBI_OS_UNIX
1459  if (::truncate(filename.c_str(), (off_t) filesize) != 0)
1460  x_error = errno;
1461 #endif //NCBI_OS_UNIX
1462 #ifdef NCBI_OS_MSWIN
1463  TXString x_filename(_T_XSTRING(filename));
1464  HANDLE handle = ::CreateFile(x_filename.c_str(), GENERIC_WRITE,
1465  0/*sharing*/, NULL, OPEN_EXISTING,
1466  FILE_ATTRIBUTE_NORMAL, NULL);
1467  if (handle != INVALID_HANDLE_VALUE) {
1468  LARGE_INTEGER x_filesize;
1469  x_filesize.QuadPart = filesize;
1470  if (!::SetFilePointerEx(handle, x_filesize, NULL, FILE_BEGIN)
1471  || !::SetEndOfFile(handle)) {
1472  x_error = (int) ::GetLastError();
1473  }
1474  bool closed = ::CloseHandle(handle) ? true : false;
1475  if (!x_error && !closed) {
1476  x_error = (int) ::GetLastError();
1477  }
1478  } else {
1479  x_error = (int) ::GetLastError();
1480  }
1481 #endif //NCBI_OS_MSWIN
1482  return x_error;
1483 }
1484 
1485 
1486 void CTar::x_Close(bool truncate)
1487 {
1488  if (m_FileStream && m_FileStream->is_open()) {
1489  m_FileStream->close();
1490  if (!m_Bad && m_FileStream->fail()) {
1491  int x_errno = errno;
1492  TAR_POST(104, Error,
1493  "Cannot close archive" + s_OSReason(x_errno));
1494  m_Bad = true;
1495  }
1496  if (!m_Bad && !(m_Flags & fTarfileNoTruncate) && truncate) {
1498  }
1499  }
1500  m_OpenMode = eNone;
1501  m_Modified = false;
1502  m_BufferPos = 0;
1503  m_Bad = false;
1504 }
1505 
1506 
1507 void CTar::x_Open(EAction action)
1508 {
1509  _ASSERT(action);
1510  bool toend = false;
1511  // We can only open a named file here, and if an external stream is being
1512  // used as an archive, it must be explicitly repositioned by user's code
1513  // (outside of this class) before each archive operation.
1514  if (!m_FileStream) {
1515  if (!m_Modified) {
1516  // Check if Create() is followed by Append()
1517  if (m_OpenMode != eWO && action == eAppend
1518  && (m_Flags & fStreamPipeThrough)) {
1519  toend = true;
1520  }
1521  } else if (action != eAppend) {
1522  _ASSERT(m_OpenMode != eWO); // NB: Prev action != eCreate
1523  if (m_Flags & fStreamPipeThrough) {
1524  x_Flush(); // NB: resets m_Modified to false if successful
1525  }
1526  if (m_Modified) {
1527  if (!m_Bad) {
1528  TAR_POST(1, Warning,
1529  "Pending changes may be discarded"
1530  " upon reopen of in-stream archive");
1531  }
1532  m_Modified = false;
1533  }
1534  }
1535  m_Current.m_Name.clear();
1536  if (m_Bad || (m_Stream.rdstate() & ~NcbiEofbit) || !m_Stream.rdbuf()) {
1537  TAR_THROW(this, eOpen,
1538  "Archive I/O stream is in bad state");
1539  } else {
1540  m_OpenMode = EOpenMode(int(action) & eRW);
1541  _ASSERT(m_OpenMode != eNone);
1542  }
1543  if (action != eAppend && action != eInternal) {
1544  m_BufferPos = 0;
1545  m_StreamPos = 0;
1546  }
1547 #ifdef NCBI_OS_MSWIN
1548  if (&m_Stream == &cin) {
1549  HANDLE handle = (HANDLE) _get_osfhandle(_fileno(stdin));
1550  if (GetFileType(handle) != FILE_TYPE_DISK) {
1552  }
1553  }
1554 #endif //NCBI_OS_MSWIN
1555  } else {
1557  EOpenMode mode = EOpenMode(int(action) & eRW);
1558  _ASSERT(mode != eNone);
1559  if (action != eAppend && action != eCreate/*mode == eWO*/) {
1560  x_Flush();
1561  } else {
1562  m_Current.m_Name.clear();
1563  }
1564  if (mode == eWO || m_OpenMode < mode) {
1565  // Need to (re-)open the archive file
1566  if (m_OpenMode != eWO && action == eAppend) {
1567  toend = true;
1568  }
1569  x_Close(false); // NB: m_OpenMode = eNone; m_Modified = false
1570  m_StreamPos = 0;
1571  switch (mode) {
1572  case eWO:
1573  // WO access
1574  _ASSERT(action == eCreate);
1575  // Note that m_Modified is untouched
1576  m_FileStream->open(m_FileName.c_str(),
1577  IOS_BASE::out |
1578  IOS_BASE::binary | IOS_BASE::trunc);
1579  break;
1580  case eRO:
1581  // RO access
1582  _ASSERT(action != eCreate);
1583  m_FileStream->open(m_FileName.c_str(),
1584  IOS_BASE::in |
1585  IOS_BASE::binary);
1586  break;
1587  case eRW:
1588  // RW access
1589  _ASSERT(action != eCreate);
1590  m_FileStream->open(m_FileName.c_str(),
1592  IOS_BASE::binary);
1593  break;
1594  default:
1595  _TROUBLE;
1596  break;
1597  }
1598  if (!m_FileStream->is_open() || !m_FileStream->good()) {
1599  int x_errno = errno;
1600  TAR_THROW(this, eOpen,
1601  "Cannot open archive" + s_OSReason(x_errno));
1602  } else {
1603  m_OpenMode = mode;
1604  }
1605  } else {
1606  // No need to reopen the archive file
1607  _ASSERT(m_OpenMode > eWO && action != eCreate);
1608  if (m_Bad) {
1609  TAR_THROW(this, eOpen,
1610  "Archive file is in bad state");
1611  }
1612  if (action != eAppend && action != eInternal) {
1613  m_BufferPos = 0;
1614  m_StreamPos = 0;
1615  m_FileStream->seekg(0);
1616  }
1617  }
1618  }
1619  if (toend) {
1620  _ASSERT(!m_Modified && action == eAppend);
1621  // There may be an extra and unnecessary archive file scanning
1622  // if Append() follows Update() that caused no modifications;
1623  // but there is no way to distinguish this, currently :-/
1624  // Also, this sequence should be a real rarity in practice.
1625  x_ReadAndProcess(eAppend); // to position at logical EOF
1626  }
1627  _ASSERT(!(m_Stream.rdstate() & ~NcbiEofbit));
1628  _ASSERT(m_Stream.rdbuf());
1629 }
1630 
1631 
1632 unique_ptr<CTar::TEntries> CTar::Extract(void)
1633 {
1634  x_Open(eExtract);
1635  unique_ptr<TEntries> entries = x_ReadAndProcess(eExtract);
1636 
1637  // Restore attributes of "postponed" directories
1638  if (m_Flags & fPreserveAll) {
1639  ITERATE(TEntries, e, *entries) {
1640  if (e->GetType() == CTarEntryInfo::eDir) {
1641  x_RestoreAttrs(*e, m_Flags);
1642  }
1643  }
1644  }
1645 
1646  return entries;
1647 }
1648 
1649 
1651 {
1652  if (m_Bad) {
1653  return 0;
1654  }
1655  if (m_OpenMode & eRO) {
1658  } else {
1659  x_Open(eInternal);
1660  }
1661  unique_ptr<TEntries> temp = x_ReadAndProcess(eInternal);
1662  _ASSERT(temp && temp->size() < 2);
1663  if (temp->size() < 1) {
1664  return 0;
1665  }
1666  _ASSERT(m_Current == temp->front());
1667  return &m_Current;
1668 }
1669 
1670 
1671 // Return a pointer to buffer, which is always block-aligned, and reflect the
1672 // number of bytes available via the parameter. Return NULL when unable to
1673 // read (either EOF or other read error).
1674 const char* CTar::x_ReadArchive(size_t& n)
1675 {
1678  _ASSERT(n != 0);
1679  size_t nread;
1680  if (!m_BufferPos) {
1681  nread = 0;
1682  do {
1683  streamsize xread;
1684  IOS_BASE::iostate iostate = m_Stream.rdstate();
1685  if (!iostate) { // NB: good()
1686 #ifdef NCBI_COMPILER_MIPSPRO
1687  try {
1688  // Work around a bug in MIPSPro 7.3's streambuf::xsgetn()
1689  CNcbiIstream* is = dynamic_cast<CNcbiIstream*>(&m_Stream);
1690  _ASSERT(is);
1691  is->read (m_Buffer + nread,
1692  (streamsize)(m_BufferSize - nread));
1693  xread = is->gcount();
1694  if (xread > 0) {
1695  is->clear();
1696  }
1697  } catch (IOS_BASE::failure&) {
1698  xread = m_Stream.rdstate() & NcbiEofbit ? 0 : -1;
1699  }
1700 #else
1701  try {
1702  xread = m_Stream.rdbuf()->
1703  sgetn(m_Buffer + nread,
1704  (streamsize)(m_BufferSize - nread));
1705 # ifdef NCBI_COMPILER_WORKSHOP
1706  if (xread < 0) {
1707  xread = 0; // NB: WS6 is known to return -1 :-/
1708  }
1709 # endif //NCBI_COMPILER_WORKSHOP
1710  } catch (IOS_BASE::failure&) {
1711  xread = -1;
1712  }
1713 #endif //NCBI_COMPILER_MIPSPRO
1714  } else {
1715  xread = iostate == NcbiEofbit ? 0 : -1;
1716  }
1717  if (xread <= 0) {
1718  if (nread && (m_Flags & fDumpEntryHeaders)) {
1719  TAR_POST(57, xread ? Error : Warning,
1720  "Short read (" + NStr::NumericToString(nread)
1721  + (xread ? ")" : "): EOF"));
1722  }
1723  s_SetStateSafe(m_Stream, xread < 0 ? NcbiBadbit : NcbiEofbit);
1724  if (nread) {
1725  break;
1726  }
1727  return 0;
1728  }
1729  nread += (size_t) xread;
1730  } while (nread < m_BufferSize);
1731  memset(m_Buffer + nread, 0, m_BufferSize - nread);
1732  } else {
1733  nread = m_BufferSize - m_BufferPos;
1734  }
1735  if (n > nread) {
1736  n = nread;
1737  }
1738  size_t xpos = m_BufferPos;
1739  m_BufferPos += ALIGN_SIZE(n);
1741  if (m_BufferPos == m_BufferSize) {
1742  m_BufferPos = 0;
1743  if (!m_FileStream && (m_Flags & fStreamPipeThrough)) {
1744  size_t zbc = m_ZeroBlockCount;
1747  _ASSERT(m_BufferPos == 0);
1748  m_ZeroBlockCount = zbc;
1749  }
1750  }
1752  return m_Buffer + xpos;
1753 }
1754 
1755 
1756 // All partial internal (i.e. in-buffer) block writes are _not_ block-aligned;
1757 // but all external writes (i.e. when "src" is provided) _are_ block-aligned.
1758 void CTar::x_WriteArchive(size_t nwrite, const char* src)
1759 {
1760  if (!nwrite || m_Bad) {
1761  return;
1762  }
1763  m_Modified = true;
1764  m_ZeroBlockCount = 0;
1765  do {
1767  size_t avail = m_BufferSize - m_BufferPos;
1768  if (avail > nwrite) {
1769  avail = nwrite;
1770  }
1771  size_t advance = avail;
1772  if (src && src != (const char*)(-1L)) {
1773  memcpy(m_Buffer + m_BufferPos, src, avail);
1774  size_t pad = ALIGN_SIZE(avail) - avail;
1775  memset(m_Buffer + m_BufferPos + avail, 0, pad);
1776  advance += pad;
1777  src += avail;
1778  }
1779  m_BufferPos += advance;
1781  if (m_BufferPos == m_BufferSize) {
1782  size_t nwritten = 0;
1783  do {
1784  int x_errno = 0;
1785  streamsize xwritten;
1786  IOS_BASE::iostate iostate = m_Stream.rdstate();
1787  if (!(iostate & ~NcbiEofbit)) { // NB: good() OR eof()
1788  try {
1789  xwritten = m_Stream.rdbuf()
1790  ->sputn(m_Buffer + nwritten,
1791  (streamsize)(m_BufferSize - nwritten));
1792  } catch (IOS_BASE::failure&) {
1793  xwritten = -1;
1794  }
1795  if (xwritten <= 0) {
1796  x_errno = errno;
1797  } else if (iostate) {
1798  m_Stream.clear();
1799  }
1800  } else {
1801  xwritten = -1;
1802  }
1803  if (xwritten <= 0) {
1804  m_Bad = true;
1806  if (src != (const char*)(-1L)) {
1807  TAR_THROW(this, eWrite,
1808  "Archive write failed" +s_OSReason(x_errno));
1809  }
1810  TAR_POST(84, Error,
1811  "Archive write failed" + s_OSReason(x_errno));
1812  return;
1813  }
1814  nwritten += (size_t) xwritten;
1815  } while (nwritten < m_BufferSize);
1816  m_BufferPos = 0;
1817  }
1818  m_StreamPos += advance;
1819  nwrite -= avail;
1820  } while (nwrite);
1822 }
1823 
1824 
1825 // PAX (Portable Archive Interchange) extraction support
1826 
1827 // Define bitmasks for extended numeric information (must fit in perm mask)
1828 enum EPAXBit {
1831  fPAXSparse = 1 << 1,
1832  fPAXMtime = 1 << 2,
1833  fPAXAtime = 1 << 3,
1834  fPAXCtime = 1 << 4,
1835  fPAXSize = 1 << 5,
1836  fPAXUid = 1 << 6,
1837  fPAXGid = 1 << 7
1838 };
1839 typedef unsigned int TPAXBits; // Bitwise-OR of EPAXBit(s)
1840 
1841 
1842 // Parse "len" bytes of "str" as numeric "valp[.fraq]"
1843 static bool s_ParsePAXNumeric(Uint8* valp, const char* str, size_t len,
1844  string* fraq, EPAXBit assign)
1845 {
1846  _ASSERT(valp && str[len] == '\n');
1847  if (!isdigit((unsigned char)(*str))) {
1848  return false;
1849  }
1850  const char* p = (const char*) memchr(str, '.', len);
1851  if (!p) {
1852  p = str + len;
1853  } else if (fraq == (string*)(-1L)) {
1854  // no decimal point allowed
1855  return false;
1856  }
1857  Uint8 val;
1858  try {
1859  val = NStr::StringToUInt8(CTempString(str, (size_t)(p - str)));
1860  } catch (...) {
1861  return false;
1862  }
1863  if (*p == '.' && ++p != str + len) {
1864  len -= (size_t)(p - str);
1865  _ASSERT(len);
1866  for (size_t n = 0; n < len; ++n) {
1867  if (!isdigit((unsigned char) p[n])) {
1868  return false;
1869  }
1870  }
1871  if (assign && fraq) {
1872  fraq->assign(p, len);
1873  }
1874  } // else (*p == '\n' || !*p)
1875  if (assign) {
1876  *valp = val;
1877  }
1878  return true;
1879 }
1880 
1881 
1882 static bool s_AllLowerCase(const char* str, size_t len)
1883 {
1884  for (size_t i = 0; i < len; ++i) {
1885  unsigned char c = (unsigned char) str[i];
1886  if (!isalpha(c) || !islower(c))
1887  return false;
1888  }
1889  return true;
1890 }
1891 
1892 
1893 // Raise 10 to the power of n
1894 static Uint8 ipow10(unsigned int n)
1895 {
1896  _ASSERT(n < 10);
1897  // for small n this is the fastest
1898  return n ? 10 * ipow10(n - 1) : 1;
1899 }
1900 
1901 
1902 // NB: assumes fraq is all digits
1903 static long s_FraqToNanosec(const string& fraq)
1904 {
1905  size_t len = fraq.size();
1906  if (!len)
1907  return 0;
1908  long result;
1909  if (len < 10) {
1910  Uint8 temp = NStr::StringToUInt8(fraq,
1913  result = (long)(temp * ipow10((unsigned int)(9 - len)));
1914  } else {
1915  Uint8 temp = NStr::StringToUInt8(CTempString(fraq, 0, 10),
1918  result = (long)((temp + 5) / 10);
1919  }
1920  _ASSERT(0L <= result && result < 1000000000L);
1921  return result;
1922 }
1923 
1924 
1926 {
1927  Uint8 major = 0, minor = 0, size = 0, sparse = 0, uid = 0, gid = 0;
1928  Uint8 mtime = 0, atime = 0, ctime = 0, dummy = 0;
1929  string mtime_fraq, atime_fraq, ctime_fraq;
1930  string path, linkpath, name, uname, gname;
1931  string* nodot = (string*)(-1L);
1932  const struct SPAXParseTable {
1933  const char* key;
1934  Uint8* val; // non-null for numeric, else do as string
1935  string* str; // string or fraction part (if not -1)
1936  EPAXBit bit; // for numerics only
1937  } parser[] = {
1938  { "mtime", &mtime, &mtime_fraq, fPAXMtime }, // num w/fraq: assign
1939  { "atime", &atime, &atime_fraq, fPAXAtime },
1940  { "ctime", &ctime, &ctime_fraq, fPAXCtime },
1941  /*{ "dummy", &dummy, 0, fPAXSome },*/// num w/fraq: asg int
1942  /*{ "dummy", &dummy, &fraq or 0, fPAXNone },*/// num w/fraq: ck.only
1943  { "size", &size, nodot, fPAXSize }, // number: assign
1944  { "uid", &uid, nodot, fPAXUid },
1945  { "gid", &gid, nodot, fPAXGid },
1946  /*{ "dummy", &dummy, nodot, fPAXNone },*/// number: ck.only
1947  { "path", 0, &path, fPAXNone }, // string: assign
1948  { "linkpath", 0, &linkpath, fPAXNone },
1949  { "uname", 0, &uname, fPAXNone },
1950  { "gname", 0, &gname, fPAXNone },
1951  { "comment", 0, 0, fPAXNone }, // string: ck.only
1952  { "charset", 0, 0, fPAXNone },
1953  // GNU sparse extensions (NB: .size and .realsize don't go together)
1954  { "GNU.sparse.realsize", &sparse, nodot, fPAXSparse },
1955  { "GNU.sparse.major", &major, nodot, fPAXSparse },
1956  { "GNU.sparse.minor", &minor, nodot, fPAXSparse },
1957  { "GNU.sparse.size", &dummy, nodot, fPAXSparse },
1958  { "GNU.sparse.name", 0, &name, fPAXNone },
1959  // Other
1960  { "SCHILY.realsize", &sparse, nodot, fPAXSparse }
1961  };
1962  const char* s = data.c_str();
1963  TPAXBits parsed = fPAXNone;
1964  size_t l = data.size();
1965 
1966  _ASSERT(l && l == strlen(s));
1967  do {
1968  unsigned long len;
1969  size_t klen, vlen;
1970  const char* e;
1971  char *k, *v;
1972 
1973  if (!(e = (char*) memchr(s, '\n', l))) {
1974  e = s + l;
1975  }
1976  errno = 0;
1977  if (!isdigit((unsigned char)(*s)) || !(len = strtoul(s, &k, 10))
1978  || errno || s + len - 1 != e || (*k != ' ' && *k != '\t')
1979  || !(v = (char*) memchr(k, '=', (size_t)(e - k))) // NB: k < e
1980  || !(klen = (size_t)(v++ - ++k))
1981  || memchr(k, ' ', klen) || memchr(k, '\t', klen)
1982  || !(vlen = (size_t)(e - v))) {
1983  TAR_POST(74, Error,
1984  "Skipping malformed PAX data");
1985  return eFailure;
1986  }
1987  bool done = false;
1988  for (size_t n = 0; n < sizeof(parser) / sizeof(parser[0]); ++n) {
1989  if (strlen(parser[n].key) == klen
1990  && memcmp(parser[n].key, k, klen) == 0) {
1991  if (!parser[n].val) {
1992  if (parser[n].str) {
1993  parser[n].str->assign(v, vlen);
1994  }
1995  } else if (!s_ParsePAXNumeric(parser[n].val, v, vlen,
1996  parser[n].str, parser[n].bit)) {
1997  TAR_POST(75, Error,
1998  "Ignoring bad numeric \""
1999  + CTempString(v, vlen)
2000  + "\" in PAX value \""
2001  + CTempString(k, klen) + '"');
2002  } else {
2003  parsed |= parser[n].bit;
2004  }
2005  done = true;
2006  break;
2007  }
2008  }
2009  if (!done && s_AllLowerCase(k, klen)/*&& !memchr(k, '.', klen)*/) {
2010  TAR_POST(76, Warning,
2011  "Ignoring unrecognized PAX value \""
2012  + CTempString(k, klen) + '"');
2013  }
2014  if (!*e) {
2015  break;
2016  }
2017  l -= len;
2018  s = ++e;
2019  _ASSERT(l == strlen(s));
2020  } while (l);
2021 
2022  if ((parsed & fPAXSparse) && (sparse | dummy)) {
2023  if (sparse && dummy && sparse != dummy) {
2024  TAR_POST(95, Warning,
2025  "Ignoring PAX GNU sparse file size "
2027  + " when real size "
2028  + NStr::NumericToString(sparse)
2029  + " is also present");
2030  } else if (!dummy && major == 1 && minor == 0) {
2031  if (!(m_Flags & fSparseUnsupported)) {
2032  if (!name.empty()) {
2033  if (!path.empty()) {
2034  TAR_POST(96, Warning,
2035  "Replacing PAX file name \"" + path
2036  + "\" with GNU sparse file name \"" + name
2037  + '"');
2038  }
2039  path.swap(name);
2040  }
2041  parsed |= fPAXSparseGNU_1_0;
2042  }
2043  _ASSERT(sparse);
2044  } else if (!sparse) {
2045  sparse = dummy;
2046  }
2047  size = sparse;
2048  }
2049 
2050  m_Current.m_Name.swap(path);
2051  m_Current.m_LinkName.swap(linkpath);
2052  m_Current.m_UserName.swap(uname);
2053  m_Current.m_GroupName.swap(gname);
2057  m_Current.m_Stat.orig.st_mtime = (time_t) mtime;
2058  m_Current.m_Stat.orig.st_atime = (time_t) atime;
2059  m_Current.m_Stat.orig.st_ctime = (time_t) ctime;
2060  m_Current.m_Stat.orig.st_size = (off_t) size;
2061  m_Current.m_Stat.orig.st_uid = (uid_t) uid;
2062  m_Current.m_Stat.orig.st_gid = (gid_t) gid;
2063  m_Current.m_Pos = sparse; // real (expanded) file size
2064 
2065  m_Current.m_Stat.orig.st_mode = (mode_t) parsed;
2066  return eContinue;
2067 }
2068 
2069 
2070 static void s_Dump(const string& file, Uint8 pos, size_t recsize,
2071  const string& entryname, const STarHeader* h,
2072  ETar_Format fmt, Uint8 datasize)
2073 {
2074  _ASSERT(!OFFSET_OF(pos));
2076  Uint8 blocks = BLOCK_OF(ALIGN_SIZE(datasize));
2077  ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2078  + s_DumpHeader(h, fmt) + '\n'
2079  + (blocks
2080  && (h->typeflag[0] != 'S'
2081  || fmt != eTar_OldGNU
2082  || !*h->gnu.contind)
2083  ? "Blocks of data: " + NStr::NumericToString(blocks) + '\n'
2084  : kEmptyStr));
2085  SetDiagPostLevel(level);
2086 }
2087 
2088 
2089 static void s_DumpSparse(const string& file, Uint8 pos, size_t recsize,
2090  const string& entryname, const STarHeader* h,
2091  const char* contind, Uint8 datasize)
2092 {
2093  _ASSERT(!OFFSET_OF(pos));
2095  Uint8 blocks = !*contind ? BLOCK_OF(ALIGN_SIZE(datasize)) : 0;
2096  ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2097  + "GNU sparse file map header (cont'd):\n"
2098  + s_DumpSparseMap(h, (const char*) h, contind) + '\n'
2099  + (blocks
2100  ? "Blocks of data: " + NStr::NumericToString(blocks) + '\n'
2101  : kEmptyStr));
2102  SetDiagPostLevel(level);
2103 }
2104 
2105 
2106 static void s_DumpSparse(const string& file, Uint8 pos, size_t recsize,
2107  const string& entryname,
2108  const vector< pair<Uint8, Uint8> >& bmap)
2109 {
2110  _ASSERT(!OFFSET_OF(pos));
2112  ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2113  + "PAX GNU/1.0 sparse file map data:\n"
2114  + s_DumpSparseMap(bmap) + '\n');
2115  SetDiagPostLevel(level);
2116 }
2117 
2118 
2119 static void s_DumpZero(const string& file, Uint8 pos, size_t recsize,
2120  size_t zeroblock_count, bool eot = false)
2121 {
2122  _ASSERT(!OFFSET_OF(pos));
2124  ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, kEmptyStr)
2125  + (zeroblock_count
2126  ? "Zero block " + NStr::NumericToString(zeroblock_count)
2127  : (eot ? "End-Of-Tape" : "End-Of-File")) + '\n');
2128  SetDiagPostLevel(level);
2129 }
2130 
2131 
2132 static inline bool s_IsOctal(char c)
2133 {
2134  return '0' <= c && c <= '7' ? true : false;
2135 }
2136 
2137 
2139 {
2140  // Read block
2141  const TTarBlock* block;
2142  size_t nread = sizeof(block->buffer);
2143  _ASSERT(sizeof(*block) == BLOCK_SIZE/*== sizeof(block->buffer)*/);
2144  if (!(block = (const TTarBlock*) x_ReadArchive(nread))) {
2145  return eEOF;
2146  }
2147  if (nread != BLOCK_SIZE) {
2148  TAR_THROW(this, eRead,
2149  "Unexpected EOF in archive");
2150  }
2151  const STarHeader* h = &block->header;
2152 
2153  // Check header format
2154  ETar_Format fmt = eTar_Unknown;
2155  if (memcmp(h->magic, "ustar", 6) == 0) {
2156  if ((h->star.prefix[sizeof(h->star.prefix) - 1] == '\0'
2157  && s_IsOctal(h->star.atime[0]) && h->star.atime[0] == ' '
2158  && s_IsOctal(h->star.ctime[0]) && h->star.ctime[0] == ' ')
2159  || strcmp(block->buffer + BLOCK_SIZE - 4, "tar") == 0) {
2160  fmt = eTar_Star;
2161  } else {
2162  fmt = pax ? eTar_Posix : eTar_Ustar;
2163  }
2164  } else if (memcmp(h->magic, "ustar ", 8) == 0) {
2165  // Here the magic is protruded into the adjacent version field
2166  fmt = eTar_OldGNU;
2167  } else if (memcmp(h->magic, "\0\0\0\0\0", 6) == 0) {
2168  // We'll use this also to speedup corruption checks w/checksum
2169  fmt = eTar_Legacy;
2170  } else {
2171  TAR_THROW_EX(this, eUnsupportedTarFormat,
2172  "Unrecognized header format", h, fmt);
2173  }
2174 
2175  Uint8 val;
2176  // Get checksum from header
2177  if (!s_OctalToNum(val, h->checksum, sizeof(h->checksum))) {
2178  // We must allow all zero bytes here in case of pad/zero blocks
2179  bool corrupt;
2180  if (fmt == eTar_Legacy) {
2181  corrupt = false;
2182  for (size_t i = 0; i < sizeof(block->buffer); ++i) {
2183  if (block->buffer[i]) {
2184  corrupt = true;
2185  break;
2186  }
2187  }
2188  } else {
2189  corrupt = true;
2190  }
2191  if (corrupt) {
2192  TAR_THROW_EX(this, eUnsupportedTarFormat,
2193  "Bad checksum", h, fmt);
2194  }
2195  m_StreamPos += BLOCK_SIZE; // NB: nread
2196  return eZeroBlock;
2197  }
2198  int checksum = int(val);
2199 
2200  // Compute both signed and unsigned checksums (for compatibility)
2201  int ssum = 0;
2202  unsigned int usum = 0;
2203  const char* p = block->buffer;
2204  for (size_t i = 0; i < sizeof(block->buffer); ++i) {
2205  ssum += *p;
2206  usum += (unsigned char)(*p);
2207  p++;
2208  }
2209  p = h->checksum;
2210  for (size_t j = 0; j < sizeof(h->checksum); ++j) {
2211  ssum -= *p - ' ';
2212  usum -= (unsigned char)(*p) - ' ';
2213  p++;
2214  }
2215 
2216  // Compare checksum(s)
2217  if (checksum != ssum && (unsigned int) checksum != usum) {
2218  string message = "Header checksum failed";
2219  if (m_Flags & fDumpEntryHeaders) {
2220  message += ", expected ";
2221  if (usum != (unsigned int) ssum) {
2222  message += "either ";
2223  }
2224  if (usum > 7) {
2225  message += "0";
2226  }
2227  message += NStr::NumericToString(usum, 0, 8);
2228  if (usum != (unsigned int) ssum) {
2229  message += " or ";
2230  if ((unsigned int) ssum > 7) {
2231  message += "0";
2232  }
2233  message += NStr::NumericToString((unsigned int) ssum, 0, 8);
2234  }
2235  }
2236  TAR_THROW_EX(this, eChecksum,
2237  message, h, fmt);
2238  }
2239 
2240  // Set all info members now (thus, validating the header block)
2241 
2243  unsigned char tflag = toupper((unsigned char) h->typeflag[0]);
2244 
2245  // Name
2246  if (m_Current.GetName().empty()) {
2247  if ((fmt & eTar_Ustar) && h->prefix[0] && tflag != 'X') {
2248  const char* prefix = fmt != eTar_Star ? h->prefix : h->star.prefix;
2249  size_t pfxlen = fmt != eTar_Star
2250  ? s_Length(h->prefix, sizeof(h->prefix))
2251  : s_Length(h->star.prefix, h->typeflag[0] == 'S'
2252  ? 107 : sizeof(h->star.prefix));
2254  = CDirEntry::ConcatPath(string(prefix, pfxlen),
2255  string(h->name,
2256  s_Length(h->name,
2257  sizeof(h->name))));
2258  } else {
2259  // Name prefix cannot be used
2260  m_Current.m_Name.assign(h->name,
2261  s_Length(h->name, sizeof(h->name)));
2262  }
2263  }
2264 
2265  // Mode
2266  if (!s_OctalToNum(val, h->mode, sizeof(h->mode))
2267  && (val || h->typeflag[0] != 'V')) {
2268  TAR_THROW_EX(this, eUnsupportedTarFormat,
2269  "Bad entry mode", h, fmt);
2270  }
2271  m_Current.m_Stat.orig.st_mode = (mode_t) val;
2272 
2273  // User Id
2274  if (!s_DecodeUint8(val, h->uid, sizeof(h->uid))
2275  && (val || h->typeflag[0] != 'V')) {
2276  TAR_THROW_EX(this, eUnsupportedTarFormat,
2277  "Bad user ID", h, fmt);
2278  }
2279  m_Current.m_Stat.orig.st_uid = (uid_t) val;
2280 
2281  // Group Id
2282  if (!s_DecodeUint8(val, h->gid, sizeof(h->gid))
2283  && (val || h->typeflag[0] != 'V')) {
2284  TAR_THROW_EX(this, eUnsupportedTarFormat,
2285  "Bad group ID", h, fmt);
2286  }
2287  m_Current.m_Stat.orig.st_gid = (gid_t) val;
2288 
2289  // Size
2290  if (!s_DecodeUint8(val, h->size, sizeof(h->size))
2291  && (val || h->typeflag[0] != 'V')) {
2292  TAR_THROW_EX(this, eUnsupportedTarFormat,
2293  "Bad entry size", h, fmt);
2294  }
2295  m_Current.m_Stat.orig.st_size = (off_t) val;
2296  if (m_Current.GetSize() != val) {
2297  ERR_POST_ONCE(Critical << "CAUTION:"
2298  " ***"
2299  " This run-time may not support large TAR entries"
2300  " (have you built it --with-lfs?)"
2301  " ***");
2302  }
2303 
2304  // Modification time
2305  if (!s_OctalToNum(val, h->mtime, sizeof(h->mtime))) {
2306  TAR_THROW_EX(this, eUnsupportedTarFormat,
2307  "Bad modification time", h, fmt);
2308  }
2309  m_Current.m_Stat.orig.st_mtime = (time_t) val;
2310 
2311  if (fmt == eTar_OldGNU || (fmt & eTar_Ustar)) {
2312  // User name
2313  m_Current.m_UserName.assign(h->uname,
2314  s_Length(h->uname, sizeof(h->uname)));
2315  // Group name
2316  m_Current.m_GroupName.assign(h->gname,
2317  s_Length(h->gname,sizeof(h->gname)));
2318  }
2319 
2320  if (fmt == eTar_OldGNU || fmt == eTar_Star) {
2321  // GNU times may not be valid so checks are relaxed
2322  const char* time;
2323  size_t tlen;
2324  time = fmt == eTar_Star ? h->star.atime : h->gnu.atime;
2325  tlen = fmt == eTar_Star ? sizeof(h->star.atime) : sizeof(h->gnu.atime);
2326  if (!s_OctalToNum(val, time, tlen)) {
2327  if (fmt == eTar_Star || memcchr(time, '\0', tlen)) {
2328  TAR_THROW_EX(this, eUnsupportedTarFormat,
2329  "Bad last access time", h, fmt);
2330  }
2331  } else {
2332  m_Current.m_Stat.orig.st_atime = (time_t) val;
2333  }
2334  time = fmt == eTar_Star ? h->star.ctime : h->gnu.ctime;
2335  tlen = fmt == eTar_Star ? sizeof(h->star.ctime) : sizeof(h->gnu.ctime);
2336  if (!s_OctalToNum(val, time, tlen)) {
2337  if (fmt == eTar_Star || memcchr(time, '\0', tlen)) {
2338  TAR_THROW_EX(this, eUnsupportedTarFormat,
2339  "Bad creation time", h, fmt);
2340  }
2341  } else {
2342  m_Current.m_Stat.orig.st_ctime = (time_t) val;
2343  }
2344  }
2345 
2346  // Entry type
2347  switch (h->typeflag[0]) {
2348  case '\0':
2349  case '0':
2350  if (!(fmt & eTar_Ustar) && fmt != eTar_OldGNU) {
2351  size_t namelen = s_Length(h->name, sizeof(h->name));
2352  if (namelen && h->name[namelen - 1] == '/') {
2354  m_Current.m_Stat.orig.st_size = 0;
2355  break;
2356  }
2357  }
2359  break;
2360  case '\1':
2361  case '\2':
2362  case '1':
2363  case '2':
2364  m_Current.m_Type = (h->typeflag[0] == '\2' || h->typeflag[0] == '2'
2367  m_Current.m_LinkName.assign(h->linkname,
2368  s_Length(h->linkname,sizeof(h->linkname)));
2369  if (m_Current.GetSize()) {
2371  // Mandatory to ignore
2372  m_Current.m_Stat.orig.st_size = 0;
2373  } else if (fmt != eTar_Posix) {
2374  TAR_POST(77, Trace,
2375  "Ignoring hard link size ("
2377  + ')');
2378  m_Current.m_Stat.orig.st_size = 0;
2379  } // else POSIX (re-)allowed hard links to be followed by file data
2380  }
2381  break;
2382  case '3':
2383  case '4':
2384  m_Current.m_Type = (h->typeflag[0] == '3'
2387  if (!s_OctalToNum(val, h->devminor, sizeof(h->devminor))) {
2388  TAR_THROW_EX(this, eUnsupportedTarFormat,
2389  "Bad device minor number", h, fmt);
2390  }
2391  usum = (unsigned int) val; // set aside
2392  if (!s_OctalToNum(val, h->devmajor, sizeof(h->devmajor))) {
2393  TAR_THROW_EX(this, eUnsupportedTarFormat,
2394  "Bad device major number", h, fmt);
2395  }
2396 #ifdef makedev
2397  m_Current.m_Stat.orig.st_rdev = makedev((unsigned int) val, usum);
2398 #else
2399  if (sizeof(int) >= 4 && sizeof(m_Current.m_Stat.orig.st_rdev) >= 4) {
2400  *((unsigned int*) &m_Current.m_Stat.orig.st_rdev) =
2401  (unsigned int)((val << 16) | usum);
2402  }
2403 #endif //makedev
2404  m_Current.m_Stat.orig.st_size = 0;
2405  break;
2406  case '5':
2408  m_Current.m_Stat.orig.st_size = 0;
2409  break;
2410  case '6':
2412  m_Current.m_Stat.orig.st_size = 0;
2413  break;
2414  case '7':
2415  ERR_POST_ONCE(Critical << "CAUTION:"
2416  " *** Contiguous TAR entries processed as regular files"
2417  " ***");
2419  break;
2420  case 'K':
2421  case 'L':
2422  case 'S':
2423  case 'x':
2424  case 'X':
2425  if ((tflag == 'X' && (fmt & eTar_Ustar)) ||
2426  (tflag != 'X' && fmt == eTar_OldGNU) ||
2427  (tflag == 'S' && fmt == eTar_Star)) {
2428  // Assign actual type
2429  switch (tflag) {
2430  case 'K':
2432  break;
2433  case 'L':
2435  break;
2436  case 'S':
2438  break;
2439  case 'X':
2440  if (pax) {
2441  TAR_POST(78, Warning,
2442  "Repetitious PAX headers,"
2443  " archive may be corrupt");
2444  }
2445  fmt = eTar_Posix; // upgrade
2447  break;
2448  default:
2449  _TROUBLE;
2450  break;
2451  }
2452 
2453  // Dump header
2454  size_t hsize = (size_t) m_Current.GetSize();
2455  if (dump) {
2457  m_Current.GetName(), h, fmt, hsize);
2458  }
2459  m_StreamPos += BLOCK_SIZE; // NB: nread
2460 
2462  const char* realsize = fmt != eTar_Star
2463  ? h->gnu.realsize : h->star.prefix + 107;
2464  size_t realsizelen = fmt != eTar_Star
2465  ? sizeof(h->gnu.realsize) : 12;
2466  // Real file size (if present)
2467  if (!s_DecodeUint8(val, realsize, realsizelen)) {
2468  val = 0;
2469  }
2470  if (fmt == eTar_Star) {
2471  // Archive file size includes sparse map, and already valid
2472  m_Current.m_Pos = val; // NB: real (expanded) file size
2473  return eSuccess;
2474  }
2475  // Skip all GNU sparse file headers (they are not counted
2476  // towards the sparse file size in the archive ("hsize")!)
2477  const char* contind = h->gnu.contind;
2478  while (*contind) {
2479  _ASSERT(nread == BLOCK_SIZE);
2480  if (!(block = (const TTarBlock*) x_ReadArchive(nread))
2481  || nread != BLOCK_SIZE) {
2482  TAR_THROW(this, eRead,
2483  "Unexpected EOF in GNU sparse file map"
2484  " extended header");
2485  }
2486  h = &block->header;
2487  contind = block->buffer + (24 * 21)/*504*/;
2488  if (dump) {
2490  m_Current.GetName(), h, contind, hsize);
2491  }
2493  m_StreamPos += BLOCK_SIZE; // NB: nread
2494  }
2495  m_Current.m_Pos = val; // NB: real (expanded) file size
2496  return eSuccess;
2497  }
2498 
2499  // Read in the extended header information
2500  val = ALIGN_SIZE(hsize);
2501  string data;
2502  while (hsize) {
2503  nread = hsize;
2504  const char* xbuf = x_ReadArchive(nread);
2505  if (!xbuf) {
2506  TAR_THROW(this, eRead,
2507  string("Unexpected EOF in ") +
2508  (m_Current.GetType()
2510  ? "PAX data" :
2511  m_Current.GetType()
2513  ? "long name"
2514  : "long link"));
2515  }
2516  _ASSERT(nread);
2517  data.append(xbuf, nread);
2518  hsize -= nread;
2519  m_StreamPos += ALIGN_SIZE(nread);
2520  }
2522  // Make sure there's no embedded '\0'(s)
2523  data.resize(strlen(data.c_str()));
2524  }
2525  if (dump) {
2528  m_StreamPos - val,
2529  m_BufferSize,
2530  m_Current.GetName())
2532  ? "PAX data:\n" :
2534  ? "Long name: \""
2535  : "Long link name: \"")
2537  m_Current.GetType()
2542  ? data.size() && data[data.size() - 1] == '\n'
2543  ? kEmptyStr : "\n" : "\"\n"));
2544  SetDiagPostLevel(level);
2545  }
2546  // Reset size because the data blocks have been all read
2548  m_Current.m_Stat.orig.st_size = 0;
2549  if (!val || !data.size()) {
2550  TAR_POST(79, Error,
2551  "Skipping " + string(val ? "empty" : "zero-sized")
2552  + " extended header data");
2553  return eFailure;
2554  }
2555  switch (m_Current.GetType()) {
2557  return x_ParsePAXData(data);
2559  m_Current.m_Name.swap(data);
2560  return eContinue;
2562  m_Current.m_LinkName.swap(data);
2563  return eContinue;
2564  default:
2565  _TROUBLE;
2566  break;
2567  }
2568  return eFailure;
2569  }
2570  /*FALLTHRU*/
2571  case 'V':
2572  case 'I':
2573  if (h->typeflag[0] == 'V' || h->typeflag[0] == 'I') {
2574  // Safety for no data to actually follow
2575  m_Current.m_Stat.orig.st_size = 0;
2576  if (h->typeflag[0] == 'V') {
2578  break;
2579  }
2580  }
2581  /*FALLTHRU*/
2582  default:
2584  break;
2585  }
2586 
2587  if (dump) {
2589  m_Current.GetName(), h, fmt, m_Current.GetSize());
2590  }
2591  m_StreamPos += BLOCK_SIZE; // NB: nread
2592 
2593  return eSuccess;
2594 }
2595 
2596 
2597 static inline void sx_Signature(TTarBlock* block)
2598 {
2599  _ASSERT(sizeof(block->header) + 4 < sizeof(block->buffer));
2600  memcpy(block->buffer + sizeof(*block) - 4, "NCBI", 4);
2601 }
2602 
2603 
2604 void CTar::x_WriteEntryInfo(const string& name)
2605 {
2606  // Prepare block info
2607  TTarBlock block;
2608  _ASSERT(sizeof(block) == BLOCK_SIZE/*== sizeof(block.buffer)*/);
2609  memset(block.buffer, 0, sizeof(block.buffer));
2610  STarHeader* h = &block.header;
2611 
2612  // Name(s) ('\0'-terminated if fit entirely, otherwise not)
2613  if (!x_PackCurrentName(h, false)) {
2614  TAR_THROW(this, eNameTooLong,
2615  "Name '" + m_Current.GetName()
2616  + "' too long in entry '" + name + '\'');
2617  }
2618 
2620 
2621  if (type == CTarEntryInfo::eSymLink && !x_PackCurrentName(h, true)) {
2622  TAR_THROW(this, eNameTooLong,
2623  "Link '" + m_Current.GetLinkName()
2624  + "' too long in entry '" + name + '\'');
2625  }
2626 
2627  /* NOTE: Although some sources on the Internet indicate that all but size,
2628  * mtime, and version numeric fields are '\0'-terminated, we could not
2629  * confirm that with existing tar programs, all of which we saw using
2630  * either '\0' or ' '-terminated values in both size and mtime fields.
2631  * For the ustar archive we have found a document that definitively tells
2632  * that _all_ numeric fields are '\0'-terminated, and that they can keep
2633  * up to "sizeof(field)-1" octal digits. We follow it here.
2634  * However, GNU and ustar checksums seem to be different indeed, so we
2635  * don't use a trailing space for ustar, but for GNU only.
2636  */
2637 
2638  // Mode
2639  if (!s_NumToOctal(m_Current.GetMode(), h->mode, sizeof(h->mode) - 1)) {
2640  TAR_THROW(this, eMemory,
2641  "Cannot store file mode");
2642  }
2643 
2644  // Update format as we go
2645  ETar_Format fmt = eTar_Ustar;
2646  int ok;
2647 
2648  // User ID
2649  ok = s_EncodeUint8(m_Current.GetUserId(), h->uid, sizeof(h->uid) - 1);
2650  if (!ok) {
2651  TAR_THROW(this, eMemory,
2652  "Cannot store user ID");
2653  }
2654  if (ok < 0) {
2655  fmt = eTar_OldGNU;
2656  }
2657 
2658  // Group ID
2659  ok = s_EncodeUint8(m_Current.GetGroupId(), h->gid, sizeof(h->gid) - 1);
2660  if (!ok) {
2661  TAR_THROW(this, eMemory,
2662  "Cannot store group ID");
2663  }
2664  if (ok < 0) {
2665  fmt = eTar_OldGNU;
2666  }
2667 
2668  // Size
2670  ok = s_EncodeUint8(m_Current.GetSize(), h->size, sizeof(h->size) - 1);
2671  if (!ok) {
2672  TAR_THROW(this, eMemory,
2673  "Cannot store file size");
2674  }
2675  if (ok < 0) {
2676  fmt = eTar_OldGNU;
2677  }
2678 
2679  if (fmt != eTar_Ustar && h->prefix[0]) {
2680  // Cannot downgrade to reflect encoding
2681  fmt = eTar_Ustar;
2682  }
2683 
2684  // Modification time
2686  h->mtime, sizeof(h->mtime) - 1)) {
2687  TAR_THROW(this, eMemory,
2688  "Cannot store modification time");
2689  }
2690 
2691  bool device = false;
2692  // Type (GNU extension for SymLink)
2693  switch (type) {
2694  case CTarEntryInfo::eFile:
2695  h->typeflag[0] = '0';
2696  break;
2698  h->typeflag[0] = '2';
2699  break;
2702  h->typeflag[0] = type == CTarEntryInfo::eCharDev ? '3' : '4';
2704  h->devmajor, sizeof(h->devmajor) - 1)) {
2705  TAR_THROW(this, eMemory,
2706  "Cannot store major number");
2707  }
2709  h->devminor, sizeof(h->devminor) - 1)) {
2710  TAR_THROW(this, eMemory,
2711  "Cannot store minor number");
2712  }
2713  device = true;
2714  break;
2715  case CTarEntryInfo::eDir:
2716  h->typeflag[0] = '5';
2717  break;
2718  case CTarEntryInfo::ePipe:
2719  h->typeflag[0] = '6';
2720  break;
2721  default:
2722  _TROUBLE;
2723  TAR_THROW(this, eUnsupportedEntryType,
2724  "Do not know how to archive entry '" + name
2725  + "' of type #" + NStr::IntToString(int(type))
2726  + ": Internal error");
2727  /*NOTREACHED*/
2728  break;
2729  }
2730 
2731  // User and group
2732  const string& usr = m_Current.GetUserName();
2733  size_t len = usr.size();
2734  if (len < sizeof(h->uname)) {
2735  memcpy(h->uname, usr.c_str(), len);
2736  }
2737  const string& grp = m_Current.GetGroupName();
2738  len = grp.size();
2739  if (len < sizeof(h->gname)) {
2740  memcpy(h->gname, grp.c_str(), len);
2741  }
2742 
2743  // Device numbers to complete the ustar header protocol (all fields ok)
2744  if (!device && fmt != eTar_OldGNU) {
2745  s_NumToOctal(0, h->devmajor, sizeof(h->devmajor) - 1);
2746  s_NumToOctal(0, h->devminor, sizeof(h->devminor) - 1);
2747  }
2748 
2749  if (fmt != eTar_OldGNU) {
2750  // Magic
2751  strcpy(h->magic, "ustar");
2752  // Version (EXCEPTION: not '\0' terminated)
2753  memcpy(h->version, "00", 2);
2754  } else {
2755  // NB: Old GNU magic protrudes into adjacent version field
2756  memcpy(h->magic, "ustar ", 8); // 2 spaces and '\0'-terminated
2757  }
2758 
2759  // NCBI signature if allowed
2760  if (!(m_Flags & fStandardHeaderOnly)) {
2761  sx_Signature(&block);
2762  }
2763 
2764  // Final step: checksumming
2765  if (!s_TarChecksum(&block, fmt == eTar_OldGNU ? true : false)) {
2766  TAR_THROW(this, eMemory,
2767  "Cannot store checksum");
2768  }
2769 
2770  // Write header
2771  x_WriteArchive(sizeof(block.buffer), block.buffer);
2772  m_Current.m_HeaderSize = (streamsize)(m_StreamPos - m_Current.m_Pos);
2773 
2774  Checkpoint(m_Current, true/*write*/);
2775 }
2776 
2777 
2779 {
2780  const string& name = link ? m_Current.GetLinkName() : m_Current.GetName();
2781  size_t size = link ? sizeof(h->linkname) : sizeof(h->name);
2782  char* dst = link ? h->linkname : h->name;
2783  const char* src = name.c_str();
2784  size_t len = name.size();
2785 
2786  if (len <= size) {
2787  // Name fits!
2788  memcpy(dst, src, len);
2789  return true;
2790  }
2791 
2792  bool packed = false;
2793  if (!link && len <= sizeof(h->prefix) + 1 + sizeof(h->name)) {
2794  // Try to split the long name into a prefix and a short name (POSIX)
2795  size_t i = len;
2796  if (i > sizeof(h->prefix)) {
2797  i = sizeof(h->prefix);
2798  }
2799  while (i > 0 && src[--i] != '/');
2800  if (i && len - i <= sizeof(h->name) + 1) {
2801  memcpy(h->prefix, src, i);
2802  memcpy(h->name, src + i + 1, len - i - 1);
2803  if (!(m_Flags & fLongNameSupplement))
2804  return true;
2805  packed = true;
2806  }
2807  }
2808 
2809  // Still, store the initial part in the original header
2810  if (!packed) {
2811  memcpy(dst, src, size);
2812  }
2813 
2814  // Prepare extended block header with the long name info (old GNU style)
2816  TTarBlock* block = (TTarBlock*)(m_Buffer + m_BufferPos);
2817  memset(block->buffer, 0, sizeof(block->buffer));
2818  h = &block->header;
2819 
2820  // See above for comments about header filling
2821  ++len; // write terminating '\0' as it can always be made to fit in
2822  strcpy(h->name, "././@LongLink");
2823  s_NumToOctal(0, h->mode, sizeof(h->mode) - 1);
2824  s_NumToOctal(0, h->uid, sizeof(h->uid) - 1);
2825  s_NumToOctal(0, h->gid, sizeof(h->gid) - 1);
2826  if (!s_EncodeUint8(len, h->size, sizeof(h->size) - 1)) {
2827  return false;
2828  }
2829  s_NumToOctal(0, h->mtime, sizeof(h->mtime)- 1);
2830  h->typeflag[0] = link ? 'K' : 'L';
2831 
2832  // Old GNU magic protrudes into adjacent version field
2833  memcpy(h->magic, "ustar ", 8); // 2 spaces and '\0'-terminated
2834 
2835  // NCBI signature if allowed
2836  if (!(m_Flags & fStandardHeaderOnly)) {
2837  sx_Signature(block);
2838  }
2839 
2840  s_TarChecksum(block, true);
2841 
2842  // Write the header
2843  x_WriteArchive(sizeof(block->buffer));
2844 
2845  // Store the full name in the extended block (will be aligned as necessary)
2846  x_WriteArchive(len, src);
2847 
2848  return true;
2849 }
2850 
2851 
2853 {
2856  m_Current.m_Name.clear();
2857  if (!m_ZeroBlockCount) {
2858  return;
2859  }
2860 
2861  size_t gap = SIZE_OF(m_ZeroBlockCount);
2862  if (!m_FileStream) {
2863  if (gap > m_BufferPos) {
2864  if (action == eAppend || action == eUpdate) {
2865  TAR_POST(4, Warning,
2866  "In-stream update may result in gapped tar archive");
2867  }
2868  gap = m_BufferPos;
2869  m_ZeroBlockCount -= BLOCK_OF(gap);
2870  }
2871  m_BufferPos -= gap;
2872  m_StreamPos -= gap;
2873  return;
2874  }
2875 
2876  // Tarfile here
2877  m_StreamPos -= gap;
2879  size_t off = (size_t) (m_StreamPos % m_BufferSize);
2880  if (m_BufferPos == 0) {
2882  }
2883  if (gap > m_BufferPos) {
2884  m_BufferPos = 0;
2885  size_t temp = BLOCK_SIZE;
2886  // Re-fetch the entire record
2887  if (!m_FileStream->seekg(rec * m_BufferSize)
2888  // NB: successful positioning guarantees the stream was !fail(),
2889  // which means it might have only been either good() or eof()
2890  || (m_FileStream->clear(), !x_ReadArchive(temp))
2891  || temp != BLOCK_SIZE) {
2892  TAR_POST(65, Error,
2893  "Archive backspace error in record reget");
2895  return;
2896  }
2897  m_BufferPos = off;
2898  } else {
2899  m_BufferPos -= gap;
2900  }
2902 
2903  // Always reset the put position there
2904 #if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 1101
2905  m_FileStream->clear(); // This is to only work around a bug
2906 #endif //_LIBCPP_VERSION
2907  if (!m_FileStream->seekp(rec * m_BufferSize)) {
2908  TAR_POST(80, Error,
2909  "Archive backspace error in record reset");
2911  return;
2912  }
2913  m_ZeroBlockCount = 0;
2914 }
2915 
2916 
2917 static bool s_MatchExcludeMask(const CTempString& name,
2918  const list<CTempString>& elems,
2919  const CMask* mask,
2920  NStr::ECase acase)
2921 {
2922  _ASSERT(!name.empty() && mask);
2923  if (elems.empty()) {
2924  return mask->Match(name, acase);
2925  }
2926  if (elems.size() == 1) {
2927  return mask->Match(elems.front(), acase);
2928  }
2929  string temp;
2930  REVERSE_ITERATE(list<CTempString>, e, elems) {
2931  temp = temp.empty() ? string(*e) : string(*e) + '/' + temp;
2932  if (mask->Match(temp, acase)) {
2933  return true;
2934  }
2935  }
2936  return false;
2937 }
2938 
2939 
2940 unique_ptr<CTar::TEntries> CTar::x_ReadAndProcess(EAction action)
2941 {
2942  unique_ptr<TEntries> done(new TEntries);
2944  Uint8 pos = m_StreamPos;
2945  CTarEntryInfo xinfo;
2946 
2947  m_ZeroBlockCount = 0;
2948  for (;;) {
2949  // Next block is supposed to be a header
2950  m_Current = CTarEntryInfo(pos);
2951  m_Current.m_Name = xinfo.GetName();
2952  EStatus status = x_ReadEntryInfo
2953  (action == eTest && (m_Flags & fDumpEntryHeaders),
2954  xinfo.GetType() == CTarEntryInfo::ePAXHeader);
2955  switch (status) {
2956  case eFailure:
2957  case eSuccess:
2958  case eContinue:
2960  Uint8 save_pos = m_StreamPos;
2963  TAR_POST(5, Error,
2964  "Interspersing zero block ignored");
2965  m_StreamPos = save_pos;
2966  }
2967  break;
2968 
2969  case eZeroBlock:
2970  m_ZeroBlockCount++;
2971  if (action == eTest && (m_Flags & fDumpEntryHeaders)) {
2974  }
2975  if ((m_Flags & fIgnoreZeroBlocks) || m_ZeroBlockCount < 2) {
2976  if (xinfo.GetType() == CTarEntryInfo::eUnknown) {
2977  // Not yet reading an entry -- advance
2978  pos += BLOCK_SIZE;
2979  }
2980  continue;
2981  }
2982  // Two zero blocks -> eEOF
2983  /*FALLTHRU*/
2984 
2985  case eEOF:
2986  if (action == eTest && (m_Flags & fDumpEntryHeaders)) {
2988  status != eEOF ? true : false);
2989  }
2990  if (xinfo.GetType() != CTarEntryInfo::eUnknown) {
2991  TAR_POST(6, Error,
2992  "Orphaned extended information ignored");
2993  } else if (m_ZeroBlockCount < 2 && action != eAppend) {
2994  if (!m_StreamPos) {
2995  TAR_THROW(this, eRead,
2996  "Unexpected EOF in archive");
2997  }
2998  TAR_POST(58, Warning,
3000  ? "Incomplete EOT in archive"
3001  : "Missing EOT in archive");
3002  }
3003  x_Backspace(action);
3004  return done;
3005  }
3006  m_ZeroBlockCount = 0;
3007 
3008  //
3009  // Process entry
3010  //
3011  if (status == eContinue) {
3012  // Extended header information has just been read in
3014 
3015  switch (m_Current.GetType()) {
3017  xinfo.m_Pos = m_Current.m_Pos; // NB: real (expanded) filesize
3018  m_Current.m_Pos = pos;
3019  if (xinfo.GetType() != CTarEntryInfo::eUnknown) {
3020  TAR_POST(7, Error,
3021  "Unused extended header replaced");
3022  }
3024  xinfo.m_Name.swap(m_Current.m_Name);
3025  xinfo.m_LinkName.swap(m_Current.m_LinkName);
3026  xinfo.m_UserName.swap(m_Current.m_UserName);
3027  xinfo.m_GroupName.swap(m_Current.m_GroupName);
3028  xinfo.m_Stat = m_Current.m_Stat;
3029  continue;
3030 
3032  if (xinfo.GetType() == CTarEntryInfo::ePAXHeader
3033  || !xinfo.GetName().empty()) {
3034  TAR_POST(8, Error,
3035  "Unused long name \"" + xinfo.GetName()
3036  + "\" replaced");
3037  }
3038  // Latch next long name here then just skip
3040  xinfo.m_Name.swap(m_Current.m_Name);
3041  continue;
3042 
3044  if (xinfo.GetType() == CTarEntryInfo::ePAXHeader
3045  || !xinfo.GetLinkName().empty()) {
3046  TAR_POST(9, Error,
3047  "Unused long link \"" + xinfo.GetLinkName()
3048  + "\" replaced");
3049  }
3050  // Latch next long link here then just skip
3052  xinfo.m_LinkName.swap(m_Current.m_LinkName);
3053  continue;
3054 
3055  default:
3056  _TROUBLE;
3057  NCBI_THROW(CCoreException, eCore, "Internal error");
3058  /*NOTREACHED*/
3059  break;
3060  }
3061  }
3062 
3063  // Fixup current 'info' with extended information obtained previously
3065  xinfo.m_HeaderSize = 0;
3066  if (!xinfo.GetName().empty()) {
3067  xinfo.m_Name.swap(m_Current.m_Name);
3068  xinfo.m_Name.clear();
3069  }
3070  if (!xinfo.GetLinkName().empty()) {
3071  xinfo.m_LinkName.swap(m_Current.m_LinkName);
3072  xinfo.m_LinkName.clear();
3073  }
3074  TPAXBits parsed;
3075  if (xinfo.GetType() == CTarEntryInfo::ePAXHeader) {
3076  parsed = (TPAXBits) xinfo.m_Stat.orig.st_mode;
3077  if (!xinfo.GetUserName().empty()) {
3078  xinfo.m_UserName.swap(m_Current.m_UserName);
3079  xinfo.m_UserName.clear();
3080  }
3081  if (!xinfo.GetGroupName().empty()) {
3082  xinfo.m_GroupName.swap(m_Current.m_GroupName);
3083  xinfo.m_GroupName.clear();
3084  }
3085  if (parsed & fPAXMtime) {
3086  m_Current.m_Stat.orig.st_mtime = xinfo.m_Stat.orig.st_mtime;
3088  }
3089  if (parsed & fPAXAtime) {
3090  m_Current.m_Stat.orig.st_atime = xinfo.m_Stat.orig.st_atime;
3092  }
3093  if (parsed & fPAXCtime) {
3094  m_Current.m_Stat.orig.st_ctime = xinfo.m_Stat.orig.st_ctime;
3096  }
3097  if (parsed & fPAXSparse) {
3098  // Mark to post-process below
3100  }
3101  if (parsed & fPAXSize) {
3102  m_Current.m_Stat.orig.st_size = xinfo.m_Stat.orig.st_size;
3103  }
3104  if (parsed & fPAXUid) {
3105  m_Current.m_Stat.orig.st_uid = xinfo.m_Stat.orig.st_uid;
3106  }
3107  if (parsed & fPAXGid) {
3108  m_Current.m_Stat.orig.st_gid = xinfo.m_Stat.orig.st_gid;
3109  }
3110  } else {
3111  parsed = fPAXNone/*0*/;
3112  }
3115  if (xinfo.m_Pos < m_Current.m_Pos) {
3116  xinfo.m_Pos = m_Current.m_Pos; // NB: real (expanded) filesize
3117  }
3118  m_Current.m_Pos = pos;
3119  }
3120  Uint8 size = m_Current.GetSize(); // NB: archive size to read
3121  if (xinfo.GetType() == CTarEntryInfo::eSparseFile) {
3124  TAR_POST(103, Error,
3125  "Ignoring sparse data for non-plain file");
3126  } else if (parsed & fPAXSparseGNU_1_0) {
3127  m_Current.m_Stat.orig.st_size = size ? (off_t) xinfo.m_Pos : 0;
3129  } else {
3131  if (size < xinfo.m_Pos) {
3132  m_Current.m_Stat.orig.st_size = (off_t) xinfo.m_Pos;
3133  }
3134  }
3135  }
3136  xinfo.m_Pos = 0;
3138  _ASSERT(status == eFailure || status == eSuccess);
3139 
3140  // Last sanity check
3141  if (status != eFailure && m_Current.GetName().empty()) {
3142  TAR_THROW(this, eBadName,
3143  "Empty entry name in archive");
3144  }
3145  // User callback
3146  if (!Checkpoint(m_Current, false/*read*/)) {
3147  status = eFailure;
3148  }
3149 
3150  // Match file name with the set of masks
3151  bool match = (status != eSuccess ? false
3152  : m_Mask[eExtractMask].mask && (action == eList ||
3153  action == eExtract ||
3154  action == eInternal)
3157  .acase)
3158  : true);
3159  if (match && m_Mask[eExcludeMask].mask && action != eTest) {
3160  list<CTempString> elems;
3161  _ASSERT(!m_Current.GetName().empty());
3162  NStr::Split(m_Current.GetName(), "/", elems,
3167  }
3168 
3169  // NB: match is 'false' when processing a failing entry
3170  if ((match && action == eInternal)
3171  || x_ProcessEntry(match && action == eExtract ? eExtract :
3172  action == eTest ? eTest : eUndefined,
3173  size, done.get())
3174  || (match && (action == eList || action == eUpdate))) {
3175  _ASSERT(status == eSuccess && action != eTest);
3176  done->push_back(m_Current);
3177  if (action == eInternal) {
3178  break;
3179  }
3180  }
3181 
3183  pos = m_StreamPos;
3184  }
3185 
3186  return done;
3187 }
3188 
3189 
3190 static string s_ToFilesystemPath(const string& base_dir, const string& name,
3191  bool noabs = false)
3192 {
3193  string path;
3194  _ASSERT(!name.empty());
3195  if (!base_dir.empty() && (!CDirEntry::IsAbsolutePath(name) || noabs)) {
3196  path = CDirEntry::ConcatPath(base_dir, name);
3197  } else {
3198  path = name;
3199  if (CDirEntry::IsAbsolutePath(path) && noabs) {
3200 #ifdef NCBI_OS_MSWIN
3201  if (isalpha((unsigned char) path[0]) && path[1] == ':') {
3202  // Drive
3203  path.erase(0, 2);
3204  } else if ((path[0] == '/' || path[0] == '\\') &&
3205  (path[1] == '/' || path[1] == '\\')) {
3206  // Network
3207  path.erase(0, path.find_first_of("/\\", 2));
3208  }
3209 #endif //NCBI_OS_MSWIN
3210  if (path[0] == '/' || path[0] == '\\') {
3211  path.erase(0, 1);
3212  }
3213  if (path.empty()) {
3214  path.assign(1, '.');
3215  }
3216  }
3217  }
3218  _ASSERT(!path.empty());
3219  return CDirEntry::NormalizePath(path);
3220 }
3221 
3222 
3223 static string s_ToArchiveName(const string& base_dir, const string& path)
3224 {
3225  // NB: Path assumed to have been normalized
3226  string retval = CDirEntry::AddTrailingPathSeparator(path);
3227 
3228 #if defined(NCBI_OS_MSWIN)
3229  // Convert to Unix format with all forward slashes
3230  NStr::ReplaceInPlace(retval, "\\", "/");
3231  const NStr::ECase how = NStr::eNocase;
3232 #elif defined(NCBI_OS_CYGWIN)
3233  const NStr::ECase how = NStr::eNocase;
3234 #else
3235  const NStr::ECase how = NStr::eCase;
3236 #endif //NCBI_OS
3237 
3238  SIZE_TYPE pos = 0;
3239 
3240  bool absolute;
3241  // Remove leading base dir from the path
3242  if (!base_dir.empty() && NStr::StartsWith(retval, base_dir, how)) {
3243  if (retval.size() > base_dir.size()) {
3244  retval.erase(0, base_dir.size()/*separator too*/);
3245  } else {
3246  retval.assign(1, '.');
3247  }
3248  absolute = false;
3249  } else {
3250  absolute = CDirEntry::IsAbsolutePath(retval);
3251 #ifdef NCBI_OS_MSWIN
3252  if (isalpha((unsigned char) retval[0]) && retval[1] == ':') {
3253  // Remove a disk name if present
3254  pos = 2;
3255  } else if (retval[0] == '/' && retval[1] == '/') {
3256  // Network name if present
3257  pos = retval.find('/', 2);
3258  absolute = true;
3259  }
3260 #endif //NCBI_OS_MSWIN
3261  }
3262 
3263  // Remove any leading and trailing slashes
3264  while (pos < retval.size() && retval[pos] == '/') {
3265  ++pos;
3266  }
3267  if (pos) {
3268  retval.erase(0, pos);
3269  }
3270  pos = retval.size();
3271  while (pos > 0 && retval[pos - 1] == '/') {
3272  --pos;
3273  }
3274  if (pos < retval.size()) {
3275  retval.erase(pos);
3276  }
3277 
3278  if (absolute) {
3279  retval.insert((SIZE_TYPE) 0, 1, '/');
3280  }
3281  return retval;
3282 }
3283 
3284 
3286 {
3287 public:
3289  : CDirEntry(GetTmpNameEx(entry.GetDir(), "xNCBItArX")),
3291  {
3292  _ASSERT(!Exists());
3293  if (CDirEntry(m_Entry.GetPath()).Rename(GetPath())) {
3294  m_Activated = m_Pending = true;
3295  CNcbiError::SetErrno(errno = 0);
3296  }
3297  }
3298 
3300  {
3301  if (m_Activated) {
3302  (void)(m_Pending ? Restore() : Remove());
3303  }
3304  }
3305 
3306  bool Restore(void)
3307  {
3308  m_Activated = false;
3309  m_Entry.Remove();
3310  CNcbiError::SetErrno(errno = 0);
3311  return Rename(m_Entry.GetPath());
3312  }
3313 
3314  void Release(void)
3315  {
3316  m_Pending = false;
3317  }
3318 
3319 private:
3323 };
3324 
3325 
3326 static inline CTempString x_DropTrailingSlashes(const string& s)
3327 {
3328  size_t len = s.size();
3329  while (len > 1) {
3330  if (s[--len] != '/')
3331  break;
3332  }
3333  return CTempString(s, 0, len);
3334 }
3335 
3336 
3338  const CTar::TEntries* entries)
3339 {
3341  bool extract = action == eExtract;
3342 
3343  if (extract) {
3344  // Destination for extraction
3345  unique_ptr<CDirEntry> dst
3353  !(m_Flags & fKeepAbsolutePath))));
3354  // Source for extraction
3355  unique_ptr<CDirEntry> src;
3356  // Direntry pending removal
3357  AutoPtr<CTarTempDirEntry> pending;
3358 
3359  // Dereference symlink if requested
3360  if (type != CTarEntryInfo::eSymLink &&
3362  dst->DereferenceLink();
3363  }
3364 
3365  // Actual type in file system (if exists)
3366  CDirEntry::EType dst_type = dst->GetType();
3367 
3368  // Check if extraction is allowed (when the destination exists)
3369  if (dst_type != CDirEntry::eUnknown) {
3370  enum {
3371  eNotExtracted = 0,
3372  eExtracted = 1,
3373  eConflict = 2,
3374  eMisCase = 3
3375  } extracted = eNotExtracted; // Check if ours (e.g. a prev. revision extracted)
3376  CTempString dstname = x_DropTrailingSlashes(dst->GetPath());
3377  if (entries) {
3378  CDirEntry::SStat dst_stat;
3379  bool do_stat = dst->Stat(&dst_stat, eIgnoreLinks);
3380  ITERATE(TEntries, e, *entries) {
3381  CTempString tstname = x_DropTrailingSlashes(e->GetPath());
3382  if (!do_stat) {
3383  if (NStr::CompareCase(dstname, tstname) != 0) {
3384  continue;
3385  }
3386  if (e->GetType() != m_Current.GetType()) {
3387  extracted = eConflict;
3388  break;
3389  }
3390  } else {
3391  if (NStr::CompareNocase(dstname, tstname) != 0) {
3392  continue;
3393  }
3394  // Case-blind filesystems (e.g. NTFS) present a challenge
3395  CDirEntry tst(e->GetPath());
3396  CDirEntry::SStat tst_stat;
3397  if (!tst.Stat(&tst_stat, m_Flags & fFollowLinks ? eFollowLinks : eIgnoreLinks)
3398  || memcmp(&dst_stat.orig.st_dev, &tst_stat.orig.st_dev, sizeof(dst_stat.orig.st_dev)) != 0
3399  || memcmp(&dst_stat.orig.st_ino, &tst_stat.orig.st_ino, sizeof(dst_stat.orig.st_ino)) != 0) {
3400  continue;
3401  }
3402  if (e->GetType() != m_Current.GetType()) {
3403  extracted = eConflict;
3404  break;
3405  }
3406  if (NStr::CompareCase(dstname, tstname) != 0) {
3407  extracted = eMisCase;
3408  break;
3409  }
3410  }
3411  extracted = eExtracted;
3412  break;
3413  }
3414  }
3415  if (!extracted) {
3416  // Can overwrite it?
3417  if (!(m_Flags & fOverwrite)) {
3418  // File already exists, and cannot be changed
3419  extract = false;
3420  }
3421  // Can update?
3422  else if ((m_Flags & fUpdate) == fUpdate // NB: fOverwrite set
3423  && (type == CTarEntryInfo::eDir ||
3424  // Make sure that dst is not newer than the entry
3425  dst->IsNewer(m_Current.GetModificationCTime(),
3426  // NB: dst must exist
3428  extract = false;
3429  }
3430  // Have equal types?
3431  else if (m_Flags & fEqualTypes) {
3432  if (type == CTarEntryInfo::eHardLink) {
3433  src.reset(new CDirEntry
3436  !(m_Flags & fKeepAbsolutePath))));
3437  if (dst_type != src->GetType()) {
3438  extract = false;
3439  }
3440  } else if (CTarEntryInfo::EType(dst_type) != type) {
3441  extract = false;
3442  }
3443  }
3444  }
3445  if (extract && (type != CTarEntryInfo::eDir ||
3446  dst_type != CDirEntry::eDir)) {
3447  if (!extracted && (m_Flags & fBackup) == fBackup) {
3448  // Need to backup the existing destination?
3449  CDirEntry tmp(*dst);
3450  if (!tmp.Backup(kEmptyStr, CDirEntry::eBackup_Rename)) {
3451  int x_errno = CNcbiError::GetLast().Code();
3452  TAR_THROW(this, eBackup,
3453  "Failed to backup '" + dst->GetPath() + '\''
3454  + s_OSReason(x_errno));
3455  }
3456  } else {
3457  // Do removal safely until extraction is confirmed
3458  int x_errno;
3459  bool link = false;
3460  if (!(extracted & eConflict)
3461  || (!(link = (type == CTarEntryInfo::eSymLink ||
3463  && ((extracted == eMisCase && (m_Flags & fIgnoreNameCase)) ||
3464  (extracted == eConflict && (m_Flags & fConflictOverwrite))))) {
3465  pending.reset(new CTarTempDirEntry(*dst));
3466  x_errno = CNcbiError::GetLast().Code();
3467  } else {
3468  x_errno = 0;
3469  }
3470  if (/*!pending->Exists() ||*/ dst->Exists()) {
3471  // Security concern: do not attempt data extraction
3472  // into special files etc, which can harm the system.
3473  if (x_errno == 0) {
3474  x_errno = EEXIST;
3475  }
3476  if (x_errno != EEXIST || !link) {
3477  TAR_THROW(this, eWrite,
3478  "Cannot extract '" + dst->GetPath() + '\''
3479  + s_OSReason(x_errno));
3480  }
3481  string whatlink(type == CTarEntryInfo::eSymLink ? "symlink" : "hard link");
3482  TAR_POST(122, Error,
3483  "Cannot create " + whatlink + " '" + dst->GetPath() + "' -> '"
3484  + m_Current.GetLinkName() + '\''
3485  + s_OSReason(x_errno));
3486  extract = false;
3487  }
3488  }
3489  } else if (extract && extracted == eMisCase && !(m_Flags & fIgnoreNameCase)) {
3490  TAR_THROW(this, eWrite,
3491  "Cannot extract '" + dst->GetPath() + '\''
3492  + s_OSReason(EISDIR));
3493  }
3494  }
3495  if (extract) {
3496 #ifdef NCBI_OS_UNIX
3497  mode_t u;
3498  u = umask(022);
3499  umask(u & ~(S_IRUSR | S_IWUSR | S_IXUSR));
3500  try {
3501 #endif //NCBI_OS_UNIX
3502  extract = x_ExtractEntry(size, dst.get(), src.get());
3503 #ifdef NCBI_OS_UNIX
3504  } catch (...) {
3505  umask(u);
3506  throw;
3507  }
3508  umask(u);
3509 #endif //NCBI_OS_UNIX
3510  if (extract) {
3511  m_Current.m_Path = dst->GetPath();
3512  if (pending) {
3513  pending->Release();
3514  }
3515  } else if (pending && !pending->Restore()) { // Undo delete
3516  int x_errno = CNcbiError::GetLast().Code();
3517  TAR_THROW(this, eWrite,
3518  "Cannot restore '" + dst->GetPath()
3519  + "' back in place" + s_OSReason(x_errno));
3520  }
3521  }
3523  && action == eTest && (m_Flags & fDumpEntryHeaders)) {
3524  unique_ptr<CDirEntry> dst
3528  !(m_Flags & fKeepAbsolutePath))));
3529  (void) x_ExtractSparseFile(size, dst.get(), true);
3530  }
3531 
3533 
3534  return extract;
3535 }
3536 
3537 
3539 {
3541  while (blocks) {
3542 #ifndef NCBI_COMPILER_WORKSHOP
3543  // RogueWave RTL is buggy in seeking pipes -- it clobbers
3544  // (discards) streambuf data instead of leaving it alone..
3546  && m_BufferPos == 0 && blocks >= BLOCK_OF(m_BufferSize)) {
3547  CT_OFF_TYPE fskip =
3549  _ASSERT(ALIGN_SIZE(fskip) == fskip);
3550  if (m_Stream.rdbuf()->PUBSEEKOFF(fskip, IOS_BASE::cur)
3551  != (CT_POS_TYPE)((CT_OFF_TYPE)(-1))) {
3552  blocks -= BLOCK_OF(fskip);
3553  m_StreamPos += fskip;
3554  continue;
3555  }
3556  if (m_FileStream) {
3557  TAR_POST(2, Warning,
3558  "Cannot fast skip in file archive,"
3559  " reverting to slow skip");
3560  }
3562  }
3563 #endif //NCBI_COMPILER_WORKSHOP
3564  size_t nskip = (blocks < BLOCK_OF(m_BufferSize)
3565  ? (size_t) SIZE_OF(blocks)
3566  : m_BufferSize);
3567  _ASSERT(ALIGN_SIZE(nskip) == nskip);
3568  if (!x_ReadArchive(nskip)) {
3569  TAR_THROW(this, eRead,
3570  "Archive skip failed (EOF)");
3571  }
3572  _ASSERT(nskip);
3573  nskip = ALIGN_SIZE(nskip);
3574  blocks -= BLOCK_OF (nskip);
3575  m_StreamPos += nskip;
3576  }
3578 }
3579 
3580 
3581 // NB: Clobbers umask, must be restored after the call
3583  const CDirEntry* src)
3584 {
3586  unique_ptr<CDirEntry> src_ptr; // deleter
3587  bool extracted = true; // assume best
3588 
3590  // Conform to POSIX-mandated behavior to extract as files
3592  }
3593  switch (type) {
3594  case CTarEntryInfo::eSparseFile: // NB: only PAX GNU/1.0 sparse file here
3596  case CTarEntryInfo::eFile:
3597  _ASSERT(!dst->Exists());
3598  {{
3599  // Create base directory
3600  CDir dir(dst->GetDir());
3601  if (/*dir.GetPath() != "." && */!dir.CreatePath()) {
3602  int x_errno = CNcbiError::GetLast().Code();
3603  TAR_THROW(this, eCreate,
3604  "Cannot create directory '" + dir.GetPath() + '\''
3605  + s_OSReason(x_errno));
3606  }
3607 
3608  if (type == CTarEntryInfo::eHardLink) {
3609  if (!src) {
3610  src_ptr.reset(new CDirEntry
3613  !(m_Flags & fKeepAbsolutePath))));
3614  src = src_ptr.get();
3615  }
3616  if (src->GetType() == CDirEntry::eUnknown && size) {
3617  // Looks like a dangling hard link but luckily we have
3618  // the actual file data (POSIX extension) so use it here.
3620  }
3621  }
3622 
3623  if (type == CTarEntryInfo::eHardLink) {
3624  _ASSERT(src);
3625 #ifdef NCBI_OS_UNIX
3626  if (link(src->GetPath().c_str(), dst->GetPath().c_str()) == 0){
3627  if (m_Flags & fPreserveAll) {
3629  }
3630  break;
3631  }
3632  int x_errno = errno;
3633  if (x_errno == ENOENT || x_errno == EEXIST || x_errno == ENOTDIR) {
3634  extracted = false;
3635  }
3636  TAR_POST(10, extracted ? Warning : Error,
3637  "Cannot hard link '" + dst->GetPath()
3638  + "' -> '" + src->GetPath() + '\''
3639  + s_OSReason(x_errno)
3640  + (extracted ? ", trying to copy" : ""));
3641  if (!extracted) {
3642  break;
3643  }
3644 #endif //NCBI_OS_UNIX
3645  if (!src->Copy(dst->GetPath(),
3648  int xx_errno = CNcbiError::GetLast().Code();
3649  TAR_POST(11, Error,
3650  "Cannot hard link '" + dst->GetPath()
3651  + "' -> '" + src->GetPath() + "' via copy"
3652  + s_OSReason(xx_errno));
3653  extracted = false;
3654  break;
3655  }
3656  } else if (type == CTarEntryInfo::eSparseFile && size) {
3657  if (!(extracted = x_ExtractSparseFile(size, dst)))
3658  break;
3659  } else {
3660  x_ExtractPlainFile(size, dst);
3661  }
3662 
3663  // Restore attributes
3664  if (m_Flags & fPreserveAll) {
3666  }
3667  }}
3668  break;
3669 
3670  case CTarEntryInfo::eDir:
3671  _ASSERT(size == 0);
3672  {{
3673  const CDir* dir = dynamic_cast<const CDir*>(dst);
3674  if (!dir || !dir->CreatePath()) {
3675  int x_errno = !dir ? 0 : CNcbiError::GetLast().Code();
3676  TAR_THROW(this, eCreate,
3677  "Cannot create directory '" + dst->GetPath() + '\''
3678  + (!dir
3679  ? string(": Internal error")
3680  : s_OSReason(x_errno)));
3681  }
3682  // NB: Attributes for a directory must be set only after all of its
3683  // entries have been already extracted.
3684  }}
3685  break;
3686 
3688  _ASSERT(size == 0);
3689  {{
3690  const CSymLink* symlink = dynamic_cast<const CSymLink*>(dst);
3691  if (!symlink || !symlink->Create(m_Current.GetLinkName())) {
3692  int x_errno = !symlink ? 0 : CNcbiError::GetLast().Code();
3693  string error = "Cannot create symlink '" + dst->GetPath()
3694  + "' -> '" + m_Current.GetLinkName() + '\''
3695  + (!symlink
3696  ? string(": Internal error")
3697  : s_OSReason(x_errno));
3698  if (!symlink || x_errno != ENOTSUP
3699  || !(m_Flags & fSkipUnsupported)) {
3700  TAR_THROW(this, eCreate, error);
3701  }
3702  TAR_POST(12, Error, error);
3703  extracted = false;
3704  }
3705  }}
3706  break;
3707 
3708  case CTarEntryInfo::ePipe:
3709  _ASSERT(size == 0);
3710  {{
3711 #ifdef NCBI_OS_UNIX
3712  umask(0);
3713  if (mkfifo(dst->GetPath().c_str(), m_Current.GetMode()) == 0) {
3714  break;
3715  }
3716  int x_errno = errno;
3717  string reason = s_OSReason(x_errno);
3718 #else
3719  int x_errno = ENOTSUP;
3720  CNcbiError::SetErrno(x_errno);
3721  string reason = ": Feature not supported by host OS";
3722 #endif //NCBI_OS_UNIX
3723  extracted = false;
3724  string error
3725  = "Cannot create FIFO '" + dst->GetPath() + '\'' + reason;
3726  if (x_errno != ENOTSUP || !(m_Flags & fSkipUnsupported)) {
3727  TAR_THROW(this, eCreate, error);
3728  }
3729  TAR_POST(81, Error, error);
3730  }}
3731  break;
3732 
3735  _ASSERT(size == 0);
3736  {{
3737 #ifdef NCBI_OS_UNIX
3738  umask(0);
3739  mode_t m = (m_Current.GetMode() |
3740  (type == CTarEntryInfo::eCharDev ? S_IFCHR : S_IFBLK));
3741  if (mknod(dst->GetPath().c_str(), m, m_Current.m_Stat.orig.st_rdev) == 0) {
3742  break;
3743  }
3744  int x_errno = errno;
3745  string reason = s_OSReason(x_errno);
3746 #else
3747  int x_errno = ENOTSUP;
3748  CNcbiError::SetErrno(x_errno);
3749  string reason = ": Feature not supported by host OS";
3750 #endif //NCBI_OS_UNIX
3751  extracted = false;
3752  string error
3753  = "Cannot create " + string(type == CTarEntryInfo::eCharDev
3754  ? "character" : "block")
3755  + " device '" + dst->GetPath() + '\'' + reason;
3756  if (x_errno != ENOTSUP || !(m_Flags & fSkipUnsupported)) {
3757  TAR_THROW(this, eCreate, error);
3758  }
3759  TAR_POST(82, Error, error);
3760  }}
3761  break;
3762 
3764  _ASSERT(size == 0);
3765  /*NOOP*/
3766  break;
3767 
3771  // Extended headers should have already been processed and not be here
3772  _TROUBLE;
3773  /*FALLTHRU*/
3774 
3775  default:
3776  TAR_POST(13, Error,
3777  "Skipping unsupported entry '" + m_Current.GetName()
3778  + "' of type #" + NStr::IntToString(int(type)));
3779  extracted = false;
3780  break;
3781  }
3782 
3783  return extracted;
3784 }
3785 
3786 
3788 {
3789  // FIXME: Switch to CFileIO eventually to bypass ofstream's obscurity
3790  // w.r.t. errors, extra buffering etc.
3791  CNcbiOfstream ofs(dst->GetPath().c_str(),
3792  IOS_BASE::trunc |
3793  IOS_BASE::out |
3794  IOS_BASE::binary);
3795  if (!ofs) {
3796  int x_errno = errno;
3797  TAR_THROW(this, eCreate,
3798  "Cannot create file '" + dst->GetPath() + '\''
3799  + s_OSReason(x_errno));
3800  }
3801  if (m_Flags & fPreserveMode) { // NB: secure
3803  dst, fTarURead | fTarUWrite);
3804  }
3805 
3806  bool okay = ofs.good();
3807  if (okay) while (size) {
3808  // Read from the archive
3809  size_t nread = size < m_BufferSize ? (size_t) size : m_BufferSize;
3810  const char* data = x_ReadArchive(nread);
3811  if (!data) {
3812  TAR_THROW(this, eRead,
3813  "Unexpected EOF in archive");
3814  }
3815  _ASSERT(nread && ofs.good());
3816  // Write file to disk
3817  try {
3818  okay = ofs.write(data, (streamsize) nread) ? true : false;
3819  } catch (IOS_BASE::failure&) {
3820  okay = false;
3821  }
3822  if (!okay) {
3823  break;
3824  }
3825  size -= nread;
3826  m_StreamPos += ALIGN_SIZE(nread);
3827  }
3828 
3829  ofs.close();
3830  if (!okay || !ofs.good()) {
3831  int x_errno = errno;
3832  TAR_THROW(this, eWrite,
3833  "Cannot " + string(okay ? "close" : "write")
3834  + " file '" + dst->GetPath()+ '\'' + s_OSReason(x_errno));
3835  }
3836 }
3837 
3838 
3839 string CTar::x_ReadLine(Uint8& size, const char*& data, size_t& nread)
3840 {
3841  string line;
3842  for (;;) {
3843  size_t n;
3844  for (n = 0; n < nread; ++n) {
3845  if (!isprint((unsigned char) data[n])) {
3846  break;
3847  }
3848  }
3849  line.append(data, n);
3850  if (n < nread) {
3851  if (data[n] == '\n') {
3852  ++n;
3853  }
3854  data += n;
3855  nread -= n;
3856  break;
3857  }
3858  if (!(nread = size < BLOCK_SIZE ? size : BLOCK_SIZE)) {
3859  break;
3860  }
3861  if (!(data = x_ReadArchive(nread))) {
3862  return kEmptyStr;
3863  }
3864  _ASSERT(nread);
3865  if (size >= nread) {
3866  size -= nread;
3867  } else {
3868  size = 0;
3869  }
3870  m_StreamPos += ALIGN_SIZE(nread);
3871  }
3872  return line;
3873 }
3874 
3875 
3876 template<>
3877 struct Deleter<FILE>
3878 {
3879  static void Delete(FILE* fp) { fclose(fp); }
3880 };
3881 
3882 
3883 #ifdef NCBI_OS_MSWIN
3884 # define NCBI_FILE_WO "wb"
3885 #else
3886 # define NCBI_FILE_WO "w"
3887 #endif //NCBI_OS_MSWIN
3888 
3890 {
3891  _ASSERT(size);
3892 
3893  // Read sparse map first
3894  Uint8 pos = m_StreamPos;
3895  size_t nread = size < BLOCK_SIZE ? (size_t) size : BLOCK_SIZE;
3896  const char* data = x_ReadArchive(nread);
3897  if (!data) {
3898  TAR_THROW(this, eRead,
3899  "Unexpected EOF in archive");
3900  }
3901  _ASSERT(nread);
3902  if (size >= nread) {
3903  size -= nread;
3904  } else {
3905  size = 0;
3906  }
3907 
3908  string num(x_ReadLine(size, data, nread)); // "numblocks"
3909  Uint8 n = NStr::StringToUInt8(num,
3912  if (!n) {
3913  TAR_POST(97, Error,
3914  "Cannot expand sparse file '" + dst->GetPath()
3915  + "': Region count is "
3916  + string(num.empty() ? "missing" : "invalid")
3917  + " (\"" + num + "\")");
3918  m_StreamPos += ALIGN_SIZE(nread);
3919  return false;
3920  }
3921  m_StreamPos += ALIGN_SIZE(nread);
3922  vector< pair<Uint8, Uint8> > bmap(n);
3923 
3924  for (Uint8 i = 0; i < n; ++i) { // "offset numbytes" pairs
3925  Uint8 val[2];
3926  for (int k = 0; k < 2; ++k) {
3927  num = x_ReadLine(size, data, nread);
3928  try {
3929  val[k] = NStr::StringToUInt8(num);
3930  } catch (...) {
3931  TAR_POST(98, Error,
3932  "Cannot expand sparse file '" + dst->GetPath()
3933  + "': Sparse map "
3934  + string(k == 0 ? "offset" : "region size")
3935  + '[' + NStr::NumericToString(i) + "] is "
3936  + string(num.empty() ? "missing" : "invalid")
3937  + " (\"" + num + "\")");
3938  return false;
3939  }
3940  }
3941  bmap[i] = pair<Uint8, Uint8>(val[0], val[1]);
3942  }
3943  if (dump) {
3945  /* dontcare */
3946  return false;
3947  }
3948 
3949  // Write the file out
3950  AutoPtr<FILE> fp(::fopen(dst->GetPath().c_str(), NCBI_FILE_WO));
3951  if (!fp) {
3952  int x_errno = errno;
3953  TAR_THROW(this, eCreate,
3954  "Cannot create file '" + dst->GetPath() + '\''
3955  + s_OSReason(x_errno));
3956  }
3957  if (m_Flags & fPreserveMode) { // NB: secure
3959  dst, fTarURead | fTarUWrite);
3960  }
3961 
3962  nread = 0;
3963  Uint8 eof = 0;
3964  int x_error = 0;
3965  for (Uint8 i = 0; i < n; ++i) {
3966  Uint8 top = bmap[i].first + bmap[i].second;
3967  if (eof < top) {
3968  eof = top;
3969  }
3970  if (!bmap[i].second) {
3971  continue;
3972  }
3973  // non-empty region
3974  if (::fseek(fp.get(), (long) bmap[i].first, SEEK_SET) != 0) {
3975  if (!(x_error = errno))
3976  x_error = EIO; // Make sure non-zero
3977  break;
3978  }
3979  Uint8 done = 0;
3980  do {
3981  if (!nread) {
3982  nread = size < m_BufferSize ? (size_t) size : m_BufferSize;
3983  if (!nread || !(data = x_ReadArchive(nread))) {
3984  x_error = errno;
3985  if (!nread)
3986  CNcbiError::SetErrno(x_error);
3987  TAR_POST(99, Error,
3988  "Cannot read archive data for sparse file '"
3989  + dst->GetPath() + "', region #"
3991  + (nread
3992  ? s_OSReason(x_error)
3993  : string(": End-of-data")));
3994  x_error = -1;
3995  eof = 0;
3996  break;
3997  }
3998  _ASSERT(nread);
3999  size -= nread;
4000  m_StreamPos += ALIGN_SIZE(nread);
4001  }
4002  size_t xread = nread;
4003  if (xread > bmap[i].second - done) {
4004  xread = (size_t)(bmap[i].second - done);
4005  }
4006  if (::fwrite(data, 1, xread, fp.get()) != xread) {
4007  if (!(x_error = errno)) {
4008  x_error = EIO; // Make sure non-zero
4009  }
4010  break;
4011  }
4012  done += xread;
4013  data += xread;
4014  nread -= xread;
4015  } while (done < bmap[i].second);
4016  if (x_error) {
4017  break;
4018  }
4019  }
4020 
4021  // Finalize the file
4022  bool closed = ::fclose(fp.release()) == 0 ? true : false;
4023  if (!x_error && !closed) {
4024  if (!(x_error = errno))
4025  x_error = EIO; // Make sure non-zero
4026  }
4027  string reason;
4028  if (x_error > 0) {
4029  reason = s_OSReason(x_error);
4030  } else if (eof) {
4031  x_error = s_TruncateFile(dst->GetPath(), eof);
4032  if (x_error) {
4033 #ifdef NCBI_OS_MSWIN
4034  TCHAR* str = NULL;
4035  DWORD rv = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
4036  FORMAT_MESSAGE_FROM_SYSTEM |
4037  FORMAT_MESSAGE_MAX_WIDTH_MASK |
4038  FORMAT_MESSAGE_IGNORE_INSERTS,
4039  NULL, (DWORD) x_error,
4040  MAKELANGID(LANG_NEUTRAL,SUBLANG_DEFAULT),
4041  (LPTSTR) &str, 0, NULL);
4042  if (str) {
4043  if (rv) {
4044  _ASSERT(*str);
4045  reason = string(": ") + _T_STDSTRING(str);
4046  }
4047  ::LocalFree((HLOCAL) str);
4048  }
4049  if (reason.empty()) {
4050  reason = ": Error 0x" + NStr::UIntToString(x_error, 0, 16);
4051  }
4052  x_error = EIO; // NB: Make sure no accidental sign bit in x_error
4053  CNcbiError::SetErrno(x_error);
4054 #else
4055  reason = s_OSReason(x_error);
4056 #endif //NCBI_OS_MSWIN
4057  }
4058  }
4059  if (x_error) {
4060  if (x_error > 0) {
4061  _ASSERT(!reason.empty());
4062  TAR_POST(100, Error,
4063  "Cannot write sparse file '" + dst->GetPath() + '\''+ reason);
4064  }
4065  dst->Remove();
4066  return false;
4067  }
4068 
4069  return true;
4070 }
4071 
4072 
4074  TFlags what,
4075  const CDirEntry* path,
4076  TTarMode perm) const
4077 {
4078  unique_ptr<CDirEntry> path_ptr; // deleter
4079  if (!path) {
4080  path_ptr.reset(new CDirEntry(info.GetPath()));
4081  path = path_ptr.get();
4082  }
4083  _ASSERT(!path->GetPath().empty());
4084 
4085  // Date/time.
4086  // Set the time before permissions because on some platforms this setting
4087  // can also affect file permissions.
4088  if (what & fPreserveTime) {
4089  CTime modification(info.GetModificationTime());
4090  CTime last_access(info.GetLastAccessTime());
4091  CTime creation(info.GetCreationTime());
4092  modification.SetNanoSecond(info.m_Stat.mtime_nsec);
4093  last_access.SetNanoSecond(info.m_Stat.atime_nsec);
4094  creation.SetNanoSecond(info.m_Stat.ctime_nsec);
4095  if (!path->SetTime(&modification, &last_access, &creation)) {
4096  int x_errno = CNcbiError::GetLast().Code();
4097  TAR_THROW(this, eRestoreAttrs,
4098  "Cannot restore date/time for '" + path->GetPath() + '\''
4099  + s_OSReason(x_errno));
4100  }
4101  }
4102 
4103  // Owner.
4104  // This must precede changing permissions because on some systems chown()
4105  // clears the set[ug]id bits for non-superusers thus resulting in incorrect
4106  // file permissions.
4107  if (what & fPreserveOwner) {
4108  bool done = false;
4109  // 2-tier trial: first using the names, then using numeric IDs.
4110  // Note that it is often impossible to restore the original owner
4111  // without the super-user rights so no error checking is done here.
4112  if (!info.GetUserName().empty() || !info.GetGroupName().empty()) {
4113  unsigned int uid, gid;
4114  if (path->SetOwner(info.GetUserName(), info.GetGroupName(),
4115  eIgnoreLinks, &uid, &gid)
4116  || (!info.GetGroupName().empty()
4117  && path->SetOwner(kEmptyStr, info.GetGroupName(),
4118  eIgnoreLinks))
4119  || (uid == info.GetUserId() && gid == info.GetGroupId())) {
4120  done = true;
4121  }
4122  }
4123  if (!done) {
4124  string user = NStr::UIntToString(info.GetUserId());
4125  string group = NStr::UIntToString(info.GetGroupId());
4126  if (!path->SetOwner(user, group, eIgnoreLinks)) {
4127  path->SetOwner(kEmptyStr, group, eIgnoreLinks);
4128  }
4129  }
4130  }
4131 
4132  // Mode.
4133  // Set them last.
4134  if ((what & fPreserveMode)
4135  && info.GetType() != CTarEntryInfo::ePipe
4136  && info.GetType() != CTarEntryInfo::eCharDev
4137  && info.GetType() != CTarEntryInfo::eBlockDev) {
4138  bool failed = false;
4139 #ifdef NCBI_OS_UNIX
4140  // We won't change permissions for symlinks because lchmod() is not
4141  // portable, and also is not implemented on majority of platforms.
4142  if (info.GetType() != CTarEntryInfo::eSymLink) {
4143  // Use raw mode here to restore most of the bits
4144  mode_t mode = s_TarToMode(perm ? perm : info.m_Stat.orig.st_mode);
4145  if (chmod(path->GetPath().c_str(), mode) != 0) {
4146  // May fail due to setuid/setgid bits -- strip'em and try again
4147  if (mode & (S_ISUID | S_ISGID)) {
4148  mode &= ~(S_ISUID | S_ISGID);
4149  failed = chmod(path->GetPath().c_str(), mode) != 0;
4150  } else {
4151  failed = true;
4152  }
4154  }
4155  }
4156 #else
4157  CDirEntry::TMode user, group, other;
4158  CDirEntry::TSpecialModeBits special_bits;
4159  if (perm) {
4160  s_TarToMode(perm, &user, &group, &other, &special_bits);
4161  } else {
4162  info.GetMode(&user, &group, &other, &special_bits);
4163  }
4164  failed = !path->SetMode(user, group, other, special_bits);
4165 #endif //NCBI_OS_UNIX
4166  if (failed) {
4167  int x_errno = CNcbiError::GetLast().Code();
4168  TAR_THROW(this, eRestoreAttrs,
4169  "Cannot " + string(perm ? "change" : "restore")
4170  + " permissions for '" + path->GetPath() + '\''
4171  + s_OSReason(x_errno));
4172  }
4173  }
4174 }
4175 
4176 
4177 static string s_BaseDir(const string& dirname)
4178 {
4179  string path = s_ToFilesystemPath(kEmptyStr, dirname);
4180 #ifdef NCBI_OS_MSWIN
4181  // Replace backslashes with forward slashes
4182  NStr::ReplaceInPlace(path, "\\", "/");
4183 #endif //NCBI_OS_MSWIN
4184  if (!NStr::EndsWith(path, '/'))
4185  path += '/';
4186  return path;
4187 }
4188 
4189 
4190 unique_ptr<CTar::TEntries> CTar::x_Append(const string& name,
4191  const TEntries* toc)
4192 {
4193  unique_ptr<TEntries> entries(new TEntries);
4194  unique_ptr<CDir::TEntries> dir;
4195 
4196  const EFollowLinks follow_links = (m_Flags & fFollowLinks ?
4198  unsigned int uid = 0, gid = 0;
4199  bool update = true;
4200  bool added = false;
4201 
4202  // Create the entry info
4204 
4205  // Compose entry name for relative names
4206  string path = s_ToFilesystemPath(m_BaseDir, name);
4207 
4208  // Get direntry information
4209  CDirEntry entry(path);
4211  if (!entry.Stat(&st, follow_links)) {
4212  int x_errno = errno;
4213  TAR_THROW(this, eOpen,
4214  "Cannot get status of '" + path + '\''+ s_OSReason(x_errno));
4215  }
4217 
4218  string temp = s_ToArchiveName(m_BaseDir, path);
4219 
4220  if (temp.empty()) {
4221  TAR_THROW(this, eBadName,
4222  "Empty entry name not allowed");
4223  }
4224 
4225  list<CTempString> elems;
4226  NStr::Split(temp, "/", elems,
4228  if (find(elems.begin(), elems.end(), "..") != elems.end()) {
4229  TAR_THROW(this, eBadName,
4230  "Name '" + temp + "' embeds parent directory (\"..\")");
4231  }
4232  if (m_Mask[eExcludeMask].mask
4233  && s_MatchExcludeMask(temp, elems,
4235  m_Mask[eExcludeMask].acase)) {
4236  goto out;
4237  }
4238  elems.clear();
4239  if (type == CDirEntry::eDir && temp != "/") {
4240  temp += '/';
4241  }
4242 
4243  m_Current.m_Name.swap(temp);
4246  _ASSERT(!follow_links);
4247  m_Current.m_LinkName = entry.LookupLink();
4248  if (m_Current.GetLinkName().empty()) {
4249  TAR_THROW(this, eBadName,
4250  "Empty link name not allowed");
4251  }
4252  }
4253 
4255  follow_links, &uid, &gid);
4256 #ifdef NCBI_OS_UNIX
4257  if (NStr::UIntToString(uid) == m_Current.GetUserName()) {
4258  m_Current.m_UserName.clear();
4259  }
4260  if (NStr::UIntToString(gid) == m_Current.GetGroupName()) {
4261  m_Current.m_GroupName.clear();
4262  }
4263 #endif //NCBI_OS_UNIX
4264 #ifdef NCBI_OS_MSWIN
4265  // These are fake but we don't want to leave plain 0 (Unix root) in there
4266  st.orig.st_uid = (uid_t) uid;
4267  st.orig.st_gid = (gid_t) gid;
4268 #endif //NCBI_OS_MSWIN
4269 
4270  m_Current.m_Stat = st;
4271  // Fixup for permissions
4272  m_Current.m_Stat.orig.st_mode = (mode_t) s_ModeToTar(st.orig.st_mode);
4273 
4274  // Check if we need to update this entry in the archive
4275  if (toc) {
4276  bool found = false;
4277 
4278  if (type != CDirEntry::eUnknown) {
4279  // Start searching from the end of the list, to find
4280  // the most recent entry (if any) first
4281  _ASSERT(temp.empty());
4282  REVERSE_ITERATE(TEntries, e, *toc) {
4283  string entry_path;
4284  const string* entry_path_ptr;
4285  if (e->GetPath().empty()) {
4286  s_ToFilesystemPath(m_BaseDir, e->GetName()).swap(entry_path);
4287  entry_path_ptr = &entry_path;
4288  } else {
4289  entry_path_ptr = &e->GetPath();
4290  }
4291  if (!temp.empty()) {
4292  if (e->GetType() == CTarEntryInfo::eHardLink ||
4293  temp != *entry_path_ptr) {
4294  continue;
4295  }
4296  } else if (path == *entry_path_ptr) {
4297  found = true;
4298  if (e->GetType() == CTarEntryInfo::eHardLink) {
4299  s_ToFilesystemPath(m_BaseDir, e->GetLinkName()).swap(temp);
4300  continue;
4301  }
4302  } else {
4303  continue;
4304  }
4305  if (m_Current.GetType() != e->GetType()) {
4306  if (m_Flags & fEqualTypes) {
4307  goto out;
4308  }
4309  } else if (m_Current.GetType() == CTarEntryInfo::eSymLink
4310  && m_Current.GetLinkName() == e->GetLinkName()) {
4311  goto out;
4312  }
4314  <= e->GetModificationCTime()) {
4315  update = false; // same(or older), no update
4316  }
4317  break;
4318  }
4319  }
4320 
4321  if (!update || (!found && (m_Flags & (fUpdate & ~fOverwrite)))) {
4323  goto out;
4324  }
4325  // Directories always get recursive treatment later
4326  update = false;
4327  }
4328  }
4329 
4330  // Append the entry
4331  switch (type) {
4332  case CDirEntry::eFile:
4333  _ASSERT(update);
4334  added = x_AppendFile(path);
4335  break;
4336 
4339  case CDirEntry::eSymLink:
4340  case CDirEntry::ePipe:
4341  _ASSERT(update);
4342  m_Current.m_Stat.orig.st_size = 0;
4343  x_WriteEntryInfo(path);
4344  added = true;
4345  break;
4346 
4347  case CDirEntry::eDir:
4348  dir.reset(CDir(path).GetEntriesPtr(kEmptyStr, CDir::eIgnoreRecursive));
4349  if (!dir) {
4350  int x_errno = CNcbiError::GetLast().Code();
4351  string error =
4352  "Cannot list directory '" + path + '\'' + s_OSReason(x_errno);
4353  if (m_Flags & fIgnoreUnreadable) {
4354  TAR_POST(101, Error, error);
4355  break;
4356  }
4357  TAR_THROW(this, eRead, error);
4358  }
4359  if (update) {
4360  // NB: Can't use "added" here -- it'd be out of order
4361  m_Current.m_Stat.orig.st_size = 0;
4362  x_WriteEntryInfo(path);
4363  m_Current.m_Path.swap(path);
4364  entries->push_back(m_Current);
4365  }
4366  // Append/update all files from that directory
4367  ITERATE(CDir::TEntries, e, *dir) {
4368  unique_ptr<TEntries> add = x_Append((*e)->GetPath(), toc);
4369  entries->splice(entries->end(), *add);
4370  }
4371  break;
4372 
4373  case CDirEntry::eDoor:
4374  case CDirEntry::eSocket:
4375  // Tar does not have any provisions to store these kinds of entries
4376  if (!(m_Flags & fSkipUnsupported)) {
4377  TAR_POST(3, Warning,
4378  "Skipping non-archiveable "
4379  + string(type == CDirEntry::eSocket ? "socket" : "door")
4380  + " '" + path + '\'');
4381  }
4382  break;
4383 
4384  case CDirEntry::eUnknown:
4385  if (!(m_Flags & fSkipUnsupported)) {
4386  TAR_THROW(this, eUnsupportedSource,
4387  "Unable to archive '" + path + '\'');
4388  }
4389  /*FALLTHRU*/
4390 
4391  default:
4392  if (type != CDirEntry::eUnknown) {
4393  _TROUBLE;
4394  }
4395  TAR_POST(14, Error,
4396  "Skipping unsupported source '" + path
4397  + "' of type #" + NStr::IntToString(int(type)));
4398  break;
4399  }
4400  if (added) {
4401  m_Current.m_Path.swap(path);
4402  entries->push_back(m_Current);
4403  }
4404 
4405  out:
4406  return entries;
4407 }
4408 
4409 
4410 unique_ptr<CTar::TEntries> CTar::x_Append(const CTarUserEntryInfo& entry,
4411  CNcbiIstream& is)
4412 {
4413  unique_ptr<TEntries> entries(new TEntries);
4414 
4415  // Create a temp entry info first (for logging, if any)
4417 
4418  string temp = s_ToArchiveName(kEmptyStr, entry.GetName());
4419 
4420  while (NStr::EndsWith(temp, '/')) { // NB: directories are not allowed here
4421  temp.resize(temp.size() - 1);
4422  }
4423  if (temp.empty()) {
4424  TAR_THROW(this, eBadName,
4425  "Empty entry name not allowed");
4426  }
4427 
4428  list<CTempString> elems;
4429  NStr::Split(temp, "/", elems,
4431  if (find(elems.begin(), elems.end(), "..") != elems.end()) {
4432  TAR_THROW(this, eBadName,
4433  "Name '" + temp + "' embeds parent directory (\"..\")");
4434  }
4435  elems.clear();
4436 
4437  // Recreate entry info
4438  m_Current = entry;
4439  m_Current.m_Name.swap(temp);
4442 
4443  if (!is.good()) {
4444  TAR_THROW(this, eRead,
4445  "Bad input file stream");
4446  }
4447 
4450  m_Current.m_Stat.orig.st_mtime
4451  = m_Current.m_Stat.orig.st_atime
4452  = m_Current.m_Stat.orig.st_ctime;
4456 
4457 #ifdef NCBI_OS_UNIX
4458  // use regular file mode, adjusted with umask()
4460  fTarGRead | fTarGWrite |
4461  fTarORead | fTarOWrite);
4462  mode_t u;
4463 # ifdef HAVE_GETUMASK
4464  // NB: thread-safe
4465  u = getumask();
4466 # else
4467  u = umask(022);
4468  umask(u);
4469 # endif //HAVE_GETUMASK
4470  mode &= ~u;
4471  m_Current.m_Stat.orig.st_mode = (mode_t) s_ModeToTar(mode);
4472 
4473  m_Current.m_Stat.orig.st_uid = geteuid();
4474  m_Current.m_Stat.orig.st_gid = getegid();
4475 
4477  .swap(m_Current.m_UserName);
4479  .swap(m_Current.m_GroupName);
4480 #endif //NCBI_OS_UNIX
4481 #ifdef NCBI_OS_MSWIN
4482  // safe file mode
4483  m_Current.m_Stat.orig.st_mode = (fTarURead | fTarUWrite |
4484  fTarGRead | fTarORead);
4485 
4486  unsigned int uid = 0, gid = 0;
4488  SE_KERNEL_OBJECT,
4491  &uid, &gid);
4492  // These are fake but we don't want to leave plain 0 (Unix root) in there
4493  m_Current.m_Stat.orig.st_uid = (uid_t) uid;
4494  m_Current.m_Stat.orig.st_gid = (gid_t) gid;
4495 #endif //NCBI_OS_MSWIN
4496 
4497  x_AppendStream(entry.GetName(), is);
4498 
4499  entries->push_back(m_Current);
4500  return entries;
4501 }
4502 
4503 
4504 // Regular entries only!
4505 void CTar::x_AppendStream(const string& name, CNcbiIstream& is)
4506 {
4508 
4509  // Write entry header
4510  x_WriteEntryInfo(name);
4511 
4512  errno = 0;
4514  while (size) {
4515  // Write file contents
4517  size_t avail = m_BufferSize - m_BufferPos;
4518  if (avail > size) {
4519  avail = (size_t) size;
4520  }
4521  // Read file
4522  int x_errno = 0;
4523  streamsize xread;
4524  if (is.good()) {
4525  try {
4526  if (!is.read(m_Buffer + m_BufferPos, (streamsize) avail)) {
4527  x_errno = errno;
4528  xread = -1;
4529  } else {
4530  xread = is.gcount();
4531  }
4532  } catch (IOS_BASE::failure&) {
4533  xread = -1;
4534  }
4535  } else {
4536  xread = -1;
4537  }
4538  if (xread <= 0) {
4539  ifstream* ifs = dynamic_cast<ifstream*>(&is);
4540  TAR_THROW(this, eRead,
4541  "Cannot read "
4542  + string(ifs ? "file" : "stream")
4543  + " '" + name + '\'' + s_OSReason(x_errno));
4544  }
4545  // Write buffer to the archive
4546  avail = (size_t) xread;
4547  x_WriteArchive(avail);
4548  size -= avail;
4549  }
4550 
4551  // Write zeros to get the written size a multiple of BLOCK_SIZE
4552  size_t zero = ALIGN_SIZE(m_BufferPos) - m_BufferPos;
4553  memset(m_Buffer + m_BufferPos, 0, zero);
4554  x_WriteArchive(zero);
4556 }
4557 
4558 
4559 // Regular files only!
4560 bool CTar::x_AppendFile(const string& file)
4561 {
4563 
4564  // FIXME: Switch to CFileIO eventually to avoid ifstream's obscurity
4565  // w.r.t. errors, an extra layer of buffering etc.
4566  CNcbiIfstream ifs;
4567 
4568  // Open file
4569  ifs.open(file.c_str(), IOS_BASE::binary | IOS_BASE::in);
4570  if (!ifs) {
4571  int x_errno = errno;
4572  string error
4573  = "Cannot open file '" + file + '\'' + s_OSReason(x_errno);
4574  if (m_Flags & fIgnoreUnreadable) {
4575  TAR_POST(102, Error, error);
4576  return false;
4577  }
4578  TAR_THROW(this, eOpen, error);
4579  }
4580 
4581  x_AppendStream(file, ifs);
4582  return true;
4583 }
4584 
4585 
4587  EMaskType type, NStr::ECase acase)
4588 {
4589  int idx = int(type);
4590  if (idx < 0 || sizeof(m_Mask)/sizeof(m_Mask[0]) <= (size_t) idx){
4591  TAR_THROW(this, eMemory,
4592  "Mask type is out of range: " + NStr::IntToString(idx));
4593  }
4594  if (m_Mask[idx].owned) {
4595  delete m_Mask[idx].mask;
4596  }
4597  m_Mask[idx].mask = mask;
4598  m_Mask[idx].acase = acase;
4599  m_Mask[idx].owned = mask ? own : eNoOwnership;
4600 }
4601 
4602 
4603 void CTar::SetBaseDir(const string& dirname)
4604 {
4605  string dir = s_BaseDir(dirname);
4606  m_BaseDir.swap(dir);
4607 }
4608 
4609 
4611  size_t blocking_factor,
4612  const string& base_dir)
4613 {
4614  const size_t buffer_size = SIZE_OF(blocking_factor);
4615  string prefix = s_BaseDir(base_dir);
4616  Uint8 result = 0;
4617 
4618  ITERATE(TFiles, f, files) {
4619  // Count in the file size
4620  result += BLOCK_SIZE/*header*/ + ALIGN_SIZE(f->second);
4621 
4622  // Count in the long name (if any)
4623  string path = s_ToFilesystemPath(prefix, f->first);
4624  string name = s_ToArchiveName (prefix, path);
4625  size_t namelen = name.size() + 1;
4626  if (namelen > sizeof(STarHeader::name)) {
4627  result += BLOCK_SIZE/*long name header*/ + ALIGN_SIZE(namelen);
4628  }
4629  }
4630  if (result) {
4631  result += BLOCK_SIZE << 1; // EOT
4632  Uint8 padding = result % buffer_size;
4633  if (padding) {
4634  result += buffer_size - padding;
4635  }
4636  }
4637 
4638  return result;
4639 }
4640 
4641 
4642 class CTarReader : public IReader
4643 {
4644 public:
4646  : m_Read(0), m_Eof(false), m_Bad(false), m_Tar(tar, own)
4647  { }
4648 
4649  virtual ERW_Result Read(void* buf, size_t count, size_t* bytes_read = 0);
4650  virtual ERW_Result PendingCount(size_t* count);
4651 
4652 private:
4654  bool m_Eof;
4655  bool m_Bad;