NCBI C++ ToolKit
tar.cpp
Go to the documentation of this file.

Go to the SVN repository for this file.

1 /* $Id: tar.cpp 101109 2023-10-31 15:40:22Z lavr $
2  * ===========================================================================
3  *
4  * PUBLIC DOMAIN NOTICE
5  * National Center for Biotechnology Information
6  *
7  * This software/database is a "United States Government Work" under the
8  * terms of the United States Copyright Act. It was written as part of
9  * the author's official duties as a United States Government employee and
10  * thus cannot be copyrighted. This software/database is freely available
11  * to the public for use. The National Library of Medicine and the U.S.
12  * Government have not placed any restriction on its use or reproduction.
13  *
14  * Although all reasonable efforts have been taken to ensure the accuracy
15  * and reliability of the software and data, the NLM and the U.S.
16  * Government do not and cannot warrant the performance or results that
17  * may be obtained by using this software or data. The NLM and the U.S.
18  * Government disclaim all warranties, express or implied, including
19  * warranties of performance, merchantability or fitness for any particular
20  * purpose.
21  *
22  * Please cite the author in any work or product based on this material.
23  *
24  * ===========================================================================
25  *
26  * Authors: Vladimir Ivanov
27  * Anton Lavrentiev
28  *
29  * File Description:
30  * Tar archive API.
31  *
32  * Supports subsets of POSIX.1-1988 (ustar), POSIX 1003.1-2001 (posix), old
33  * GNU (POSIX 1003.1), and V7 formats (all partially but reasonably). New
34  * archives are created using POSIX (genuine ustar) format, using GNU
35  * extensions for long names/links only when unavoidable. It cannot,
36  * however, handle all the exotics like sparse files (except for GNU/1.0
37  * sparse PAX extension) and contiguous files (yet still can work around both
38  * of them gracefully, if needed), multivolume / incremental archives, etc.
39  * but just regular files, devices (character or block), FIFOs, directories,
40  * and limited links: can extract both hard- and symlinks, but can store
41  * symlinks only. Also, this implementation is only minimally PAX(Portable
42  * Archive eXchange)-aware for file extractions (and does not yet use any PAX
43  * extensions to store the files).
44  *
45  */
46 
47 #include <ncbi_pch.hpp>
48 // Cancel __wur (warn unused result) ill effects in GCC
49 #ifdef _FORTIFY_SOURCE
50 # undef _FORTIFY_SOURCE
51 #endif /*_FORTIFY_SOURCE*/
52 #define _FORTIFY_SOURCE 0
53 #include <util/compress/tar.hpp>
54 #include <util/error_codes.hpp>
55 
56 #if !defined(NCBI_OS_UNIX) && !defined(NCBI_OS_MSWIN)
57 # error "Class CTar can be defined on UNIX and MS-Windows platforms only!"
58 #endif
59 
60 #if defined(NCBI_OS_UNIX)
61 # include "../../../corelib/ncbi_os_unix_p.hpp"
62 # include <grp.h>
63 # include <pwd.h>
64 # include <unistd.h>
65 # ifdef NCBI_OS_IRIX
66 # include <sys/mkdev.h>
67 # endif //NCBI_OS_IRIX
68 # ifdef HAVE_SYS_SYSMACROS_H
69 # include <sys/sysmacros.h>
70 # endif //HAVE_SYS_SYSMACROS_H
71 # ifdef NCBI_OS_DARWIN
72 // macOS supplies these as inline functions rather than macros.
73 # define major major
74 # define minor minor
75 # define makedev makedev
76 # endif
77 # if !defined(major) || !defined(minor) || !defined(makedev)
78 # error "Device macros undefined in this UNIX build!"
79 # endif
80 #elif defined(NCBI_OS_MSWIN)
81 # include "../../../corelib/ncbi_os_mswin_p.hpp"
82 # include <io.h>
83 typedef unsigned int mode_t;
84 typedef unsigned int uid_t;
85 typedef unsigned int gid_t;
86 #endif //NCBI_OS...
87 
88 
89 #define NCBI_USE_ERRCODE_X Util_Compress
90 #define NCBI_MODULE NCBITAR
91 
92 
94 
95 
96 /////////////////////////////////////////////////////////////////////////////
97 //
98 // TAR helper routines
99 //
100 
101 // Convert a number to an octal string padded to the left
102 // with [leading] zeros ('0') and having _no_ trailing '\0'.
103 static bool s_NumToOctal(Uint8 val, char* ptr, size_t len)
104 {
105  _ASSERT(len > 0);
106  do {
107  ptr[--len] = char('0' + char(val & 7));
108  val >>= 3;
109  } while (len);
110  return val ? false : true;
111 }
112 
113 
114 // Convert an octal number (possibly preceded by spaces) to numeric form.
115 // Stop either at the end of the field or at first '\0' (if any).
116 static bool s_OctalToNum(Uint8& val, const char* ptr, size_t len)
117 {
118  _ASSERT(ptr && len > 0);
119  size_t i = *ptr ? 0 : 1;
120  while (i < len && ptr[i]) {
121  if (!isspace((unsigned char) ptr[i]))
122  break;
123  ++i;
124  }
125  val = 0;
126  bool okay = false;
127  while (i < len && '0' <= ptr[i] && ptr[i] <= '7') {
128  okay = true;
129  val <<= 3;
130  val |= ptr[i++] - '0';
131  }
132  while (i < len && ptr[i]) {
133  if (!isspace((unsigned char) ptr[i]))
134  return false;
135  ++i;
136  }
137  return okay;
138 }
139 
140 
141 static bool s_NumToBase256(Uint8 val, char* ptr, size_t len)
142 {
143  _ASSERT(len > 0);
144  do {
145  ptr[--len] = (unsigned char)(val & 0xFF);
146  val >>= 8;
147  } while (len);
148  *ptr |= '\x80'; // set base-256 encoding flag
149  return val ? false : true;
150 }
151 
152 
153 // Return 0 (false) if conversion failed; 1 if the value converted to
154 // conventional octal representation (perhaps, with terminating '\0'
155 // sacrificed), or -1 if the value converted using base-256.
156 static int s_EncodeUint8(Uint8 val, char* ptr, size_t len)
157 { // Max file size (for len == 12):
158  if (s_NumToOctal (val, ptr, len)) { // 8GiB-1
159  return 1/*okay*/;
160  }
161  if (s_NumToOctal (val, ptr, ++len)) { // 64GiB-1
162  return 1/*okay*/;
163  }
164  if (s_NumToBase256(val, ptr, len)) { // up to 2^94-1
165  return -1/*okay, base-256*/;
166  }
167  return 0/*failure*/;
168 }
169 
170 
171 // Return true if conversion succeeded; false otherwise.
172 static bool s_Base256ToNum(Uint8& val, const char* ptr, size_t len)
173 {
174  const Uint8 lim = kMax_UI8 >> 8;
175  if (*ptr & '\x40') { // negative base-256?
176  return false;
177  }
178  val = *ptr++ & '\x3F';
179  while (--len) {
180  if (val > lim) {
181  return false;
182  }
183  val <<= 8;
184  val |= (unsigned char)(*ptr++);
185  }
186  return true;
187 }
188 
189 
190 // Return 0 (false) if conversion failed; 1 if the value was read into
191 // as a conventional octal string (perhaps, without the terminating '\0');
192 // or -1 if base-256 representation used.
193 static int s_DecodeUint8(Uint8& val, const char* ptr, size_t len)
194 {
195  if (*ptr & '\x80') {
196  return s_Base256ToNum(val, ptr, len) ? -1/*okay*/ : 0/*failure*/;
197  } else {
198  return s_OctalToNum (val, ptr, len) ? 1/*okay*/ : 0/*failure*/;
199  }
200 }
201 
202 
203 static void s_TarToMode(TTarMode perm,
204  CDirEntry::TMode* usr_mode,
205  CDirEntry::TMode* grp_mode,
206  CDirEntry::TMode* oth_mode,
207  CDirEntry::TSpecialModeBits* special_bits)
208 {
209  // User
210  if (usr_mode) {
211  *usr_mode = ((perm & fTarURead ? CDirEntry::fRead : 0) |
212  (perm & fTarUWrite ? CDirEntry::fWrite : 0) |
213  (perm & fTarUExecute ? CDirEntry::fExecute : 0));
214  }
215 
216  // Group
217  if (grp_mode) {
218  *grp_mode = ((perm & fTarGRead ? CDirEntry::fRead : 0) |
219  (perm & fTarGWrite ? CDirEntry::fWrite : 0) |
220  (perm & fTarGExecute ? CDirEntry::fExecute : 0));
221  }
222 
223  // Others
224  if (oth_mode) {
225  *oth_mode = ((perm & fTarORead ? CDirEntry::fRead : 0) |
226  (perm & fTarOWrite ? CDirEntry::fWrite : 0) |
227  (perm & fTarOExecute ? CDirEntry::fExecute : 0));
228  }
229 
230  // Special bits
231  if (special_bits) {
232  *special_bits = ((perm & fTarSetUID ? CDirEntry::fSetUID : 0) |
233  (perm & fTarSetGID ? CDirEntry::fSetGID : 0) |
234  (perm & fTarSticky ? CDirEntry::fSticky : 0));
235  }
236 }
237 
238 
240 {
241  mode_t mode = (
242 #ifdef S_ISUID
243  (perm & fTarSetUID ? S_ISUID : 0) |
244 #endif
245 #ifdef S_ISGID
246  (perm & fTarSetGID ? S_ISGID : 0) |
247 #endif
248 #ifdef S_ISVTX
249  (perm & fTarSticky ? S_ISVTX : 0) |
250 #endif
251 #if defined(S_IRUSR)
252  (perm & fTarURead ? S_IRUSR : 0) |
253 #elif defined(S_IREAD)
254  (perm & fTarURead ? S_IREAD : 0) |
255 #endif
256 #if defined(S_IWUSR)
257  (perm & fTarUWrite ? S_IWUSR : 0) |
258 #elif defined(S_IWRITE)
259  (perm & fTarUWrite ? S_IWRITE : 0) |
260 #endif
261 #if defined(S_IXUSR)
262  (perm & fTarUExecute ? S_IXUSR : 0) |
263 #elif defined(S_IEXEC)
264  (perm & fTarUExecute ? S_IEXEC : 0) |
265 #endif
266 #ifdef S_IRGRP
267  (perm & fTarGRead ? S_IRGRP : 0) |
268 #endif
269 #ifdef S_IWGRP
270  (perm & fTarGWrite ? S_IWGRP : 0) |
271 #endif
272 #ifdef S_IXGRP
273  (perm & fTarGExecute ? S_IXGRP : 0) |
274 #endif
275 #ifdef S_IROTH
276  (perm & fTarORead ? S_IROTH : 0) |
277 #endif
278 #ifdef S_IWOTH
279  (perm & fTarOWrite ? S_IWOTH : 0) |
280 #endif
281 #ifdef S_IXOTH
282  (perm & fTarOExecute ? S_IXOTH : 0) |
283 #endif
284  0);
285  return mode;
286 }
287 
288 
290 {
291  // Keep in mind that the mode may be extracted on a different platform
292  TTarMode perm = (
293 #ifdef S_ISUID
294  (mode & S_ISUID ? fTarSetUID : 0) |
295 #endif
296 #ifdef S_ISGID
297  (mode & S_ISGID ? fTarSetGID : 0) |
298 #endif
299 #ifdef S_ISVTX
300  (mode & S_ISVTX ? fTarSticky : 0) |
301 #endif
302 #if defined(S_IRUSR)
303  (mode & S_IRUSR ? fTarURead : 0) |
304 #elif defined(S_IREAD)
305  (mode & S_IREAD ? fTarURead : 0) |
306 #endif
307 #if defined(S_IWUSR)
308  (mode & S_IWUSR ? fTarUWrite : 0) |
309 #elif defined(S_IWRITE)
310  (mode & S_IWRITE ? fTarUWrite : 0) |
311 #endif
312 #if defined(S_IXUSR)
313  (mode & S_IXUSR ? fTarUExecute : 0) |
314 #elif defined(S_IEXEC)
315  (mode & S_IEXEC ? fTarUExecute : 0) |
316 #endif
317 #if defined(S_IRGRP)
318  (mode & S_IRGRP ? fTarGRead : 0) |
319 #elif defined(S_IREAD)
320  // emulate read permission when file is readable
321  (mode & S_IREAD ? fTarGRead : 0) |
322 #endif
323 #ifdef S_IWGRP
324  (mode & S_IWGRP ? fTarGWrite : 0) |
325 #endif
326 #ifdef S_IXGRP
327  (mode & S_IXGRP ? fTarGExecute : 0) |
328 #endif
329 #if defined(S_IROTH)
330  (mode & S_IROTH ? fTarORead : 0) |
331 #elif defined(S_IREAD)
332  // emulate read permission when file is readable
333  (mode & S_IREAD ? fTarORead : 0) |
334 #endif
335 #ifdef S_IWOTH
336  (mode & S_IWOTH ? fTarOWrite : 0) |
337 #endif
338 #ifdef S_IXOTH
339  (mode & S_IXOTH ? fTarOExecute : 0) |
340 #endif
341  0);
342 #if defined(S_IFMT) || defined(_S_IFMT)
344 # ifdef S_IFMT
345  mask &= S_IFMT;
346 # else
347  mask &= _S_IFMT;
348 # endif
349  if (!(mask & 07777)) {
350  perm |= mask;
351  }
352 #endif
353  return perm;
354 }
355 
356 
357 static size_t s_Length(const char* ptr, size_t maxsize)
358 {
359  const char* pos = (const char*) memchr(ptr, '\0', maxsize);
360  return pos ? (size_t)(pos - ptr) : maxsize;
361 }
362 
363 
364 //////////////////////////////////////////////////////////////////////////////
365 //
366 // Constants / macros / typedefs
367 //
368 
369 /// Round up to the nearest multiple of BLOCK_SIZE:
370 //#define ALIGN_SIZE(size) SIZE_OF(BLOCK_OF(size + (BLOCK_SIZE-1)))
371 #define ALIGN_SIZE(size) (((size) + (BLOCK_SIZE-1)) & ~(BLOCK_SIZE-1))
372 #define OFFSET_OF(size) ( (size) & (BLOCK_SIZE-1))
373 #define BLOCK_OF(pos) ((pos) >> 9)
374 #define SIZE_OF(blk) ((blk) << 9)
375 
376 /// Tar block size (512 bytes)
377 #define BLOCK_SIZE SIZE_OF(1)
378 
379 
380 /// Recognized TAR formats
386  eTar_Posix = 5, // |= eTar_Ustar
387  eTar_Star = 6 // |= eTar_Ustar
388 };
389 
390 
391 /// POSIX "ustar" tar archive member header
392 struct STarHeader { // byte offset
393  char name[100]; // 0
394  char mode[8]; // 100
395  char uid[8]; // 108
396  char gid[8]; // 116
397  char size[12]; // 124
398  char mtime[12]; // 136
399  char checksum[8]; // 148
400  char typeflag[1]; // 156
401  char linkname[100]; // 157
402  char magic[6]; // 257
403  char version[2]; // 263
404  char uname[32]; // 265
405  char gname[32]; // 297
406  char devmajor[8]; // 329
407  char devminor[8]; // 337
408  union { // 345
409  char prefix[155]; // NB: not valid with old GNU format (no need)
410  struct { // NB: old GNU format only
411  char atime[12];
412  char ctime[12]; // 357
413  char unused[17]; // 369
414  char sparse[96]; // 386 sparse map: ([12] offset + [12] size) x 4
415  char contind[1]; // 482 non-zero if continued in the next header
416  char realsize[12];// 483 true file size
417  } gnu;
418  struct {
419  char prefix[131]; // NB: prefix + 107: realsize (char[12]) for 'S'
420  char atime[12]; // 476
421  char ctime[12]; // 488
422  } star;
423  }; // 500
424  // NCBI in last 4 bytes // 508
425 };
426 
427 
428 /// Block as a header.
429 union TTarBlock {
432 };
433 
434 
435 static bool s_TarChecksum(TTarBlock* block, bool isgnu)
436 {
437  STarHeader* h = &block->header;
438  size_t len = sizeof(h->checksum) - (isgnu ? 2 : 1);
439 
440  // Compute the checksum
441  memset(h->checksum, ' ', sizeof(h->checksum));
442  unsigned long checksum = 0;
443  const unsigned char* p = (const unsigned char*) block->buffer;
444  for (size_t i = 0; i < sizeof(block->buffer); ++i) {
445  checksum += *p++;
446  }
447  // ustar: '\0'-terminated checksum
448  // GNU special: 6 digits, then '\0', then a space [already in place]
449  if (!s_NumToOctal(checksum, h->checksum, len)) {
450  return false;
451  }
452  h->checksum[len] = '\0';
453  return true;
454 }
455 
456 
457 
458 //////////////////////////////////////////////////////////////////////////////
459 //
460 // CTarEntryInfo
461 //
462 
464 {
465  // Raw tar mode gets returned here (as kept in the info)
466  return (TTarMode)(m_Stat.orig.st_mode & 07777);
467 }
468 
469 
471  CDirEntry::TMode* grp_mode,
472  CDirEntry::TMode* oth_mode,
473  CDirEntry::TSpecialModeBits* special_bits) const
474 {
475  s_TarToMode(GetMode(), usr_mode, grp_mode, oth_mode, special_bits);
476 }
477 
478 
479 unsigned int CTarEntryInfo::GetMajor(void) const
480 {
481 #ifdef major
482  if (m_Type == eCharDev || m_Type == eBlockDev) {
483  return major(m_Stat.orig.st_rdev);
484  }
485 #else
486  if (sizeof(int) >= 4 && sizeof(m_Stat.orig.st_rdev) >= 4) {
487  return (*((unsigned int*) &m_Stat.orig.st_rdev) >> 16) & 0xFFFF;
488  }
489 #endif //major
490  return (unsigned int)(-1);
491 }
492 
493 
494 unsigned int CTarEntryInfo::GetMinor(void) const
495 {
496 #ifdef minor
497  if (m_Type == eCharDev || m_Type == eBlockDev) {
498  return minor(m_Stat.orig.st_rdev);
499  }
500 #else
501  if (sizeof(int) >= 4 && sizeof(m_Stat.orig.st_rdev) >= 4) {
502  return *((unsigned int*) &m_Stat.orig.st_rdev) & 0xFFFF;
503  }
504 #endif //minor
505  return (unsigned int)(-1);
506 }
507 
508 
510 {
511  char buf[9];
512  memset(buf, '-', sizeof(buf));
513 
514  char* usr = buf;
515  char* grp = usr + 3;
516  char* oth = grp + 3;
517 
518  if (mode & fTarURead) {
519  usr[0] = 'r';
520  }
521  if (mode & fTarUWrite) {
522  usr[1] = 'w';
523  }
524  if (mode & fTarUExecute) {
525  usr[2] = mode & fTarSetUID ? 's' : 'x';
526  } else if (mode & fTarSetUID) {
527  usr[2] = 'S';
528  }
529  if (mode & fTarGRead) {
530  grp[0] = 'r';
531  }
532  if (mode & fTarGWrite) {
533  grp[1] = 'w';
534  }
535  if (mode & fTarGExecute) {
536  grp[2] = mode & fTarSetGID ? 's' : 'x';
537  } else if (mode & fTarSetGID) {
538  grp[2] = 'S';
539  }
540  if (mode & fTarORead) {
541  oth[0] = 'r';
542  }
543  if (mode & fTarOWrite) {
544  oth[1] = 'w';
545  }
546  if (mode & fTarOExecute) {
547  oth[2] = mode & fTarSticky ? 't' : 'x';
548  } else if (mode & fTarSticky) {
549  oth[2] = 'T';
550  }
551 
552  return string(buf, sizeof(buf));
553 }
554 
555 
557 {
558  switch (type) {
561  return '-';
563  return 'l';
564  case CTarEntryInfo::eDir:
565  return 'd';
567  return 'p';
569  return 'c';
571  return 'b';
573  return 'V';
575  return 'S';
576  default:
577  break;
578  }
579  return '?';
580 }
581 
582 
584 {
585  string user(info.GetUserName());
586  if (user.empty()) {
587  NStr::UIntToString(user, info.GetUserId());
588  }
589  string group(info.GetGroupName());
590  if (group.empty()) {
591  NStr::UIntToString(group, info.GetGroupId());
592  }
593  return user + '/' + group;
594 }
595 
596 
597 static string s_MajorMinor(unsigned int n)
598 {
599  return n != (unsigned int)(-1) ? NStr::UIntToString(n) : string(1, '?');
600 }
601 
602 
603 static string s_SizeOrMajorMinor(const CTarEntryInfo& info)
604 {
605  if (info.GetType() == CTarEntryInfo::eCharDev ||
606  info.GetType() == CTarEntryInfo::eBlockDev) {
607  unsigned int major = info.GetMajor();
608  unsigned int minor = info.GetMinor();
609  return s_MajorMinor(major) + ',' + s_MajorMinor(minor);
610  } else if (info.GetType() == CTarEntryInfo::eDir ||
611  info.GetType() == CTarEntryInfo::ePipe ||
612  info.GetType() == CTarEntryInfo::eSymLink ||
613  info.GetType() == CTarEntryInfo::eVolHeader) {
614  return string("-");
615  } else if (info.GetType() == CTarEntryInfo::eSparseFile &&
616  info.GetSize() == 0) {
617  return string("?");
618  }
619  return NStr::NumericToString(info.GetSize());
620 }
621 
622 
624 {
625  CTime mtime(info.GetModificationTime());
626  os << s_TypeAsChar(info.GetType())
627  << s_ModeAsString(info.GetMode()) << ' '
628  << setw(17) << s_UserGroupAsString(info) << ' '
629  << setw(10) << s_SizeOrMajorMinor(info) << ' '
630  << mtime.ToLocalTime().AsString(" Y-M-D h:m:s ")
631  << info.GetName();
632  if (info.GetType() == CTarEntryInfo::eSymLink ||
633  info.GetType() == CTarEntryInfo::eHardLink) {
634  os << " -> " << info.GetLinkName();
635  }
636  return os;
637 }
638 
639 
640 
641 //////////////////////////////////////////////////////////////////////////////
642 //
643 // Debugging utilities
644 //
645 
646 static string s_OSReason(int x_errno)
647 {
648  static const char kUnknownError[] = "Unknown error";
649  const char* strerr;
650  char errbuf[80];
651  if (!x_errno)
652  return kEmptyStr;
653  strerr = ::strerror(x_errno);
654  if (!strerr || !*strerr
655  || !NStr::strncasecmp(strerr,
656  kUnknownError, sizeof(kUnknownError) - 1)) {
657  if (x_errno > 0) {
658  ::sprintf(errbuf, "Error %d", x_errno);
659  } else if (x_errno != -1) {
660  ::sprintf(errbuf, "Error 0x%08X", (unsigned int) x_errno);
661  } else {
662  ::strcpy (errbuf, "Unknown error (-1)");
663  }
664  strerr = errbuf;
665  }
666  _ASSERT(strerr && *strerr);
667  return string(": ") + strerr;
668 }
669 
670 
671 static string s_PositionAsString(const string& file, Uint8 pos, size_t recsize,
672  const string& entryname)
673 {
674  _ASSERT(!OFFSET_OF(recsize));
675  _ASSERT(recsize >= BLOCK_SIZE);
676  string result;
677  if (!file.empty()) {
678  CDirEntry temp(file);
679  result = (temp.GetType() == CDirEntry::eFile ? temp.GetName() : file)
680  + ": ";
681  }
682  result += "At record " + NStr::NumericToString(pos / recsize);
683  if (recsize != BLOCK_SIZE) {
684  result +=
685  ", block " + NStr::NumericToString(BLOCK_OF(pos % recsize)) +
686  " [thru #" + NStr::NumericToString(BLOCK_OF(pos),
687  NStr::fWithCommas) + ']';
688  }
689  if (!entryname.empty()) {
690  result += ", while in '" + entryname + '\'';
691  }
692  return result + ":\n";
693 }
694 
695 
696 static string s_OffsetAsString(size_t offset)
697 {
698  char buf[20];
699  _ASSERT(offset < 1000);
700  _VERIFY(sprintf(buf, "%03u", (unsigned int) offset));
701  return buf;
702 }
703 
704 
705 static bool memcchr(const char* s, char c, size_t len)
706 {
707  for (size_t i = 0; i < len; ++i) {
708  if (*s++ != c)
709  return true;
710  }
711  return false;
712 }
713 
714 
715 static string s_Printable(const char* field, size_t maxsize, bool text)
716 {
717  bool check = false;
718  if (!text && maxsize > 1 && !*field) {
719  field++, maxsize--;
720  check = true;
721  }
722  size_t len = s_Length(field, maxsize);
723  string retval = NStr::PrintableString(CTempString(field,
724  memcchr(field + len,
725  '\0',
726  maxsize - len)
727  ? maxsize
728  : len));
729  return check && !retval.empty() ? "\\0" + retval : retval;
730 }
731 
732 
733 #if !defined(__GNUC__) && !defined(offsetof)
734 # define offsetof(T, F) ((char*) &(((T*) 0)->F) - (char*) 0)
735 #endif
736 
737 #define TAR_PRINTABLE_EX(field, text, size) \
738  "@" + s_OffsetAsString((size_t) offsetof(STarHeader, field)) + \
739  "[" NCBI_AS_STRING(field) "]:" + \
740  string(14 - sizeof(NCBI_AS_STRING(field)), ' ') + \
741  '"' + s_Printable(h->field, size, text || excpt) + '"'
742 
743 #define TAR_PRINTABLE(field, text) \
744  TAR_PRINTABLE_EX(field, text, sizeof(h->field))
745 
746 
747 #define TAR_GNU_REGION "[gnu.region]: "
748 #define TAR_GNU_CONTIND "[gnu.contind]: "
749 
750 static string s_DumpSparseMap(const STarHeader* h, const char* sparse,
751  const char* contind, bool excpt = false)
752 {
753  string dump;
754  size_t offset;
755  bool done = false;
756  string region(TAR_GNU_REGION);
757 
758  do {
759  if (memcchr(sparse, '\0', 24)) {
760  offset = (size_t)(sparse - (const char*) h);
761  if (!dump.empty())
762  dump += '\n';
763  dump += '@' + s_OffsetAsString(offset);
764  if (!done) {
765  Uint8 off, len;
766  int ok_off = s_DecodeUint8(off, sparse, 12);
767  int ok_len = s_DecodeUint8(len, sparse + 12, 12);
768  if (ok_off & ok_len) {
769  dump += region;
770  region = ':' + string(sizeof(TAR_GNU_REGION) - 2, ' ');
771  if (ok_off > 0) {
772  dump += '"';
773  dump += s_Printable(sparse, 12, excpt);
774  dump += "\" ";
775  } else {
776  dump += string(14, ' ');
777  }
778  sparse += 12;
779  if (ok_len > 0) {
780  dump += '"';
781  dump += s_Printable(sparse, 12, excpt);
782  dump += "\" ";
783  } else {
784  dump += string(14, ' ');
785  }
786  sparse += 12;
787  dump += "[@";
788  dump += NStr::NumericToString(off);
789  dump += ", ";
791  dump += ']';
792  continue;
793  }
794  done = true;
795  }
796  dump += ':' + string(sizeof(TAR_GNU_REGION) - 2, ' ')
797  + '"' + NStr::PrintableString(string(sparse, 24)) + '"';
798  } else {
799  done = true;
800  }
801  sparse += 24;
802  } while (sparse < contind);
803  if (!dump.empty()) {
804  dump += '\n';
805  }
806  offset = (size_t)(contind - (const char*) h);
808  "\"" + NStr::PrintableString(string(contind, 1))
809  + (*contind ? "\" [to-be-cont'd]" : "\" [last]");
810  return dump;
811 }
812 
813 
814 static string s_DumpSparseMap(const vector< pair<Uint8, Uint8> >& bmap)
815 {
816  size_t size = bmap.size();
817  string dump("Regions: " + NStr::NumericToString(size));
818  for (size_t n = 0; n < size; ++n) {
819  dump += "\n [" + NStr::NumericToString(n) + "]: @"
820  + NStr::NumericToString(bmap[n].first) + ", "
821  + NStr::NumericToString(bmap[n].second);
822  }
823  return dump;
824 }
825 
826 
827 static string s_DumpHeader(const STarHeader* h, ETar_Format fmt,
828  bool excpt = false)
829 {
830  string dump;
831  Uint8 val;
832  int ok;
833 
834  dump += TAR_PRINTABLE(name, true);
835  dump += '\n';
836 
837  ok = s_OctalToNum(val, h->mode, sizeof(h->mode));
838  dump += TAR_PRINTABLE(mode, !ok);
839  if (ok && val) {
840  dump += " [" + s_ModeAsString((TTarMode) val) + ']';
841  }
842  dump += '\n';
843 
844  ok = s_DecodeUint8(val, h->uid, sizeof(h->uid));
845  dump += TAR_PRINTABLE(uid, ok <= 0);
846  if (ok && (ok < 0 || val > 7)) {
847  dump += " [" + NStr::NumericToString(val) + ']';
848  if (ok < 0) {
849  dump += " (base-256)";
850  }
851  }
852  dump += '\n';
853 
854  ok = s_DecodeUint8(val, h->gid, sizeof(h->gid));
855  dump += TAR_PRINTABLE(gid, ok <= 0);
856  if (ok && (ok < 0 || val > 7)) {
857  dump += " [" + NStr::NumericToString(val) + ']';
858  if (ok < 0) {
859  dump += " (base-256)";
860  }
861  }
862  dump += '\n';
863 
864  ok = s_DecodeUint8(val, h->size, sizeof(h->size));
865  dump += TAR_PRINTABLE(size, ok <= 0);
866  if (ok && (ok < 0 || val > 7)) {
867  dump += " [" + NStr::NumericToString(val) + ']';
868  if (ok && h->typeflag[0] == 'S' && fmt == eTar_OldGNU) {
869  dump += " w/o map(s)!";
870  }
871  if (ok < 0) {
872  dump += " (base-256)";
873  }
874  }
875  dump += '\n';
876 
877  ok = s_OctalToNum(val, h->mtime, sizeof(h->mtime));
878  dump += TAR_PRINTABLE(mtime, !ok);
879  if (ok && val) {
880  CTime mtime((time_t) val);
881  ok = (Uint8) mtime.GetTimeT() == val ? true : false;
882  if (ok || val > 7) {
883  dump += (" ["
884  + (val > 7 ? NStr::NumericToString(val) + ", " : "")
885  + (ok ? mtime.ToLocalTime().AsString("Y-M-D h:m:s") : "")
886  + ']');
887  }
888  }
889  dump += '\n';
890 
891  ok = s_OctalToNum(val, h->checksum, sizeof(h->checksum));
892  dump += TAR_PRINTABLE(checksum, !ok);
893  dump += '\n';
894 
895  // Classify to the extent possible to help debug the problem (if any)
896  dump += TAR_PRINTABLE(typeflag, true);
897  ok = false;
898  const char* tname = 0;
899  switch (h->typeflag[0]) {
900  case '\0':
901  case '0':
902  ok = true;
903  if (!(fmt & eTar_Ustar) && fmt != eTar_OldGNU) {
904  size_t namelen = s_Length(h->name, sizeof(h->name));
905  if (namelen && h->name[namelen - 1] == '/')
906  tname = "legacy regular entry (dir)";
907  }
908  if (!tname)
909  tname = "legacy regular entry (file)";
910  tname += h->typeflag[0] ? 7/*skip "legacy "*/ : 0;
911  break;
912  case '\1':
913  case '1':
914  ok = true;
915 #ifdef NCBI_OS_UNIX
916  tname = "legacy hard link";
917 #else
918  tname = "legacy hard link - not FULLY supported";
919 #endif //NCBI_OS_UNIX
920  tname += h->typeflag[0] != '\1' ? 7/*skip "legacy "*/ : 0;
921  break;
922  case '\2':
923  case '2':
924  ok = true;
925 #ifdef NCBI_OS_UNIX
926  tname = "legacy symbolic link";
927 #else
928  tname = "legacy symbolic link - not FULLY supported";
929 #endif //NCBI_OS_UNIX
930  tname += h->typeflag[0] != '\2' ? 7/*skip "legacy "*/ : 0;
931  break;
932  case '3':
933 #ifdef NCBI_OS_UNIX
934  ok = true;
935 #endif //NCBI_OS_UNIX
936  tname = "character device";
937  break;
938  case '4':
939 #ifdef NCBI_OS_UNIX
940  ok = true;
941 #endif //NCBI_OS_UNIX
942  tname = "block device";
943  break;
944  case '5':
945  ok = true;
946  tname = "directory";
947  break;
948  case '6':
949 #ifdef NCBI_OS_UNIX
950  ok = true;
951 #endif //NCBI_OS_UNIX
952  tname = "FIFO";
953  break;
954  case '7':
955  tname = "contiguous";
956  break;
957  case 'g':
958  tname = "global extended header";
959  break;
960  case 'x':
961  case 'X':
962  if (fmt & eTar_Ustar) {
963  ok = true;
964  if (h->typeflag[0] == 'x') {
965  tname = "extended (POSIX 1003.1-2001 [PAX]) header"
966  " - not FULLY supported";
967  } else {
968  tname = "extended (POSIX 1003.1-2001 [PAX] by Sun) header"
969  " - not FULLY supported";
970  }
971  } else {
972  tname = "extended header";
973  }
974  break;
975  case 'A':
976  tname = "Solaris ACL";
977  break;
978  case 'D':
979  if (fmt == eTar_OldGNU) {
980  tname = "GNU extension: directory dump";
981  }
982  break;
983  case 'E':
984  tname = "Solaris extended attribute file";
985  break;
986  case 'I':
987  // CAUTION: Entry size shows actual file size in the filesystem but
988  // no actual data blocks stored in the archive following the header!
989  tname = "Inode metadata only";
990  break;
991  case 'K':
992  if (fmt == eTar_OldGNU) {
993  ok = true;
994  tname = "GNU extension: long link";
995  }
996  break;
997  case 'L':
998  if (fmt == eTar_OldGNU) {
999  ok = true;
1000  tname = "GNU extension: long name";
1001  }
1002  break;
1003  case 'M':
1004  switch (fmt) {
1005  case eTar_OldGNU:
1006  tname = "GNU extension: multi-volume entry";
1007  break;
1008  case eTar_Star:
1009  tname = "STAR extension: multi-volume entry";
1010  break;
1011  default:
1012  break;
1013  }
1014  break;
1015  case 'N':
1016  if (fmt == eTar_OldGNU) {
1017  tname = "GNU extension (obsolete): long filename(s)";
1018  }
1019  break;
1020  case 'S':
1021  switch (fmt) {
1022  case eTar_OldGNU:
1023  // CAUTION: Entry size does not include sparse entry map stored in
1024  // additional (non-standard) headers that may follow this header!
1025  tname = "GNU extension: sparse file";
1026  break;
1027  case eTar_Star:
1028  // Entry size already includes size of additional sparse file maps
1029  // that may follow this header before the actual file data.
1030  tname = "STAR extension: sparse file";
1031  break;
1032  default:
1033  break;
1034  }
1035  break;
1036  case 'V':
1037  ok = true;
1038  tname = "Volume header";
1039  break;
1040  default:
1041  break;
1042  }
1043  if (!tname && 'A' <= h->typeflag[0] && h->typeflag[0] <= 'Z') {
1044  tname = "local vendor enhancement / user-defined extension";
1045  }
1046  dump += (" [" + string(tname ? tname : "reserved")
1047  + (ok
1048  ? "]\n"
1049  : " -- NOT SUPPORTED]\n"));
1050 
1051  dump += TAR_PRINTABLE(linkname, true);
1052  dump += '\n';
1053 
1054  switch (fmt) {
1055  case eTar_Legacy: // NCBI never writes this header
1056  tname = "legacy (V7)";
1057  break;
1058  case eTar_OldGNU:
1059  if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1060  tname = "old GNU (NCBI)";
1061  } else {
1062  tname = "old GNU";
1063  }
1064  break;
1065  case eTar_Ustar:
1066  if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1067  tname = "ustar (NCBI)";
1068  } else {
1069  tname = "ustar";
1070  }
1071  break;
1072  case eTar_Posix: // aka "pax"
1073  if (!NStr::strncasecmp((const char*) h + BLOCK_SIZE - 4, "NCBI", 4)) {
1074  tname = "posix (NCBI)";
1075  } else {
1076  tname = "posix";
1077  }
1078  break;
1079  case eTar_Star: // NCBI never writes this header
1080  tname = "star";
1081  break;
1082  default:
1083  tname = 0;
1084  break;
1085  }
1086  dump += TAR_PRINTABLE(magic, true);
1087  if (tname) {
1088  dump += " [" + string(tname) + ']';
1089  }
1090  dump += '\n';
1091 
1092  dump += TAR_PRINTABLE(version, true);
1093 
1094  if (fmt != eTar_Legacy) {
1095  dump += '\n';
1096 
1097  dump += TAR_PRINTABLE(uname, true);
1098  dump += '\n';
1099 
1100  dump += TAR_PRINTABLE(gname, true);
1101  dump += '\n';
1102 
1103  ok = s_OctalToNum(val, h->devmajor, sizeof(h->devmajor));
1104  dump += TAR_PRINTABLE(devmajor, !ok);
1105  if (ok && val > 7) {
1106  dump += " [" + NStr::NumericToString(val) + ']';
1107  }
1108  dump += '\n';
1109 
1110  ok = s_OctalToNum(val, h->devminor, sizeof(h->devminor));
1111  dump += TAR_PRINTABLE(devminor, !ok);
1112  if (ok && val > 7) {
1113  dump += " [" + NStr::NumericToString(val) + ']';
1114  }
1115  dump += '\n';
1116 
1117  switch (fmt) {
1118  case eTar_Star:
1119  if (h->typeflag[0] == 'S') {
1120  dump += TAR_PRINTABLE_EX(star.prefix, true, 107);
1121  const char* realsize = h->star.prefix + 107;
1122  ok = s_DecodeUint8(val, realsize, 12);
1123  dump += "@"
1124  + s_OffsetAsString((size_t)(realsize - (const char*) h))
1125  + "[star.realsize]:\""
1126  + s_Printable(realsize, 12, !ok || excpt) + '"';
1127  if (ok && (ok < 0 || val > 7)) {
1128  dump += " [" + NStr::NumericToString(val) + ']';
1129  if (ok < 0) {
1130  dump += " (base-256)";
1131  }
1132  }
1133  } else {
1134  dump += TAR_PRINTABLE(star.prefix, true);
1135  }
1136  dump += '\n';
1137 
1138  ok = s_OctalToNum(val, h->star.atime, sizeof(h->star.atime));
1139  dump += TAR_PRINTABLE(star.atime, !ok);
1140  if (ok && val) {
1141  CTime atime((time_t) val);
1142  ok = (Uint8) atime.GetTimeT() == val ? true : false;
1143  if (ok || val > 7) {
1144  dump += (" ["
1145  + (val > 7 ? NStr::NumericToString(val)+", " : "")
1146  + (ok
1147  ? atime.ToLocalTime().AsString("Y-M-D h:m:s")
1148  : "")
1149  + ']');
1150  }
1151  }
1152  dump += '\n';
1153 
1154  ok = s_OctalToNum(val, h->star.ctime, sizeof(h->star.ctime));
1155  dump += TAR_PRINTABLE(star.ctime, !ok);
1156  if (ok && val) {
1157  CTime ctime((time_t) val);
1158  ok = (Uint8) ctime.GetTimeT() == val ? true : false;
1159  if (ok || val > 7) {
1160  dump += (" ["
1161  + (val > 7 ? NStr::NumericToString(val)+", " : "")
1162  + (ok
1163  ? ctime.ToLocalTime().AsString("Y-M-D h:m:s")
1164  : "")
1165  + ']');
1166  }
1167  }
1168  tname = (const char*) &h->star + sizeof(h->star);
1169  break;
1170 
1171  case eTar_OldGNU:
1172  ok = s_OctalToNum(val, h->gnu.atime, sizeof(h->gnu.atime));
1173  dump += TAR_PRINTABLE(gnu.atime, !ok);
1174  if (ok && val) {
1175  CTime atime((time_t) val);
1176  ok = (Uint8) atime.GetTimeT() == val ? true : false;
1177  if (ok || val > 7) {
1178  dump += (" ["
1179  + (val > 7 ? NStr::NumericToString(val)+", " : "")
1180  + (ok
1181  ? atime.ToLocalTime().AsString("Y-M-D h:m:s")
1182  : "")
1183  + ']');
1184  }
1185  }
1186  dump += '\n';
1187 
1188  ok = s_OctalToNum(val, h->gnu.ctime, sizeof(h->gnu.ctime));
1189  dump += TAR_PRINTABLE(gnu.ctime, !ok);
1190  if (ok && val) {
1191  CTime ctime((time_t) val);
1192  ok = (Uint8) ctime.GetTimeT() == val ? true : false;
1193  if (ok || val > 7) {
1194  dump += (" ["
1195  + (val > 7 ? NStr::NumericToString(val)+", " : "")
1196  + (ok
1197  ? ctime.ToLocalTime().AsString("Y-M-D h:m:s")
1198  : "")
1199  + ']');
1200  }
1201  }
1202 
1203  if (h->typeflag[0] == 'S') {
1204  if (memcchr(h->gnu.unused, '\0', sizeof(h->gnu.unused))) {
1205  dump += '\n';
1206  dump += TAR_PRINTABLE(gnu.unused, true);
1207  }
1208  dump += '\n' + s_DumpSparseMap(h, h->gnu.sparse,
1209  h->gnu.contind, excpt);
1210  if (memcchr(h->gnu.realsize, '\0', sizeof(h->gnu.realsize))) {
1211  ok = s_DecodeUint8(val, h->gnu.realsize,
1212  sizeof(h->gnu.realsize));
1213  dump += '\n';
1214  dump += TAR_PRINTABLE(gnu.realsize, ok <= 0);
1215  if (ok && (ok < 0 || val > 7)) {
1216  dump += " [" + NStr::NumericToString(val) + ']';
1217  }
1218  if (ok < 0) {
1219  dump += " (base-256)";
1220  }
1221  }
1222  tname = (const char*) &h->gnu + sizeof(h->gnu);
1223  } else {
1224  tname = h->gnu.ctime + sizeof(h->gnu.ctime);
1225  }
1226  break;
1227 
1228  default:
1229  dump += TAR_PRINTABLE(prefix, true);
1230  tname = h->prefix + sizeof(h->prefix);
1231  break;
1232  }
1233  } else {
1234  tname = h->version + sizeof(h->version);
1235  }
1236 
1237  size_t n = 0;
1238  while (&tname[n] < (const char*) h + BLOCK_SIZE) {
1239  if (tname[n]) {
1240  size_t offset = (size_t)(&tname[n] - (const char*) h);
1241  size_t len = BLOCK_SIZE - offset;
1242  if (len & ~0xF) { // len > 16
1243  len = 0x10; // len = 16
1244  }
1245  const char* e = (const char*) memchr(&tname[n], '\0', len);
1246  if (e) {
1247  len = (size_t)(e - &tname[n]);
1248  ok = s_DecodeUint8(val, &tname[n], len);
1249  } else {
1250  if (len > (offset & 0xF)) {
1251  len -= (offset & 0xF);
1252  }
1253  ok = false;
1254  }
1255  _ASSERT(len);
1256  dump += "\n@" + s_OffsetAsString(offset) + ':' + string(15, ' ')
1257  + '"' + NStr::PrintableString(string(&tname[n], len)) + '"';
1258  if (ok) {
1259  CTime time((time_t) val);
1260  bool okaytime = (Uint8) time.GetTimeT() == val;
1261  if (ok < 0 || val > 7 || okaytime) {
1262  dump += " [";
1263  if (ok < 0 || val > 7) {
1265  }
1266  if (ok < 0) {
1267  dump += "] (base-256)";
1268  } else if (okaytime) {
1269  if (val > 7) {
1270  dump += ", ";
1271  }
1272  dump += time.ToLocalTime().AsString("Y-M-D h:m:s]");
1273  } else {
1274  dump += ']';
1275  }
1276  }
1277  }
1278  n += len;
1279  } else {
1280  n++;
1281  }
1282  }
1283 
1284  return dump;
1285 }
1286 
1287 #undef TAR_PRINTABLE
1288 
1289 #undef _STR
1290 
1291 
1292 inline void s_SetStateSafe(CNcbiIos& ios, IOS_BASE::iostate state) throw()
1293 {
1294  try {
1295  ios.setstate(state);
1296  } catch (IOS_BASE::failure&) {
1297  ;
1298  }
1299 }
1300 
1301 
1302 //////////////////////////////////////////////////////////////////////////////
1303 //
1304 // CTar
1305 //
1306 
1307 CTar::CTar(const string& filename, size_t blocking_factor)
1308  : m_FileName(filename),
1309  m_FileStream(new CNcbiFstream),
1310  m_Stream(*m_FileStream),
1311  m_ZeroBlockCount(0),
1312  m_BufferSize(SIZE_OF(blocking_factor)),
1313  m_BufferPos(0),
1314  m_StreamPos(0),
1315  m_BufPtr(0),
1316  m_Buffer(0),
1317  m_OpenMode(eNone),
1318  m_Modified(false),
1319  m_Bad(false),
1320  m_Flags(fDefault)
1321 {
1322  x_Init();
1323 }
1324 
1325 
1326 CTar::CTar(CNcbiIos& stream, size_t blocking_factor)
1327  : m_FileName(kEmptyStr),
1328  m_FileStream(0),
1329  m_Stream(stream),
1330  m_ZeroBlockCount(0),
1331  m_BufferSize(SIZE_OF(blocking_factor)),
1332  m_BufferPos(0),
1333  m_StreamPos(0),
1334  m_BufPtr(0),
1335  m_Buffer(0),
1336  m_OpenMode(eNone),
1337  m_Modified(false),
1338  m_Bad(false),
1339  m_Flags(fDefault)
1340 {
1341  x_Init();
1342 }
1343 
1344 
1346 {
1347  // Close stream(s)
1348  x_Close(x_Flush(true/*no_throw*/));
1349  delete m_FileStream;
1350  m_FileStream = 0;
1351 
1352  // Delete owned masks
1353  for (size_t i = 0; i < sizeof(m_Mask) / sizeof(m_Mask[0]); ++i) {
1355  }
1356 
1357  // Delete buffer
1358  delete[] m_BufPtr;
1359  m_BufPtr = 0;
1360 }
1361 
1362 
1363 #define TAR_THROW(who, errcode, message) \
1364  NCBI_THROW(CTarException, errcode, \
1365  s_PositionAsString(who->m_FileName, who->m_StreamPos, \
1366  who->m_BufferSize, \
1367  who->m_Current.GetName()) + (message))
1368 
1369 #define TAR_THROW_EX(who, errcode, message, hdr, fmt) \
1370  TAR_THROW(who, errcode, \
1371  who->m_Flags & fDumpEntryHeaders \
1372  ? string(message) + ":\n" + s_DumpHeader(hdr, fmt, true) \
1373  : string(message))
1374 
1375 #define TAR_POST(subcode, severity, message) \
1376  ERR_POST_X(subcode, (severity) << \
1377  s_PositionAsString(m_FileName, m_StreamPos, m_BufferSize,\
1378  m_Current.GetName()) + (message))
1379 
1380 
1381 void CTar::x_Init(void)
1382 {
1384  size_t pagesize = (size_t) CSystemInfo::GetVirtualMemoryPageSize();
1385  if (pagesize < 4096 || (pagesize & (pagesize - 1))) {
1386  pagesize = 4096; // reasonable default
1387  }
1388  size_t pagemask = pagesize - 1;
1389  m_BufPtr = new char[m_BufferSize + pagemask];
1390  // Make m_Buffer page-aligned
1391  m_Buffer = m_BufPtr +
1392  ((((size_t) m_BufPtr + pagemask) & ~pagemask) - (size_t) m_BufPtr);
1393 }
1394 
1395 
1396 bool CTar::x_Flush(bool no_throw)
1397 {
1398  m_Current.m_Name.clear();
1399  if (m_BufferPos == m_BufferSize) {
1400  m_Bad = true; // In case of unhandled exception(s)
1401  }
1402  if (m_Bad || !m_OpenMode) {
1403  return false;
1404  }
1405  if (!m_Modified &&
1407  return false;
1408  }
1409 
1411  if (m_BufferPos || m_ZeroBlockCount < 2) {
1412  // Assure proper blocking factor and pad the archive as necessary
1413  size_t zbc = m_ZeroBlockCount;
1414  size_t pad = m_BufferSize - m_BufferPos;
1415  memset(m_Buffer + m_BufferPos, 0, pad);
1416  x_WriteArchive(pad, no_throw ? (const char*)(-1L) : 0);
1417  _ASSERT(!(m_BufferPos % m_BufferSize) // m_BufferSize if write error
1418  && !m_Bad == !m_BufferPos);
1419  if (!m_Bad && (zbc += BLOCK_OF(pad)) < 2) {
1420  // Write EOT (two zero blocks), if have not padded enough already
1421  memset(m_Buffer, 0, m_BufferSize - pad);
1422  x_WriteArchive(m_BufferSize, no_throw ? (const char*)(-1L) : 0);
1424  && !m_Bad == !m_BufferPos);
1425  if (!m_Bad && (zbc += BLOCK_OF(m_BufferSize)) < 2) {
1426  _ASSERT(zbc == 1 && m_BufferSize == BLOCK_SIZE);
1427  x_WriteArchive(BLOCK_SIZE, no_throw ? (const char*)(-1L) : 0);
1429  && !m_Bad == !m_BufferPos);
1430  }
1431  }
1432  m_ZeroBlockCount = zbc;
1433  }
1435 
1436  if (!m_Bad && m_Stream.rdbuf()->PUBSYNC() != 0) {
1437  m_Bad = true;
1438  int x_errno = errno;
1440  if (!no_throw) {
1441  TAR_THROW(this, eWrite,
1442  "Archive flush failed" + s_OSReason(x_errno));
1443  }
1444  TAR_POST(83, Error,
1445  "Archive flush failed" + s_OSReason(x_errno));
1446  }
1447  if (!m_Bad) {
1448  m_Modified = false;
1449  }
1450  return true;
1451 }
1452 
1453 
1454 static int s_TruncateFile(const string& filename, Uint8 filesize)
1455 {
1456  int x_error = 0;
1457 #ifdef NCBI_OS_UNIX
1458  if (::truncate(filename.c_str(), (off_t) filesize) != 0)
1459  x_error = errno;
1460 #endif //NCBI_OS_UNIX
1461 #ifdef NCBI_OS_MSWIN
1462  TXString x_filename(_T_XSTRING(filename));
1463  HANDLE handle = ::CreateFile(x_filename.c_str(), GENERIC_WRITE,
1464  0/*sharing*/, NULL, OPEN_EXISTING,
1465  FILE_ATTRIBUTE_NORMAL, NULL);
1466  if (handle != INVALID_HANDLE_VALUE) {
1467  LARGE_INTEGER x_filesize;
1468  x_filesize.QuadPart = filesize;
1469  if (!::SetFilePointerEx(handle, x_filesize, NULL, FILE_BEGIN)
1470  || !::SetEndOfFile(handle)) {
1471  x_error = (int) ::GetLastError();
1472  }
1473  bool closed = ::CloseHandle(handle) ? true : false;
1474  if (!x_error && !closed) {
1475  x_error = (int) ::GetLastError();
1476  }
1477  } else {
1478  x_error = (int) ::GetLastError();
1479  }
1480 #endif //NCBI_OS_MSWIN
1481  return x_error;
1482 }
1483 
1484 
1485 void CTar::x_Close(bool truncate)
1486 {
1487  if (m_FileStream && m_FileStream->is_open()) {
1488  m_FileStream->close();
1489  if (!m_Bad && m_FileStream->fail()) {
1490  int x_errno = errno;
1491  TAR_POST(104, Error,
1492  "Cannot close archive" + s_OSReason(x_errno));
1493  m_Bad = true;
1494  }
1495  if (!m_Bad && !(m_Flags & fTarfileNoTruncate) && truncate) {
1497  }
1498  }
1499  m_OpenMode = eNone;
1500  m_Modified = false;
1501  m_BufferPos = 0;
1502  m_Bad = false;
1503 }
1504 
1505 
1506 void CTar::x_Open(EAction action)
1507 {
1508  _ASSERT(action);
1509  bool toend = false;
1510  // We can only open a named file here, and if an external stream is being
1511  // used as an archive, it must be explicitly repositioned by user's code
1512  // (outside of this class) before each archive operation.
1513  if (!m_FileStream) {
1514  if (!m_Modified) {
1515  // Check if Create() is followed by Append()
1516  if (m_OpenMode != eWO && action == eAppend
1517  && (m_Flags & fStreamPipeThrough)) {
1518  toend = true;
1519  }
1520  } else if (action != eAppend) {
1521  _ASSERT(m_OpenMode != eWO); // NB: Prev action != eCreate
1522  if (m_Flags & fStreamPipeThrough) {
1523  x_Flush(); // NB: resets m_Modified to false if successful
1524  }
1525  if (m_Modified) {
1526  if (!m_Bad) {
1527  TAR_POST(1, Warning,
1528  "Pending changes may be discarded"
1529  " upon reopen of in-stream archive");
1530  }
1531  m_Modified = false;
1532  }
1533  }
1534  m_Current.m_Name.clear();
1535  if (m_Bad || (m_Stream.rdstate() & ~NcbiEofbit) || !m_Stream.rdbuf()) {
1536  TAR_THROW(this, eOpen,
1537  "Archive I/O stream is in bad state");
1538  } else {
1539  m_OpenMode = EOpenMode(int(action) & eRW);
1540  _ASSERT(m_OpenMode != eNone);
1541  }
1542  if (action != eAppend && action != eInternal) {
1543  m_BufferPos = 0;
1544  m_StreamPos = 0;
1545  }
1546 #ifdef NCBI_OS_MSWIN
1547  if (&m_Stream == &cin) {
1548  HANDLE handle = (HANDLE) _get_osfhandle(_fileno(stdin));
1549  if (GetFileType(handle) != FILE_TYPE_DISK) {
1551  }
1552  }
1553 #endif //NCBI_OS_MSWIN
1554  } else {
1556  EOpenMode mode = EOpenMode(int(action) & eRW);
1557  _ASSERT(mode != eNone);
1558  if (action != eAppend && action != eCreate/*mode == eWO*/) {
1559  x_Flush();
1560  } else {
1561  m_Current.m_Name.clear();
1562  }
1563  if (mode == eWO || m_OpenMode < mode) {
1564  // Need to (re-)open the archive file
1565  if (m_OpenMode != eWO && action == eAppend) {
1566  toend = true;
1567  }
1568  x_Close(false); // NB: m_OpenMode = eNone; m_Modified = false
1569  m_StreamPos = 0;
1570  switch (mode) {
1571  case eWO:
1572  // WO access
1573  _ASSERT(action == eCreate);
1574  // Note that m_Modified is untouched
1575  m_FileStream->open(m_FileName.c_str(),
1576  IOS_BASE::out |
1577  IOS_BASE::binary | IOS_BASE::trunc);
1578  break;
1579  case eRO:
1580  // RO access
1581  _ASSERT(action != eCreate);
1582  m_FileStream->open(m_FileName.c_str(),
1583  IOS_BASE::in |
1584  IOS_BASE::binary);
1585  break;
1586  case eRW:
1587  // RW access
1588  _ASSERT(action != eCreate);
1589  m_FileStream->open(m_FileName.c_str(),
1591  IOS_BASE::binary);
1592  break;
1593  default:
1594  _TROUBLE;
1595  break;
1596  }
1597  if (!m_FileStream->is_open() || !m_FileStream->good()) {
1598  int x_errno = errno;
1599  TAR_THROW(this, eOpen,
1600  "Cannot open archive" + s_OSReason(x_errno));
1601  } else {
1602  m_OpenMode = mode;
1603  }
1604  } else {
1605  // No need to reopen the archive file
1606  _ASSERT(m_OpenMode > eWO && action != eCreate);
1607  if (m_Bad) {
1608  TAR_THROW(this, eOpen,
1609  "Archive file is in bad state");
1610  }
1611  if (action != eAppend && action != eInternal) {
1612  m_BufferPos = 0;
1613  m_StreamPos = 0;
1614  m_FileStream->seekg(0);
1615  }
1616  }
1617  }
1618  if (toend) {
1619  _ASSERT(!m_Modified && action == eAppend);
1620  // There may be an extra and unnecessary archive file scanning
1621  // if Append() follows Update() that caused no modifications;
1622  // but there is no way to distinguish this, currently :-/
1623  // Also, this sequence should be a real rarity in practice.
1624  x_ReadAndProcess(eAppend); // to position at logical EOF
1625  }
1626  _ASSERT(!(m_Stream.rdstate() & ~NcbiEofbit));
1627  _ASSERT(m_Stream.rdbuf());
1628 }
1629 
1630 
1631 unique_ptr<CTar::TEntries> CTar::Extract(void)
1632 {
1633  x_Open(eExtract);
1634  unique_ptr<TEntries> entries = x_ReadAndProcess(eExtract);
1635 
1636  // Restore attributes of "postponed" directory entries
1637  if (m_Flags & fPreserveAll) {
1638  ITERATE(TEntries, e, *entries) {
1639  if (e->GetType() == CTarEntryInfo::eDir) {
1640  x_RestoreAttrs(*e, m_Flags);
1641  }
1642  }
1643  }
1644 
1645  return entries;
1646 }
1647 
1648 
1650 {
1651  if (m_Bad) {
1652  return 0;
1653  }
1654  if (m_OpenMode & eRO) {
1657  } else {
1658  x_Open(eInternal);
1659  }
1660  unique_ptr<TEntries> temp = x_ReadAndProcess(eInternal);
1661  _ASSERT(temp && temp->size() < 2);
1662  if (temp->size() < 1) {
1663  return 0;
1664  }
1665  _ASSERT(m_Current == temp->front());
1666  return &m_Current;
1667 }
1668 
1669 
1670 // Return a pointer to buffer, which is always block-aligned, and reflect the
1671 // number of bytes available via the parameter. Return NULL when unable to
1672 // read (either EOF or other read error).
1673 const char* CTar::x_ReadArchive(size_t& n)
1674 {
1677  _ASSERT(n != 0);
1678  size_t nread;
1679  if (!m_BufferPos) {
1680  nread = 0;
1681  do {
1682  streamsize xread;
1683  IOS_BASE::iostate iostate = m_Stream.rdstate();
1684  if (!iostate) { // NB: good()
1685 #ifdef NCBI_COMPILER_MIPSPRO
1686  try {
1687  // Work around a bug in MIPSPro 7.3's streambuf::xsgetn()
1688  CNcbiIstream* is = dynamic_cast<CNcbiIstream*>(&m_Stream);
1689  _ASSERT(is);
1690  is->read (m_Buffer + nread,
1691  (streamsize)(m_BufferSize - nread));
1692  xread = is->gcount();
1693  if (xread > 0) {
1694  is->clear();
1695  }
1696  } catch (IOS_BASE::failure&) {
1697  xread = m_Stream.rdstate() & NcbiEofbit ? 0 : -1;
1698  }
1699 #else
1700  try {
1701  xread = m_Stream.rdbuf()->
1702  sgetn(m_Buffer + nread,
1703  (streamsize)(m_BufferSize - nread));
1704 # ifdef NCBI_COMPILER_WORKSHOP
1705  if (xread < 0) {
1706  xread = 0; // NB: WS6 is known to return -1 :-/
1707  }
1708 # endif //NCBI_COMPILER_WORKSHOP
1709  } catch (IOS_BASE::failure&) {
1710  xread = -1;
1711  }
1712 #endif //NCBI_COMPILER_MIPSPRO
1713  } else {
1714  xread = iostate == NcbiEofbit ? 0 : -1;
1715  }
1716  if (xread <= 0) {
1717  if (nread && (m_Flags & fDumpEntryHeaders)) {
1718  TAR_POST(57, xread ? Error : Warning,
1719  "Short read (" + NStr::NumericToString(nread)
1720  + (xread ? ")" : "): EOF"));
1721  }
1722  s_SetStateSafe(m_Stream, xread < 0 ? NcbiBadbit : NcbiEofbit);
1723  if (nread) {
1724  break;
1725  }
1726  return 0;
1727  }
1728  nread += (size_t) xread;
1729  } while (nread < m_BufferSize);
1730  memset(m_Buffer + nread, 0, m_BufferSize - nread);
1731  } else {
1732  nread = m_BufferSize - m_BufferPos;
1733  }
1734  if (n > nread) {
1735  n = nread;
1736  }
1737  size_t xpos = m_BufferPos;
1738  m_BufferPos += ALIGN_SIZE(n);
1740  if (m_BufferPos == m_BufferSize) {
1741  m_BufferPos = 0;
1742  if (!m_FileStream && (m_Flags & fStreamPipeThrough)) {
1743  size_t zbc = m_ZeroBlockCount;
1746  _ASSERT(m_BufferPos == 0);
1747  m_ZeroBlockCount = zbc;
1748  }
1749  }
1751  return m_Buffer + xpos;
1752 }
1753 
1754 
1755 // All partial internal (i.e. in-buffer) block writes are _not_ block-aligned;
1756 // but all external writes (i.e. when "src" is provided) _are_ block-aligned.
1757 void CTar::x_WriteArchive(size_t nwrite, const char* src)
1758 {
1759  if (!nwrite || m_Bad) {
1760  return;
1761  }
1762  m_Modified = true;
1763  m_ZeroBlockCount = 0;
1764  do {
1766  size_t avail = m_BufferSize - m_BufferPos;
1767  if (avail > nwrite) {
1768  avail = nwrite;
1769  }
1770  size_t advance = avail;
1771  if (src && src != (const char*)(-1L)) {
1772  memcpy(m_Buffer + m_BufferPos, src, avail);
1773  size_t pad = ALIGN_SIZE(avail) - avail;
1774  memset(m_Buffer + m_BufferPos + avail, 0, pad);
1775  advance += pad;
1776  src += avail;
1777  }
1778  m_BufferPos += advance;
1780  if (m_BufferPos == m_BufferSize) {
1781  size_t nwritten = 0;
1782  do {
1783  int x_errno;
1784  streamsize xwritten;
1785  IOS_BASE::iostate iostate = m_Stream.rdstate();
1786  if (!(iostate & ~NcbiEofbit)) { // NB: good() OR eof()
1787  try {
1788  xwritten = m_Stream.rdbuf()
1789  ->sputn(m_Buffer + nwritten,
1790  (streamsize)(m_BufferSize - nwritten));
1791  } catch (IOS_BASE::failure&) {
1792  xwritten = -1;
1793  }
1794  if (xwritten > 0) {
1795  if (iostate) {
1796  m_Stream.clear();
1797  }
1798  x_errno = 0;
1799  } else {
1800  x_errno = errno;
1801  }
1802  } else {
1803  xwritten = -1;
1804  x_errno = 0;
1805  }
1806  if (xwritten <= 0) {
1807  m_Bad = true;
1809  if (src != (const char*)(-1L)) {
1810  TAR_THROW(this, eWrite,
1811  "Archive write failed" +s_OSReason(x_errno));
1812  }
1813  TAR_POST(84, Error,
1814  "Archive write failed" + s_OSReason(x_errno));
1815  return;
1816  }
1817  nwritten += (size_t) xwritten;
1818  } while (nwritten < m_BufferSize);
1819  m_BufferPos = 0;
1820  }
1821  m_StreamPos += advance;
1822  nwrite -= avail;
1823  } while (nwrite);
1825 }
1826 
1827 
1828 // PAX (Portable Archive Interchange) extraction support
1829 
1830 // Define bitmasks for extended numeric information (must fit in perm mask)
1831 enum EPAXBit {
1834  fPAXSparse = 1 << 1,
1835  fPAXMtime = 1 << 2,
1836  fPAXAtime = 1 << 3,
1837  fPAXCtime = 1 << 4,
1838  fPAXSize = 1 << 5,
1839  fPAXUid = 1 << 6,
1840  fPAXGid = 1 << 7
1841 };
1842 typedef unsigned int TPAXBits; // Bitwise-OR of EPAXBit(s)
1843 
1844 
1845 // Parse "len" bytes of "str" as numeric "valp[.fraq]"
1846 static bool s_ParsePAXNumeric(Uint8* valp, const char* str, size_t len,
1847  string* fraq, EPAXBit assign)
1848 {
1849  _ASSERT(valp && str[len] == '\n');
1850  if (!isdigit((unsigned char)(*str))) {
1851  return false;
1852  }
1853  const char* p = (const char*) memchr(str, '.', len);
1854  if (!p) {
1855  p = str + len;
1856  } else if (fraq == (string*)(-1L)) {
1857  // no decimal point allowed
1858  return false;
1859  }
1860  Uint8 val;
1861  try {
1862  val = NStr::StringToUInt8(CTempString(str, (size_t)(p - str)));
1863  } catch (...) {
1864  return false;
1865  }
1866  if (*p == '.' && ++p != str + len) {
1867  len -= (size_t)(p - str);
1868  _ASSERT(len);
1869  for (size_t n = 0; n < len; ++n) {
1870  if (!isdigit((unsigned char) p[n])) {
1871  return false;
1872  }
1873  }
1874  if (assign && fraq) {
1875  fraq->assign(p, len);
1876  }
1877  } // else (*p == '\n' || !*p)
1878  if (assign) {
1879  *valp = val;
1880  }
1881  return true;
1882 }
1883 
1884 
1885 static bool s_AllLowerCase(const char* str, size_t len)
1886 {
1887  for (size_t i = 0; i < len; ++i) {
1888  unsigned char c = (unsigned char) str[i];
1889  if (!isalpha(c) || !islower(c))
1890  return false;
1891  }
1892  return true;
1893 }
1894 
1895 
1896 // Raise 10 to the power of n
1897 static Uint8 ipow10(unsigned int n)
1898 {
1899  _ASSERT(n < 10);
1900  // for small n this is the fastest
1901  return n ? 10 * ipow10(n - 1) : 1;
1902 }
1903 
1904 
1905 // NB: assumes fraq is all digits
1906 static long s_FraqToNanosec(const string& fraq)
1907 {
1908  size_t len = fraq.size();
1909  if (!len)
1910  return 0;
1911  long result;
1912  if (len < 10) {
1913  Uint8 temp = NStr::StringToUInt8(fraq,
1916  result = (long)(temp * ipow10((unsigned int)(9 - len)));
1917  } else {
1918  Uint8 temp = NStr::StringToUInt8(CTempString(fraq, 0, 10),
1921  result = (long)((temp + 5) / 10);
1922  }
1923  _ASSERT(0L <= result && result < 1000000000L);
1924  return result;
1925 }
1926 
1927 
1929 {
1930  Uint8 major = 0, minor = 0, size = 0, sparse = 0, uid = 0, gid = 0;
1931  Uint8 mtime = 0, atime = 0, ctime = 0, dummy = 0;
1932  string mtime_fraq, atime_fraq, ctime_fraq;
1933  string path, linkpath, name, uname, gname;
1934  string* nodot = (string*)(-1L);
1935  const struct SPAXParseTable {
1936  const char* key;
1937  Uint8* val; // non-null for numeric, else do as string
1938  string* str; // string or fraction part (if not -1)
1939  EPAXBit bit; // for numerics only
1940  } parser[] = {
1941  { "mtime", &mtime, &mtime_fraq, fPAXMtime }, // num w/fraq: assign
1942  { "atime", &atime, &atime_fraq, fPAXAtime },
1943  { "ctime", &ctime, &ctime_fraq, fPAXCtime },
1944  /*{ "dummy", &dummy, 0, fPAXSome },*/// num w/fraq: asg int
1945  /*{ "dummy", &dummy, &fraq or 0, fPAXNone },*/// num w/fraq: ck.only
1946  { "size", &size, nodot, fPAXSize }, // number: assign
1947  { "uid", &uid, nodot, fPAXUid },
1948  { "gid", &gid, nodot, fPAXGid },
1949  /*{ "dummy", &dummy, nodot, fPAXNone },*/// number: ck.only
1950  { "path", 0, &path, fPAXNone }, // string: assign
1951  { "linkpath", 0, &linkpath, fPAXNone },
1952  { "uname", 0, &uname, fPAXNone },
1953  { "gname", 0, &gname, fPAXNone },
1954  { "comment", 0, 0, fPAXNone }, // string: ck.only
1955  { "charset", 0, 0, fPAXNone },
1956  // GNU sparse extensions (NB: .size and .realsize don't go together)
1957  { "GNU.sparse.realsize", &sparse, nodot, fPAXSparse },
1958  { "GNU.sparse.major", &major, nodot, fPAXSparse },
1959  { "GNU.sparse.minor", &minor, nodot, fPAXSparse },
1960  { "GNU.sparse.size", &dummy, nodot, fPAXSparse },
1961  { "GNU.sparse.name", 0, &name, fPAXNone },
1962  // Other
1963  { "SCHILY.realsize", &sparse, nodot, fPAXSparse }
1964  };
1965  const char* s = data.c_str();
1966  TPAXBits parsed = fPAXNone;
1967  size_t l = data.size();
1968 
1969  _ASSERT(l && l == strlen(s));
1970  do {
1971  unsigned long len;
1972  size_t klen, vlen;
1973  const char* e;
1974  char *k, *v;
1975 
1976  if (!(e = (char*) memchr(s, '\n', l))) {
1977  e = s + l;
1978  }
1979  errno = 0;
1980  if (!isdigit((unsigned char)(*s)) || !(len = strtoul(s, &k, 10))
1981  || errno || s + len - 1 != e || (*k != ' ' && *k != '\t')
1982  || !(v = (char*) memchr(k, '=', (size_t)(e - k))) // NB: k < e
1983  || !(klen = (size_t)(v++ - ++k))
1984  || memchr(k, ' ', klen) || memchr(k, '\t', klen)
1985  || !(vlen = (size_t)(e - v))) {
1986  TAR_POST(74, Error,
1987  "Skipping malformed PAX data");
1988  return eFailure;
1989  }
1990  bool done = false;
1991  for (size_t n = 0; n < sizeof(parser) / sizeof(parser[0]); ++n) {
1992  if (strlen(parser[n].key) == klen
1993  && memcmp(parser[n].key, k, klen) == 0) {
1994  if (!parser[n].val) {
1995  if (parser[n].str) {
1996  parser[n].str->assign(v, vlen);
1997  }
1998  } else if (!s_ParsePAXNumeric(parser[n].val, v, vlen,
1999  parser[n].str, parser[n].bit)) {
2000  TAR_POST(75, Error,
2001  "Ignoring bad numeric \""
2002  + CTempString(v, vlen)
2003  + "\" in PAX value \""
2004  + CTempString(k, klen) + '"');
2005  } else {
2006  parsed |= parser[n].bit;
2007  }
2008  done = true;
2009  break;
2010  }
2011  }
2012  if (!done && s_AllLowerCase(k, klen)/*&& !memchr(k, '.', klen)*/) {
2013  TAR_POST(76, Warning,
2014  "Ignoring unrecognized PAX value \""
2015  + CTempString(k, klen) + '"');
2016  }
2017  if (!*e) {
2018  break;
2019  }
2020  l -= len;
2021  s = ++e;
2022  _ASSERT(l == strlen(s));
2023  } while (l);
2024 
2025  if ((parsed & fPAXSparse) && (sparse | dummy)) {
2026  if (sparse && dummy && sparse != dummy) {
2027  TAR_POST(95, Warning,
2028  "Ignoring PAX GNU sparse file size "
2030  + " when real size "
2031  + NStr::NumericToString(sparse)
2032  + " is also present");
2033  } else if (!dummy && major == 1 && minor == 0) {
2034  if (!(m_Flags & fSparseUnsupported)) {
2035  if (!name.empty()) {
2036  if (!path.empty()) {
2037  TAR_POST(96, Warning,
2038  "Replacing PAX file name \"" + path
2039  + "\" with GNU sparse file name \"" + name
2040  + '"');
2041  }
2042  path.swap(name);
2043  }
2044  parsed |= fPAXSparseGNU_1_0;
2045  }
2046  _ASSERT(sparse);
2047  } else if (!sparse) {
2048  sparse = dummy;
2049  }
2050  size = sparse;
2051  }
2052 
2053  m_Current.m_Name.swap(path);
2054  m_Current.m_LinkName.swap(linkpath);
2055  m_Current.m_UserName.swap(uname);
2056  m_Current.m_GroupName.swap(gname);
2060  m_Current.m_Stat.orig.st_mtime = (time_t) mtime;
2061  m_Current.m_Stat.orig.st_atime = (time_t) atime;
2062  m_Current.m_Stat.orig.st_ctime = (time_t) ctime;
2063  m_Current.m_Stat.orig.st_size = (off_t) size;
2064  m_Current.m_Stat.orig.st_uid = (uid_t) uid;
2065  m_Current.m_Stat.orig.st_gid = (gid_t) gid;
2066  m_Current.m_Pos = sparse; // real (expanded) file size
2067 
2068  m_Current.m_Stat.orig.st_mode = (mode_t) parsed;
2069  return eContinue;
2070 }
2071 
2072 
2073 static void s_Dump(const string& file, Uint8 pos, size_t recsize,
2074  const string& entryname, const STarHeader* h,
2075  ETar_Format fmt, Uint8 datasize)
2076 {
2077  _ASSERT(!OFFSET_OF(pos));
2079  Uint8 blocks = BLOCK_OF(ALIGN_SIZE(datasize));
2080  ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2081  + s_DumpHeader(h, fmt) + '\n'
2082  + (blocks
2083  && (h->typeflag[0] != 'S'
2084  || fmt != eTar_OldGNU
2085  || !*h->gnu.contind)
2086  ? "Blocks of data: " + NStr::NumericToString(blocks) + '\n'
2087  : kEmptyStr));
2088  SetDiagPostLevel(level);
2089 }
2090 
2091 
2092 static void s_DumpSparse(const string& file, Uint8 pos, size_t recsize,
2093  const string& entryname, const STarHeader* h,
2094  const char* contind, Uint8 datasize)
2095 {
2096  _ASSERT(!OFFSET_OF(pos));
2098  Uint8 blocks = !*contind ? BLOCK_OF(ALIGN_SIZE(datasize)) : 0;
2099  ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2100  + "GNU sparse file map header (cont'd):\n"
2101  + s_DumpSparseMap(h, (const char*) h, contind) + '\n'
2102  + (blocks
2103  ? "Blocks of data: " + NStr::NumericToString(blocks) + '\n'
2104  : kEmptyStr));
2105  SetDiagPostLevel(level);
2106 }
2107 
2108 
2109 static void s_DumpSparse(const string& file, Uint8 pos, size_t recsize,
2110  const string& entryname,
2111  const vector< pair<Uint8, Uint8> >& bmap)
2112 {
2113  _ASSERT(!OFFSET_OF(pos));
2115  ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, entryname)
2116  + "PAX GNU/1.0 sparse file map data:\n"
2117  + s_DumpSparseMap(bmap) + '\n');
2118  SetDiagPostLevel(level);
2119 }
2120 
2121 
2122 static void s_DumpZero(const string& file, Uint8 pos, size_t recsize,
2123  size_t zeroblock_count, bool eot = false)
2124 {
2125  _ASSERT(!OFFSET_OF(pos));
2127  ERR_POST(Info << '\n' + s_PositionAsString(file, pos, recsize, kEmptyStr)
2128  + (zeroblock_count
2129  ? "Zero block " + NStr::NumericToString(zeroblock_count)
2130  : (eot ? "End-Of-Tape" : "End-Of-File")) + '\n');
2131  SetDiagPostLevel(level);
2132 }
2133 
2134 
2135 static inline bool s_IsOctal(char c)
2136 {
2137  return '0' <= c && c <= '7' ? true : false;
2138 }
2139 
2140 
2142 {
2143  // Read block
2144  const TTarBlock* block;
2145  size_t nread = sizeof(block->buffer);
2146  _ASSERT(sizeof(*block) == BLOCK_SIZE/*== sizeof(block->buffer)*/);
2147  if (!(block = (const TTarBlock*) x_ReadArchive(nread))) {
2148  return eEOF;
2149  }
2150  if (nread != BLOCK_SIZE) {
2151  TAR_THROW(this, eRead,
2152  "Unexpected EOF in archive");
2153  }
2154  const STarHeader* h = &block->header;
2155 
2156  // Check header format
2157  ETar_Format fmt = eTar_Unknown;
2158  if (memcmp(h->magic, "ustar", 6) == 0) {
2159  if ((h->star.prefix[sizeof(h->star.prefix) - 1] == '\0'
2160  && s_IsOctal(h->star.atime[0]) && h->star.atime[0] == ' '
2161  && s_IsOctal(h->star.ctime[0]) && h->star.ctime[0] == ' ')
2162  || strcmp(block->buffer + BLOCK_SIZE - 4, "tar") == 0) {
2163  fmt = eTar_Star;
2164  } else {
2165  fmt = pax ? eTar_Posix : eTar_Ustar;
2166  }
2167  } else if (memcmp(h->magic, "ustar ", 8) == 0) {
2168  // Here the magic is protruded into the adjacent version field
2169  fmt = eTar_OldGNU;
2170  } else if (memcmp(h->magic, "\0\0\0\0\0", 6) == 0) {
2171  // We'll use this also to speedup corruption checks w/checksum
2172  fmt = eTar_Legacy;
2173  } else {
2174  TAR_THROW_EX(this, eUnsupportedTarFormat,
2175  "Unrecognized header format", h, fmt);
2176  }
2177 
2178  Uint8 val;
2179  // Get checksum from header
2180  if (!s_OctalToNum(val, h->checksum, sizeof(h->checksum))) {
2181  // We must allow all zero bytes here in case of pad/zero blocks
2182  bool corrupt;
2183  if (fmt == eTar_Legacy) {
2184  corrupt = false;
2185  for (size_t i = 0; i < sizeof(block->buffer); ++i) {
2186  if (block->buffer[i]) {
2187  corrupt = true;
2188  break;
2189  }
2190  }
2191  } else {
2192  corrupt = true;
2193  }
2194  if (corrupt) {
2195  TAR_THROW_EX(this, eUnsupportedTarFormat,
2196  "Bad checksum", h, fmt);
2197  }
2198  m_StreamPos += BLOCK_SIZE; // NB: nread
2199  return eZeroBlock;
2200  }
2201  int checksum = int(val);
2202 
2203  // Compute both signed and unsigned checksums (for compatibility)
2204  int ssum = 0;
2205  unsigned int usum = 0;
2206  const char* p = block->buffer;
2207  for (size_t i = 0; i < sizeof(block->buffer); ++i) {
2208  ssum += *p;
2209  usum += (unsigned char)(*p);
2210  p++;
2211  }
2212  p = h->checksum;
2213  for (size_t j = 0; j < sizeof(h->checksum); ++j) {
2214  ssum -= *p - ' ';
2215  usum -= (unsigned char)(*p) - ' ';
2216  p++;
2217  }
2218 
2219  // Compare checksum(s)
2220  if (checksum != ssum && (unsigned int) checksum != usum) {
2221  string message = "Header checksum failed";
2222  if (m_Flags & fDumpEntryHeaders) {
2223  message += ", expected ";
2224  if (usum != (unsigned int) ssum) {
2225  message += "either ";
2226  }
2227  if (usum > 7) {
2228  message += "0";
2229  }
2230  message += NStr::NumericToString(usum, 0, 8);
2231  if (usum != (unsigned int) ssum) {
2232  message += " or ";
2233  if ((unsigned int) ssum > 7) {
2234  message += "0";
2235  }
2236  message += NStr::NumericToString((unsigned int) ssum, 0, 8);
2237  }
2238  }
2239  TAR_THROW_EX(this, eChecksum,
2240  message, h, fmt);
2241  }
2242 
2243  // Set all info members now (thus, validating the header block)
2244 
2246  unsigned char tflag = toupper((unsigned char) h->typeflag[0]);
2247 
2248  // Name
2249  if (m_Current.GetName().empty()) {
2250  if ((fmt & eTar_Ustar) && h->prefix[0] && tflag != 'X') {
2251  const char* prefix = fmt != eTar_Star ? h->prefix : h->star.prefix;
2252  size_t pfxlen = fmt != eTar_Star
2253  ? s_Length(h->prefix, sizeof(h->prefix))
2254  : s_Length(h->star.prefix, h->typeflag[0] == 'S'
2255  ? 107 : sizeof(h->star.prefix));
2257  = CDirEntry::ConcatPath(string(prefix, pfxlen),
2258  string(h->name,
2259  s_Length(h->name,
2260  sizeof(h->name))));
2261  } else {
2262  // Name prefix cannot be used
2263  m_Current.m_Name.assign(h->name,
2264  s_Length(h->name, sizeof(h->name)));
2265  }
2266  }
2267 
2268  // Mode
2269  if (!s_OctalToNum(val, h->mode, sizeof(h->mode))
2270  && (val || h->typeflag[0] != 'V')) {
2271  TAR_THROW_EX(this, eUnsupportedTarFormat,
2272  "Bad entry mode", h, fmt);
2273  }
2274  m_Current.m_Stat.orig.st_mode = (mode_t) val;
2275 
2276  // User Id
2277  if (!s_DecodeUint8(val, h->uid, sizeof(h->uid))
2278  && (val || h->typeflag[0] != 'V')) {
2279  TAR_THROW_EX(this, eUnsupportedTarFormat,
2280  "Bad user ID", h, fmt);
2281  }
2282  m_Current.m_Stat.orig.st_uid = (uid_t) val;
2283 
2284  // Group Id
2285  if (!s_DecodeUint8(val, h->gid, sizeof(h->gid))
2286  && (val || h->typeflag[0] != 'V')) {
2287  TAR_THROW_EX(this, eUnsupportedTarFormat,
2288  "Bad group ID", h, fmt);
2289  }
2290  m_Current.m_Stat.orig.st_gid = (gid_t) val;
2291 
2292  // Size
2293  if (!s_DecodeUint8(val, h->size, sizeof(h->size))
2294  && (val || h->typeflag[0] != 'V')) {
2295  TAR_THROW_EX(this, eUnsupportedTarFormat,
2296  "Bad entry size", h, fmt);
2297  }
2298  m_Current.m_Stat.orig.st_size = (off_t) val;
2299  if (m_Current.GetSize() != val) {
2300  ERR_POST_ONCE(Critical << "CAUTION:"
2301  " ***"
2302  " This run-time may not support large TAR entries"
2303  " (have you built it --with-lfs?)"
2304  " ***");
2305  }
2306 
2307  // Modification time
2308  if (!s_OctalToNum(val, h->mtime, sizeof(h->mtime))) {
2309  TAR_THROW_EX(this, eUnsupportedTarFormat,
2310  "Bad modification time", h, fmt);
2311  }
2312  m_Current.m_Stat.orig.st_mtime = (time_t) val;
2313 
2314  if (fmt == eTar_OldGNU || (fmt & eTar_Ustar)) {
2315  // User name
2316  m_Current.m_UserName.assign(h->uname,
2317  s_Length(h->uname, sizeof(h->uname)));
2318  // Group name
2319  m_Current.m_GroupName.assign(h->gname,
2320  s_Length(h->gname,sizeof(h->gname)));
2321  }
2322 
2323  if (fmt == eTar_OldGNU || fmt == eTar_Star) {
2324  // GNU times may not be valid so checks are relaxed
2325  const char* time;
2326  size_t tlen;
2327  time = fmt == eTar_Star ? h->star.atime : h->gnu.atime;
2328  tlen = fmt == eTar_Star ? sizeof(h->star.atime) : sizeof(h->gnu.atime);
2329  if (!s_OctalToNum(val, time, tlen)) {
2330  if (fmt == eTar_Star || memcchr(time, '\0', tlen)) {
2331  TAR_THROW_EX(this, eUnsupportedTarFormat,
2332  "Bad last access time", h, fmt);
2333  }
2334  } else {
2335  m_Current.m_Stat.orig.st_atime = (time_t) val;
2336  }
2337  time = fmt == eTar_Star ? h->star.ctime : h->gnu.ctime;
2338  tlen = fmt == eTar_Star ? sizeof(h->star.ctime) : sizeof(h->gnu.ctime);
2339  if (!s_OctalToNum(val, time, tlen)) {
2340  if (fmt == eTar_Star || memcchr(time, '\0', tlen)) {
2341  TAR_THROW_EX(this, eUnsupportedTarFormat,
2342  "Bad creation time", h, fmt);
2343  }
2344  } else {
2345  m_Current.m_Stat.orig.st_ctime = (time_t) val;
2346  }
2347  }
2348 
2349  // Entry type
2350  switch (h->typeflag[0]) {
2351  case '\0':
2352  case '0':
2353  if (!(fmt & eTar_Ustar) && fmt != eTar_OldGNU) {
2354  size_t namelen = s_Length(h->name, sizeof(h->name));
2355  if (namelen && h->name[namelen - 1] == '/') {
2357  m_Current.m_Stat.orig.st_size = 0;
2358  break;
2359  }
2360  }
2362  break;
2363  case '\1':
2364  case '\2':
2365  case '1':
2366  case '2':
2367  m_Current.m_Type = (h->typeflag[0] == '\2' || h->typeflag[0] == '2'
2370  m_Current.m_LinkName.assign(h->linkname,
2371  s_Length(h->linkname,sizeof(h->linkname)));
2372  if (m_Current.GetSize()) {
2374  // Mandatory to ignore
2375  m_Current.m_Stat.orig.st_size = 0;
2376  } else if (fmt != eTar_Posix) {
2377  TAR_POST(77, Warning,
2378  "Non-zero hard-link size ("
2380  + ") is ignored (non-PAX)");
2381  m_Current.m_Stat.orig.st_size = 0;
2382  } // else POSIX (re-)allowed hard links to be followed by file data
2383  }
2384  break;
2385  case '3':
2386  case '4':
2387  m_Current.m_Type = (h->typeflag[0] == '3'
2390  if (!s_OctalToNum(val, h->devminor, sizeof(h->devminor))) {
2391  TAR_THROW_EX(this, eUnsupportedTarFormat,
2392  "Bad device minor number", h, fmt);
2393  }
2394  usum = (unsigned int) val; // set aside
2395  if (!s_OctalToNum(val, h->devmajor, sizeof(h->devmajor))) {
2396  TAR_THROW_EX(this, eUnsupportedTarFormat,
2397  "Bad device major number", h, fmt);
2398  }
2399 #ifdef makedev
2400  m_Current.m_Stat.orig.st_rdev = makedev((unsigned int) val, usum);
2401 #else
2402  if (sizeof(int) >= 4 && sizeof(m_Current.m_Stat.orig.st_rdev) >= 4) {
2403  *((unsigned int*) &m_Current.m_Stat.orig.st_rdev) =
2404  (unsigned int)((val << 16) | usum);
2405  }
2406 #endif //makedev
2407  m_Current.m_Stat.orig.st_size = 0;
2408  break;
2409  case '5':
2411  m_Current.m_Stat.orig.st_size = 0;
2412  break;
2413  case '6':
2415  m_Current.m_Stat.orig.st_size = 0;
2416  break;
2417  case '7':
2418  ERR_POST_ONCE(Critical << "CAUTION:"
2419  " *** Contiguous TAR entries processed as regular files"
2420  " ***");
2422  break;
2423  case 'K':
2424  case 'L':
2425  case 'S':
2426  case 'x':
2427  case 'X':
2428  if ((tflag == 'X' && (fmt & eTar_Ustar)) ||
2429  (tflag != 'X' && fmt == eTar_OldGNU) ||
2430  (tflag == 'S' && fmt == eTar_Star)) {
2431  // Assign actual type
2432  switch (tflag) {
2433  case 'K':
2435  break;
2436  case 'L':
2438  break;
2439  case 'S':
2441  break;
2442  case 'X':
2443  if (pax) {
2444  TAR_POST(78, Warning,
2445  "Repetitious PAX headers,"
2446  " archive may be corrupt");
2447  }
2448  fmt = eTar_Posix; // upgrade
2450  break;
2451  default:
2452  _TROUBLE;
2453  break;
2454  }
2455 
2456  // Dump header
2457  size_t hsize = (size_t) m_Current.GetSize();
2458  if (dump) {
2460  m_Current.GetName(), h, fmt, hsize);
2461  }
2462  m_StreamPos += BLOCK_SIZE; // NB: nread
2463 
2465  const char* realsize = fmt != eTar_Star
2466  ? h->gnu.realsize : h->star.prefix + 107;
2467  size_t realsizelen = fmt != eTar_Star
2468  ? sizeof(h->gnu.realsize) : 12;
2469  // Real file size (if present)
2470  if (!s_DecodeUint8(val, realsize, realsizelen)) {
2471  val = 0;
2472  }
2473  if (fmt == eTar_Star) {
2474  // Archive file size includes sparse map, and already valid
2475  m_Current.m_Pos = val; // NB: real (expanded) file size
2476  return eSuccess;
2477  }
2478  // Skip all GNU sparse file headers (they are not counted
2479  // towards the sparse file size in the archive ("hsize")!)
2480  const char* contind = h->gnu.contind;
2481  while (*contind) {
2482  _ASSERT(nread == BLOCK_SIZE);
2483  if (!(block = (const TTarBlock*) x_ReadArchive(nread))
2484  || nread != BLOCK_SIZE) {
2485  TAR_THROW(this, eRead,
2486  "Unexpected EOF in GNU sparse file map"
2487  " extended header");
2488  }
2489  h = &block->header;
2490  contind = block->buffer + (24 * 21)/*504*/;
2491  if (dump) {
2493  m_Current.GetName(), h, contind, hsize);
2494  }
2496  m_StreamPos += BLOCK_SIZE; // NB: nread
2497  }
2498  m_Current.m_Pos = val; // NB: real (expanded) file size
2499  return eSuccess;
2500  }
2501 
2502  // Read in the extended header information
2503  val = ALIGN_SIZE(hsize);
2504  string data;
2505  while (hsize) {
2506  nread = hsize;
2507  const char* xbuf = x_ReadArchive(nread);
2508  if (!xbuf) {
2509  TAR_THROW(this, eRead,
2510  string("Unexpected EOF in ") +
2511  (m_Current.GetType()
2513  ? "PAX data" :
2514  m_Current.GetType()
2516  ? "long name"
2517  : "long link"));
2518  }
2519  _ASSERT(nread);
2520  data.append(xbuf, nread);
2521  hsize -= nread;
2522  m_StreamPos += ALIGN_SIZE(nread);
2523  }
2525  // Make sure there's no embedded '\0'(s)
2526  data.resize(strlen(data.c_str()));
2527  }
2528  if (dump) {
2531  m_StreamPos - val,
2532  m_BufferSize,
2533  m_Current.GetName())
2535  ? "PAX data:\n" :
2537  ? "Long name: \""
2538  : "Long link name: \"")
2539  + NStr::PrintableString(data,
2540  m_Current.GetType()
2545  ? data.size() && data[data.size() - 1] == '\n'
2546  ? kEmptyStr : "\n" : "\"\n"));
2547  SetDiagPostLevel(level);
2548  }
2549  // Reset size because the data blocks have been all read
2551  m_Current.m_Stat.orig.st_size = 0;
2552  if (!val || !data.size()) {
2553  TAR_POST(79, Error,
2554  "Skipping " + string(val ? "empty" : "zero-sized")
2555  + " extended header data");
2556  return eFailure;
2557  }
2558  switch (m_Current.GetType()) {
2560  return x_ParsePAXData(data);
2562  m_Current.m_Name.swap(data);
2563  return eContinue;
2565  m_Current.m_LinkName.swap(data);
2566  return eContinue;
2567  default:
2568  _TROUBLE;
2569  break;
2570  }
2571  return eFailure;
2572  }
2573  /*FALLTHRU*/
2574  case 'V':
2575  case 'I':
2576  if (h->typeflag[0] == 'V' || h->typeflag[0] == 'I') {
2577  // Safety for no data to actually follow
2578  m_Current.m_Stat.orig.st_size = 0;
2579  if (h->typeflag[0] == 'V') {
2581  break;
2582  }
2583  }
2584  /*FALLTHRU*/
2585  default:
2587  break;
2588  }
2589 
2590  if (dump) {
2592  m_Current.GetName(), h, fmt, m_Current.GetSize());
2593  }
2594  m_StreamPos += BLOCK_SIZE; // NB: nread
2595 
2596  return eSuccess;
2597 }
2598 
2599 
2600 static inline void sx_Signature(TTarBlock* block)
2601 {
2602  _ASSERT(sizeof(block->header) + 4 < sizeof(block->buffer));
2603  memcpy(block->buffer + sizeof(*block) - 4, "NCBI", 4);
2604 }
2605 
2606 
2607 void CTar::x_WriteEntryInfo(const string& name)
2608 {
2609  // Prepare block info
2610  TTarBlock block;
2611  _ASSERT(sizeof(block) == BLOCK_SIZE/*== sizeof(block.buffer)*/);
2612  memset(block.buffer, 0, sizeof(block.buffer));
2613  STarHeader* h = &block.header;
2614 
2615  // Name(s) ('\0'-terminated if fit entirely, otherwise not)
2616  if (!x_PackCurrentName(h, false)) {
2617  TAR_THROW(this, eNameTooLong,
2618  "Name '" + m_Current.GetName()
2619  + "' too long in entry '" + name + '\'');
2620  }
2621 
2623 
2624  if (type == CTarEntryInfo::eSymLink && !x_PackCurrentName(h, true)) {
2625  TAR_THROW(this, eNameTooLong,
2626  "Link '" + m_Current.GetLinkName()
2627  + "' too long in entry '" + name + '\'');
2628  }
2629 
2630  /* NOTE: Although some sources on the Internet indicate that all but size,
2631  * mtime, and version numeric fields are '\0'-terminated, we could not
2632  * confirm that with existing tar programs, all of which we saw using
2633  * either '\0' or ' '-terminated values in both size and mtime fields.
2634  * For the ustar archive we have found a document that definitively tells
2635  * that _all_ numeric fields are '\0'-terminated, and that they can keep
2636  * up to "sizeof(field)-1" octal digits. We follow it here.
2637  * However, GNU and ustar checksums seem to be different indeed, so we
2638  * don't use a trailing space for ustar, but for GNU only.
2639  */
2640 
2641  // Mode
2642  if (!s_NumToOctal(m_Current.GetMode(), h->mode, sizeof(h->mode) - 1)) {
2643  TAR_THROW(this, eMemory,
2644  "Cannot store file mode");
2645  }
2646 
2647  // Update format as we go
2648  ETar_Format fmt = eTar_Ustar;
2649  int ok;
2650 
2651  // User ID
2652  ok = s_EncodeUint8(m_Current.GetUserId(), h->uid, sizeof(h->uid) - 1);
2653  if (!ok) {
2654  TAR_THROW(this, eMemory,
2655  "Cannot store user ID");
2656  }
2657  if (ok < 0) {
2658  fmt = eTar_OldGNU;
2659  }
2660 
2661  // Group ID
2662  ok = s_EncodeUint8(m_Current.GetGroupId(), h->gid, sizeof(h->gid) - 1);
2663  if (!ok) {
2664  TAR_THROW(this, eMemory,
2665  "Cannot store group ID");
2666  }
2667  if (ok < 0) {
2668  fmt = eTar_OldGNU;
2669  }
2670 
2671  // Size
2673  ok = s_EncodeUint8(m_Current.GetSize(), h->size, sizeof(h->size) - 1);
2674  if (!ok) {
2675  TAR_THROW(this, eMemory,
2676  "Cannot store file size");
2677  }
2678  if (ok < 0) {
2679  fmt = eTar_OldGNU;
2680  }
2681 
2682  if (fmt != eTar_Ustar && h->prefix[0]) {
2683  // Cannot downgrade to reflect encoding
2684  fmt = eTar_Ustar;
2685  }
2686 
2687  // Modification time
2689  h->mtime, sizeof(h->mtime) - 1)) {
2690  TAR_THROW(this, eMemory,
2691  "Cannot store modification time");
2692  }
2693 
2694  bool device = false;
2695  // Type (GNU extension for SymLink)
2696  switch (type) {
2697  case CTarEntryInfo::eFile:
2698  h->typeflag[0] = '0';
2699  break;
2701  h->typeflag[0] = '2';
2702  break;
2705  h->typeflag[0] = type == CTarEntryInfo::eCharDev ? '3' : '4';
2707  h->devmajor, sizeof(h->devmajor) - 1)) {
2708  TAR_THROW(this, eMemory,
2709  "Cannot store major number");
2710  }
2712  h->devminor, sizeof(h->devminor) - 1)) {
2713  TAR_THROW(this, eMemory,
2714  "Cannot store minor number");
2715  }
2716  device = true;
2717  break;
2718  case CTarEntryInfo::eDir:
2719  h->typeflag[0] = '5';
2720  break;
2721  case CTarEntryInfo::ePipe:
2722  h->typeflag[0] = '6';
2723  break;
2724  default:
2725  _TROUBLE;
2726  TAR_THROW(this, eUnsupportedEntryType,
2727  "Do not know how to archive entry '" + name
2728  + "' of type #" + NStr::IntToString(int(type))
2729  + ": Internal error");
2730  /*NOTREACHED*/
2731  break;
2732  }
2733 
2734  // User and group
2735  const string& usr = m_Current.GetUserName();
2736  size_t len = usr.size();
2737  if (len < sizeof(h->uname)) {
2738  memcpy(h->uname, usr.c_str(), len);
2739  }
2740  const string& grp = m_Current.GetGroupName();
2741  len = grp.size();
2742  if (len < sizeof(h->gname)) {
2743  memcpy(h->gname, grp.c_str(), len);
2744  }
2745 
2746  // Device numbers to complete the ustar header protocol (all fields ok)
2747  if (!device && fmt != eTar_OldGNU) {
2748  s_NumToOctal(0, h->devmajor, sizeof(h->devmajor) - 1);
2749  s_NumToOctal(0, h->devminor, sizeof(h->devminor) - 1);
2750  }
2751 
2752  if (fmt != eTar_OldGNU) {
2753  // Magic
2754  strcpy(h->magic, "ustar");
2755  // Version (EXCEPTION: not '\0' terminated)
2756  memcpy(h->version, "00", 2);
2757  } else {
2758  // NB: Old GNU magic protrudes into adjacent version field
2759  memcpy(h->magic, "ustar ", 8); // 2 spaces and '\0'-terminated
2760  }
2761 
2762  // NCBI signature if allowed
2763  if (!(m_Flags & fStandardHeaderOnly)) {
2764  sx_Signature(&block);
2765  }
2766 
2767  // Final step: checksumming
2768  if (!s_TarChecksum(&block, fmt == eTar_OldGNU ? true : false)) {
2769  TAR_THROW(this, eMemory,
2770  "Cannot store checksum");
2771  }
2772 
2773  // Write header
2774  x_WriteArchive(sizeof(block.buffer), block.buffer);
2775  m_Current.m_HeaderSize = (streamsize)(m_StreamPos - m_Current.m_Pos);
2776 
2777  Checkpoint(m_Current, true/*write*/);
2778 }
2779 
2780 
2782 {
2783  const string& name = link ? m_Current.GetLinkName() : m_Current.GetName();
2784  size_t size = link ? sizeof(h->linkname) : sizeof(h->name);
2785  char* dst = link ? h->linkname : h->name;
2786  const char* src = name.c_str();
2787  size_t len = name.size();
2788 
2789  if (len <= size) {
2790  // Name fits!
2791  memcpy(dst, src, len);
2792  return true;
2793  }
2794 
2795  bool packed = false;
2796  if (!link && len <= sizeof(h->prefix) + 1 + sizeof(h->name)) {
2797  // Try to split the long name into a prefix and a short name (POSIX)
2798  size_t i = len;
2799  if (i > sizeof(h->prefix)) {
2800  i = sizeof(h->prefix);
2801  }
2802  while (i > 0 && src[--i] != '/');
2803  if (i && len - i <= sizeof(h->name) + 1) {
2804  memcpy(h->prefix, src, i);
2805  memcpy(h->name, src + i + 1, len - i - 1);
2806  if (!(m_Flags & fLongNameSupplement))
2807  return true;
2808  packed = true;
2809  }
2810  }
2811 
2812  // Still, store the initial part in the original header
2813  if (!packed) {
2814  memcpy(dst, src, size);
2815  }
2816 
2817  // Prepare extended block header with the long name info (old GNU style)
2819  TTarBlock* block = (TTarBlock*)(m_Buffer + m_BufferPos);
2820  memset(block->buffer, 0, sizeof(block->buffer));
2821  h = &block->header;
2822 
2823  // See above for comments about header filling
2824  ++len; // write terminating '\0' as it can always be made to fit in
2825  strcpy(h->name, "././@LongLink");
2826  s_NumToOctal(0, h->mode, sizeof(h->mode) - 1);
2827  s_NumToOctal(0, h->uid, sizeof(h->uid) - 1);
2828  s_NumToOctal(0, h->gid, sizeof(h->gid) - 1);
2829  if (!s_EncodeUint8(len, h->size, sizeof(h->size) - 1)) {
2830  return false;
2831  }
2832  s_NumToOctal(0, h->mtime, sizeof(h->mtime)- 1);
2833  h->typeflag[0] = link ? 'K' : 'L';
2834 
2835  // Old GNU magic protrudes into adjacent version field
2836  memcpy(h->magic, "ustar ", 8); // 2 spaces and '\0'-terminated
2837 
2838  // NCBI signature if allowed
2839  if (!(m_Flags & fStandardHeaderOnly)) {
2840  sx_Signature(block);
2841  }
2842 
2843  s_TarChecksum(block, true);
2844 
2845  // Write the header
2846  x_WriteArchive(sizeof(block->buffer));
2847 
2848  // Store the full name in the extended block (will be aligned as necessary)
2849  x_WriteArchive(len, src);
2850 
2851  return true;
2852 }
2853 
2854 
2856 {
2859  m_Current.m_Name.clear();
2860  if (!m_ZeroBlockCount) {
2861  return;
2862  }
2863 
2864  size_t gap = SIZE_OF(m_ZeroBlockCount);
2865  if (!m_FileStream) {
2866  if (gap > m_BufferPos) {
2867  if (action == eAppend || action == eUpdate) {
2868  TAR_POST(4, Warning,
2869  "In-stream update may result in gapped tar archive");
2870  }
2871  gap = m_BufferPos;
2872  m_ZeroBlockCount -= BLOCK_OF(gap);
2873  }
2874  m_BufferPos -= gap;
2875  m_StreamPos -= gap;
2876  return;
2877  }
2878 
2879  // Tarfile here
2880  m_StreamPos -= gap;
2882  size_t off = (size_t) (m_StreamPos % m_BufferSize);
2883  if (m_BufferPos == 0) {
2885  }
2886  if (gap > m_BufferPos) {
2887  m_BufferPos = 0;
2888  size_t temp = BLOCK_SIZE;
2889  // Re-fetch the entire record
2890  if (!m_FileStream->seekg(rec * m_BufferSize)
2891  // NB: successful positioning guarantees the stream was !fail(),
2892  // which means it might have only been either good() or eof()
2893  || (m_FileStream->clear(), !x_ReadArchive(temp))
2894  || temp != BLOCK_SIZE) {
2895  TAR_POST(65, Error,
2896  "Archive backspace error in record reget");
2898  return;
2899  }
2900  m_BufferPos = off;
2901  } else {
2902  m_BufferPos -= gap;
2903  }
2905 
2906  // Always reset the put position there
2907 #if defined(_LIBCPP_VERSION) && _LIBCPP_VERSION <= 1101
2908  m_FileStream->clear(); // This is to only work around a bug
2909 #endif //_LIBCPP_VERSION
2910  if (!m_FileStream->seekp(rec * m_BufferSize)) {
2911  TAR_POST(80, Error,
2912  "Archive backspace error in record reset");
2914  return;
2915  }
2916  m_ZeroBlockCount = 0;
2917 }
2918 
2919 
2920 static bool s_MatchExcludeMask(const CTempString& name,
2921  const list<CTempString>& elems,
2922  const CMask* mask,
2923  NStr::ECase acase)
2924 {
2925  _ASSERT(!name.empty() && mask);
2926  if (elems.empty()) {
2927  return mask->Match(name, acase);
2928  }
2929  if (elems.size() == 1) {
2930  return mask->Match(elems.front(), acase);
2931  }
2932  string temp;
2933  REVERSE_ITERATE(list<CTempString>, e, elems) {
2934  temp = temp.empty() ? string(*e) : string(*e) + '/' + temp;
2935  if (mask->Match(temp, acase)) {
2936  return true;
2937  }
2938  }
2939  return false;
2940 }
2941 
2942 
2943 unique_ptr<CTar::TEntries> CTar::x_ReadAndProcess(EAction action)
2944 {
2945  unique_ptr<TEntries> done(new TEntries);
2947  Uint8 pos = m_StreamPos;
2948  CTarEntryInfo xinfo;
2949 
2950  m_ZeroBlockCount = 0;
2951  for (;;) {
2952  // Next block is supposed to be a header
2953  m_Current = CTarEntryInfo(pos);
2954  m_Current.m_Name = xinfo.GetName();
2955  EStatus status = x_ReadEntryInfo
2956  (action == eTest && (m_Flags & fDumpEntryHeaders),
2957  xinfo.GetType() == CTarEntryInfo::ePAXHeader);
2958  switch (status) {
2959  case eFailure:
2960  case eSuccess:
2961  case eContinue:
2963  Uint8 save_pos = m_StreamPos;
2966  TAR_POST(5, Error,
2967  "Interspersing zero block ignored");
2968  m_StreamPos = save_pos;
2969  }
2970  break;
2971 
2972  case eZeroBlock:
2973  m_ZeroBlockCount++;
2974  if (action == eTest && (m_Flags & fDumpEntryHeaders)) {
2977  }
2978  if ((m_Flags & fIgnoreZeroBlocks) || m_ZeroBlockCount < 2) {
2979  if (xinfo.GetType() == CTarEntryInfo::eUnknown) {
2980  // Not yet reading an entry -- advance
2981  pos += BLOCK_SIZE;
2982  }
2983  continue;
2984  }
2985  // Two zero blocks -> eEOF
2986  /*FALLTHRU*/
2987 
2988  case eEOF:
2989  if (action == eTest && (m_Flags & fDumpEntryHeaders)) {
2991  status != eEOF ? true : false);
2992  }
2993  if (xinfo.GetType() != CTarEntryInfo::eUnknown) {
2994  TAR_POST(6, Error,
2995  "Orphaned extended information ignored");
2996  } else if (m_ZeroBlockCount < 2 && action != eAppend) {
2997  if (!m_StreamPos) {
2998  TAR_THROW(this, eRead,
2999  "Unexpected EOF in archive");
3000  }
3001  TAR_POST(58, Warning,
3003  ? "Incomplete EOT in archive"
3004  : "Missing EOT in archive");
3005  }
3006  x_Backspace(action);
3007  return done;
3008  }
3009  m_ZeroBlockCount = 0;
3010 
3011  //
3012  // Process entry
3013  //
3014  if (status == eContinue) {
3015  // Extended header information has just been read in
3017 
3018  switch (m_Current.GetType()) {
3020  xinfo.m_Pos = m_Current.m_Pos; // NB: real (expanded) filesize
3021  m_Current.m_Pos = pos;
3022  if (xinfo.GetType() != CTarEntryInfo::eUnknown) {
3023  TAR_POST(7, Error,
3024  "Unused extended header replaced");
3025  }
3027  xinfo.m_Name.swap(m_Current.m_Name);
3028  xinfo.m_LinkName.swap(m_Current.m_LinkName);
3029  xinfo.m_UserName.swap(m_Current.m_UserName);
3030  xinfo.m_GroupName.swap(m_Current.m_GroupName);
3031  xinfo.m_Stat = m_Current.m_Stat;
3032  continue;
3033 
3035  if (xinfo.GetType() == CTarEntryInfo::ePAXHeader
3036  || !xinfo.GetName().empty()) {
3037  TAR_POST(8, Error,
3038  "Unused long name \"" + xinfo.GetName()
3039  + "\" replaced");
3040  }
3041  // Latch next long name here then just skip
3043  xinfo.m_Name.swap(m_Current.m_Name);
3044  continue;
3045 
3047  if (xinfo.GetType() == CTarEntryInfo::ePAXHeader
3048  || !xinfo.GetLinkName().empty()) {
3049  TAR_POST(9, Error,
3050  "Unused long link \"" + xinfo.GetLinkName()
3051  + "\" replaced");
3052  }
3053  // Latch next long link here then just skip
3055  xinfo.m_LinkName.swap(m_Current.m_LinkName);
3056  continue;
3057 
3058  default:
3059  _TROUBLE;
3060  NCBI_THROW(CCoreException, eCore, "Internal error");
3061  /*NOTREACHED*/
3062  break;
3063  }
3064  }
3065 
3066  // Fixup current 'info' with extended information obtained previously
3068  xinfo.m_HeaderSize = 0;
3069  if (!xinfo.GetName().empty()) {
3070  xinfo.m_Name.swap(m_Current.m_Name);
3071  xinfo.m_Name.clear();
3072  }
3073  if (!xinfo.GetLinkName().empty()) {
3074  xinfo.m_LinkName.swap(m_Current.m_LinkName);
3075  xinfo.m_LinkName.clear();
3076  }
3077  TPAXBits parsed;
3078  if (xinfo.GetType() == CTarEntryInfo::ePAXHeader) {
3079  parsed = (TPAXBits) xinfo.m_Stat.orig.st_mode;
3080  if (!xinfo.GetUserName().empty()) {
3081  xinfo.m_UserName.swap(m_Current.m_UserName);
3082  xinfo.m_UserName.clear();
3083  }
3084  if (!xinfo.GetGroupName().empty()) {
3085  xinfo.m_GroupName.swap(m_Current.m_GroupName);
3086  xinfo.m_GroupName.clear();
3087  }
3088  if (parsed & fPAXMtime) {
3089  m_Current.m_Stat.orig.st_mtime = xinfo.m_Stat.orig.st_mtime;
3091  }
3092  if (parsed & fPAXAtime) {
3093  m_Current.m_Stat.orig.st_atime = xinfo.m_Stat.orig.st_atime;
3095  }
3096  if (parsed & fPAXCtime) {
3097  m_Current.m_Stat.orig.st_ctime = xinfo.m_Stat.orig.st_ctime;
3099  }
3100  if (parsed & fPAXSparse) {
3101  // Mark to post-process below
3103  }
3104  if (parsed & fPAXSize) {
3105  m_Current.m_Stat.orig.st_size = xinfo.m_Stat.orig.st_size;
3106  }
3107  if (parsed & fPAXUid) {
3108  m_Current.m_Stat.orig.st_uid = xinfo.m_Stat.orig.st_uid;
3109  }
3110  if (parsed & fPAXGid) {
3111  m_Current.m_Stat.orig.st_gid = xinfo.m_Stat.orig.st_gid;
3112  }
3113  } else {
3114  parsed = fPAXNone/*0*/;
3115  }
3118  if (xinfo.m_Pos < m_Current.m_Pos) {
3119  xinfo.m_Pos = m_Current.m_Pos; // NB: real (expanded) filesize
3120  }
3121  m_Current.m_Pos = pos;
3122  }
3123  Uint8 size = m_Current.GetSize(); // NB: archive size to read
3124  if (xinfo.GetType() == CTarEntryInfo::eSparseFile) {
3127  TAR_POST(103, Error,
3128  "Ignoring sparse data for non-plain file");
3129  } else if (parsed & fPAXSparseGNU_1_0) {
3130  m_Current.m_Stat.orig.st_size = size ? (off_t) xinfo.m_Pos : 0;
3132  } else {
3134  if (size < xinfo.m_Pos) {
3135  m_Current.m_Stat.orig.st_size = (off_t) xinfo.m_Pos;
3136  }
3137  }
3138  }
3139  xinfo.m_Pos = 0;
3141  _ASSERT(status == eFailure || status == eSuccess);
3142 
3143  // Last sanity check
3144  if (status != eFailure && m_Current.GetName().empty()) {
3145  TAR_THROW(this, eBadName,
3146  "Empty entry name in archive");
3147  }
3148  // User callback
3149  if (!Checkpoint(m_Current, false/*read*/)) {
3150  status = eFailure;
3151  }
3152 
3153  // Match file name with the set of masks
3154  bool match = (status != eSuccess ? false
3155  : m_Mask[eExtractMask].mask && (action == eList ||
3156  action == eExtract ||
3157  action == eInternal)
3160  .acase)
3161  : true);
3162  if (match && m_Mask[eExcludeMask].mask && action != eTest) {
3163  list<CTempString> elems;
3164  _ASSERT(!m_Current.GetName().empty());
3165  NStr::Split(m_Current.GetName(), "/", elems,
3170  }
3171 
3172  // NB: match is 'false' when processing a failing entry
3173  if ((match && action == eInternal)
3174  || x_ProcessEntry(match && action == eExtract ? eExtract :
3175  action == eTest ? eTest : eUndefined,
3176  size, done.get())
3177  || (match && (action == eList || action == eUpdate))) {
3178  _ASSERT(status == eSuccess && action != eTest);
3179  done->push_back(m_Current);
3180  if (action == eInternal) {
3181  break;
3182  }
3183  }
3184 
3186  pos = m_StreamPos;
3187  }
3188 
3189  return done;
3190 }
3191 
3192 
3193 static string s_ToFilesystemPath(const string& base_dir, const string& name,
3194  bool noabs = false)
3195 {
3196  string path;
3197  _ASSERT(!name.empty());
3198  if (!base_dir.empty() && (!CDirEntry::IsAbsolutePath(name) || noabs)) {
3199  path = CDirEntry::ConcatPath(base_dir, name);
3200  } else {
3201  path = name;
3202  if (CDirEntry::IsAbsolutePath(path) && noabs) {
3203 #ifdef NCBI_OS_MSWIN
3204  if (isalpha((unsigned char) path[0]) && path[1] == ':') {
3205  // Drive
3206  path.erase(0, 2);
3207  } else if ((path[0] == '/' || path[0] == '\\') &&
3208  (path[1] == '/' || path[1] == '\\')) {
3209  // Network
3210  path.erase(0, path.find_first_of("/\\", 2));
3211  }
3212 #endif //NCBI_OS_MSWIN
3213  if (path[0] == '/' || path[0] == '\\') {
3214  path.erase(0, 1);
3215  }
3216  if (path.empty()) {
3217  path.assign(1, '.');
3218  }
3219  }
3220  }
3221  _ASSERT(!path.empty());
3222  return CDirEntry::NormalizePath(path);
3223 }
3224 
3225 
3226 static string s_ToArchiveName(const string& base_dir, const string& path)
3227 {
3228  // NB: Path assumed to have been normalized
3229  string retval = CDirEntry::AddTrailingPathSeparator(path);
3230 
3231 #ifdef NCBI_OS_MSWIN
3232  // Convert to Unix format with forward slashes
3233  NStr::ReplaceInPlace(retval, "\\", "/");
3234  const NStr::ECase how = NStr::eNocase;
3235 #else
3236  const NStr::ECase how = NStr::eCase;
3237 #endif //NCBI_OS_MSWIN
3238 
3239  SIZE_TYPE pos = 0;
3240 
3241  bool absolute;
3242  // Remove leading base dir from the path
3243  if (!base_dir.empty() && NStr::StartsWith(retval, base_dir, how)) {
3244  if (retval.size() > base_dir.size()) {
3245  retval.erase(0, base_dir.size()/*separator too*/);
3246  } else {
3247  retval.assign(1, '.');
3248  }
3249  absolute = false;
3250  } else {
3251  absolute = CDirEntry::IsAbsolutePath(retval);
3252 #ifdef NCBI_OS_MSWIN
3253  if (isalpha((unsigned char) retval[0]) && retval[1] == ':') {
3254  // Remove a disk name if present
3255  pos = 2;
3256  } else if (retval[0] == '/' && retval[1] == '/') {
3257  // Network name if present
3258  pos = retval.find('/', 2);
3259  absolute = true;
3260  }
3261 #endif //NCBI_OS_MSWIN
3262  }
3263 
3264  // Remove any leading and trailing slashes
3265  while (pos < retval.size() && retval[pos] == '/') {
3266  ++pos;
3267  }
3268  if (pos) {
3269  retval.erase(0, pos);
3270  }
3271  pos = retval.size();
3272  while (pos > 0 && retval[pos - 1] == '/') {
3273  --pos;
3274  }
3275  if (pos < retval.size()) {
3276  retval.erase(pos);
3277  }
3278 
3279  if (absolute) {
3280  retval.insert((SIZE_TYPE) 0, 1, '/');
3281  }
3282  return retval;
3283 }
3284 
3285 
3287 {
3288 public:
3290  : CDirEntry(GetTmpNameEx(entry.GetDir(), "xNCBItArX")),
3292  {
3293  _ASSERT(!Exists() && m_Entry.GetType() != eDir);
3294  if (CDirEntry(m_Entry.GetPath()).Rename(GetPath())) {
3295  m_Activated = m_Pending = true;
3296  errno = 0;
3297  }
3298  }
3299 
3301  {
3302  if (m_Activated) {
3303  (void)(m_Pending ? Restore() : RemoveEntry());
3304  }
3305  }
3306 
3307  bool Restore(void)
3308  {
3309  m_Entry.Remove();
3310  errno = 0;
3311  bool renamed = Rename(m_Entry.GetPath());
3312  m_Activated = !renamed;
3313  m_Pending = false;
3314  return renamed;
3315  }
3316 
3317  void Release(void)
3318  {
3319  m_Pending = false;
3320  }
3321 
3322 private:
3326 };
3327 
3328 
3330  const CTar::TEntries* entries)
3331 {
3333  bool extract = action == eExtract;
3334 
3335  if (extract) {
3336  // Destination for extraction
3337  unique_ptr<CDirEntry> dst
3342  !(m_Flags & fKeepAbsolutePath))));
3343  // Source for extraction
3344  unique_ptr<CDirEntry> src;
3345  // Direntry pending removal
3346  AutoPtr<CTarTempDirEntry> pending;
3347 
3348  // Dereference symlink if requested
3349  if (type != CTarEntryInfo::eSymLink &&
3351  dst->DereferenceLink();
3352  }
3353 
3354  // Actual type in file system (if exists)
3355  CDirEntry::EType dst_type = dst->GetType();
3356 
3357  // Look if extraction is allowed (when the destination exists)
3358  if (dst_type != CDirEntry::eUnknown) {
3359  bool extracted = false; // check if ours (prev. revision extracted)
3360  if (entries) {
3361  ITERATE(TEntries, e, *entries) {
3362  if (e->GetName() == m_Current.GetName() &&
3363  e->GetType() == m_Current.GetType()) {
3364  extracted = true;
3365  break;
3366  }
3367  }
3368  }
3369  if (!extracted) {
3370  // Can overwrite it?
3371  if (!(m_Flags & fOverwrite)) {
3372  // File already exists, and cannot be changed
3373  extract = false;
3374  }
3375  // Can update?
3376  else if ((m_Flags & fUpdate) == fUpdate // NB: fOverwrite set
3377  && (type == CTarEntryInfo::eDir ||
3378  // Make sure that dst is not newer than the entry
3379  dst->IsNewer(m_Current.GetModificationCTime(),
3380  // NB: dst must exist
3382  extract = false;
3383  }
3384  // Have equal types?
3385  else if (m_Flags & fEqualTypes) {
3386  if (type == CTarEntryInfo::eHardLink) {
3387  src.reset(new CDirEntry
3390  !(m_Flags & fKeepAbsolutePath))));
3391  if (dst_type != src->GetType()) {
3392  extract = false;
3393  }
3394  } else if (dst_type != CDirEntry::EType(type)) {
3395  extract = false;
3396  }
3397  }
3398  }
3399  if (extract && (type != CTarEntryInfo::eDir ||
3400  dst_type != CDirEntry::eDir)) {
3401  if (!extracted && (m_Flags & fBackup) == fBackup) {
3402  // Need to backup the existing destination?
3403  CDirEntry tmp(*dst);
3404  if (!tmp.Backup(kEmptyStr, CDirEntry::eBackup_Rename)) {
3405  int x_errno = CNcbiError::GetLast().Code();
3406  TAR_THROW(this, eBackup,
3407  "Failed to backup '" + dst->GetPath() + '\''
3408  + s_OSReason(x_errno));
3409  }
3410  } else {
3411  // Do removal safely until extraction is confirmed
3412  pending.reset(new CTarTempDirEntry(*dst));
3413  if (/*!pending->Exists() ||*/ dst->Exists()) {
3414  // Security concern: do not attempt data extraction
3415  // into special files etc, which can harm the system.
3416 #ifdef __GNUC__
3417  int x_errno = errno ?: EEXIST;
3418 #else
3419  int x_errno = errno;
3420  if (x_errno == 0) {
3421  x_errno = EEXIST;
3422  }
3423 #endif //__GNUC__
3424  extract = false;
3425  TAR_THROW(this, eWrite,
3426  "Cannot extract '" + dst->GetPath() + '\''
3427  + s_OSReason(x_errno));
3428  }
3429  }
3430  }
3431  }
3432  if (extract) {
3433 #ifdef NCBI_OS_UNIX
3434  mode_t u;
3435  u = umask(022);
3436  umask(u & ~(S_IRUSR | S_IWUSR | S_IXUSR));
3437  try {
3438 #endif //NCBI_OS_UNIX
3439  extract = x_ExtractEntry(size, dst.get(), src.get());
3440 #ifdef NCBI_OS_UNIX
3441  } catch (...) {
3442  umask(u);
3443  throw;
3444  }
3445  umask(u);
3446 #endif //NCBI_OS_UNIX
3447  if (pending) {
3448  if (extract) {
3449  pending->Release();
3450  } else if (!pending->Restore()) { // Undo delete
3451  int x_errno = errno;
3452  TAR_THROW(this, eWrite,
3453  "Cannot restore '" + dst->GetPath()
3454  + "' back in place" + s_OSReason(x_errno));
3455  }
3456  }
3457  }
3459  && action == eTest && (m_Flags & fDumpEntryHeaders)) {
3460  unique_ptr<CDirEntry> dst
3464  !(m_Flags & fKeepAbsolutePath))));
3465  (void) x_ExtractSparseFile(size, dst.get(), true);
3466  }
3467 
3469 
3470  return extract;
3471 }
3472 
3473 
3475 {
3477  while (blocks) {
3478 #ifndef NCBI_COMPILER_WORKSHOP
3479  // RogueWave RTL is buggy in seeking pipes -- it clobbers
3480  // (discards) streambuf data instead of leaving it alone..
3482  && m_BufferPos == 0 && blocks >= BLOCK_OF(m_BufferSize)) {
3483  CT_OFF_TYPE fskip =
3485  _ASSERT(ALIGN_SIZE(fskip) == fskip);
3486  if (m_Stream.rdbuf()->PUBSEEKOFF(fskip, IOS_BASE::cur)
3487  != (CT_POS_TYPE)((CT_OFF_TYPE)(-1))) {
3488  blocks -= BLOCK_OF(fskip);
3489  m_StreamPos += fskip;
3490  continue;
3491  }
3492  if (m_FileStream) {
3493  TAR_POST(2, Warning,
3494  "Cannot fast skip in file archive,"
3495  " reverting to slow skip");
3496  }
3498  }
3499 #endif //NCBI_COMPILER_WORKSHOP
3500  size_t nskip = (blocks < BLOCK_OF(m_BufferSize)
3501  ? (size_t) SIZE_OF(blocks)
3502  : m_BufferSize);
3503  _ASSERT(ALIGN_SIZE(nskip) == nskip);
3504  if (!x_ReadArchive(nskip)) {
3505  TAR_THROW(this, eRead,
3506  "Archive skip failed (EOF)");
3507  }
3508  _ASSERT(nskip);
3509  nskip = ALIGN_SIZE(nskip);
3510  blocks -= BLOCK_OF (nskip);
3511  m_StreamPos += nskip;
3512  }
3514 }
3515 
3516 
3517 // NB: Clobbers umask, must be restored after the call
3519  const CDirEntry* src)
3520 {
3522  unique_ptr<CDirEntry> src_ptr; // deleter
3523  bool extracted = true; // assume best
3524 
3526  // Conform to POSIX-mandated behavior to extract as files
3528  }
3529  switch (type) {
3530  case CTarEntryInfo::eSparseFile: // NB: only PAX GNU/1.0 sparse file here
3532  case CTarEntryInfo::eFile:
3533  {{
3534  _ASSERT(!dst->Exists());
3535  // Create base directory
3536  CDir dir(dst->GetDir());
3537  if (/*dir.GetPath() != "." && */!dir.CreatePath()) {
3538  int x_errno = errno;
3539  TAR_THROW(this, eCreate,
3540  "Cannot create directory '" + dir.GetPath() + '\''
3541  + s_OSReason(x_errno));
3542  }
3543 
3544  if (type == CTarEntryInfo::eHardLink) {
3545  if (!src) {
3546  src_ptr.reset(new CDirEntry
3549  !(m_Flags & fKeepAbsolutePath))));
3550  src = src_ptr.get();
3551  }
3552  if (src->GetType() == CDirEntry::eUnknown && size) {
3553  // Looks like a dangling hard link but luckily we have
3554  // the actual file data (POSIX extension) so use it here.
3556  }
3557  }
3558 
3559  if (type == CTarEntryInfo::eHardLink) {
3560  _ASSERT(src);
3561 #ifdef NCBI_OS_UNIX
3562  if (link(src->GetPath().c_str(), dst->GetPath().c_str()) == 0){
3563  if (m_Flags & fPreserveAll) {
3565  }
3566  break;
3567  }
3568  int x_errno = errno;
3569  TAR_POST(10, Warning,
3570  "Cannot hard-link '" + src->GetPath()
3571  + "' and '" + dst->GetPath() + '\''
3572  + s_OSReason(x_errno) + ", trying to copy");
3573 #endif //NCBI_OS_UNIX
3574  if (!src->Copy(dst->GetPath(),
3577  TAR_POST(11, Error,
3578  "Cannot hard-link '" + src->GetPath()
3579  + "' and '" + dst->GetPath() + "' via copy");
3580  extracted = false;
3581  break;
3582  }
3583  } else if (type == CTarEntryInfo::eSparseFile && size) {
3584  if (!(extracted = x_ExtractSparseFile(size, dst)))
3585  break;
3586  } else {
3587  x_ExtractPlainFile(size, dst);
3588  }
3589 
3590  // Restore attributes
3591  if (m_Flags & fPreserveAll) {
3593  }
3594  }}
3595  break;
3596 
3597  case CTarEntryInfo::eDir:
3598  {{
3599  const CDir* dir = dynamic_cast<const CDir*>(dst);
3600  if (!dir || !dir->CreatePath()) {
3601  int x_errno = !dir ? 0 : CNcbiError::GetLast().Code();
3602  TAR_THROW(this, eCreate,
3603  "Cannot create directory '" + dst->GetPath() + '\''
3604  + (!dir
3605  ? string(": Internal error")
3606  : s_OSReason(x_errno)));
3607  }
3608  // NB: Attributes for a directory must be set only after all of its
3609  // entries have been already extracted.
3610  _ASSERT(size == 0);
3611  }}
3612  break;
3613 
3615  {{
3616  const CSymLink* symlink = dynamic_cast<const CSymLink*>(dst);
3617  if (!symlink || !symlink->Create(m_Current.GetLinkName())) {
3618  int x_errno = !symlink ? 0 : CNcbiError::GetLast().Code();
3619  string error = "Cannot create symlink '" + dst->GetPath()
3620  + "' -> '" + m_Current.GetLinkName() + '\''
3621  + (!symlink
3622  ? string(": Internal error")
3623  : s_OSReason(x_errno));
3624  if (!symlink || x_errno != ENOTSUP
3625  || !(m_Flags & fSkipUnsupported)) {
3626  TAR_THROW(this, eCreate, error);
3627  }
3628  TAR_POST(12, Error, error);
3629  extracted = false;
3630  }
3631  _ASSERT(size == 0);
3632  }}
3633  break;
3634 
3635  case CTarEntryInfo::ePipe:
3636  {{
3637  _ASSERT(size == 0);
3638 #ifdef NCBI_OS_UNIX
3639  umask(0);
3640  int x_errno = 0;
3641  if (mkfifo(dst->GetPath().c_str(), m_Current.GetMode())/*!= 0*/) {
3642  x_errno = errno;
3643  extracted = false;
3644  }
3645  if (extracted) {
3646  break;
3647  }
3648  string reason = s_OSReason(x_errno);
3649 #else
3650  int x_errno = ENOTSUP;
3651  string reason = ": Feature not supported by host OS";
3652  extracted = false;
3653 #endif //NCBI_OS_UNIX
3654  string error
3655  = "Cannot create FIFO '" + dst->GetPath() + '\'' + reason;
3656  if (x_errno != ENOTSUP || !(m_Flags & fSkipUnsupported)) {
3657  TAR_THROW(this, eCreate, error);
3658  }
3659  TAR_POST(81, Error, error);
3660  }}
3661  break;
3662 
3665  {{
3666  _ASSERT(size == 0);
3667 #ifdef NCBI_OS_UNIX
3668  umask(0);
3669  int x_errno = 0;
3670  mode_t m = (m_Current.GetMode() |
3671  (type == CTarEntryInfo::eCharDev ? S_IFCHR : S_IFBLK));
3672  if (mknod(dst->GetPath().c_str(),m,m_Current.m_Stat.orig.st_rdev)){
3673  x_errno = errno;
3674  extracted = false;
3675  }
3676  if (extracted) {
3677  break;
3678  }
3679  string reason = s_OSReason(x_errno);
3680 #else
3681  int x_errno = ENOTSUP;
3682  string reason = ": Feature not supported by host OS";
3683  extracted = false;
3684 #endif //NCBI_OS_UNIX
3685  string error
3686  = "Cannot create " + string(type == CTarEntryInfo::eCharDev
3687  ? "character" : "block")
3688  + " device '" + dst->GetPath() + '\'' + reason;
3689  if (x_errno != ENOTSUP || !(m_Flags & fSkipUnsupported)) {
3690  TAR_THROW(this, eCreate, error);
3691  }
3692  TAR_POST(82, Error, error);
3693  }}
3694  break;
3695 
3697  _ASSERT(size == 0);
3698  /*NOOP*/
3699  break;
3700 
3704  // Extended headers should have already been processed and not be here
3705  _TROUBLE;
3706  /*FALLTHRU*/
3707 
3708  default:
3709  TAR_POST(13, Error,
3710  "Skipping unsupported entry '" + m_Current.GetName()
3711  + "' of type #" + NStr::IntToString(int(type)));
3712  extracted = false;
3713  break;
3714  }
3715 
3716  return extracted;
3717 }
3718 
3719 
3721 {
3722  // FIXME: Switch to CFileIO eventually to bypass ofstream's obscurity
3723  // w.r.t. errors, extra buffering etc.
3724  CNcbiOfstream ofs(dst->GetPath().c_str(),
3725  IOS_BASE::trunc |
3726  IOS_BASE::out |
3727  IOS_BASE::binary);
3728  if (!ofs) {
3729  int x_errno = errno;
3730  TAR_THROW(this, eCreate,
3731  "Cannot create file '" + dst->GetPath() + '\''
3732  + s_OSReason(x_errno));
3733  }
3734  if (m_Flags & fPreserveMode) { // NB: secure
3736  dst, fTarURead | fTarUWrite);
3737  }
3738 
3739  bool okay = ofs.good();
3740  if (okay) while (size) {
3741  // Read from the archive
3742  size_t nread = size < m_BufferSize ? (size_t) size : m_BufferSize;
3743  const char* data = x_ReadArchive(nread);
3744  if (!data) {
3745  TAR_THROW(this, eRead,
3746  "Unexpected EOF in archive");
3747  }
3748  _ASSERT(nread && ofs.good());
3749  // Write file to disk
3750  try {
3751  okay = ofs.write(data, (streamsize) nread) ? true : false;
3752  } catch (IOS_BASE::failure&) {
3753  okay = false;
3754  }
3755  if (!okay) {
3756  break;
3757  }
3758  size -= nread;
3759  m_StreamPos += ALIGN_SIZE(nread);
3760  }
3761 
3762  ofs.close();
3763  if (!okay || !ofs.good()) {
3764  int x_errno = errno;
3765  TAR_THROW(this, eWrite,
3766  "Cannot " + string(okay ? "close" : "write")
3767  + " file '" + dst->GetPath()+ '\'' + s_OSReason(x_errno));
3768  }
3769 }
3770 
3771 
3772 string CTar::x_ReadLine(Uint8& size, const char*& data, size_t& nread)
3773 {
3774  string line;
3775  for (;;) {
3776  size_t n;
3777  for (n = 0; n < nread; ++n) {
3778  if (!isprint((unsigned char) data[n])) {
3779  break;
3780  }
3781  }
3782  line.append(data, n);
3783  if (n < nread) {
3784  if (data[n] == '\n') {
3785  ++n;
3786  }
3787  data += n;
3788  nread -= n;
3789  break;
3790  }
3791  if (!(nread = size < BLOCK_SIZE ? size : BLOCK_SIZE)) {
3792  break;
3793  }
3794  if (!(data = x_ReadArchive(nread))) {
3795  return kEmptyStr;
3796  }
3797  _ASSERT(nread);
3798  if (size >= nread) {
3799  size -= nread;
3800  } else {
3801  size = 0;
3802  }
3803  m_StreamPos += ALIGN_SIZE(nread);
3804  }
3805  return line;
3806 }
3807 
3808 
3809 template<>
3810 struct Deleter<FILE>
3811 {
3812  static void Delete(FILE* fp) { fclose(fp); }
3813 };
3814 
3815 
3816 #ifdef NCBI_OS_MSWIN
3817 # define NCBI_FILE_WO "wb"
3818 #else
3819 # define NCBI_FILE_WO "w"
3820 #endif /*NCBI_OS_MSWIN*/
3821 
3823 {
3824  _ASSERT(size);
3825 
3826  // Read sparse map first
3827  Uint8 pos = m_StreamPos;
3828  size_t nread = size < BLOCK_SIZE ? (size_t) size : BLOCK_SIZE;
3829  const char* data = x_ReadArchive(nread);
3830  if (!data) {
3831  TAR_THROW(this, eRead,
3832  "Unexpected EOF in archive");
3833  }
3834  _ASSERT(nread);
3835  if (size >= nread) {
3836  size -= nread;
3837  } else {
3838  size = 0;
3839  }
3840 
3841  string num(x_ReadLine(size, data, nread)); // "numblocks"
3842  Uint8 n = NStr::StringToUInt8(num,
3845  if (!n) {
3846  TAR_POST(97, Error,
3847  "Cannot expand sparse file '" + dst->GetPath()
3848  + "': Region count is "
3849  + string(num.empty() ? "missing" : "invalid")
3850  + " (\"" + num + "\")");
3851  m_StreamPos += ALIGN_SIZE(nread);
3852  return false;
3853  }
3854  m_StreamPos += ALIGN_SIZE(nread);
3855  vector< pair<Uint8, Uint8> > bmap(n);
3856 
3857  for (Uint8 i = 0; i < n; ++i) { // "offset numbytes" pairs
3858  Uint8 val[2];
3859  for (int k = 0; k < 2; ++k) {
3860  num = x_ReadLine(size, data, nread);
3861  try {
3862  val[k] = NStr::StringToUInt8(num);
3863  } catch (...) {
3864  TAR_POST(98, Error,
3865  "Cannot expand sparse file '" + dst->GetPath()
3866  + "': Sparse map "
3867  + string(k == 0 ? "offset" : "region size")
3868  + '[' + NStr::NumericToString(i) + "] is "
3869  + string(num.empty() ? "missing" : "invalid")
3870  + " (\"" + num + "\")");
3871  return false;
3872  }
3873  }
3874  bmap[i] = pair<Uint8, Uint8>(val[0], val[1]);
3875  }
3876  if (dump) {
3878  /* dontcare */
3879  return false;
3880  }
3881 
3882  // Write the file out
3883  AutoPtr<FILE> fp(::fopen(dst->GetPath().c_str(), NCBI_FILE_WO));
3884  if (!fp) {
3885  int x_errno = errno;
3886  TAR_THROW(this, eCreate,
3887  "Cannot create file '" + dst->GetPath() + '\''
3888  + s_OSReason(x_errno));
3889  }
3890  if (m_Flags & fPreserveMode) { // NB: secure
3892  dst, fTarURead | fTarUWrite);
3893  }
3894 
3895  nread = 0;
3896  Uint8 eof = 0;
3897  int x_error = 0;
3898  for (Uint8 i = 0; i < n; ++i) {
3899  Uint8 top = bmap[i].first + bmap[i].second;
3900  if (eof < top) {
3901  eof = top;
3902  }
3903  if (!bmap[i].second) {
3904  continue;
3905  }
3906  // non-empty region
3907  if (::fseek(fp.get(), (long) bmap[i].first, SEEK_SET) != 0) {
3908  x_error = errno;
3909  break;
3910  }
3911  Uint8 done = 0;
3912  do {
3913  if (!nread) {
3914  nread = size < m_BufferSize ? (size_t) size : m_BufferSize;
3915  if (!nread || !(data = x_ReadArchive(nread))) {
3916  x_error = errno;
3917  TAR_POST(99, Error,
3918  "Cannot read archive data for sparse file '"
3919  + dst->GetPath() + "', region #"
3921  + (nread
3922  ? s_OSReason(x_error)
3923  : string(": End-of-data")));
3924  x_error = -1;
3925  eof = 0;
3926  break;
3927  }
3928  _ASSERT(nread);
3929  size -= nread;
3930  m_StreamPos += ALIGN_SIZE(nread);
3931  }
3932  size_t xread = nread;
3933  if (xread > bmap[i].second - done) {
3934  xread = (size_t)(bmap[i].second - done);
3935  }
3936  if (::fwrite(data, 1, xread, fp.get()) != xread) {
3937  if (!(x_error = errno)) {
3938  x_error = -1; // Make sure non-zero
3939  }
3940  break;
3941  }
3942  done += xread;
3943  data += xread;
3944  nread -= xread;
3945  } while (done < bmap[i].second);
3946  if (x_error) {
3947  break;
3948  }
3949  }
3950 
3951  // Finalize the file
3952  bool closed = ::fclose(fp.release()) == 0 ? true : false;
3953  if (!x_error && !closed) {
3954  x_error = errno;
3955  }
3956  string reason;
3957  if (x_error) {
3958  reason = s_OSReason(x_error);
3959  } else if (eof) {
3960  x_error = s_TruncateFile(dst->GetPath(), eof);
3961  if (x_error) {
3962 #ifdef NCBI_OS_MSWIN
3963  TCHAR* str = NULL;
3964  DWORD rv = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER |
3965  FORMAT_MESSAGE_FROM_SYSTEM |
3966  FORMAT_MESSAGE_MAX_WIDTH_MASK |
3967  FORMAT_MESSAGE_IGNORE_INSERTS,
3968  NULL, (DWORD) x_error,
3969  MAKELANGID(LANG_NEUTRAL,SUBLANG_DEFAULT),
3970  (LPTSTR) &str, 0, NULL);
3971  if (str) {
3972  if (rv) {
3973  _ASSERT(*str);
3974  reason = string(": ") + _T_STDSTRING(str);
3975  }
3976  ::LocalFree((HLOCAL) str);
3977  }
3978  if (reason.empty()) {
3979  reason = ": Error 0x" + NStr::UIntToString(x_error, 0, 16);
3980  }
3981 #else
3982  reason = s_OSReason(x_error);
3983 #endif //NCBI_OS_MSWIN
3984  }
3985  }
3986  if (x_error) {
3987  _ASSERT(!reason.empty());
3988  TAR_POST(100, Error,
3989  "Cannot write sparse file '" + dst->GetPath() + '\''+ reason);
3990  dst->Remove();
3991  return false;
3992  }
3993 
3994  return true;
3995 }
3996 
3997 
3999  TFlags what,
4000  const CDirEntry* path,
4001  TTarMode perm) const
4002 {
4003  unique_ptr<CDirEntry> path_ptr; // deleter
4004  if (!path) {
4005  path_ptr.reset(new CDirEntry(s_ToFilesystemPath
4006  (m_BaseDir, info.GetName(),
4007  !(m_Flags & fKeepAbsolutePath))));
4008  path = path_ptr.get();
4009  }
4010 
4011  // Date/time.
4012  // Set the time before permissions because on some platforms this setting
4013  // can also affect file permissions.
4014  if (what & fPreserveTime) {
4015  CTime modification(info.GetModificationTime());
4016  CTime last_access(info.GetLastAccessTime());
4017  CTime creation(info.GetCreationTime());
4018  modification.SetNanoSecond(info.m_Stat.mtime_nsec);
4019  last_access.SetNanoSecond(info.m_Stat.atime_nsec);
4020  creation.SetNanoSecond(info.m_Stat.ctime_nsec);
4021  if (!path->SetTime(&modification, &last_access, &creation)) {
4022  int x_errno = CNcbiError::GetLast().Code();
4023  TAR_THROW(this, eRestoreAttrs,
4024  "Cannot restore date/time of '" + path->GetPath() + '\''
4025  + s_OSReason(x_errno));
4026  }
4027  }
4028 
4029  // Owner.
4030  // This must precede changing permissions because on some systems chown()
4031  // clears the set[ug]id bits for non-superusers thus resulting in incorrect
4032  // file permissions.
4033  if (what & fPreserveOwner) {
4034  bool done = false;
4035  // 2-tier trial: first using the names, then using numeric IDs.
4036  // Note that it is often impossible to restore the original owner
4037  // without the super-user rights so no error checking is done here.
4038  if (!info.GetUserName().empty() || !info.GetGroupName().empty()) {
4039  unsigned int uid, gid;
4040  if (path->SetOwner(info.GetUserName(), info.GetGroupName(),
4041  eIgnoreLinks, &uid, &gid)
4042  || (!info.GetGroupName().empty()
4043  && path->SetOwner(kEmptyStr, info.GetGroupName(),
4044  eIgnoreLinks))
4045  || (uid == info.GetUserId() && gid == info.GetGroupId())) {
4046  done = true;
4047  }
4048  }
4049  if (!done) {
4050  string user = NStr::UIntToString(info.GetUserId());
4051  string group = NStr::UIntToString(info.GetGroupId());
4052  if (!path->SetOwner(user, group, eIgnoreLinks)) {
4053  path->SetOwner(kEmptyStr, group, eIgnoreLinks);
4054  }
4055  }
4056  }
4057 
4058  // Mode.
4059  // Set them last.
4060  if ((what & fPreserveMode)
4061  && info.GetType() != CTarEntryInfo::ePipe
4062  && info.GetType() != CTarEntryInfo::eCharDev
4063  && info.GetType() != CTarEntryInfo::eBlockDev) {
4064  bool failed = false;
4065 #ifdef NCBI_OS_UNIX
4066  // We won't change permissions for sym.links because lchmod() is not
4067  // portable, and also is not implemented on majority of platforms.
4068  if (info.GetType() != CTarEntryInfo::eSymLink) {
4069  // Use raw mode here to restore most of the bits
4070  mode_t mode = s_TarToMode(perm ? perm : info.m_Stat.orig.st_mode);
4071  if (chmod(path->GetPath().c_str(), mode) != 0) {
4072  // May fail due to setuid/setgid bits -- strip'em and try again
4073  if (mode & (S_ISUID | S_ISGID)) {
4074  mode &= ~(S_ISUID | S_ISGID);
4075  failed = chmod(path->GetPath().c_str(), mode) != 0;
4076  } else {
4077  failed = true;
4078  }
4080  }
4081  }
4082 #else
4083  CDirEntry::TMode user, group, other;
4084  CDirEntry::TSpecialModeBits special_bits;
4085  if (perm) {
4086  s_TarToMode(perm, &user, &group, &other, &special_bits);
4087  } else {
4088  info.GetMode(&user, &group, &other, &special_bits);
4089  }
4090  failed = !path->SetMode(user, group, other, special_bits);
4091 #endif //NCBI_OS_UNIX
4092  if (failed) {
4093  int x_errno = CNcbiError::GetLast().Code();
4094  TAR_THROW(this, eRestoreAttrs,
4095  "Cannot " + string(perm ? "change" : "restore")
4096  + " mode bits of '" + path->GetPath() + '\''
4097  + s_OSReason(x_errno));
4098  }
4099  }
4100 }
4101 
4102 
4103 static string s_BaseDir(const string& dirname)
4104 {
4105  string path = s_ToFilesystemPath(kEmptyStr, dirname);
4106 #ifdef NCBI_OS_MSWIN
4107  // Replace backslashes with forward slashes
4108  NStr::ReplaceInPlace(path, "\\", "/");
4109 #endif //NCBI_OS_MSWIN
4110  if (!NStr::EndsWith(path, '/'))
4111  path += '/';
4112  return path;
4113 }
4114 
4115 
4116 unique_ptr<CTar::TEntries> CTar::x_Append(const string& name,
4117  const TEntries* toc)
4118 {
4119  unique_ptr<TEntries> entries(new TEntries);
4120  unique_ptr<CDir::TEntries> dir;
4121 
4122  const EFollowLinks follow_links = (m_Flags & fFollowLinks ?
4124  unsigned int uid = 0, gid = 0;
4125  bool update = true;
4126 
4127  // Create the entry info
4129 
4130  // Compose entry name for relative names
4131  string path = s_ToFilesystemPath(m_BaseDir, name);
4132 
4133  // Get direntry information
4134  CDirEntry entry(path);
4135  CDirEntry::SStat st;
4136  if (!entry.Stat(&st, follow_links)) {
4137  int x_errno = errno;
4138  TAR_THROW(this, eOpen,
4139  "Cannot get status of '" + path + '\''+ s_OSReason(x_errno));
4140  }
4142 
4143  string temp = s_ToArchiveName(m_BaseDir, path);
4144 
4145  if (temp.empty()) {
4146  TAR_THROW(this, eBadName,
4147  "Empty entry name not allowed");
4148  }
4149 
4150  list<CTempString> elems;
4151  NStr::Split(temp, "/", elems,
4153  if (find(elems.begin(), elems.end(), "..") != elems.end()) {
4154  TAR_THROW(this, eBadName,
4155  "Name '" + temp + "' embeds parent directory (\"..\")");
4156  }
4157  if (m_Mask[eExcludeMask].mask
4158  && s_MatchExcludeMask(temp, elems,
4160  m_Mask[eExcludeMask].acase)) {
4161  goto out;
4162  }
4163  elems.clear();
4164  if (type == CDirEntry::eDir && temp != "/") {
4165  temp += '/';
4166  }
4167 
4168  m_Current.m_Name.swap(temp);
4171  _ASSERT(!follow_links);
4172  m_Current.m_LinkName = entry.LookupLink();
4173  if (m_Current.GetLinkName().empty()) {
4174  TAR_THROW(this, eBadName,
4175  "Empty link name not allowed");
4176  }
4177  }
4178 
4180  follow_links, &uid, &gid);
4181 #ifdef NCBI_OS_UNIX
4182  if (NStr::UIntToString(uid) == m_Current.GetUserName()) {
4183  m_Current.m_UserName.clear();
4184  }
4185  if (NStr::UIntToString(gid) == m_Current.GetGroupName()) {
4186  m_Current.m_GroupName.clear();
4187  }
4188 #endif //NCBI_OS_UNIX
4189 #ifdef NCBI_OS_MSWIN
4190  // These are fake but we don't want to leave plain 0 (Unix root) in there
4191  st.orig.st_uid = (uid_t) uid;
4192  st.orig.st_gid = (gid_t) gid;
4193 #endif //NCBI_OS_MSWIN
4194 
4195  m_Current.m_Stat = st;
4196  // Fixup for mode bits
4197  m_Current.m_Stat.orig.st_mode = (mode_t) s_ModeToTar(st.orig.st_mode);
4198 
4199  // Check if we need to update this entry in the archive
4200  if (toc) {
4201  bool found = false;
4202 
4203  if (type != CDirEntry::eUnknown) {
4204  // Start searching from the end of the list, to find
4205  // the most recent entry (if any) first
4206  _ASSERT(temp.empty());
4207  REVERSE_ITERATE(TEntries, e, *toc) {
4208  if (!temp.empty()) {
4209  if (e->GetType() == CTarEntryInfo::eHardLink ||
4210  temp != s_ToFilesystemPath(m_BaseDir, e->GetName())) {
4211  continue;
4212  }
4213  } else if (path == s_ToFilesystemPath(m_BaseDir,e->GetName())){
4214  found = true;
4215  if (e->GetType() == CTarEntryInfo::eHardLink) {
4216  temp = s_ToFilesystemPath(m_BaseDir, e->GetLinkName());
4217  continue;
4218  }
4219  } else {
4220  continue;
4221  }
4222  if (m_Current.GetType() != e->GetType()) {
4223  if (m_Flags & fEqualTypes) {
4224  goto out;
4225  }
4226  } else if (m_Current.GetType() == CTarEntryInfo::eSymLink
4227  && m_Current.GetLinkName() == e->GetLinkName()) {
4228  goto out;
4229  }
4231  <= e->GetModificationCTime()) {
4232  update = false; // same(or older), no update
4233  }
4234  break;
4235  }
4236  }
4237 
4238  if (!update || (!found && (m_Flags & (fUpdate & ~fOverwrite)))) {
4240  goto out;
4241  }
4242  // Directories always get recursive treatment later
4243  update = false;
4244  }
4245  }
4246 
4247  // Append the entry
4248  switch (type) {
4249  case CDirEntry::eFile:
4250  _ASSERT(update);
4251  if (x_AppendFile(path)) {
4252  entries->push_back(m_Current);
4253  }
4254  break;
4255 
4258  case CDirEntry::eSymLink:
4259  case CDirEntry::ePipe:
4260  _ASSERT(update);
4261  m_Current.m_Stat.orig.st_size = 0;
4262  x_WriteEntryInfo(path);
4263  entries->push_back(m_Current);
4264  break;
4265 
4266  case CDirEntry::eDir:
4267  dir.reset(CDir(path).GetEntriesPtr(kEmptyStr, CDir::eIgnoreRecursive));
4268  if (!dir) {
4269  int x_errno = CNcbiError::GetLast().Code();
4270  string error =
4271  "Cannot list directory '" + path + '\'' + s_OSReason(x_errno);
4272  if (m_Flags & fIgnoreUnreadable) {
4273  TAR_POST(101, Error, error);
4274  break;
4275  }
4276  TAR_THROW(this, eRead, error);
4277  }
4278  if (update) {
4279  m_Current.m_Stat.orig.st_size = 0;
4280  x_WriteEntryInfo(path);
4281  entries->push_back(m_Current);
4282  }
4283  // Append/update all files from that directory
4284  ITERATE(CDir::TEntries, e, *dir) {
4285  unique_ptr<TEntries> add = x_Append((*e)->GetPath(), toc);
4286  entries->splice(entries->end(), *add);
4287  }
4288  break;
4289 
4290  case CDirEntry::eDoor:
4291  case CDirEntry::eSocket:
4292  // Tar does not have any provisions to store this kind of entries
4293  if (!(m_Flags & fSkipUnsupported)) {
4294  TAR_POST(3, Warning,
4295  "Skipping non-archiveable "
4296  + string(type == CDirEntry::eSocket ? "socket" : "door")
4297  + " '" + path + '\'');
4298  }
4299  break;
4300 
4301  case CDirEntry::eUnknown:
4302  if (!(m_Flags & fSkipUnsupported)) {
4303  TAR_THROW(this, eUnsupportedSource,
4304  "Unable to archive '" + path + '\'');
4305  }
4306  /*FALLTHRU*/
4307 
4308  default:
4309  if (type != CDirEntry::eUnknown) {
4310  _TROUBLE;
4311  }
4312  TAR_POST(14, Error,
4313  "Skipping unsupported source '" + path
4314  + "' of type #" + NStr::IntToString(int(type)));
4315  break;
4316  }
4317 
4318  out:
4319  return entries;
4320 }
4321 
4322 
4323 unique_ptr<CTar::TEntries> CTar::x_Append(const CTarUserEntryInfo& entry,
4324  CNcbiIstream& is)
4325 {
4326  unique_ptr<TEntries> entries(new TEntries);
4327 
4328  // Create a temp entry info first
4330 
4331  string temp = s_ToArchiveName(kEmptyStr, entry.GetName());
4332 
4333  while (NStr::EndsWith(temp, '/')) { // NB: directories are not allowed here
4334  temp.resize(temp.size() - 1);
4335  }
4336  if (temp.empty()) {
4337  TAR_THROW(this, eBadName,
4338  "Empty entry name not allowed");
4339  }
4340 
4341  list<CTempString> elems;
4342  NStr::Split(temp, "/", elems,
4344  if (find(elems.begin(), elems.end(), "..") != elems.end()) {
4345  TAR_THROW(this, eBadName,
4346  "Name '" + temp + "' embeds parent directory (\"..\")");
4347  }
4348  elems.clear();
4349 
4350  // Recreate entry info
4351  m_Current = entry;
4352  m_Current.m_Name.swap(temp);
4355 
4356  if (!is.good()) {
4357  TAR_THROW(this, eRead,
4358  "Bad input file stream");
4359  }
4360 
4363  m_Current.m_Stat.orig.st_mtime
4364  = m_Current.m_Stat.orig.st_atime
4365  = m_Current.m_Stat.orig.st_ctime;
4369 
4370 #ifdef NCBI_OS_UNIX
4371  // use regular file mode, adjusted with umask()
4373  fTarGRead | fTarGWrite |
4374  fTarORead | fTarOWrite);
4375  mode_t u;
4376 # ifdef HAVE_GETUMASK
4377  // NB: thread-safe
4378  u = getumask();
4379 # else
4380  u = umask(022);
4381  umask(u);
4382 # endif //HAVE_GETUMASK
4383  mode &= ~u;
4384  m_Current.m_Stat.orig.st_mode = (mode_t) s_ModeToTar(mode);
4385 
4386  m_Current.m_Stat.orig.st_uid = geteuid();
4387  m_Current.m_Stat.orig.st_gid = getegid();
4388 
4390  .swap(m_Current.m_UserName);
4392  .swap(m_Current.m_GroupName);
4393 #endif //NCBI_OS_UNIX
4394 #ifdef NCBI_OS_MSWIN
4395  // safe file mode
4396  m_Current.m_Stat.orig.st_mode = (fTarURead | fTarUWrite |
4397  fTarGRead | fTarORead);
4398 
4399  unsigned int uid = 0, gid = 0;
4401  SE_KERNEL_OBJECT,
4404  &uid, &gid);
4405  // These are fake but we don't want to leave plain 0 (Unix root) in there
4406  m_Current.m_Stat.orig.st_uid = (uid_t) uid;
4407  m_Current.m_Stat.orig.st_gid = (gid_t) gid;
4408 #endif //NCBI_OS_MSWIN
4409 
4410  x_AppendStream(entry.GetName(), is);
4411 
4412  entries->push_back(m_Current);
4413  return entries;
4414 }
4415 
4416 
4417 // Regular entries only!
4418 void CTar::x_AppendStream(const string& name, CNcbiIstream& is)
4419 {
4421 
4422  // Write entry header
4423  x_WriteEntryInfo(name);
4424 
4425  errno = 0;
4427  while (size) {
4428  // Write file contents
4430  size_t avail = m_BufferSize - m_BufferPos;
4431  if (avail > size) {
4432  avail = (size_t) size;
4433  }
4434  // Read file
4435  int x_errno = 0;
4436  streamsize xread;
4437  if (is.good()) {
4438  try {
4439  if (!is.read(m_Buffer + m_BufferPos, (streamsize) avail)) {
4440  x_errno = errno;
4441  xread = -1;
4442  } else {
4443  xread = is.gcount();
4444  }
4445  } catch (IOS_BASE::failure&) {
4446  xread = -1;
4447  }
4448  } else {
4449  xread = -1;
4450  }
4451  if (xread <= 0) {
4452  ifstream* ifs = dynamic_cast<ifstream*>(&is);
4453  TAR_THROW(this, eRead,
4454  "Cannot read "
4455  + string(ifs ? "file" : "stream")
4456  + " '" + name + '\'' + s_OSReason(x_errno));
4457  }
4458  // Write buffer to the archive
4459  avail = (size_t) xread;
4460  x_WriteArchive(avail);
4461  size -= avail;
4462  }
4463 
4464  // Write zeros to get the written size a multiple of BLOCK_SIZE
4465  size_t zero = ALIGN_SIZE(m_BufferPos) - m_BufferPos;
4466  memset(m_Buffer + m_BufferPos, 0, zero);
4467  x_WriteArchive(zero);
4469 }
4470 
4471 
4472 // Regular files only!
4473 bool CTar::x_AppendFile(const string& file)
4474 {
4476 
4477  // FIXME: Switch to CFileIO eventually to avoid ifstream's obscurity
4478  // w.r.t. errors, an extra layer of buffering etc.
4479  CNcbiIfstream ifs;
4480 
4481  // Open file
4482  ifs.open(file.c_str(), IOS_BASE::binary | IOS_BASE::in);
4483  if (!ifs) {
4484  int x_errno = errno;
4485  string error
4486  = "Cannot open file '" + file + '\'' + s_OSReason(x_errno);
4487  if (m_Flags & fIgnoreUnreadable) {
4488  TAR_POST(102, Error, error);
4489  return false;
4490  }
4491  TAR_THROW(this, eOpen, error);
4492  }
4493 
4494  x_AppendStream(file, ifs);
4495  return true;
4496 }
4497 
4498 
4500  EMaskType type, NStr::ECase acase)
4501 {
4502  int idx = int(type);
4503  if (idx < 0 || sizeof(m_Mask)/sizeof(m_Mask[0]) <= (size_t) idx){
4504  TAR_THROW(this, eMemory,
4505  "Mask type is out of range: " + NStr::IntToString(idx));
4506  }
4507  if (m_Mask[idx].owned) {
4508  delete m_Mask[idx].mask;
4509  }
4510  m_Mask[idx].mask = mask;
4511  m_Mask[idx].acase = acase;
4512  m_Mask[idx].owned = mask ? own : eNoOwnership;
4513 }
4514 
4515 
4516 void CTar::SetBaseDir(const string& dirname)
4517 {
4518  string dir = s_BaseDir(dirname);
4519  m_BaseDir.swap(dir);
4520 }
4521 
4522 
4524  size_t blocking_factor,
4525  const string& base_dir)
4526 {
4527  const size_t buffer_size = SIZE_OF(blocking_factor);
4528  string prefix = s_BaseDir(base_dir);
4529  Uint8 result = 0;
4530 
4531  ITERATE(TFiles, f, files) {
4532  // Count in the file size
4533  result += BLOCK_SIZE/*header*/ + ALIGN_SIZE(f->second);
4534 
4535  // Count in the long name (if any)
4536  string path = s_ToFilesystemPath(prefix, f->first);
4537  string name = s_ToArchiveName (prefix, path);
4538  size_t namelen = name.size() + 1;
4539  if (namelen > sizeof(STarHeader::name)) {
4540  result += BLOCK_SIZE/*long name header*/ + ALIGN_SIZE(namelen);
4541  }
4542  }
4543  if (result) {
4544  result += BLOCK_SIZE << 1; // EOT
4546  if (padding) {
4547  result += buffer_size - padding;
4548  }
4549  }
4550 
4551  return result;
4552 }
4553 
4554 
4555 class CTarReader : public IReader
4556 {
4557 public:
4559  : m_Read(0), m_Eof(false), m_Bad(false), m_Tar(tar, own)
4560  { }
4561 
4562  virtual ERW_Result Read(void* buf, size_t count, size_t* bytes_read = 0);
4563  virtual ERW_Result PendingCount(size_t* count);
4564 
4565 private:
4567  bool m_Eof;
4568  bool m_Bad;
4570 };
4571 
4572 
4573 ERW_Result CTarReader::Read(void* buf, size_t count, size_t* bytes_read)
4574 {
4575  if (m_Bad || !count) {
4576  if (bytes_read) {
4577  *bytes_read = 0;
4578  }
4579  return m_Bad ? eRW_Error
4581  : eRW_Eof;
4582  }
4583 
4584  size_t read;
4586  Uint8 left = m_Tar->m_Current.GetSize() - m_Read;
4587  if (!left) {
4588  m_Eof = true;
4589  read = 0;
4590  } else {
4591  if (count > left) {
4592  count = (size_t) left;
4593  }
4594 
4595  size_t off = (size_t) OFFSET_OF(m_Read);
4596  if (off) {
4597  read = BLOCK_SIZE - off;
4598  if (m_Tar->m_BufferPos) {
4599  off += m_Tar->m_BufferPos - BLOCK_SIZE;
4600  } else {
4601  off += m_Tar->m_BufferSize - BLOCK_SIZE;
4602  }
4603  if (read > count) {
4604  read = count;
4605  }
4606  memcpy(buf, m_Tar->m_Buffer + off, read);
4607  m_Read += read;
4608  count -= read;
4609  if (!count) {
4610  goto out;
4611  }
4612  buf = (char*) buf + read;
4613  } else {
4614  read = 0;
4615  }
4616 
4617  off = m_Tar->m_BufferPos; // NB: x_ReadArchive() changes m_BufferPos
4618  if (m_Tar->x_ReadArchive(count)) {
4619  _ASSERT(count);
4620  memcpy(buf, m_Tar->m_Buffer + off, count);
4621  m_Read += count;
4622  read += count;
4623  m_Tar->m_StreamPos += ALIGN_SIZE(count);
4625  } else {
4626  m_Bad = true;
4627  _ASSERT(!m_Tar->m_Stream.good());
4628  // If we don't throw here, it may look like an ordinary EOF
4630  "Read error while streaming");
4631  }
4632  }
4633 
4634  out:
4635  _ASSERT(!m_Bad);
4636  if (bytes_read) {
4637  *bytes_read = read;
4638  }
4639  return m_Eof ? eRW_Eof : eRW_Success;
4640 }
4641 
4642 
4644 {
4645  if (m_Bad) {
4646  return eRW_Error;
4647  }
4649  Uint8 left = m_Tar->m_Current.GetSize() - m_Read;
4650  if (!left && m_Eof) {
4651  return eRW_Eof;
4652  }
4653  size_t avail = BLOCK_SIZE - (size_t) OFFSET_OF(m_Read);
4655  if (m_Tar->m_BufferPos) {
4656  avail += m_Tar->m_BufferSize - m_Tar->m_BufferPos;
4657  }
4658  if (!avail && m_Tar->m_Stream.good()) {
4659  // NB: good() subsumes there's streambuf (bad() otherwise)
4660  streamsize sb_avail = m_Tar->m_Stream.rdbuf()->in_avail();
4661  if (sb_avail != -1) {
4662  avail = (size_t) sb_avail;
4663  }
4664  }
4665  *count = avail > left ? (size_t) left : avail;
4666  return eRW_Success;
4667 }
4668 
4669 
4671  const string& name, CTar::TFlags flags)
4672 {
4673  unique_ptr<CTar> tar(new CTar(is, 1/*blocking factor*/));
4674  tar->SetFlags(flags & ~fStreamPipeThrough);
4675 
4676  unique_ptr<CMaskFileName> mask(new CMaskFileName);
4677  mask->Add(name);
4678  tar->SetMask(mask.get(), eTakeOwnership);
4679  mask.rele