tar: add support for --strip-components=N

function                                             old     new   delta
data_extract_all                                     882     995    +113
tar_longopts                                         290     309     +19
tar_main                                             938     942      +4
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 136/0)             Total: 136 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2015-10-22 01:07:13 +02:00
parent c47917865d
commit 6c563e370d
5 changed files with 156 additions and 46 deletions

View File

@ -8,9 +8,17 @@
void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
{
file_header_t *file_header = archive_handle->file_header;
int dst_fd;
int res;
#if ENABLE_FEATURE_TAR_LONG_OPTIONS
char *dst_name;
char *dst_link;
#else
# define dst_name (file_header->name)
# define dst_link (file_header->link_target)
#endif
#if ENABLE_FEATURE_TAR_SELINUX
char *sctx = archive_handle->tar__sctx[PAX_NEXT_FILE];
@ -23,11 +31,47 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
}
#endif
#if ENABLE_FEATURE_TAR_LONG_OPTIONS
dst_name = file_header->name;
dst_link = file_header->link_target;
if (archive_handle->tar__strip_components) {
unsigned n = archive_handle->tar__strip_components;
do {
dst_name = strchr(dst_name, '/');
if (!dst_name || dst_name[1] == '\0') {
data_skip(archive_handle);
return;
}
dst_name++;
/*
* Link target is shortened only for hardlinks:
* softlinks restored unchanged.
*/
if (S_ISREG(file_header->mode)
&& file_header->size == 0
&& dst_link
) {
// GNU tar 1.26 does not check that we reached end of link name:
// if "dir/hardlink" is hardlinked to "file",
// tar xvf a.tar --strip-components=1 says:
// tar: hardlink: Cannot hard link to '': No such file or directory
// and continues processing. We silently skip such entries.
dst_link = strchr(dst_link, '/');
if (!dst_link || dst_link[1] == '\0') {
data_skip(archive_handle);
return;
}
dst_link++;
}
} while (--n != 0);
}
#endif
if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) {
char *slash = strrchr(file_header->name, '/');
char *slash = strrchr(dst_name, '/');
if (slash) {
*slash = '\0';
bb_make_directory(file_header->name, -1, FILEUTILS_RECUR);
bb_make_directory(dst_name, -1, FILEUTILS_RECUR);
*slash = '/';
}
}
@ -38,8 +82,8 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
/* Is it hardlink?
* We encode hard links as regular files of size 0 with a symlink */
if (S_ISREG(file_header->mode)
&& file_header->link_target
&& file_header->size == 0
&& dst_link
) {
/* Ugly special case:
* tar cf t.tar hardlink1 hardlink2 hardlink1
@ -48,22 +92,22 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
* hardlink2 -> hardlink1
* hardlink1 -> hardlink1 <== !!!
*/
if (strcmp(file_header->link_target, file_header->name) == 0)
if (strcmp(dst_link, dst_name) == 0)
goto ret;
}
/* Proceed with deleting */
if (unlink(file_header->name) == -1
if (unlink(dst_name) == -1
&& errno != ENOENT
) {
bb_perror_msg_and_die("can't remove old file %s",
file_header->name);
dst_name);
}
}
}
else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) {
/* Remove the existing entry if its older than the extracted entry */
struct stat existing_sb;
if (lstat(file_header->name, &existing_sb) == -1) {
if (lstat(dst_name, &existing_sb) == -1) {
if (errno != ENOENT) {
bb_perror_msg_and_die("can't stat old file");
}
@ -73,30 +117,30 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
&& !S_ISDIR(file_header->mode)
) {
bb_error_msg("%s not created: newer or "
"same age file exists", file_header->name);
"same age file exists", dst_name);
}
data_skip(archive_handle);
goto ret;
}
else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) {
else if ((unlink(dst_name) == -1) && (errno != EISDIR)) {
bb_perror_msg_and_die("can't remove old file %s",
file_header->name);
dst_name);
}
}
/* Handle hard links separately
* We encode hard links as regular files of size 0 with a symlink */
if (S_ISREG(file_header->mode)
&& file_header->link_target
&& file_header->size == 0
&& dst_link
) {
/* hard link */
res = link(file_header->link_target, file_header->name);
if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
/* Hard link */
res = link(dst_link, dst_name);
if (res != 0 && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) {
bb_perror_msg("can't create %slink "
"from %s to %s", "hard",
file_header->name,
file_header->link_target);
dst_name,
dst_link);
}
/* Hardlinks have no separate mode/ownership, skip chown/chmod */
goto ret;
@ -106,17 +150,17 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
switch (file_header->mode & S_IFMT) {
case S_IFREG: {
/* Regular file */
char *dst_name;
char *dst_nameN;
int flags = O_WRONLY | O_CREAT | O_EXCL;
if (archive_handle->ah_flags & ARCHIVE_O_TRUNC)
flags = O_WRONLY | O_CREAT | O_TRUNC;
dst_name = file_header->name;
dst_nameN = dst_name;
#ifdef ARCHIVE_REPLACE_VIA_RENAME
if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME)
/* rpm-style temp file name */
dst_name = xasprintf("%s;%x", dst_name, (int)getpid());
dst_nameN = xasprintf("%s;%x", dst_name, (int)getpid());
#endif
dst_fd = xopen3(dst_name,
dst_fd = xopen3(dst_nameN,
flags,
file_header->mode
);
@ -124,32 +168,32 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
close(dst_fd);
#ifdef ARCHIVE_REPLACE_VIA_RENAME
if (archive_handle->ah_flags & ARCHIVE_REPLACE_VIA_RENAME) {
xrename(dst_name, file_header->name);
free(dst_name);
xrename(dst_nameN, dst_name);
free(dst_nameN);
}
#endif
break;
}
case S_IFDIR:
res = mkdir(file_header->name, file_header->mode);
res = mkdir(dst_name, file_header->mode);
if ((res == -1)
&& (errno != EISDIR) /* btw, Linux doesn't return this */
&& (errno != EEXIST)
&& !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
) {
bb_perror_msg("can't make dir %s", file_header->name);
bb_perror_msg("can't make dir %s", dst_name);
}
break;
case S_IFLNK:
/* Symlink */
//TODO: what if file_header->link_target == NULL (say, corrupted tarball?)
res = symlink(file_header->link_target, file_header->name);
if ((res == -1)
res = symlink(file_header->link_target, dst_name);
if (res != 0
&& !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
) {
bb_perror_msg("can't create %slink "
"from %s to %s", "sym",
file_header->name,
dst_name,
file_header->link_target);
}
break;
@ -157,11 +201,11 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
case S_IFBLK:
case S_IFCHR:
case S_IFIFO:
res = mknod(file_header->name, file_header->mode, file_header->device);
res = mknod(dst_name, file_header->mode, file_header->device);
if ((res == -1)
&& !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)
) {
bb_perror_msg("can't create node %s", file_header->name);
bb_perror_msg("can't create node %s", dst_name);
}
break;
default:
@ -186,20 +230,20 @@ void FAST_FUNC data_extract_all(archive_handle_t *archive_handle)
}
#endif
/* GNU tar 1.15.1 uses chown, not lchown */
chown(file_header->name, uid, gid);
chown(dst_name, uid, gid);
}
/* uclibc has no lchmod, glibc is even stranger -
* it has lchmod which seems to do nothing!
* so we use chmod... */
if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) {
chmod(file_header->name, file_header->mode);
chmod(dst_name, file_header->mode);
}
if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) {
struct timeval t[2];
t[1].tv_sec = t[0].tv_sec = file_header->mtime;
t[1].tv_usec = t[0].tv_usec = 0;
utimes(file_header->name, t);
utimes(dst_name, t);
}
}

View File

@ -418,6 +418,7 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle)
/* Everything up to and including last ".." component is stripped */
overlapping_strcpy(file_header->name, strip_unsafe_prefix(file_header->name));
//TODO: do the same for file_header->link_target?
/* Strip trailing '/' in directories */
/* Must be done after mode is set as '/' is used to check if it's a directory */

View File

@ -152,9 +152,12 @@
# define FNM_LEADING_DIR 0
#endif
//#define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__)
#define DBG(...) ((void)0)
#if 0
# define DBG(fmt, ...) bb_error_msg("%s: " fmt, __func__, ## __VA_ARGS__)
#else
# define DBG(...) ((void)0)
#endif
#define DBG_OPTION_PARSING 0
#define block_buf bb_common_bufsiz1
@ -855,6 +858,7 @@ enum {
IF_FEATURE_SEAMLESS_Z( OPTBIT_COMPRESS ,)
IF_FEATURE_TAR_NOPRESERVE_TIME(OPTBIT_NOPRESERVE_TIME,)
#if ENABLE_FEATURE_TAR_LONG_OPTIONS
OPTBIT_STRIP_COMPONENTS,
OPTBIT_NORECURSION,
IF_FEATURE_TAR_TO_COMMAND(OPTBIT_2COMMAND ,)
OPTBIT_NUMERIC_OWNER,
@ -879,12 +883,13 @@ enum {
OPT_GZIP = IF_FEATURE_SEAMLESS_GZ( (1 << OPTBIT_GZIP )) + 0, // z
OPT_XZ = IF_FEATURE_SEAMLESS_XZ( (1 << OPTBIT_XZ )) + 0, // J
OPT_COMPRESS = IF_FEATURE_SEAMLESS_Z( (1 << OPTBIT_COMPRESS )) + 0, // Z
OPT_NOPRESERVE_TIME = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m
OPT_NORECURSION = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION )) + 0, // no-recursion
OPT_2COMMAND = IF_FEATURE_TAR_TO_COMMAND( (1 << OPTBIT_2COMMAND )) + 0, // to-command
OPT_NUMERIC_OWNER = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER )) + 0, // numeric-owner
OPT_NOPRESERVE_PERM = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions
OPT_OVERWRITE = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE )) + 0, // overwrite
OPT_NOPRESERVE_TIME = IF_FEATURE_TAR_NOPRESERVE_TIME((1 << OPTBIT_NOPRESERVE_TIME)) + 0, // m
OPT_STRIP_COMPONENTS = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_STRIP_COMPONENTS)) + 0, // strip-components
OPT_NORECURSION = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NORECURSION )) + 0, // no-recursion
OPT_2COMMAND = IF_FEATURE_TAR_TO_COMMAND( (1 << OPTBIT_2COMMAND )) + 0, // to-command
OPT_NUMERIC_OWNER = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NUMERIC_OWNER )) + 0, // numeric-owner
OPT_NOPRESERVE_PERM = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_NOPRESERVE_PERM)) + 0, // no-same-permissions
OPT_OVERWRITE = IF_FEATURE_TAR_LONG_OPTIONS((1 << OPTBIT_OVERWRITE )) + 0, // overwrite
OPT_ANY_COMPRESS = (OPT_BZIP2 | OPT_LZMA | OPT_GZIP | OPT_XZ | OPT_COMPRESS),
};
@ -928,6 +933,7 @@ static const char tar_longopts[] ALIGN1 =
# if ENABLE_FEATURE_TAR_NOPRESERVE_TIME
"touch\0" No_argument "m"
# endif
"strip-components\0" Required_argument "\xf9"
"no-recursion\0" No_argument "\xfa"
# if ENABLE_FEATURE_TAR_TO_COMMAND
"to-command\0" Required_argument "\xfb"
@ -973,11 +979,15 @@ int tar_main(int argc UNUSED_PARAM, char **argv)
"tt:vv:" // count -t,-v
IF_FEATURE_TAR_FROM("X::T::") // cumulative lists
#if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM
"\xff::" // cumulative lists for --exclude
"\xff::" // --exclude=PATTERN is a list
#endif
IF_FEATURE_TAR_CREATE("c:") "t:x:" // at least one of these is reqd
IF_FEATURE_TAR_CREATE("c--tx:t--cx:x--ct") // mutually exclusive
IF_NOT_FEATURE_TAR_CREATE("t--x:x--t"); // mutually exclusive
IF_NOT_FEATURE_TAR_CREATE("t--x:x--t") // mutually exclusive
#if ENABLE_FEATURE_TAR_LONG_OPTIONS
":\xf9+" // --strip-components=NUM
#endif
;
#if ENABLE_FEATURE_TAR_LONG_OPTIONS
applet_long_options = tar_longopts;
#endif
@ -1018,10 +1028,14 @@ int tar_main(int argc UNUSED_PARAM, char **argv)
IF_FEATURE_SEAMLESS_XZ( "J" )
IF_FEATURE_SEAMLESS_Z( "Z" )
IF_FEATURE_TAR_NOPRESERVE_TIME("m")
IF_FEATURE_TAR_LONG_OPTIONS("\xf9:") // --strip-components
, &base_dir // -C dir
, &tar_filename // -f filename
IF_FEATURE_TAR_FROM(, &(tar_handle->accept)) // T
IF_FEATURE_TAR_FROM(, &(tar_handle->reject)) // X
#if ENABLE_FEATURE_TAR_LONG_OPTIONS
, &tar_handle->tar__strip_components // --strip-components
#endif
IF_FEATURE_TAR_TO_COMMAND(, &(tar_handle->tar__to_command)) // --to-command
#if ENABLE_FEATURE_TAR_LONG_OPTIONS && ENABLE_FEATURE_TAR_FROM
, &excludes // --exclude
@ -1029,11 +1043,49 @@ int tar_main(int argc UNUSED_PARAM, char **argv)
, &verboseFlag // combined count for -t and -v
, &verboseFlag // combined count for -t and -v
);
//bb_error_msg("opt:%08x", opt);
#if DBG_OPTION_PARSING
bb_error_msg("opt: 0x%08x", opt);
# define showopt(o) bb_error_msg("opt & %s(%x): %x", #o, o, opt & o);
showopt(OPT_TEST );
showopt(OPT_EXTRACT );
showopt(OPT_BASEDIR );
showopt(OPT_TARNAME );
showopt(OPT_2STDOUT );
showopt(OPT_NOPRESERVE_OWNER);
showopt(OPT_P );
showopt(OPT_VERBOSE );
showopt(OPT_KEEP_OLD );
showopt(OPT_CREATE );
showopt(OPT_DEREFERENCE );
showopt(OPT_BZIP2 );
showopt(OPT_LZMA );
showopt(OPT_INCLUDE_FROM );
showopt(OPT_EXCLUDE_FROM );
showopt(OPT_GZIP );
showopt(OPT_XZ );
showopt(OPT_COMPRESS );
showopt(OPT_NOPRESERVE_TIME );
showopt(OPT_STRIP_COMPONENTS);
showopt(OPT_NORECURSION );
showopt(OPT_2COMMAND );
showopt(OPT_NUMERIC_OWNER );
showopt(OPT_NOPRESERVE_PERM );
showopt(OPT_OVERWRITE );
showopt(OPT_ANY_COMPRESS );
bb_error_msg("base_dir:'%s'", base_dir);
bb_error_msg("tar_filename:'%s'", tar_filename);
bb_error_msg("verboseFlag:%d", verboseFlag);
bb_error_msg("tar_handle->tar__to_command:'%s'", tar_handle->tar__to_command);
bb_error_msg("tar_handle->tar__strip_components:%u", tar_handle->tar__strip_components);
return 0;
# undef showopt
#endif
argv += optind;
if (verboseFlag) tar_handle->action_header = header_verbose_list;
if (verboseFlag == 1) tar_handle->action_header = header_list;
if (verboseFlag)
tar_handle->action_header = header_verbose_list;
if (verboseFlag == 1)
tar_handle->action_header = header_list;
if (opt & OPT_EXTRACT)
tar_handle->action_data = data_extract_all;

View File

@ -77,6 +77,9 @@ typedef struct archive_handle_t {
off_t offset;
/* Archiver specific. Can make it a union if it ever gets big */
#if ENABLE_FEATURE_TAR_LONG_OPTIONS
unsigned tar__strip_components;
#endif
#define PAX_NEXT_FILE 0
#define PAX_GLOBAL 1
#if ENABLE_TAR || ENABLE_DPKG || ENABLE_DPKG_DEB

View File

@ -53,6 +53,15 @@ dd if=/dev/zero bs=512 count=20 2>/dev/null | tar xvf - 2>&1; echo $?
"" ""
SKIP=
# "tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input":
# GNU tar 1.26 records as hardlinks:
# input_hard2 -> input_hard1
# input_hard1 -> input_hard1 (!!!)
# input_dir/file -> input_dir/file
# input -> input
# As of 1.24.0, we don't record last two: for them, nlink==1
# and we check for "hardlink"ness only files with nlink!=1
# We also don't use "hrw-r--r--" notation for hardlinks in "tar tv" listing.
optional FEATURE_TAR_CREATE FEATURE_LS_SORTFILES
testing "tar hardlinks and repeated files" '\
rm -rf input_* test.tar 2>/dev/null
@ -64,6 +73,7 @@ chmod -R 644 *
chmod 755 input_dir
tar cf test.tar input input_dir/ input_hard1 input_hard2 input_hard1 input_dir/ input
tar tvf test.tar | sed "s/.*[0-9] input/input/"
rm -rf input_dir
tar xf test.tar 2>&1
echo Ok: $?
ls -l . input_dir/* | grep input_ | sed "s/\\(^[^ ]*\\) .* input/\\1 input/"