unzip: do not use CDF.extra_len, read local file header. Closes 9536

While at it, shorten many field and variable names.

function                                             old     new   delta
unzip_main                                          2334    2376     +42

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2017-01-08 14:14:19 +01:00
parent 86584e134e
commit 0ffac1cc22
2 changed files with 125 additions and 115 deletions

View File

@ -62,8 +62,8 @@
enum {
#if BB_BIG_ENDIAN
ZIP_FILEHEADER_MAGIC = 0x504b0304,
ZIP_CDF_MAGIC = 0x504b0102, /* central directory's file header */
ZIP_CDE_MAGIC = 0x504b0506, /* "end of central directory" record */
ZIP_CDF_MAGIC = 0x504b0102, /* CDF item */
ZIP_CDE_MAGIC = 0x504b0506, /* End of CDF */
ZIP_DD_MAGIC = 0x504b0708,
#else
ZIP_FILEHEADER_MAGIC = 0x04034b50,
@ -91,16 +91,16 @@ typedef union {
/* filename follows (not NUL terminated) */
/* extra field follows */
/* data follows */
} formatted PACKED;
} fmt PACKED;
} zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */
#define FIX_ENDIANNESS_ZIP(zip_header) \
#define FIX_ENDIANNESS_ZIP(zip) \
do { if (BB_BIG_ENDIAN) { \
(zip_header).formatted.crc32 = SWAP_LE32((zip_header).formatted.crc32 ); \
(zip_header).formatted.cmpsize = SWAP_LE32((zip_header).formatted.cmpsize ); \
(zip_header).formatted.ucmpsize = SWAP_LE32((zip_header).formatted.ucmpsize ); \
(zip_header).formatted.filename_len = SWAP_LE16((zip_header).formatted.filename_len); \
(zip_header).formatted.extra_len = SWAP_LE16((zip_header).formatted.extra_len ); \
(zip).fmt.crc32 = SWAP_LE32((zip).fmt.crc32 ); \
(zip).fmt.cmpsize = SWAP_LE32((zip).fmt.cmpsize ); \
(zip).fmt.ucmpsize = SWAP_LE32((zip).fmt.ucmpsize ); \
(zip).fmt.filename_len = SWAP_LE16((zip).fmt.filename_len); \
(zip).fmt.extra_len = SWAP_LE16((zip).fmt.extra_len ); \
}} while (0)
#define CDF_HEADER_LEN 42
@ -118,39 +118,39 @@ typedef union {
uint32_t crc32; /* 12-15 */
uint32_t cmpsize; /* 16-19 */
uint32_t ucmpsize; /* 20-23 */
uint16_t file_name_length; /* 24-25 */
uint16_t extra_field_length; /* 26-27 */
uint16_t filename_len; /* 24-25 */
uint16_t extra_len; /* 26-27 */
uint16_t file_comment_length; /* 28-29 */
uint16_t disk_number_start; /* 30-31 */
uint16_t internal_file_attributes; /* 32-33 */
uint32_t external_file_attributes PACKED; /* 34-37 */
uint16_t internal_attributes; /* 32-33 */
uint32_t external_attributes PACKED; /* 34-37 */
uint32_t relative_offset_of_local_header PACKED; /* 38-41 */
/* filename follows (not NUL terminated) */
/* extra field follows */
/* comment follows */
} formatted PACKED;
/* file comment follows */
} fmt PACKED;
} cdf_header_t;
#define FIX_ENDIANNESS_CDF(cdf_header) \
#define FIX_ENDIANNESS_CDF(cdf) \
do { if (BB_BIG_ENDIAN) { \
(cdf_header).formatted.version_made_by = SWAP_LE16((cdf_header).formatted.version_made_by); \
(cdf_header).formatted.version_needed = SWAP_LE16((cdf_header).formatted.version_needed); \
(cdf_header).formatted.method = SWAP_LE16((cdf_header).formatted.method ); \
(cdf_header).formatted.modtime = SWAP_LE16((cdf_header).formatted.modtime ); \
(cdf_header).formatted.moddate = SWAP_LE16((cdf_header).formatted.moddate ); \
(cdf_header).formatted.crc32 = SWAP_LE32((cdf_header).formatted.crc32 ); \
(cdf_header).formatted.cmpsize = SWAP_LE32((cdf_header).formatted.cmpsize ); \
(cdf_header).formatted.ucmpsize = SWAP_LE32((cdf_header).formatted.ucmpsize ); \
(cdf_header).formatted.file_name_length = SWAP_LE16((cdf_header).formatted.file_name_length); \
(cdf_header).formatted.extra_field_length = SWAP_LE16((cdf_header).formatted.extra_field_length); \
(cdf_header).formatted.file_comment_length = SWAP_LE16((cdf_header).formatted.file_comment_length); \
(cdf_header).formatted.external_file_attributes = SWAP_LE32((cdf_header).formatted.external_file_attributes); \
(cdf).fmt.version_made_by = SWAP_LE16((cdf).fmt.version_made_by); \
(cdf).fmt.version_needed = SWAP_LE16((cdf).fmt.version_needed); \
(cdf).fmt.method = SWAP_LE16((cdf).fmt.method ); \
(cdf).fmt.modtime = SWAP_LE16((cdf).fmt.modtime ); \
(cdf).fmt.moddate = SWAP_LE16((cdf).fmt.moddate ); \
(cdf).fmt.crc32 = SWAP_LE32((cdf).fmt.crc32 ); \
(cdf).fmt.cmpsize = SWAP_LE32((cdf).fmt.cmpsize ); \
(cdf).fmt.ucmpsize = SWAP_LE32((cdf).fmt.ucmpsize ); \
(cdf).fmt.filename_len = SWAP_LE16((cdf).fmt.filename_len); \
(cdf).fmt.extra_len = SWAP_LE16((cdf).fmt.extra_len ); \
(cdf).fmt.file_comment_length = SWAP_LE16((cdf).fmt.file_comment_length); \
(cdf).fmt.external_attributes = SWAP_LE32((cdf).fmt.external_attributes); \
}} while (0)
#define CDE_HEADER_LEN 16
#define CDE_LEN 16
typedef union {
uint8_t raw[CDE_HEADER_LEN];
uint8_t raw[CDE_LEN];
struct {
/* uint32_t signature; 50 4b 05 06 */
uint16_t this_disk_no;
@ -159,14 +159,14 @@ typedef union {
uint16_t cdf_entries_total;
uint32_t cdf_size;
uint32_t cdf_offset;
/* uint16_t file_comment_length; */
/* .ZIP file comment (variable size) */
} formatted PACKED;
} cde_header_t;
/* uint16_t archive_comment_length; */
/* archive comment follows */
} fmt PACKED;
} cde_t;
#define FIX_ENDIANNESS_CDE(cde_header) \
#define FIX_ENDIANNESS_CDE(cde) \
do { if (BB_BIG_ENDIAN) { \
(cde_header).formatted.cdf_offset = SWAP_LE32((cde_header).formatted.cdf_offset); \
(cde).fmt.cdf_offset = SWAP_LE32((cde).fmt.cdf_offset); \
}} while (0)
struct BUG {
@ -175,13 +175,13 @@ struct BUG {
* even though the elements are all in the right place.
*/
char BUG_zip_header_must_be_26_bytes[
offsetof(zip_header_t, formatted.extra_len) + 2
offsetof(zip_header_t, fmt.extra_len) + 2
== ZIP_HEADER_LEN ? 1 : -1];
char BUG_cdf_header_must_be_42_bytes[
offsetof(cdf_header_t, formatted.relative_offset_of_local_header) + 4
offsetof(cdf_header_t, fmt.relative_offset_of_local_header) + 4
== CDF_HEADER_LEN ? 1 : -1];
char BUG_cde_header_must_be_16_bytes[
sizeof(cde_header_t) == CDE_HEADER_LEN ? 1 : -1];
char BUG_cde_must_be_16_bytes[
sizeof(cde_t) == CDE_LEN ? 1 : -1];
};
@ -207,7 +207,7 @@ enum { zip_fd = 3 };
/* NB: does not preserve file position! */
static uint32_t find_cdf_offset(void)
{
cde_header_t cde_header;
cde_t cde;
unsigned char *buf;
unsigned char *p;
off_t end;
@ -228,7 +228,7 @@ static uint32_t find_cdf_offset(void)
found = BAD_CDF_OFFSET;
p = buf;
while (p <= buf + PEEK_FROM_END - CDE_HEADER_LEN - 4) {
while (p <= buf + PEEK_FROM_END - CDE_LEN - 4) {
if (*p != 'P') {
p++;
continue;
@ -240,19 +240,19 @@ static uint32_t find_cdf_offset(void)
if (*++p != 6)
continue;
/* we found CDE! */
memcpy(cde_header.raw, p + 1, CDE_HEADER_LEN);
FIX_ENDIANNESS_CDE(cde_header);
memcpy(cde.raw, p + 1, CDE_LEN);
FIX_ENDIANNESS_CDE(cde);
/*
* I've seen .ZIP files with seemingly valid CDEs
* where cdf_offset points past EOF - ??
* This check ignores such CDEs:
*/
if (cde_header.formatted.cdf_offset < end + (p - buf)) {
found = cde_header.formatted.cdf_offset;
if (cde.fmt.cdf_offset < end + (p - buf)) {
found = cde.fmt.cdf_offset;
dbg("Possible cdf_offset:0x%x at 0x%"OFF_FMT"x",
(unsigned)found, end + (p-3 - buf));
dbg(" cdf_offset+cdf_size:0x%x",
(unsigned)(found + SWAP_LE32(cde_header.formatted.cdf_size)));
(unsigned)(found + SWAP_LE32(cde.fmt.cdf_size)));
/*
* We do not "break" here because only the last CDE is valid.
* I've seen a .zip archive which contained a .zip file,
@ -266,7 +266,7 @@ static uint32_t find_cdf_offset(void)
return found;
};
static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr)
static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf)
{
uint32_t magic;
@ -276,23 +276,25 @@ static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr)
dbg("Reading CDF at 0x%x", (unsigned)cdf_offset);
xlseek(zip_fd, cdf_offset, SEEK_SET);
xread(zip_fd, &magic, 4);
/* Central Directory End? */
/* Central Directory End? Assume CDF has ended.
* (more correct method is to use cde.cdf_entries_total counter)
*/
if (magic == ZIP_CDE_MAGIC) {
dbg("got ZIP_CDE_MAGIC");
return 0; /* EOF */
}
xread(zip_fd, cdf_ptr->raw, CDF_HEADER_LEN);
xread(zip_fd, cdf->raw, CDF_HEADER_LEN);
FIX_ENDIANNESS_CDF(*cdf_ptr);
dbg(" file_name_length:%u extra_field_length:%u file_comment_length:%u",
(unsigned)cdf_ptr->formatted.file_name_length,
(unsigned)cdf_ptr->formatted.extra_field_length,
(unsigned)cdf_ptr->formatted.file_comment_length
FIX_ENDIANNESS_CDF(*cdf);
dbg(" filename_len:%u extra_len:%u file_comment_length:%u",
(unsigned)cdf->fmt.filename_len,
(unsigned)cdf->fmt.extra_len,
(unsigned)cdf->fmt.file_comment_length
);
cdf_offset += 4 + CDF_HEADER_LEN
+ cdf_ptr->formatted.file_name_length
+ cdf_ptr->formatted.extra_field_length
+ cdf_ptr->formatted.file_comment_length;
+ cdf->fmt.filename_len
+ cdf->fmt.extra_len
+ cdf->fmt.file_comment_length;
return cdf_offset;
};
@ -315,28 +317,28 @@ static void unzip_create_leading_dirs(const char *fn)
free(name);
}
static void unzip_extract(zip_header_t *zip_header, int dst_fd)
static void unzip_extract(zip_header_t *zip, int dst_fd)
{
if (zip_header->formatted.method == 0) {
if (zip->fmt.method == 0) {
/* Method 0 - stored (not compressed) */
off_t size = zip_header->formatted.ucmpsize;
off_t size = zip->fmt.ucmpsize;
if (size)
bb_copyfd_exact_size(zip_fd, dst_fd, size);
} else {
/* Method 8 - inflate */
transformer_state_t xstate;
init_transformer_state(&xstate);
xstate.bytes_in = zip_header->formatted.cmpsize;
xstate.bytes_in = zip->fmt.cmpsize;
xstate.src_fd = zip_fd;
xstate.dst_fd = dst_fd;
if (inflate_unzip(&xstate) < 0)
bb_error_msg_and_die("inflate error");
/* Validate decompression - crc */
if (zip_header->formatted.crc32 != (xstate.crc32 ^ 0xffffffffL)) {
if (zip->fmt.crc32 != (xstate.crc32 ^ 0xffffffffL)) {
bb_error_msg_and_die("crc error");
}
/* Validate decompression - size */
if (zip_header->formatted.ucmpsize != xstate.bytes_out) {
if (zip->fmt.ucmpsize != xstate.bytes_out) {
/* Don't die. Who knows, maybe len calculation
* was botched somewhere. After all, crc matched! */
bb_error_msg("bad length");
@ -563,7 +565,7 @@ int unzip_main(int argc, char **argv)
total_entries = 0;
cdf_offset = find_cdf_offset(); /* try to seek to the end, find CDE and CDF start */
while (1) {
zip_header_t zip_header;
zip_header_t zip;
mode_t dir_mode = 0777;
#if ENABLE_FEATURE_UNZIP_CDF
mode_t file_mode = 0666;
@ -589,7 +591,7 @@ int unzip_main(int argc, char **argv)
/* Check magic number */
xread(zip_fd, &magic, 4);
/* Central directory? It's at the end, so exit */
/* CDF item? Assume there are no more files, exit */
if (magic == ZIP_CDF_MAGIC) {
dbg("got ZIP_CDF_MAGIC");
break;
@ -605,71 +607,74 @@ int unzip_main(int argc, char **argv)
bb_error_msg_and_die("invalid zip magic %08X", (int)magic);
dbg("got ZIP_FILEHEADER_MAGIC");
xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN);
FIX_ENDIANNESS_ZIP(zip_header);
if ((zip_header.formatted.method != 0)
&& (zip_header.formatted.method != 8)
xread(zip_fd, zip.raw, ZIP_HEADER_LEN);
FIX_ENDIANNESS_ZIP(zip);
if ((zip.fmt.method != 0)
&& (zip.fmt.method != 8)
) {
/* TODO? method 12: bzip2, method 14: LZMA */
bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method);
bb_error_msg_and_die("unsupported method %d", zip.fmt.method);
}
if (zip_header.formatted.zip_flags & SWAP_LE16(0x0009)) {
if (zip.fmt.zip_flags & SWAP_LE16(0x0009)) {
bb_error_msg_and_die("zip flags 1 and 8 are not supported");
}
}
#if ENABLE_FEATURE_UNZIP_CDF
else {
/* cdf_offset is valid (and we know the file is seekable) */
cdf_header_t cdf_header;
cdf_offset = read_next_cdf(cdf_offset, &cdf_header);
cdf_header_t cdf;
cdf_offset = read_next_cdf(cdf_offset, &cdf);
if (cdf_offset == 0) /* EOF? */
break;
# if 0
# if 1
xlseek(zip_fd,
SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4,
SWAP_LE32(cdf.fmt.relative_offset_of_local_header) + 4,
SEEK_SET);
xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN);
FIX_ENDIANNESS_ZIP(zip_header);
if (zip_header.formatted.zip_flags & SWAP_LE16(0x0008)) {
xread(zip_fd, zip.raw, ZIP_HEADER_LEN);
FIX_ENDIANNESS_ZIP(zip);
if (zip.fmt.zip_flags & SWAP_LE16(0x0008)) {
/* 0x0008 - streaming. [u]cmpsize can be reliably gotten
* only from Central Directory.
*/
zip_header.formatted.crc32 = cdf_header.formatted.crc32;
zip_header.formatted.cmpsize = cdf_header.formatted.cmpsize;
zip_header.formatted.ucmpsize = cdf_header.formatted.ucmpsize;
zip.fmt.crc32 = cdf.fmt.crc32;
zip.fmt.cmpsize = cdf.fmt.cmpsize;
zip.fmt.ucmpsize = cdf.fmt.ucmpsize;
}
# else
/* CDF has the same data as local header, no need to read the latter */
memcpy(&zip_header.formatted.version,
&cdf_header.formatted.version_needed, ZIP_HEADER_LEN);
/* CDF has the same data as local header, no need to read the latter...
* ...not really. An archive was seen with cdf.extra_len == 6 but
* zip.extra_len == 0.
*/
memcpy(&zip.fmt.version,
&cdf.fmt.version_needed, ZIP_HEADER_LEN);
xlseek(zip_fd,
SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4 + ZIP_HEADER_LEN,
SWAP_LE32(cdf.fmt.relative_offset_of_local_header) + 4 + ZIP_HEADER_LEN,
SEEK_SET);
# endif
if ((cdf_header.formatted.version_made_by >> 8) == 3) {
if ((cdf.fmt.version_made_by >> 8) == 3) {
/* This archive is created on Unix */
dir_mode = file_mode = (cdf_header.formatted.external_file_attributes >> 16);
dir_mode = file_mode = (cdf.fmt.external_attributes >> 16);
}
}
#endif
if (zip_header.formatted.zip_flags & SWAP_LE16(0x0001)) {
if (zip.fmt.zip_flags & SWAP_LE16(0x0001)) {
/* 0x0001 - encrypted */
bb_error_msg_and_die("zip flag 1 (encryption) is not supported");
}
dbg("File cmpsize:0x%x extra_len:0x%x ucmpsize:0x%x",
(unsigned)zip_header.formatted.cmpsize,
(unsigned)zip_header.formatted.extra_len,
(unsigned)zip_header.formatted.ucmpsize
(unsigned)zip.fmt.cmpsize,
(unsigned)zip.fmt.extra_len,
(unsigned)zip.fmt.ucmpsize
);
/* Read filename */
free(dst_fn);
dst_fn = xzalloc(zip_header.formatted.filename_len + 1);
xread(zip_fd, dst_fn, zip_header.formatted.filename_len);
dst_fn = xzalloc(zip.fmt.filename_len + 1);
xread(zip_fd, dst_fn, zip.fmt.filename_len);
/* Skip extra header bytes */
unzip_skip(zip_header.formatted.extra_len);
unzip_skip(zip.fmt.extra_len);
/* Guard against "/abspath", "/../" and similar attacks */
overlapping_strcpy(dst_fn, strip_unsafe_prefix(dst_fn));
@ -684,32 +689,32 @@ int unzip_main(int argc, char **argv)
/* List entry */
char dtbuf[sizeof("mm-dd-yyyy hh:mm")];
sprintf(dtbuf, "%02u-%02u-%04u %02u:%02u",
(zip_header.formatted.moddate >> 5) & 0xf, // mm: 0x01e0
(zip_header.formatted.moddate) & 0x1f, // dd: 0x001f
(zip_header.formatted.moddate >> 9) + 1980, // yy: 0xfe00
(zip_header.formatted.modtime >> 11), // hh: 0xf800
(zip_header.formatted.modtime >> 5) & 0x3f // mm: 0x07e0
// seconds/2 are not shown, encoded in ----------- 0x001f
(zip.fmt.moddate >> 5) & 0xf, // mm: 0x01e0
(zip.fmt.moddate) & 0x1f, // dd: 0x001f
(zip.fmt.moddate >> 9) + 1980, // yy: 0xfe00
(zip.fmt.modtime >> 11), // hh: 0xf800
(zip.fmt.modtime >> 5) & 0x3f // mm: 0x07e0
// seconds/2 not shown, encoded in -- 0x001f
);
if (!verbose) {
// " Length Date Time Name\n"
// "--------- ---------- ----- ----"
printf( "%9u " "%s " "%s\n",
(unsigned)zip_header.formatted.ucmpsize,
(unsigned)zip.fmt.ucmpsize,
dtbuf,
dst_fn);
} else {
unsigned long percents = zip_header.formatted.ucmpsize - zip_header.formatted.cmpsize;
unsigned long percents = zip.fmt.ucmpsize - zip.fmt.cmpsize;
if ((int32_t)percents < 0)
percents = 0; /* happens if ucmpsize < cmpsize */
percents = percents * 100;
if (zip_header.formatted.ucmpsize)
percents /= zip_header.formatted.ucmpsize;
if (zip.fmt.ucmpsize)
percents /= zip.fmt.ucmpsize;
// " Length Method Size Cmpr Date Time CRC-32 Name\n"
// "-------- ------ ------- ---- ---------- ----- -------- ----"
printf( "%8u %s" "%9u%4u%% " "%s " "%08x " "%s\n",
(unsigned)zip_header.formatted.ucmpsize,
zip_header.formatted.method == 0 ? "Stored" : "Defl:N", /* Defl is method 8 */
(unsigned)zip.fmt.ucmpsize,
zip.fmt.method == 0 ? "Stored" : "Defl:N", /* Defl is method 8 */
/* TODO: show other methods?
* 1 - Shrunk
* 2 - Reduced with compression factor 1
@ -722,15 +727,16 @@ int unzip_main(int argc, char **argv)
* 10 - PKWARE Data Compression Library Imploding
* 11 - Reserved by PKWARE
* 12 - BZIP2
* 14 - LZMA
*/
(unsigned)zip_header.formatted.cmpsize,
(unsigned)zip.fmt.cmpsize,
(unsigned)percents,
dtbuf,
zip_header.formatted.crc32,
zip.fmt.crc32,
dst_fn);
total_size += zip_header.formatted.cmpsize;
total_size += zip.fmt.cmpsize;
}
total_usize += zip_header.formatted.ucmpsize;
total_usize += zip.fmt.ucmpsize;
i = 'n';
} else if (dst_fd == STDOUT_FILENO) {
/* Extracting to STDOUT */
@ -798,9 +804,11 @@ int unzip_main(int argc, char **argv)
#endif
case -1: /* Unzip */
if (!quiet) {
printf(" inflating: %s\n", dst_fn);
printf(/* zip.fmt.method == 0
? " extracting: %s\n"
: */ " inflating: %s\n", dst_fn);
}
unzip_extract(&zip_header, dst_fd);
unzip_extract(&zip, dst_fd);
if (dst_fd != STDOUT_FILENO) {
/* closing STDOUT is potentially bad for future business */
close(dst_fd);
@ -811,7 +819,7 @@ int unzip_main(int argc, char **argv)
overwrite = O_NEVER;
case 'n':
/* Skip entry data */
unzip_skip(zip_header.formatted.cmpsize);
unzip_skip(zip.fmt.cmpsize);
break;
case 'r':

View File

@ -34,7 +34,9 @@ rm foo.zip
optional FEATURE_UNZIP_CDF
testing "unzip (bad archive)" "uudecode; unzip bad.zip 2>&1; echo \$?" \
"Archive: bad.zip
unzip: short read
inflating: ]3j½r«IK-%Ix
unzip: corrupted data
unzip: inflate error
1
" \
"" "\