From e3c4db8b396e955ca01648ba51c0fd093f3cab56 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 5 Jan 2017 11:43:53 +0100 Subject: unzip: properly use CDF to find compressed files. Closes 9536 function old new delta unzip_main 2437 2350 -87 Signed-off-by: Denys Vlasenko --- archival/unzip.c | 285 ++++++++++++++++++++++++++++++++----------------------- 1 file changed, 165 insertions(+), 120 deletions(-) (limited to 'archival/unzip.c') diff --git a/archival/unzip.c b/archival/unzip.c index c540485ac..edef22f75 100644 --- a/archival/unzip.c +++ b/archival/unzip.c @@ -16,7 +16,6 @@ * TODO * Zip64 + other methods */ - //config:config UNZIP //config: bool "unzip" //config: default y @@ -24,8 +23,17 @@ //config: unzip will list or extract files from a ZIP archive, //config: commonly found on DOS/WIN systems. The default behavior //config: (with no options) is to extract the archive into the -//config: current directory. Use the `-d' option to extract to a -//config: directory of your choice. +//config: current directory. +//config: +//config:config FEATURE_UNZIP_CDF +//config: bool "Read and use Central Directory data" +//config: default y +//config: depends on UNZIP +//config: help +//config: If you know that you only need to deal with simple +//config: ZIP files without deleted/updated files, SFX archves etc, +//config: you can reduce code size by unselecting this option. +//config: To support less trivial ZIPs, say Y. //applet:IF_UNZIP(APPLET(unzip, BB_DIR_USR_BIN, BB_SUID_DROP)) //kbuild:lib-$(CONFIG_UNZIP) += unzip.o @@ -80,30 +88,20 @@ typedef union { uint32_t ucmpsize PACKED; /* 18-21 */ uint16_t filename_len; /* 22-23 */ uint16_t extra_len; /* 24-25 */ + /* filename follows (not NUL terminated) */ + /* extra field follows */ + /* data follows */ } formatted PACKED; } zip_header_t; /* PACKED - gcc 4.2.1 doesn't like it (spews warning) */ -/* Check the offset of the last element, not the length. This leniency - * allows for poor packing, whereby the overall struct may be too long, - * even though the elements are all in the right place. - */ -struct BUG_zip_header_must_be_26_bytes { - char BUG_zip_header_must_be_26_bytes[ - offsetof(zip_header_t, formatted.extra_len) + 2 - == ZIP_HEADER_LEN ? 1 : -1]; -}; - -#define FIX_ENDIANNESS_ZIP(zip_header) do { \ - (zip_header).formatted.version = SWAP_LE16((zip_header).formatted.version ); \ - (zip_header).formatted.method = SWAP_LE16((zip_header).formatted.method ); \ - (zip_header).formatted.modtime = SWAP_LE16((zip_header).formatted.modtime ); \ - (zip_header).formatted.moddate = SWAP_LE16((zip_header).formatted.moddate ); \ +#define FIX_ENDIANNESS_ZIP(zip_header) \ +do { if (BB_BIG_ENDIAN) { \ (zip_header).formatted.crc32 = SWAP_LE32((zip_header).formatted.crc32 ); \ (zip_header).formatted.cmpsize = SWAP_LE32((zip_header).formatted.cmpsize ); \ (zip_header).formatted.ucmpsize = SWAP_LE32((zip_header).formatted.ucmpsize ); \ (zip_header).formatted.filename_len = SWAP_LE16((zip_header).formatted.filename_len); \ (zip_header).formatted.extra_len = SWAP_LE16((zip_header).formatted.extra_len ); \ -} while (0) +}} while (0) #define CDF_HEADER_LEN 42 @@ -115,8 +113,8 @@ typedef union { uint16_t version_needed; /* 2-3 */ uint16_t cdf_flags; /* 4-5 */ uint16_t method; /* 6-7 */ - uint16_t mtime; /* 8-9 */ - uint16_t mdate; /* 10-11 */ + uint16_t modtime; /* 8-9 */ + uint16_t moddate; /* 10-11 */ uint32_t crc32; /* 12-15 */ uint32_t cmpsize; /* 16-19 */ uint32_t ucmpsize; /* 20-23 */ @@ -127,27 +125,27 @@ typedef union { uint16_t internal_file_attributes; /* 32-33 */ uint32_t external_file_attributes PACKED; /* 34-37 */ uint32_t relative_offset_of_local_header PACKED; /* 38-41 */ + /* filename follows (not NUL terminated) */ + /* extra field follows */ + /* comment follows */ } formatted PACKED; } cdf_header_t; -struct BUG_cdf_header_must_be_42_bytes { - char BUG_cdf_header_must_be_42_bytes[ - offsetof(cdf_header_t, formatted.relative_offset_of_local_header) + 4 - == CDF_HEADER_LEN ? 1 : -1]; -}; - -#define FIX_ENDIANNESS_CDF(cdf_header) do { \ +#define FIX_ENDIANNESS_CDF(cdf_header) \ +do { if (BB_BIG_ENDIAN) { \ + (cdf_header).formatted.version_made_by = SWAP_LE16((cdf_header).formatted.version_made_by); \ + (cdf_header).formatted.version_needed = SWAP_LE16((cdf_header).formatted.version_needed); \ + (cdf_header).formatted.method = SWAP_LE16((cdf_header).formatted.method ); \ + (cdf_header).formatted.modtime = SWAP_LE16((cdf_header).formatted.modtime ); \ + (cdf_header).formatted.moddate = SWAP_LE16((cdf_header).formatted.moddate ); \ (cdf_header).formatted.crc32 = SWAP_LE32((cdf_header).formatted.crc32 ); \ (cdf_header).formatted.cmpsize = SWAP_LE32((cdf_header).formatted.cmpsize ); \ (cdf_header).formatted.ucmpsize = SWAP_LE32((cdf_header).formatted.ucmpsize ); \ (cdf_header).formatted.file_name_length = SWAP_LE16((cdf_header).formatted.file_name_length); \ (cdf_header).formatted.extra_field_length = SWAP_LE16((cdf_header).formatted.extra_field_length); \ (cdf_header).formatted.file_comment_length = SWAP_LE16((cdf_header).formatted.file_comment_length); \ - IF_DESKTOP( \ - (cdf_header).formatted.version_made_by = SWAP_LE16((cdf_header).formatted.version_made_by); \ (cdf_header).formatted.external_file_attributes = SWAP_LE32((cdf_header).formatted.external_file_attributes); \ - ) \ -} while (0) +}} while (0) #define CDE_HEADER_LEN 16 @@ -166,20 +164,38 @@ typedef union { } formatted PACKED; } cde_header_t; -struct BUG_cde_header_must_be_16_bytes { +#define FIX_ENDIANNESS_CDE(cde_header) \ +do { if (BB_BIG_ENDIAN) { \ + (cde_header).formatted.cdf_offset = SWAP_LE32((cde_header).formatted.cdf_offset); \ +}} while (0) + +struct BUG { + /* Check the offset of the last element, not the length. This leniency + * allows for poor packing, whereby the overall struct may be too long, + * even though the elements are all in the right place. + */ + char BUG_zip_header_must_be_26_bytes[ + offsetof(zip_header_t, formatted.extra_len) + 2 + == ZIP_HEADER_LEN ? 1 : -1]; + char BUG_cdf_header_must_be_42_bytes[ + offsetof(cdf_header_t, formatted.relative_offset_of_local_header) + 4 + == CDF_HEADER_LEN ? 1 : -1]; char BUG_cde_header_must_be_16_bytes[ sizeof(cde_header_t) == CDE_HEADER_LEN ? 1 : -1]; }; -#define FIX_ENDIANNESS_CDE(cde_header) do { \ - (cde_header).formatted.cdf_offset = SWAP_LE32((cde_header).formatted.cdf_offset); \ -} while (0) enum { zip_fd = 3 }; -#if ENABLE_DESKTOP +/* This value means that we failed to find CDF */ +#define BAD_CDF_OFFSET ((uint32_t)0xffffffff) + +#if !ENABLE_FEATURE_UNZIP_CDF +# define find_cdf_offset() BAD_CDF_OFFSET + +#else /* Seen in the wild: * Self-extracting PRO2K3XP_32.exe contains 19078464 byte zip archive, * where CDE was nearly 48 kbytes before EOF. @@ -188,25 +204,26 @@ enum { zip_fd = 3 }; * To make extraction work, bumped PEEK_FROM_END from 16k to 64k. */ #define PEEK_FROM_END (64*1024) - -/* This value means that we failed to find CDF */ -#define BAD_CDF_OFFSET ((uint32_t)0xffffffff) - /* NB: does not preserve file position! */ static uint32_t find_cdf_offset(void) { cde_header_t cde_header; + unsigned char *buf; unsigned char *p; off_t end; - unsigned char *buf = xzalloc(PEEK_FROM_END); uint32_t found; - end = xlseek(zip_fd, 0, SEEK_END); + end = lseek(zip_fd, 0, SEEK_END); + if (end == (off_t) -1) + return BAD_CDF_OFFSET; + end -= PEEK_FROM_END; if (end < 0) end = 0; + dbg("Looking for cdf_offset starting from 0x%"OFF_FMT"x", end); xlseek(zip_fd, end, SEEK_SET); + buf = xzalloc(PEEK_FROM_END); full_read(zip_fd, buf, PEEK_FROM_END); found = BAD_CDF_OFFSET; @@ -252,30 +269,36 @@ static uint32_t find_cdf_offset(void) static uint32_t read_next_cdf(uint32_t cdf_offset, cdf_header_t *cdf_ptr) { off_t org; + uint32_t magic; - org = xlseek(zip_fd, 0, SEEK_CUR); + if (cdf_offset == BAD_CDF_OFFSET) + return cdf_offset; - if (!cdf_offset) - cdf_offset = find_cdf_offset(); - - if (cdf_offset != BAD_CDF_OFFSET) { - dbg("Reading CDF at 0x%x", (unsigned)cdf_offset); - xlseek(zip_fd, cdf_offset + 4, SEEK_SET); - xread(zip_fd, cdf_ptr->raw, CDF_HEADER_LEN); - FIX_ENDIANNESS_CDF(*cdf_ptr); - dbg(" file_name_length:%u extra_field_length:%u file_comment_length:%u", - (unsigned)cdf_ptr->formatted.file_name_length, - (unsigned)cdf_ptr->formatted.extra_field_length, - (unsigned)cdf_ptr->formatted.file_comment_length - ); - cdf_offset += 4 + CDF_HEADER_LEN - + cdf_ptr->formatted.file_name_length - + cdf_ptr->formatted.extra_field_length - + cdf_ptr->formatted.file_comment_length; + org = xlseek(zip_fd, 0, SEEK_CUR); + dbg("Reading CDF at 0x%x", (unsigned)cdf_offset); + xlseek(zip_fd, cdf_offset, SEEK_SET); + xread(zip_fd, &magic, 4); + /* Central Directory End? */ + if (magic == ZIP_CDE_MAGIC) { + dbg("got ZIP_CDE_MAGIC"); + return 0; /* EOF */ } + xread(zip_fd, cdf_ptr->raw, CDF_HEADER_LEN); + /* Caller doesn't need this: */ + /* dbg("Returning file position to 0x%"OFF_FMT"x", org); */ + /* xlseek(zip_fd, org, SEEK_SET); */ + + FIX_ENDIANNESS_CDF(*cdf_ptr); + dbg(" file_name_length:%u extra_field_length:%u file_comment_length:%u", + (unsigned)cdf_ptr->formatted.file_name_length, + (unsigned)cdf_ptr->formatted.extra_field_length, + (unsigned)cdf_ptr->formatted.file_comment_length + ); + cdf_offset += 4 + CDF_HEADER_LEN + + cdf_ptr->formatted.file_name_length + + cdf_ptr->formatted.extra_field_length + + cdf_ptr->formatted.file_comment_length; - dbg("Returning file position to 0x%"OFF_FMT"x", org); - xlseek(zip_fd, org, SEEK_SET); return cdf_offset; }; #endif @@ -324,6 +347,7 @@ static void unzip_extract(zip_header_t *zip_header, int dst_fd) bb_error_msg("bad length"); } } + /* TODO? method 12: bzip2, method 14: LZMA */ } static void my_fgets80(char *buf80) @@ -339,15 +363,12 @@ int unzip_main(int argc, char **argv) { enum { O_PROMPT, O_NEVER, O_ALWAYS }; - zip_header_t zip_header; smallint quiet = 0; - IF_NOT_DESKTOP(const) smallint verbose = 0; + IF_NOT_FEATURE_UNZIP_CDF(const) smallint verbose = 0; smallint listing = 0; smallint overwrite = O_PROMPT; smallint x_opt_seen; -#if ENABLE_DESKTOP uint32_t cdf_offset; -#endif unsigned long total_usize; unsigned long total_size; unsigned total_entries; @@ -430,7 +451,7 @@ int unzip_main(int argc, char **argv) break; case 'v': /* Verbose list */ - IF_DESKTOP(verbose++;) + IF_FEATURE_UNZIP_CDF(verbose++;) listing = 1; break; @@ -545,78 +566,102 @@ int unzip_main(int argc, char **argv) total_usize = 0; total_size = 0; total_entries = 0; -#if ENABLE_DESKTOP - cdf_offset = 0; -#endif + cdf_offset = find_cdf_offset(); /* try to seek to the end, find CDE and CDF start */ while (1) { - uint32_t magic; + zip_header_t zip_header; mode_t dir_mode = 0777; -#if ENABLE_DESKTOP +#if ENABLE_FEATURE_UNZIP_CDF mode_t file_mode = 0666; #endif - /* Check magic number */ - xread(zip_fd, &magic, 4); - /* Central directory? It's at the end, so exit */ - if (magic == ZIP_CDF_MAGIC) { - dbg("got ZIP_CDF_MAGIC"); - break; - } -#if ENABLE_DESKTOP - /* Data descriptor? It was a streaming file, go on */ - if (magic == ZIP_DD_MAGIC) { - dbg("got ZIP_DD_MAGIC"); - /* skip over duplicate crc32, cmpsize and ucmpsize */ - unzip_skip(3 * 4); - continue; - } -#endif - if (magic != ZIP_FILEHEADER_MAGIC) - bb_error_msg_and_die("invalid zip magic %08X", (int)magic); - dbg("got ZIP_FILEHEADER_MAGIC"); - - /* Read the file header */ - xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN); - FIX_ENDIANNESS_ZIP(zip_header); - if ((zip_header.formatted.method != 0) && (zip_header.formatted.method != 8)) { - bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method); - } -#if !ENABLE_DESKTOP - if (zip_header.formatted.zip_flags & SWAP_LE16(0x0009)) { - bb_error_msg_and_die("zip flags 1 and 8 are not supported"); - } -#else - if (zip_header.formatted.zip_flags & SWAP_LE16(0x0001)) { - /* 0x0001 - encrypted */ - bb_error_msg_and_die("zip flag 1 (encryption) is not supported"); - } + if (!ENABLE_FEATURE_UNZIP_CDF || cdf_offset == BAD_CDF_OFFSET) { + /* Normally happens when input is unseekable. + * + * Valid ZIP file has Central Directory at the end + * with central directory file headers (CDFs). + * After it, there is a Central Directory End structure. + * CDFs identify what files are in the ZIP and where + * they are located. This allows ZIP readers to load + * the list of files without reading the entire ZIP archive. + * ZIP files may be appended to, only files specified in + * the CD are valid. Scanning for local file headers is + * not a correct algorithm. + * + * We try to do the above, and resort to "linear" reading + * of ZIP file only if seek failed or CDE wasn't found. + */ + uint32_t magic; - if (cdf_offset != BAD_CDF_OFFSET) { + /* Check magic number */ + xread(zip_fd, &magic, 4); + /* Central directory? It's at the end, so exit */ + if (magic == ZIP_CDF_MAGIC) { + dbg("got ZIP_CDF_MAGIC"); + break; + } + /* Data descriptor? It was a streaming file, go on */ + if (magic == ZIP_DD_MAGIC) { + dbg("got ZIP_DD_MAGIC"); + /* skip over duplicate crc32, cmpsize and ucmpsize */ + unzip_skip(3 * 4); + continue; + } + if (magic != ZIP_FILEHEADER_MAGIC) + bb_error_msg_and_die("invalid zip magic %08X", (int)magic); + dbg("got ZIP_FILEHEADER_MAGIC"); + + xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN); + FIX_ENDIANNESS_ZIP(zip_header); + if ((zip_header.formatted.method != 0) + && (zip_header.formatted.method != 8) + ) { + /* TODO? method 12: bzip2, method 14: LZMA */ + bb_error_msg_and_die("unsupported method %d", zip_header.formatted.method); + } + if (zip_header.formatted.zip_flags & SWAP_LE16(0x0009)) { + bb_error_msg_and_die("zip flags 1 and 8 are not supported"); + } + } +#if ENABLE_FEATURE_UNZIP_CDF + else { + /* cdf_offset is valid (and we know the file is seekable) */ cdf_header_t cdf_header; cdf_offset = read_next_cdf(cdf_offset, &cdf_header); - /* - * Note: cdf_offset can become BAD_CDF_OFFSET after the above call. - */ + if (cdf_offset == 0) /* EOF? */ + break; +# if 0 + xlseek(zip_fd, + SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4, + SEEK_SET); + xread(zip_fd, zip_header.raw, ZIP_HEADER_LEN); + FIX_ENDIANNESS_ZIP(zip_header); if (zip_header.formatted.zip_flags & SWAP_LE16(0x0008)) { /* 0x0008 - streaming. [u]cmpsize can be reliably gotten - * only from Central Directory. See unzip_doc.txt + * only from Central Directory. */ zip_header.formatted.crc32 = cdf_header.formatted.crc32; zip_header.formatted.cmpsize = cdf_header.formatted.cmpsize; zip_header.formatted.ucmpsize = cdf_header.formatted.ucmpsize; } +# else + /* CDF has the same data as local header, no need to read the latter */ + memcpy(&zip_header.formatted.version, + &cdf_header.formatted.version_needed, ZIP_HEADER_LEN); + xlseek(zip_fd, + SWAP_LE32(cdf_header.formatted.relative_offset_of_local_header) + 4 + ZIP_HEADER_LEN, + SEEK_SET); +# endif if ((cdf_header.formatted.version_made_by >> 8) == 3) { /* This archive is created on Unix */ dir_mode = file_mode = (cdf_header.formatted.external_file_attributes >> 16); } } - if (cdf_offset == BAD_CDF_OFFSET - && (zip_header.formatted.zip_flags & SWAP_LE16(0x0008)) - ) { - /* If it's a streaming zip, we _require_ CDF */ - bb_error_msg_and_die("can't find file table"); - } #endif + + if (zip_header.formatted.zip_flags & SWAP_LE16(0x0001)) { + /* 0x0001 - encrypted */ + bb_error_msg_and_die("zip flag 1 (encryption) is not supported"); + } dbg("File cmpsize:0x%x extra_len:0x%x ucmpsize:0x%x", (unsigned)zip_header.formatted.cmpsize, (unsigned)zip_header.formatted.extra_len, @@ -751,7 +796,7 @@ int unzip_main(int argc, char **argv) overwrite = O_ALWAYS; case 'y': /* Open file and fall into unzip */ unzip_create_leading_dirs(dst_fn); -#if ENABLE_DESKTOP +#if ENABLE_FEATURE_UNZIP_CDF dst_fd = xopen3(dst_fn, O_WRONLY | O_CREAT | O_TRUNC, file_mode); #else dst_fd = xopen(dst_fn, O_WRONLY | O_CREAT | O_TRUNC); -- cgit v1.2.3