From 8a6a2f9c9c214b94bd945acd97ac8b28c25e194e Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 6 Mar 2012 16:27:48 +0100 Subject: update seamless uncompression code This change makes "tar tf hello_world.txz" work without adding special-casing for ".txz" extension. It also removes ever-growing magic checking code in rpm2cpio and get_header_tar - we reuse one which lives in setup_unzip_on_fd. function old new delta unpack_gz_stream 7 566 +559 check_signature16 - 70 +70 setup_unzip_on_fd 99 142 +43 handle_SIGCHLD - 41 +41 unpack_bz2_stream 342 376 +34 unzip_main 2352 2385 +33 bbunpack 503 533 +30 open_transformer 74 102 +28 unpack_Z_stream 1278 1304 +26 unpack_gunzip 101 123 +22 init_transformer_aux_data - 18 +18 unpack_xz_stream 2388 2402 +14 open_zipped 131 141 +10 rpm_main 1358 1363 +5 get_header_tar_lzma 52 57 +5 get_header_tar_bz2 52 57 +5 unpack_lzma_stream 2698 2702 +4 hash_find 234 233 -1 get_header_tar 1759 1733 -26 get_header_tar_gz 92 57 -35 unpack_uncompress 51 12 -39 rpm2cpio_main 201 147 -54 unpack_unxz 67 12 -55 unpack_bz2_stream_prime 55 - -55 get_header_tar_Z 86 - -86 unpack_gz_stream_with_info 539 - -539 ------------------------------------------------------------------------------ (add/remove: 3/3 grow/shrink: 14/6 up/down: 947/-890) Total: 57 bytes Signed-off-by: Denys Vlasenko --- archival/libarchive/decompress_bunzip2.c | 18 ++--- archival/libarchive/decompress_gunzip.c | 27 +++---- archival/libarchive/decompress_uncompress.c | 5 +- archival/libarchive/decompress_unlzma.c | 2 +- archival/libarchive/decompress_unxz.c | 12 ++- archival/libarchive/get_header_tar.c | 35 ++------- archival/libarchive/get_header_tar_bz2.c | 2 +- archival/libarchive/get_header_tar_gz.c | 17 +---- archival/libarchive/get_header_tar_lzma.c | 2 +- archival/libarchive/open_transformer.c | 113 ++++++++++++++-------------- 10 files changed, 95 insertions(+), 138 deletions(-) (limited to 'archival/libarchive') diff --git a/archival/libarchive/decompress_bunzip2.c b/archival/libarchive/decompress_bunzip2.c index c4640d489..dc252bb82 100644 --- a/archival/libarchive/decompress_bunzip2.c +++ b/archival/libarchive/decompress_bunzip2.c @@ -721,7 +721,7 @@ void FAST_FUNC dealloc_bunzip(bunzip_data *bd) /* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ IF_DESKTOP(long long) int FAST_FUNC -unpack_bz2_stream(int src_fd, int dst_fd) +unpack_bz2_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) { IF_DESKTOP(long long total_written = 0;) bunzip_data *bd; @@ -729,6 +729,9 @@ unpack_bz2_stream(int src_fd, int dst_fd) int i; unsigned len; + if (check_signature16(aux, src_fd, BZIP2_MAGIC)) + return -1; + outbuf = xmalloc(IOBUF_SIZE); len = 0; while (1) { /* "Process one BZ... stream" loop */ @@ -794,17 +797,6 @@ unpack_bz2_stream(int src_fd, int dst_fd) return i ? i : IF_DESKTOP(total_written) + 0; } -IF_DESKTOP(long long) int FAST_FUNC -unpack_bz2_stream_prime(int src_fd, int dst_fd) -{ - uint16_t magic2; - xread(src_fd, &magic2, 2); - if (magic2 != BZIP2_MAGIC) { - bb_error_msg_and_die("invalid magic"); - } - return unpack_bz2_stream(src_fd, dst_fd); -} - #ifdef TESTING static char *const bunzip_errors[] = { @@ -819,7 +811,7 @@ int main(int argc, char **argv) int i; char c; - int i = unpack_bz2_stream_prime(0, 1); + int i = unpack_bz2_stream(0, 1); if (i < 0) fprintf(stderr, "%s\n", bunzip_errors[-i]); else if (read(STDIN_FILENO, &c, 1)) diff --git a/archival/libarchive/decompress_gunzip.c b/archival/libarchive/decompress_gunzip.c index 50873e3f6..f1c9a79e5 100644 --- a/archival/libarchive/decompress_gunzip.c +++ b/archival/libarchive/decompress_gunzip.c @@ -1034,22 +1034,22 @@ inflate_unzip_internal(STATE_PARAM int in, int out) /* For unzip */ IF_DESKTOP(long long) int FAST_FUNC -inflate_unzip(inflate_unzip_result *res, off_t compr_size, int in, int out) +inflate_unzip(transformer_aux_data_t *aux, int in, int out) { IF_DESKTOP(long long) int n; DECLARE_STATE; ALLOC_STATE; - to_read = compr_size; + to_read = aux->bytes_in; // bytebuffer_max = 0x8000; bytebuffer_offset = 4; bytebuffer = xmalloc(bytebuffer_max); n = inflate_unzip_internal(PASS_STATE in, out); free(bytebuffer); - res->crc = gunzip_crc; - res->bytes_out = gunzip_bytes_out; + aux->crc32 = gunzip_crc; + aux->bytes_out = gunzip_bytes_out; DEALLOC_STATE; return n; } @@ -1107,7 +1107,7 @@ static uint32_t buffer_read_le_u32(STATE_PARAM_ONLY) return res; } -static int check_header_gzip(STATE_PARAM unpack_info_t *info) +static int check_header_gzip(STATE_PARAM transformer_aux_data_t *aux) { union { unsigned char raw[8]; @@ -1169,8 +1169,8 @@ static int check_header_gzip(STATE_PARAM unpack_info_t *info) } } - if (info) - info->mtime = SWAP_LE32(header.formatted.mtime); + if (aux) + aux->mtime = SWAP_LE32(header.formatted.mtime); /* Read the header checksum */ if (header.formatted.flags & 0x02) { @@ -1182,12 +1182,15 @@ static int check_header_gzip(STATE_PARAM unpack_info_t *info) } IF_DESKTOP(long long) int FAST_FUNC -unpack_gz_stream_with_info(int src_fd, int dst_fd, unpack_info_t *info) +unpack_gz_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) { uint32_t v32; IF_DESKTOP(long long) int total, n; DECLARE_STATE; + if (check_signature16(aux, src_fd, GZIP_MAGIC)) + return -1; + total = 0; ALLOC_STATE; @@ -1197,7 +1200,7 @@ unpack_gz_stream_with_info(int src_fd, int dst_fd, unpack_info_t *info) gunzip_src_fd = src_fd; again: - if (!check_header_gzip(PASS_STATE info)) { + if (!check_header_gzip(PASS_STATE aux)) { bb_error_msg("corrupted data"); total = -1; goto ret; @@ -1248,9 +1251,3 @@ unpack_gz_stream_with_info(int src_fd, int dst_fd, unpack_info_t *info) DEALLOC_STATE; return total; } - -IF_DESKTOP(long long) int FAST_FUNC -unpack_gz_stream(int in, int out) -{ - return unpack_gz_stream_with_info(in, out, NULL); -} diff --git a/archival/libarchive/decompress_uncompress.c b/archival/libarchive/decompress_uncompress.c index 289f9e233..e9bbfb9bd 100644 --- a/archival/libarchive/decompress_uncompress.c +++ b/archival/libarchive/decompress_uncompress.c @@ -73,7 +73,7 @@ */ IF_DESKTOP(long long) int FAST_FUNC -unpack_Z_stream(int src_fd, int dst_fd) +unpack_Z_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) { IF_DESKTOP(long long total_written = 0;) IF_DESKTOP(long long) int retval = -1; @@ -103,6 +103,9 @@ unpack_Z_stream(int src_fd, int dst_fd) /* block compress mode -C compatible with 2.0 */ int block_mode; /* = BLOCK_MODE; */ + if (check_signature16(aux, src_fd, COMPRESS_MAGIC)) + return -1; + inbuf = xzalloc(IBUFSIZ + 64); outbuf = xzalloc(OBUFSIZ + 2048); htab = xzalloc(HSIZE); /* wasn't zeroed out before, maybe can xmalloc? */ diff --git a/archival/libarchive/decompress_unlzma.c b/archival/libarchive/decompress_unlzma.c index 3631b50cc..cfde8ea56 100644 --- a/archival/libarchive/decompress_unlzma.c +++ b/archival/libarchive/decompress_unlzma.c @@ -213,7 +213,7 @@ enum { IF_DESKTOP(long long) int FAST_FUNC -unpack_lzma_stream(int src_fd, int dst_fd) +unpack_lzma_stream(transformer_aux_data_t *aux UNUSED_PARAM, int src_fd, int dst_fd) { IF_DESKTOP(long long total_written = 0;) lzma_header_t header; diff --git a/archival/libarchive/decompress_unxz.c b/archival/libarchive/decompress_unxz.c index 3e5d4edca..79b48a152 100644 --- a/archival/libarchive/decompress_unxz.c +++ b/archival/libarchive/decompress_unxz.c @@ -38,7 +38,7 @@ static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) #include "unxz/xz_dec_stream.c" IF_DESKTOP(long long) int FAST_FUNC -unpack_xz_stream(int src_fd, int dst_fd) +unpack_xz_stream(transformer_aux_data_t *aux, int src_fd, int dst_fd) { struct xz_buf iobuf; struct xz_dec *state; @@ -49,13 +49,17 @@ unpack_xz_stream(int src_fd, int dst_fd) global_crc32_table = crc32_filltable(NULL, /*endian:*/ 0); memset(&iobuf, 0, sizeof(iobuf)); - /* Preload XZ file signature */ - membuf = (void*) strcpy(xmalloc(2 * BUFSIZ), HEADER_MAGIC); + membuf = xmalloc(2 * BUFSIZ); iobuf.in = membuf; - iobuf.in_size = HEADER_MAGIC_SIZE; iobuf.out = membuf + BUFSIZ; iobuf.out_size = BUFSIZ; + if (!aux || aux->check_signature == 0) { + /* Preload XZ file signature */ + strcpy((char*)membuf, HEADER_MAGIC); + iobuf.in_size = HEADER_MAGIC_SIZE; + } /* else: let xz code read & check it */ + /* Limit memory usage to about 64 MiB. */ state = xz_dec_init(XZ_DYNALLOC, 64*1024*1024); diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c index 8c699754b..80a709144 100644 --- a/archival/libarchive/get_header_tar.c +++ b/archival/libarchive/get_header_tar.c @@ -235,43 +235,18 @@ char FAST_FUNC get_header_tar(archive_handle_t *archive_handle) || memcmp(tar.magic, "\0\0\0\0", 5) != 0) ) { #if ENABLE_FEATURE_TAR_AUTODETECT - char FAST_FUNC (*get_header_ptr)(archive_handle_t *); - uint16_t magic2; - autodetect: - magic2 = *(bb__aliased_uint16_t*)tar.name; - /* tar gz/bz autodetect: check for gz/bz2 magic. - * If we see the magic, and it is the very first block, - * we can switch to get_header_tar_gz/bz2/lzma(). - * Needs seekable fd. I wish recv(MSG_PEEK) works - * on any fd... */ -# if ENABLE_FEATURE_SEAMLESS_GZ - if (magic2 == GZIP_MAGIC) { - get_header_ptr = get_header_tar_gz; - } else -# endif -# if ENABLE_FEATURE_SEAMLESS_BZ2 - if (magic2 == BZIP2_MAGIC - && tar.name[2] == 'h' && isdigit(tar.name[3]) - ) { /* bzip2 */ - get_header_ptr = get_header_tar_bz2; - } else -# endif -# if ENABLE_FEATURE_SEAMLESS_XZ - //TODO: if (magic2 == XZ_MAGIC1)... - //else -# endif - goto err; /* Two different causes for lseek() != 0: * unseekable fd (would like to support that too, but...), * or not first block (false positive, it's not .gz/.bz2!) */ if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0) goto err; - while (get_header_ptr(archive_handle) == EXIT_SUCCESS) - continue; - return EXIT_FAILURE; + if (setup_unzip_on_fd(archive_handle->src_fd, /*fail_if_not_detected:*/ 0) != 0) err: -#endif /* FEATURE_TAR_AUTODETECT */ + bb_error_msg_and_die("invalid tar magic"); + archive_handle->offset = 0; + goto again_after_align; +#endif bb_error_msg_and_die("invalid tar magic"); } diff --git a/archival/libarchive/get_header_tar_bz2.c b/archival/libarchive/get_header_tar_bz2.c index e012dec3b..0ee00df53 100644 --- a/archival/libarchive/get_header_tar_bz2.c +++ b/archival/libarchive/get_header_tar_bz2.c @@ -11,7 +11,7 @@ char FAST_FUNC get_header_tar_bz2(archive_handle_t *archive_handle) /* Can't lseek over pipes */ archive_handle->seek = seek_by_read; - open_transformer(archive_handle->src_fd, unpack_bz2_stream_prime, "bunzip2"); + open_transformer_with_sig(archive_handle->src_fd, unpack_bz2_stream, "bunzip2"); archive_handle->offset = 0; while (get_header_tar(archive_handle) == EXIT_SUCCESS) continue; diff --git a/archival/libarchive/get_header_tar_gz.c b/archival/libarchive/get_header_tar_gz.c index b9679b0bd..03284342b 100644 --- a/archival/libarchive/get_header_tar_gz.c +++ b/archival/libarchive/get_header_tar_gz.c @@ -8,25 +8,10 @@ char FAST_FUNC get_header_tar_gz(archive_handle_t *archive_handle) { -#if BB_MMU - uint16_t magic; -#endif - /* Can't lseek over pipes */ archive_handle->seek = seek_by_read; - /* Check gzip magic only if open_transformer will invoke unpack_gz_stream (MMU case). - * Otherwise, it will invoke an external helper "gunzip -cf" (NOMMU case) which will - * need the header. */ -#if BB_MMU - xread(archive_handle->src_fd, &magic, 2); - /* Can skip this check, but error message will be less clear */ - if (magic != GZIP_MAGIC) { - bb_error_msg_and_die("invalid gzip magic"); - } -#endif - - open_transformer(archive_handle->src_fd, unpack_gz_stream, "gunzip"); + open_transformer_with_sig(archive_handle->src_fd, unpack_gz_stream, "gunzip"); archive_handle->offset = 0; while (get_header_tar(archive_handle) == EXIT_SUCCESS) continue; diff --git a/archival/libarchive/get_header_tar_lzma.c b/archival/libarchive/get_header_tar_lzma.c index 666700729..d565a217d 100644 --- a/archival/libarchive/get_header_tar_lzma.c +++ b/archival/libarchive/get_header_tar_lzma.c @@ -14,7 +14,7 @@ char FAST_FUNC get_header_tar_lzma(archive_handle_t *archive_handle) /* Can't lseek over pipes */ archive_handle->seek = seek_by_read; - open_transformer(archive_handle->src_fd, unpack_lzma_stream, "unlzma"); + open_transformer_with_sig(archive_handle->src_fd, unpack_lzma_stream, "unlzma"); archive_handle->offset = 0; while (get_header_tar(archive_handle) == EXIT_SUCCESS) continue; diff --git a/archival/libarchive/open_transformer.c b/archival/libarchive/open_transformer.c index 743ffee02..693ae9995 100644 --- a/archival/libarchive/open_transformer.c +++ b/archival/libarchive/open_transformer.c @@ -6,24 +6,36 @@ #include "libbb.h" #include "bb_archive.h" -#define ZIPPED (ENABLE_FEATURE_SEAMLESS_LZMA \ - || ENABLE_FEATURE_SEAMLESS_BZ2 \ - || ENABLE_FEATURE_SEAMLESS_GZ \ - /* || ENABLE_FEATURE_SEAMLESS_Z */ \ -) +void FAST_FUNC init_transformer_aux_data(transformer_aux_data_t *aux) +{ + memset(aux, 0, sizeof(*aux)); +} -#if ZIPPED -# include "bb_archive.h" +int FAST_FUNC check_signature16(transformer_aux_data_t *aux, int src_fd, unsigned magic16) +{ + if (aux && aux->check_signature) { + uint16_t magic2; + if (full_read(src_fd, &magic2, 2) != 2 || magic2 != magic16) { + bb_error_msg("invalid magic"); +#if 0 /* possible future extension */ + if (aux->check_signature > 1) + xfunc_die(); #endif + return -1; + } + } + return 0; +} /* transformer(), more than meets the eye */ -/* - * On MMU machine, the transform_prog is removed by macro magic - * in include/archive.h. On NOMMU, transformer is removed. - */ +#if BB_MMU void FAST_FUNC open_transformer(int fd, - IF_DESKTOP(long long) int FAST_FUNC (*transformer)(int src_fd, int dst_fd), - const char *transform_prog) + int check_signature, + IF_DESKTOP(long long) int FAST_FUNC (*transformer)(transformer_aux_data_t *aux, int src_fd, int dst_fd) +) +#else +void FAST_FUNC open_transformer(int fd, const char *transform_prog) +#endif { struct fd_pair fd_pipe; int pid; @@ -35,13 +47,18 @@ void FAST_FUNC open_transformer(int fd, close(fd_pipe.rd); /* we don't want to read from the parent */ // FIXME: error check? #if BB_MMU - transformer(fd, fd_pipe.wr); - if (ENABLE_FEATURE_CLEAN_UP) { - close(fd_pipe.wr); /* send EOF */ - close(fd); + { + transformer_aux_data_t aux; + init_transformer_aux_data(&aux); + aux.check_signature = check_signature; + transformer(&aux, fd, fd_pipe.wr); + if (ENABLE_FEATURE_CLEAN_UP) { + close(fd_pipe.wr); /* send EOF */ + close(fd); + } + /* must be _exit! bug was actually seen here */ + _exit(EXIT_SUCCESS); } - /* must be _exit! bug was actually seen here */ - _exit(EXIT_SUCCESS); #else { char *argv[4]; @@ -64,26 +81,21 @@ void FAST_FUNC open_transformer(int fd, } +#if SEAMLESS_COMPRESSION + /* Used by e.g. rpm which gives us a fd without filename, * thus we can't guess the format from filename's extension. */ -#if ZIPPED -void FAST_FUNC setup_unzip_on_fd(int fd /*, int fail_if_not_detected*/) +int FAST_FUNC setup_unzip_on_fd(int fd, int fail_if_not_detected) { - const int fail_if_not_detected = 1; union { uint8_t b[4]; uint16_t b16[2]; uint32_t b32[1]; } magic; int offset = -2; -# if BB_MMU - IF_DESKTOP(long long) int FAST_FUNC (*xformer)(int src_fd, int dst_fd); - enum { xformer_prog = 0 }; -# else - enum { xformer = 0 }; - const char *xformer_prog; -# endif + USE_FOR_MMU(IF_DESKTOP(long long) int FAST_FUNC (*xformer)(transformer_aux_data_t *aux, int src_fd, int dst_fd);) + USE_FOR_NOMMU(const char *xformer_prog;) /* .gz and .bz2 both have 2-byte signature, and their * unpack_XXX_stream wants this header skipped. */ @@ -91,21 +103,15 @@ void FAST_FUNC setup_unzip_on_fd(int fd /*, int fail_if_not_detected*/) if (ENABLE_FEATURE_SEAMLESS_GZ && magic.b16[0] == GZIP_MAGIC ) { -# if BB_MMU - xformer = unpack_gz_stream; -# else - xformer_prog = "gunzip"; -# endif + USE_FOR_MMU(xformer = unpack_gz_stream;) + USE_FOR_NOMMU(xformer_prog = "gunzip";) goto found_magic; } if (ENABLE_FEATURE_SEAMLESS_BZ2 && magic.b16[0] == BZIP2_MAGIC ) { -# if BB_MMU - xformer = unpack_bz2_stream; -# else - xformer_prog = "bunzip2"; -# endif + USE_FOR_MMU(xformer = unpack_bz2_stream;) + USE_FOR_NOMMU(xformer_prog = "bunzip2";) goto found_magic; } if (ENABLE_FEATURE_SEAMLESS_XZ @@ -114,13 +120,8 @@ void FAST_FUNC setup_unzip_on_fd(int fd /*, int fail_if_not_detected*/) offset = -6; xread(fd, magic.b32, sizeof(magic.b32[0])); if (magic.b32[0] == XZ_MAGIC2) { -# if BB_MMU - xformer = unpack_xz_stream; - /* unpack_xz_stream wants fd at position 6, no need to seek */ - //xlseek(fd, offset, SEEK_CUR); -# else - xformer_prog = "unxz"; -# endif + USE_FOR_MMU(xformer = unpack_xz_stream;) + USE_FOR_NOMMU(xformer_prog = "unxz";) goto found_magic; } } @@ -132,24 +133,23 @@ void FAST_FUNC setup_unzip_on_fd(int fd /*, int fail_if_not_detected*/) IF_FEATURE_SEAMLESS_XZ("/xz") " magic"); xlseek(fd, offset, SEEK_CUR); - return; + return 1; found_magic: -# if !BB_MMU +# if BB_MMU + open_transformer_with_no_sig(fd, xformer); +# else /* NOMMU version of open_transformer execs * an external unzipper that wants * file position at the start of the file */ xlseek(fd, offset, SEEK_CUR); + open_transformer_with_sig(fd, xformer, xformer_prog); # endif - open_transformer(fd, xformer, xformer_prog); + return 0; } -#endif /* ZIPPED */ int FAST_FUNC open_zipped(const char *fname) { -#if !ZIPPED - return open(fname, O_RDONLY); -#else char *sfx; int fd; @@ -162,20 +162,21 @@ int FAST_FUNC open_zipped(const char *fname) sfx++; if (ENABLE_FEATURE_SEAMLESS_LZMA && strcmp(sfx, "lzma") == 0) /* .lzma has no header/signature, just trust it */ - open_transformer(fd, unpack_lzma_stream, "unlzma"); + open_transformer_with_sig(fd, unpack_lzma_stream, "unlzma"); else if ((ENABLE_FEATURE_SEAMLESS_GZ && strcmp(sfx, "gz") == 0) || (ENABLE_FEATURE_SEAMLESS_BZ2 && strcmp(sfx, "bz2") == 0) || (ENABLE_FEATURE_SEAMLESS_XZ && strcmp(sfx, "xz") == 0) ) { - setup_unzip_on_fd(fd /*, fail_if_not_detected: 1*/); + setup_unzip_on_fd(fd, /*fail_if_not_detected:*/ 1); } } return fd; -#endif } +#endif /* SEAMLESS_COMPRESSION */ + void* FAST_FUNC xmalloc_open_zipped_read_close(const char *fname, size_t *maxsz_p) { int fd; -- cgit v1.2.3