From caddfc83399ab783f032dbe23f3b10a5bd85414f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 28 Oct 2010 23:08:53 +0200 Subject: decompress_bunzip2: handle concatenated .bz2 files function old new delta unpack_bz2_stream 207 307 +100 start_bunzip 199 209 +10 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 2/0 up/down: 110/0) Total: 110 bytes Signed-off-by: Denys Vlasenko --- archival/libunarchive/decompress_bunzip2.c | 74 ++++++++++++++++++++---------- include/unarchive.h | 2 +- libbb/appletlib.c | 2 +- miscutils/bbconfig.c | 2 +- testsuite/bunzip2.tests | 31 ++++++++++++- 5 files changed, 81 insertions(+), 30 deletions(-) diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c index 22015683c..549c8b19f 100644 --- a/archival/libunarchive/decompress_bunzip2.c +++ b/archival/libunarchive/decompress_bunzip2.c @@ -44,7 +44,7 @@ #define RETVAL_LAST_BLOCK (-1) #define RETVAL_NOT_BZIP_DATA (-2) #define RETVAL_UNEXPECTED_INPUT_EOF (-3) -#define RETVAL_SHORT_WRITE (-4) +//#define RETVAL_SHORT_WRITE (-4) #define RETVAL_DATA_ERROR (-5) #define RETVAL_OUT_OF_MEMORY (-6) #define RETVAL_OBSOLETE_INPUT (-7) @@ -584,8 +584,8 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) /* Because bunzip2 is used for help text unpacking, and because bb_show_usage() should work for NOFORK applets too, we must be extremely careful to not leak any allocations! */ -int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, - int len) +int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, + const void *inbuf, int len) { bunzip_data *bd; unsigned i; @@ -606,9 +606,11 @@ int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *in if (-1 == in_fd) { /* in this case, bd->inbuf is read-only */ bd->inbuf = (void*)inbuf; /* cast away const-ness */ - bd->inbufCount = len; - } else + } else { bd->inbuf = (unsigned char *)(bd + 1); + memcpy(bd->inbuf, inbuf, len); + } + bd->inbufCount = len; /* Init the CRC32 table (big endian) */ crc32_filltable(bd->crc32Table, 1); @@ -652,37 +654,59 @@ IF_DESKTOP(long long) int FAST_FUNC unpack_bz2_stream(int src_fd, int dst_fd) { IF_DESKTOP(long long total_written = 0;) + bunzip_data *bd; char *outbuf; - bunzip_data *bd; int i; + unsigned len; outbuf = xmalloc(IOBUF_SIZE); - i = start_bunzip(&bd, src_fd, NULL, 0); - if (!i) { - for (;;) { - i = read_bunzip(bd, outbuf, IOBUF_SIZE); - if (i <= 0) break; - if (i != full_write(dst_fd, outbuf, i)) { - i = RETVAL_SHORT_WRITE; - break; + len = 0; + while (1) { /* "Process one BZ... stream" loop */ + + i = start_bunzip(&bd, src_fd, outbuf + 2, len); + + if (i == 0) { + while (1) { /* "Produce some output bytes" loop */ + i = read_bunzip(bd, outbuf, IOBUF_SIZE); + if (i <= 0) + break; + if (i != full_write(dst_fd, outbuf, i)) { + bb_error_msg("short write"); + goto release_mem; + } + IF_DESKTOP(total_written += i;) } - IF_DESKTOP(total_written += i;) } - } - /* Check CRC and release memory */ - - if (i == RETVAL_LAST_BLOCK) { + if (i != RETVAL_LAST_BLOCK) { + bb_error_msg("bunzip error %d", i); + break; + } if (bd->headerCRC != bd->totalCRC) { bb_error_msg("CRC error"); - } else { - i = RETVAL_OK; + break; } - } else if (i == RETVAL_SHORT_WRITE) { - bb_error_msg("short write"); - } else { - bb_error_msg("bunzip error %d", i); + + /* Successfully unpacked one BZ stream */ + i = RETVAL_OK; + + /* Do we have "BZ..." after last processed byte? + * pbzip2 (parallelized bzip2) produces such files. + */ + len = bd->inbufCount - bd->inbufPos; + memcpy(outbuf, &bd->inbuf[bd->inbufPos], len); + if (len < 2) { + if (safe_read(src_fd, outbuf + len, 2 - len) != 2 - len) + break; + len = 2; + } + if (*(uint16_t*)outbuf != BZIP2_MAGIC) /* "BZ"? */ + break; + dealloc_bunzip(bd); + len -= 2; } + + release_mem: dealloc_bunzip(bd); free(outbuf); diff --git a/include/unarchive.h b/include/unarchive.h index b55af6d9d..11d8c77a0 100644 --- a/include/unarchive.h +++ b/include/unarchive.h @@ -193,7 +193,7 @@ extern const llist_t *find_list_entry2(const llist_t *list, const char *filename /* A bit of bunzip2 internals are exposed for compressed help support: */ typedef struct bunzip_data bunzip_data; -int start_bunzip(bunzip_data **bdp, int in_fd, const unsigned char *inbuf, int len) FAST_FUNC; +int start_bunzip(bunzip_data **bdp, int in_fd, const void *inbuf, int len) FAST_FUNC; int read_bunzip(bunzip_data *bd, char *outbuf, int len) FAST_FUNC; void dealloc_bunzip(bunzip_data *bd) FAST_FUNC; diff --git a/libbb/appletlib.c b/libbb/appletlib.c index b32ff8808..fab996ca0 100644 --- a/libbb/appletlib.c +++ b/libbb/appletlib.c @@ -75,7 +75,7 @@ static const char *unpack_usage_messages(void) i = start_bunzip(&bd, /* src_fd: */ -1, - /* inbuf: */ (void *)packed_usage, + /* inbuf: */ packed_usage, /* len: */ sizeof(packed_usage)); /* read_bunzip can longjmp to start_bunzip, and ultimately * end up here with i != 0 on read data errors! Not trivial */ diff --git a/miscutils/bbconfig.c b/miscutils/bbconfig.c index 286077168..1cb42edeb 100644 --- a/miscutils/bbconfig.c +++ b/miscutils/bbconfig.c @@ -15,7 +15,7 @@ int bbconfig_main(int argc UNUSED_PARAM, char **argv UNUSED_PARAM) bunzip_data *bd; int i = start_bunzip(&bd, /* src_fd: */ -1, - /* inbuf: */ (void *)bbconfig_config_bz2, + /* inbuf: */ bbconfig_config_bz2, /* len: */ sizeof(bbconfig_config_bz2)); /* read_bunzip can longjmp to start_bunzip, and ultimately * end up here with i != 0 on read data errors! Not trivial */ diff --git a/testsuite/bunzip2.tests b/testsuite/bunzip2.tests index 827aee867..fcfce1a31 100755 --- a/testsuite/bunzip2.tests +++ b/testsuite/bunzip2.tests @@ -463,6 +463,24 @@ $ECHO -ne "\x40\xa0\x00\x8b\x12\xe8\xfb\xb7\x27\xaa\xd3\x36\x0c\xfc\xe1\x40" $ECHO -ne "\x01\xff\x8b\xb9\x22\x9c\x28\x48\x5f\xa5\xca\xf3\x80" } +pbzip_4m_zeros() { +$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\x63\xe3\xec\xa2\x00\x06" +$ECHO -ne "\xe4\xc1\x00\xc0\x00\x02\x00\x00\x08\x20\x00\x30\xcc\x09\xaa\x69" +$ECHO -ne "\x94\xa1\x36\xa9\x28\x4f\x17\x72\x45\x38\x50\x90\x63\xe3\xec\xa2" +$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\x63\xe3\xec\xa2\x00\x06" +$ECHO -ne "\xe4\xc1\x00\xc0\x00\x02\x00\x00\x08\x20\x00\x30\xcc\x09\xaa\x69" +$ECHO -ne "\x94\xa1\x36\xa9\x28\x4f\x17\x72\x45\x38\x50\x90\x63\xe3\xec\xa2" +$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\x63\xe3\xec\xa2\x00\x06" +$ECHO -ne "\xe4\xc1\x00\xc0\x00\x02\x00\x00\x08\x20\x00\x30\xcc\x09\xaa\x69" +$ECHO -ne "\x94\xa1\x36\xa9\x28\x4f\x17\x72\x45\x38\x50\x90\x63\xe3\xec\xa2" +$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\x63\xe3\xec\xa2\x00\x06" +$ECHO -ne "\xe4\xc1\x00\xc0\x00\x02\x00\x00\x08\x20\x00\x30\xcc\x09\xaa\x69" +$ECHO -ne "\x94\xa1\x36\xa9\x28\x4f\x17\x72\x45\x38\x50\x90\x63\xe3\xec\xa2" +$ECHO -ne "\x42\x5a\x68\x31\x31\x41\x59\x26\x53\x59\xc9\xb5\x21\xef\x00\x04" +$ECHO -ne "\x8d\x40\x20\xc0\x00\x01\x00\x00\x08\x20\x00\x30\xcc\x05\x29\xa6" +$ECHO -ne "\x4a\x11\xb1\x4a\x11\xe2\xee\x48\xa7\x0a\x12\x19\x36\xa4\x3d\xe0" +} + prep() { rm -f t* hello_$ext >t1.$ext @@ -520,9 +538,18 @@ if test "${0##*/}" = "bunzip2.tests"; then if test1_bz2 | ${bb}bunzip2 >/dev/null \ && test "`test1_bz2 | ${bb}bunzip2 | md5sum`" = "61bbeee4be9c6f110a71447f584fda7b -" then - echo "PASS: $unpack: test bz2 file" + echo "PASS: $unpack: test_bz2 file" + else + echo "FAIL: $unpack: test_bz2 file" + FAILCOUNT=$((FAILCOUNT + 1)) + fi + + if pbzip_4m_zeros | ${bb}bunzip2 >/dev/null \ + && test "`pbzip_4m_zeros | ${bb}bunzip2 | md5sum`" = "b5cfa9d6c8febd618f91ac2843d50a1c -" + then + echo "PASS: $unpack: pbzip_4m_zeros file" else - echo "FAIL: $unpack: test bz2 file" + echo "FAIL: $unpack: pbzip_4m_zeros file" FAILCOUNT=$((FAILCOUNT + 1)) fi fi -- cgit v1.2.3