From ba73cfd28464f9ef926dfd27e264215d4c4f8b1f Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 20 Jun 2010 02:40:56 +0200 Subject: unxz: update from XZ embedded git function old new delta rc_reset - 21 +21 unpack_xz_stream 2342 2357 +15 lzma_reset 102 64 -38 lzma_len 506 443 -63 xz_dec_lzma2_run 1438 1374 -64 xz_dec_reset 73 - -73 lzma_main 2517 2183 -334 ------------------------------------------------------------------------------ (add/remove: 1/1 grow/shrink: 1/4 up/down: 36/-572) Total: -536 bytes Signed-off-by: Denys Vlasenko --- archival/libunarchive/decompress_unxz.c | 52 +++----- archival/libunarchive/unxz/xz.h | 139 +++++++++++++------- archival/libunarchive/unxz/xz_dec_lzma2.c | 200 ++++++++++++++++------------- archival/libunarchive/unxz/xz_dec_stream.c | 17 +-- archival/libunarchive/unxz/xz_private.h | 41 +++++- 5 files changed, 270 insertions(+), 179 deletions(-) (limited to 'archival') diff --git a/archival/libunarchive/decompress_unxz.c b/archival/libunarchive/decompress_unxz.c index 1302e29fb..800680fef 100644 --- a/archival/libunarchive/decompress_unxz.c +++ b/archival/libunarchive/decompress_unxz.c @@ -12,10 +12,11 @@ #include "libbb.h" #include "unarchive.h" -#define XZ_REALLOC_DICT_BUF(ptr, size) xrealloc(ptr, size) #define XZ_FUNC FAST_FUNC #define XZ_EXTERN static +#define XZ_DEC_DYNALLOC + /* Skip check (rather than fail) of unsupported hash functions */ #define XZ_DEC_ANY_CHECK 1 @@ -40,15 +41,9 @@ static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) #define put_unaligned_le32(val, buf) move_to_unaligned16(buf, SWAP_LE32(val)) #define put_unaligned_be32(val, buf) move_to_unaligned16(buf, SWAP_BE32(val)) -#include "unxz/xz.h" -#include "unxz/xz_config.h" - #include "unxz/xz_dec_bcj.c" #include "unxz/xz_dec_lzma2.c" #include "unxz/xz_dec_stream.c" -#include "unxz/xz_lzma2.h" -#include "unxz/xz_private.h" -#include "unxz/xz_stream.h" IF_DESKTOP(long long) int FAST_FUNC unpack_xz_stream(int src_fd, int dst_fd) @@ -57,63 +52,50 @@ unpack_xz_stream(int src_fd, int dst_fd) struct xz_dec *state; unsigned char *membuf; IF_DESKTOP(long long) int total = 0; - enum { - IN_SIZE = 4 * 1024, - OUT_SIZE = 60 * 1024, - }; if (!crc32_table) crc32_table = crc32_filltable(NULL, /*endian:*/ 0); - membuf = xmalloc(IN_SIZE + OUT_SIZE); + membuf = xmalloc(2 * BUFSIZ); memset(&iobuf, 0, sizeof(iobuf)); iobuf.in = membuf; - iobuf.out = membuf + IN_SIZE; - iobuf.out_size = OUT_SIZE; + iobuf.out = membuf + BUFSIZ; + iobuf.out_size = BUFSIZ; - state = xz_dec_init(64*1024); /* initial dict of 64k */ + /* Limit memory usage to about 64 MiB. */ + state = xz_dec_init(XZ_DYNALLOC, 64*1024*1024); while (1) { enum xz_ret r; - int insz, rd, outpos; - iobuf.in_size -= iobuf.in_pos; - insz = iobuf.in_size; - if (insz) - memmove(membuf, membuf + iobuf.in_pos, insz); - iobuf.in_pos = 0; - rd = IN_SIZE - insz; - if (rd) { - rd = safe_read(src_fd, membuf + insz, rd); + if (iobuf.in_pos == iobuf.in_size) { + int rd = safe_read(src_fd, membuf, BUFSIZ); if (rd < 0) { bb_error_msg(bb_msg_read_error); total = -1; break; } - iobuf.in_size = insz + rd; + iobuf.in_size = rd; + iobuf.in_pos = 0; } // bb_error_msg(">in pos:%d size:%d out pos:%d size:%d", // iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size); r = xz_dec_run(state, &iobuf); // bb_error_msg(" 0, the decoder is initialized to work in multi-call mode. - * dict_max number of bytes of memory is preallocated for the LZMA2 - * dictionary. This way there is no risk that xz_dec_run() could run out - * of memory, since xz_dec_run() will never allocate any memory. Instead, - * if the preallocated dictionary is too small for decoding the given input - * stream, xz_dec_run() will return XZ_MEMLIMIT_ERROR. Thus, it is important - * to know what kind of data will be decoded to avoid allocating excessive - * amount of memory for the dictionary. - * - * LZMA2 dictionary is always 2^n bytes or 2^n + 2^(n-1) bytes (the latter - * sizes are less common in practice). In the kernel, dictionary sizes of - * 64 KiB, 128 KiB, 256 KiB, 512 KiB, and 1 MiB are probably the only - * reasonable values. - * - * If dict_max == 0, the decoder is initialized to work in single-call mode. - * In single-call mode, xz_dec_run() decodes the whole stream at once. The - * caller must provide enough output space or the decoding will fail. The - * output space is used as the dictionary buffer, which is why there is - * no need to allocate the dictionary as part of the decoder's internal - * state. + * multi-call decoding. This is ignored in single-call mode + * (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes + * or 2^n + 2^(n-1) bytes (the latter sizes are less common + * in practice), so other values for dict_max don't make sense. + * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB, + * 512 KiB, and 1 MiB are probably the only reasonable values, + * except for kernel and initramfs images where a bigger + * dictionary can be fine and useful. + * + * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at + * once. The caller must provide enough output space or the decoding will + * fail. The output space is used as the dictionary buffer, which is why + * there is no need to allocate the dictionary as part of the decoder's + * internal state. * * Because the output buffer is used as the workspace, streams encoded using - * a big dictionary are not a problem in single-call. It is enough that the - * output buffer is big enough to hold the actual uncompressed data; it + * a big dictionary are not a problem in single-call mode. It is enough that + * the output buffer is big enough to hold the actual uncompressed data; it * can be smaller than the dictionary size stored in the stream headers. * + * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes + * of memory is preallocated for the LZMA2 dictionary. This way there is no + * risk that xz_dec_run() could run out of memory, since xz_dec_run() will + * never allocate any memory. Instead, if the preallocated dictionary is too + * small for decoding the given input stream, xz_dec_run() will return + * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be + * decoded to avoid allocating excessive amount of memory for the dictionary. + * + * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC): + * dict_max specifies the maximum allowed dictionary size that xz_dec_run() + * may allocate once it has parsed the dictionary size from the stream + * headers. This way excessive allocations can be avoided while still + * limiting the maximum memory usage to a sane value to prevent running the + * system out of memory when decompressing streams from untrusted sources. + * * On success, xz_dec_init() returns a pointer to struct xz_dec, which is - * ready to be used with xz_dec_run(). On error, xz_dec_init() returns NULL. + * ready to be used with xz_dec_run(). If memory allocation fails, + * xz_dec_init() returns NULL. */ -XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(uint32_t dict_max); +XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( + enum xz_mode mode, uint32_t dict_max); /** * xz_dec_run() - Run the XZ decoder * @s: Decoder state allocated using xz_dec_init() * @b: Input and output buffers * - * In multi-call mode, this function may return any of the values listed in - * enum xz_ret. - * - * In single-call mode, this function never returns XZ_OK. If an error occurs - * in single-call mode (return value is not XZ_STREAM_END), b->in_pos and - * b->out_pos are not modified, and the contents of the output buffer from - * b->out[b->out_pos] onward are undefined. - * - * NOTE: In single-call mode, the contents of the output buffer are undefined - * also after XZ_BUF_ERROR. This is because with some filter chains, there - * may be a second pass over the output buffer, and this pass cannot be - * properly done if the output buffer is truncated. Thus, you cannot give - * the single-call decoder a too small buffer and then expect to get that - * amount valid data from the beginning of the stream. You must use the - * multi-call decoder if you don't want to uncompress the whole stream. + * The possible return values depend on build options and operation mode. + * See enum xz_ret for details. + * + * NOTE: If an error occurs in single-call mode (return value is not + * XZ_STREAM_END), b->in_pos and b->out_pos are not modified, and the + * contents of the output buffer from b->out[b->out_pos] onward are + * undefined. This is true even after XZ_BUF_ERROR, because with some filter + * chains, there may be a second pass over the output buffer, and this pass + * cannot be properly done if the output buffer is truncated. Thus, you + * cannot give the single-call decoder a too small buffer and then expect to + * get that amount valid data from the beginning of the stream. You must use + * the multi-call decoder if you don't want to uncompress the whole stream. */ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b); diff --git a/archival/libunarchive/unxz/xz_dec_lzma2.c b/archival/libunarchive/unxz/xz_dec_lzma2.c index 37de6fc32..da71cb4d4 100644 --- a/archival/libunarchive/unxz/xz_dec_lzma2.c +++ b/archival/libunarchive/unxz/xz_dec_lzma2.c @@ -34,7 +34,8 @@ * * In multi-call mode, also these are true: * end == size - * size <= allocated + * size <= size_max + * allocated <= size * * Most of these variables are size_t to support single-call mode, * in which the dictionary variables address the actual output @@ -74,11 +75,20 @@ struct dictionary { uint32_t size; /* - * Amount of memory allocated for the dictionary. A special - * value of zero indicates that we are in single-call mode, - * where the output buffer works as the dictionary. + * Maximum allowed dictionary size in multi-call mode. + * This is ignored in single-call mode. + */ + uint32_t size_max; + + /* + * Amount of memory currently allocated for the dictionary. + * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC, + * size_max is always the same as the allocated size.) */ uint32_t allocated; + + /* Operation mode */ + enum xz_mode mode; }; /* Range decoder */ @@ -120,31 +130,31 @@ struct lzma_len_dec { }; struct lzma_dec { - /* - * LZMA properties or related bit masks (number of literal - * context bits, a mask dervied from the number of literal - * position bits, and a mask dervied from the number - * position bits) - */ - uint32_t lc; - uint32_t literal_pos_mask; /* (1 << lp) - 1 */ - uint32_t pos_mask; /* (1 << pb) - 1 */ - - /* Types of the most recently seen LZMA symbols */ - enum lzma_state state; - /* Distances of latest four matches */ uint32_t rep0; uint32_t rep1; uint32_t rep2; uint32_t rep3; + /* Types of the most recently seen LZMA symbols */ + enum lzma_state state; + /* * Length of a match. This is updated so that dict_repeat can * be called again to finish repeating the whole match. */ uint32_t len; + /* + * LZMA properties or related bit masks (number of literal + * context bits, a mask dervied from the number of literal + * position bits, and a mask dervied from the number + * position bits) + */ + uint32_t lc; + uint32_t literal_pos_mask; /* (1 << lp) - 1 */ + uint32_t pos_mask; /* (1 << pb) - 1 */ + /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ uint16_t is_match[STATES][POS_STATES_MAX]; @@ -201,49 +211,59 @@ struct lzma_dec { uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; }; +struct lzma2_dec { + /* Position in xz_dec_lzma2_run(). */ + enum lzma2_seq { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA_PREPARE, + SEQ_LZMA_RUN, + SEQ_COPY + } sequence; + + /* Next position after decoding the compressed size of the chunk. */ + enum lzma2_seq next_sequence; + + /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ + uint32_t uncompressed; + + /* + * Compressed size of LZMA chunk or compressed/uncompressed + * size of uncompressed chunk (64 KiB at maximum) + */ + uint32_t compressed; + + /* + * True if dictionary reset is needed. This is false before + * the first chunk (LZMA or uncompressed). + */ + bool need_dict_reset; + + /* + * True if new LZMA properties are needed. This is false + * before the first LZMA chunk. + */ + bool need_props; +}; + struct xz_dec_lzma2 { - /* LZMA2 */ - struct { - /* Position in xz_dec_lzma2_run(). */ - enum lzma2_seq { - SEQ_CONTROL, - SEQ_UNCOMPRESSED_1, - SEQ_UNCOMPRESSED_2, - SEQ_COMPRESSED_0, - SEQ_COMPRESSED_1, - SEQ_PROPERTIES, - SEQ_LZMA_PREPARE, - SEQ_LZMA_RUN, - SEQ_COPY - } sequence; - - /* - * Next position after decoding the compressed size of - * the chunk. - */ - enum lzma2_seq next_sequence; - - /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ - uint32_t uncompressed; - - /* - * Compressed size of LZMA chunk or compressed/uncompressed - * size of uncompressed chunk (64 KiB at maximum) - */ - uint32_t compressed; - - /* - * True if dictionary reset is needed. This is false before - * the first chunk (LZMA or uncompressed). - */ - bool need_dict_reset; - - /* - * True if new LZMA properties are needed. This is false - * before the first LZMA chunk. - */ - bool need_props; - } lzma2; + /* + * The order below is important on x86 to reduce code size and + * it shouldn't hurt on other platforms. Everything up to and + * including lzma.pos_mask are in the first 128 bytes on x86-32, + * which allows using smaller instructions to access those + * variables. On x86-64, fewer variables fit into the first 128 + * bytes, but this is still the best order without sacrificing + * the readability by splitting the structures. + */ + struct rc_dec rc; + struct dictionary dict; + struct lzma2_dec lzma2; + struct lzma_dec lzma; /* * Temporary buffer which holds small number of input bytes between @@ -253,10 +273,6 @@ struct xz_dec_lzma2 { uint32_t size; uint8_t buf[3 * LZMA_IN_REQUIRED]; } temp; - - struct dictionary dict; - struct rc_dec rc; - struct lzma_dec lzma; }; /************** @@ -269,7 +285,7 @@ struct xz_dec_lzma2 { */ static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b) { - if (dict->allocated == 0) { + if (DEC_IS_SINGLE(dict->mode)) { dict->buf = b->out + b->out_pos; dict->end = b->out_size - b->out_pos; } @@ -379,7 +395,7 @@ static void XZ_FUNC dict_uncompressed( if (dict->full < dict->pos) dict->full = dict->pos; - if (dict->allocated != 0) { + if (DEC_IS_MULTI(dict->mode)) { if (dict->pos == dict->end) dict->pos = 0; @@ -404,7 +420,7 @@ static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b) { size_t copy_size = dict->pos - dict->start; - if (dict->allocated != 0) { + if (DEC_IS_MULTI(dict->mode)) { if (dict->pos == dict->end) dict->pos = 0; @@ -422,7 +438,7 @@ static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b) *****************/ /* Reset the range decoder. */ -static __always_inline void XZ_FUNC rc_reset(struct rc_dec *rc) +static void XZ_FUNC rc_reset(struct rc_dec *rc) { rc->range = (uint32_t)-1; rc->code = 0; @@ -1088,28 +1104,27 @@ XZ_EXTERN NOINLINE enum xz_ret XZ_FUNC xz_dec_lzma2_run( return XZ_OK; } -XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(uint32_t dict_max) +XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( + enum xz_mode mode, uint32_t dict_max) { - struct xz_dec_lzma2 *s; - - /* Maximum supported dictionary by this implementation is 3 GiB. */ - if (dict_max > ((uint32_t)3 << 30)) - return NULL; - - s = kmalloc(sizeof(*s), GFP_KERNEL); + struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) return NULL; - if (dict_max > 0) { + s->dict.mode = mode; + s->dict.size_max = dict_max; + + if (DEC_IS_PREALLOC(mode)) { s->dict.buf = vmalloc(dict_max); if (s->dict.buf == NULL) { kfree(s); return NULL; } + } else if (DEC_IS_DYNALLOC(mode)) { + s->dict.buf = NULL; + s->dict.allocated = 0; } - s->dict.allocated = dict_max; - return s; } @@ -1123,18 +1138,23 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( s->dict.size = 2 + (props & 1); s->dict.size <<= (props >> 1) + 11; - if (s->dict.allocated > 0 && s->dict.allocated < s->dict.size) { -#ifdef XZ_REALLOC_DICT_BUF - s->dict.buf = XZ_REALLOC_DICT_BUF(s->dict.buf, s->dict.size); - if (!s->dict.buf) - return XZ_MEMLIMIT_ERROR; - s->dict.allocated = s->dict.size; -#else - return XZ_MEMLIMIT_ERROR; -#endif - } + if (DEC_IS_MULTI(s->dict.mode)) { + if (s->dict.size > s->dict.size_max) + return XZ_MEMLIMIT_ERROR; - s->dict.end = s->dict.size; + s->dict.end = s->dict.size; + + if (DEC_IS_DYNALLOC(s->dict.mode)) { + if (s->dict.allocated < s->dict.size) { + vfree(s->dict.buf); + s->dict.buf = vmalloc(s->dict.size); + if (s->dict.buf == NULL) { + s->dict.allocated = 0; + return XZ_MEM_ERROR; + } + } + } + } s->lzma.len = 0; @@ -1148,7 +1168,7 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s) { - if (s->dict.allocated > 0) + if (DEC_IS_MULTI(s->dict.mode)) vfree(s->dict.buf); kfree(s); diff --git a/archival/libunarchive/unxz/xz_dec_stream.c b/archival/libunarchive/unxz/xz_dec_stream.c index 21db283fb..bdcbf1ba3 100644 --- a/archival/libunarchive/unxz/xz_dec_stream.c +++ b/archival/libunarchive/unxz/xz_dec_stream.c @@ -48,8 +48,8 @@ struct xz_dec { /* Type of the integrity check calculated from uncompressed data */ enum xz_check check_type; - /* True if we are operating in single-call mode. */ - bool single_call; + /* Operation mode */ + enum xz_mode mode; /* * True if the next call to xz_dec_run() is allowed to return @@ -737,14 +737,14 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b) size_t out_start; enum xz_ret ret; - if (s->single_call) + if (DEC_IS_SINGLE(s->mode)) xz_dec_reset(s); in_start = b->in_pos; out_start = b->out_pos; ret = dec_main(s, b); - if (s->single_call) { + if (DEC_IS_SINGLE(s->mode)) { if (ret == XZ_OK) ret = b->in_pos == b->in_size ? XZ_DATA_ERROR : XZ_BUF_ERROR; @@ -767,21 +767,22 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b) return ret; } -XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(uint32_t dict_max) +XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( + enum xz_mode mode, uint32_t dict_max) { struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) return NULL; - s->single_call = dict_max == 0; + s->mode = mode; #ifdef XZ_DEC_BCJ - s->bcj = xz_dec_bcj_create(s->single_call); + s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); if (s->bcj == NULL) goto error_bcj; #endif - s->lzma2 = xz_dec_lzma2_create(dict_max); + s->lzma2 = xz_dec_lzma2_create(mode, dict_max); if (s->lzma2 == NULL) goto error_lzma2; diff --git a/archival/libunarchive/unxz/xz_private.h b/archival/libunarchive/unxz/xz_private.h index f4e0b4010..145649a83 100644 --- a/archival/libunarchive/unxz/xz_private.h +++ b/archival/libunarchive/unxz/xz_private.h @@ -53,6 +53,45 @@ # include "xz_config.h" #endif +/* If no specific decoding mode is requested, enable support for all modes. */ +#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \ + && !defined(XZ_DEC_DYNALLOC) +# define XZ_DEC_SINGLE +# define XZ_DEC_PREALLOC +# define XZ_DEC_DYNALLOC +#endif + +/* + * The DEC_IS_foo(mode) macros are used in "if" statements. If only some + * of the supported modes are enabled, these macros will evaluate to true or + * false at compile time and thus allow the compiler to omit unneeded code. + */ +#ifdef XZ_DEC_SINGLE +# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE) +#else +# define DEC_IS_SINGLE(mode) (false) +#endif + +#ifdef XZ_DEC_PREALLOC +# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC) +#else +# define DEC_IS_PREALLOC(mode) (false) +#endif + +#ifdef XZ_DEC_DYNALLOC +# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC) +#else +# define DEC_IS_DYNALLOC(mode) (false) +#endif + +#if !defined(XZ_DEC_SINGLE) +# define DEC_IS_MULTI(mode) (true) +#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC) +# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE) +#else +# define DEC_IS_MULTI(mode) (false) +#endif + /* * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ. * XZ_DEC_BCJ is used to enable generic support for BCJ decoders. @@ -71,7 +110,7 @@ * before calling xz_dec_lzma2_run(). */ XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( - uint32_t dict_max); + enum xz_mode mode, uint32_t dict_max); /* * Decode the LZMA2 properties (one byte) and reset the decoder. Return -- cgit v1.2.3