diff options
Diffstat (limited to 'archival/libunarchive/unxz')
-rw-r--r-- | archival/libunarchive/unxz/xz.h | 139 | ||||
-rw-r--r-- | archival/libunarchive/unxz/xz_dec_lzma2.c | 200 | ||||
-rw-r--r-- | archival/libunarchive/unxz/xz_dec_stream.c | 17 | ||||
-rw-r--r-- | archival/libunarchive/unxz/xz_private.h | 41 |
4 files changed, 253 insertions, 144 deletions
diff --git a/archival/libunarchive/unxz/xz.h b/archival/libunarchive/unxz/xz.h index eb82706b9..c6c071c4a 100644 --- a/archival/libunarchive/unxz/xz.h +++ b/archival/libunarchive/unxz/xz.h @@ -30,9 +30,42 @@ #endif /** + * enum xz_mode - Operation mode + * + * @XZ_SINGLE: Single-call mode. This uses less RAM than + * than multi-call modes, because the LZMA2 + * dictionary doesn't need to be allocated as + * part of the decoder state. All required data + * structures are allocated at initialization, + * so xz_dec_run() cannot return XZ_MEM_ERROR. + * @XZ_PREALLOC: Multi-call mode with preallocated LZMA2 + * dictionary buffer. All data structures are + * allocated at initialization, so xz_dec_run() + * cannot return XZ_MEM_ERROR. + * @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is + * allocated once the required size has been + * parsed from the stream headers. If the + * allocation fails, xz_dec_run() will return + * XZ_MEM_ERROR. + * + * It is possible to enable support only for a subset of the above + * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC, + * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled + * with support for all operation modes, but the preboot code may + * be built with fewer features to minimize code size. + */ +enum xz_mode { + XZ_SINGLE, + XZ_PREALLOC, + XZ_DYNALLOC +}; + +/** * enum xz_ret - Return codes * @XZ_OK: Everything is OK so far. More input or more - * output space is required to continue. + * output space is required to continue. This + * return code is possible only in multi-call mode + * (XZ_PREALLOC or XZ_DYNALLOC). * @XZ_STREAM_END: Operation finished successfully. * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding * is still possible in multi-call mode by simply @@ -42,8 +75,17 @@ * which is not used in the kernel. Unsupported * check types return XZ_OPTIONS_ERROR if * XZ_DEC_ANY_CHECK was not defined at build time. - * @XZ_MEMLIMIT_ERROR: Not enough memory was preallocated at decoder - * initialization time. + * @XZ_MEM_ERROR: Allocating memory failed. This return code is + * possible only if the decoder was initialized + * with XZ_DYNALLOC. The amount of memory that was + * tried to be allocated was no more than the + * dict_max argument given to xz_dec_init(). + * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than + * allowed by the dict_max argument given to + * xz_dec_init(). This return value is possible + * only in multi-call mode (XZ_PREALLOC or + * XZ_DYNALLOC); the single-call mode (XZ_SINGLE) + * ignores the dict_max argument. * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic * bytes). * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested @@ -72,6 +114,7 @@ enum xz_ret { XZ_OK, XZ_STREAM_END, XZ_UNSUPPORTED_CHECK, + XZ_MEM_ERROR, XZ_MEMLIMIT_ERROR, XZ_FORMAT_ERROR, XZ_OPTIONS_ERROR, @@ -112,61 +155,67 @@ struct xz_dec; /** * xz_dec_init() - Allocate and initialize a XZ decoder state + * @mode: Operation mode * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for - * multi-call decoding, or special value of zero to indicate - * single-call decoding mode. - * - * If dict_max > 0, the decoder is initialized to work in multi-call mode. - * dict_max number of bytes of memory is preallocated for the LZMA2 - * dictionary. This way there is no risk that xz_dec_run() could run out - * of memory, since xz_dec_run() will never allocate any memory. Instead, - * if the preallocated dictionary is too small for decoding the given input - * stream, xz_dec_run() will return XZ_MEMLIMIT_ERROR. Thus, it is important - * to know what kind of data will be decoded to avoid allocating excessive - * amount of memory for the dictionary. - * - * LZMA2 dictionary is always 2^n bytes or 2^n + 2^(n-1) bytes (the latter - * sizes are less common in practice). In the kernel, dictionary sizes of - * 64 KiB, 128 KiB, 256 KiB, 512 KiB, and 1 MiB are probably the only - * reasonable values. - * - * If dict_max == 0, the decoder is initialized to work in single-call mode. - * In single-call mode, xz_dec_run() decodes the whole stream at once. The - * caller must provide enough output space or the decoding will fail. The - * output space is used as the dictionary buffer, which is why there is - * no need to allocate the dictionary as part of the decoder's internal - * state. + * multi-call decoding. This is ignored in single-call mode + * (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes + * or 2^n + 2^(n-1) bytes (the latter sizes are less common + * in practice), so other values for dict_max don't make sense. + * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB, + * 512 KiB, and 1 MiB are probably the only reasonable values, + * except for kernel and initramfs images where a bigger + * dictionary can be fine and useful. + * + * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at + * once. The caller must provide enough output space or the decoding will + * fail. The output space is used as the dictionary buffer, which is why + * there is no need to allocate the dictionary as part of the decoder's + * internal state. * * Because the output buffer is used as the workspace, streams encoded using - * a big dictionary are not a problem in single-call. It is enough that the - * output buffer is big enough to hold the actual uncompressed data; it + * a big dictionary are not a problem in single-call mode. It is enough that + * the output buffer is big enough to hold the actual uncompressed data; it * can be smaller than the dictionary size stored in the stream headers. * + * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes + * of memory is preallocated for the LZMA2 dictionary. This way there is no + * risk that xz_dec_run() could run out of memory, since xz_dec_run() will + * never allocate any memory. Instead, if the preallocated dictionary is too + * small for decoding the given input stream, xz_dec_run() will return + * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be + * decoded to avoid allocating excessive amount of memory for the dictionary. + * + * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC): + * dict_max specifies the maximum allowed dictionary size that xz_dec_run() + * may allocate once it has parsed the dictionary size from the stream + * headers. This way excessive allocations can be avoided while still + * limiting the maximum memory usage to a sane value to prevent running the + * system out of memory when decompressing streams from untrusted sources. + * * On success, xz_dec_init() returns a pointer to struct xz_dec, which is - * ready to be used with xz_dec_run(). On error, xz_dec_init() returns NULL. + * ready to be used with xz_dec_run(). If memory allocation fails, + * xz_dec_init() returns NULL. */ -XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(uint32_t dict_max); +XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( + enum xz_mode mode, uint32_t dict_max); /** * xz_dec_run() - Run the XZ decoder * @s: Decoder state allocated using xz_dec_init() * @b: Input and output buffers * - * In multi-call mode, this function may return any of the values listed in - * enum xz_ret. - * - * In single-call mode, this function never returns XZ_OK. If an error occurs - * in single-call mode (return value is not XZ_STREAM_END), b->in_pos and - * b->out_pos are not modified, and the contents of the output buffer from - * b->out[b->out_pos] onward are undefined. - * - * NOTE: In single-call mode, the contents of the output buffer are undefined - * also after XZ_BUF_ERROR. This is because with some filter chains, there - * may be a second pass over the output buffer, and this pass cannot be - * properly done if the output buffer is truncated. Thus, you cannot give - * the single-call decoder a too small buffer and then expect to get that - * amount valid data from the beginning of the stream. You must use the - * multi-call decoder if you don't want to uncompress the whole stream. + * The possible return values depend on build options and operation mode. + * See enum xz_ret for details. + * + * NOTE: If an error occurs in single-call mode (return value is not + * XZ_STREAM_END), b->in_pos and b->out_pos are not modified, and the + * contents of the output buffer from b->out[b->out_pos] onward are + * undefined. This is true even after XZ_BUF_ERROR, because with some filter + * chains, there may be a second pass over the output buffer, and this pass + * cannot be properly done if the output buffer is truncated. Thus, you + * cannot give the single-call decoder a too small buffer and then expect to + * get that amount valid data from the beginning of the stream. You must use + * the multi-call decoder if you don't want to uncompress the whole stream. */ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b); diff --git a/archival/libunarchive/unxz/xz_dec_lzma2.c b/archival/libunarchive/unxz/xz_dec_lzma2.c index 37de6fc32..da71cb4d4 100644 --- a/archival/libunarchive/unxz/xz_dec_lzma2.c +++ b/archival/libunarchive/unxz/xz_dec_lzma2.c @@ -34,7 +34,8 @@ * * In multi-call mode, also these are true: * end == size - * size <= allocated + * size <= size_max + * allocated <= size * * Most of these variables are size_t to support single-call mode, * in which the dictionary variables address the actual output @@ -74,11 +75,20 @@ struct dictionary { uint32_t size; /* - * Amount of memory allocated for the dictionary. A special - * value of zero indicates that we are in single-call mode, - * where the output buffer works as the dictionary. + * Maximum allowed dictionary size in multi-call mode. + * This is ignored in single-call mode. + */ + uint32_t size_max; + + /* + * Amount of memory currently allocated for the dictionary. + * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC, + * size_max is always the same as the allocated size.) */ uint32_t allocated; + + /* Operation mode */ + enum xz_mode mode; }; /* Range decoder */ @@ -120,31 +130,31 @@ struct lzma_len_dec { }; struct lzma_dec { - /* - * LZMA properties or related bit masks (number of literal - * context bits, a mask dervied from the number of literal - * position bits, and a mask dervied from the number - * position bits) - */ - uint32_t lc; - uint32_t literal_pos_mask; /* (1 << lp) - 1 */ - uint32_t pos_mask; /* (1 << pb) - 1 */ - - /* Types of the most recently seen LZMA symbols */ - enum lzma_state state; - /* Distances of latest four matches */ uint32_t rep0; uint32_t rep1; uint32_t rep2; uint32_t rep3; + /* Types of the most recently seen LZMA symbols */ + enum lzma_state state; + /* * Length of a match. This is updated so that dict_repeat can * be called again to finish repeating the whole match. */ uint32_t len; + /* + * LZMA properties or related bit masks (number of literal + * context bits, a mask dervied from the number of literal + * position bits, and a mask dervied from the number + * position bits) + */ + uint32_t lc; + uint32_t literal_pos_mask; /* (1 << lp) - 1 */ + uint32_t pos_mask; /* (1 << pb) - 1 */ + /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ uint16_t is_match[STATES][POS_STATES_MAX]; @@ -201,49 +211,59 @@ struct lzma_dec { uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; }; +struct lzma2_dec { + /* Position in xz_dec_lzma2_run(). */ + enum lzma2_seq { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA_PREPARE, + SEQ_LZMA_RUN, + SEQ_COPY + } sequence; + + /* Next position after decoding the compressed size of the chunk. */ + enum lzma2_seq next_sequence; + + /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ + uint32_t uncompressed; + + /* + * Compressed size of LZMA chunk or compressed/uncompressed + * size of uncompressed chunk (64 KiB at maximum) + */ + uint32_t compressed; + + /* + * True if dictionary reset is needed. This is false before + * the first chunk (LZMA or uncompressed). + */ + bool need_dict_reset; + + /* + * True if new LZMA properties are needed. This is false + * before the first LZMA chunk. + */ + bool need_props; +}; + struct xz_dec_lzma2 { - /* LZMA2 */ - struct { - /* Position in xz_dec_lzma2_run(). */ - enum lzma2_seq { - SEQ_CONTROL, - SEQ_UNCOMPRESSED_1, - SEQ_UNCOMPRESSED_2, - SEQ_COMPRESSED_0, - SEQ_COMPRESSED_1, - SEQ_PROPERTIES, - SEQ_LZMA_PREPARE, - SEQ_LZMA_RUN, - SEQ_COPY - } sequence; - - /* - * Next position after decoding the compressed size of - * the chunk. - */ - enum lzma2_seq next_sequence; - - /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ - uint32_t uncompressed; - - /* - * Compressed size of LZMA chunk or compressed/uncompressed - * size of uncompressed chunk (64 KiB at maximum) - */ - uint32_t compressed; - - /* - * True if dictionary reset is needed. This is false before - * the first chunk (LZMA or uncompressed). - */ - bool need_dict_reset; - - /* - * True if new LZMA properties are needed. This is false - * before the first LZMA chunk. - */ - bool need_props; - } lzma2; + /* + * The order below is important on x86 to reduce code size and + * it shouldn't hurt on other platforms. Everything up to and + * including lzma.pos_mask are in the first 128 bytes on x86-32, + * which allows using smaller instructions to access those + * variables. On x86-64, fewer variables fit into the first 128 + * bytes, but this is still the best order without sacrificing + * the readability by splitting the structures. + */ + struct rc_dec rc; + struct dictionary dict; + struct lzma2_dec lzma2; + struct lzma_dec lzma; /* * Temporary buffer which holds small number of input bytes between @@ -253,10 +273,6 @@ struct xz_dec_lzma2 { uint32_t size; uint8_t buf[3 * LZMA_IN_REQUIRED]; } temp; - - struct dictionary dict; - struct rc_dec rc; - struct lzma_dec lzma; }; /************** @@ -269,7 +285,7 @@ struct xz_dec_lzma2 { */ static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b) { - if (dict->allocated == 0) { + if (DEC_IS_SINGLE(dict->mode)) { dict->buf = b->out + b->out_pos; dict->end = b->out_size - b->out_pos; } @@ -379,7 +395,7 @@ static void XZ_FUNC dict_uncompressed( if (dict->full < dict->pos) dict->full = dict->pos; - if (dict->allocated != 0) { + if (DEC_IS_MULTI(dict->mode)) { if (dict->pos == dict->end) dict->pos = 0; @@ -404,7 +420,7 @@ static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b) { size_t copy_size = dict->pos - dict->start; - if (dict->allocated != 0) { + if (DEC_IS_MULTI(dict->mode)) { if (dict->pos == dict->end) dict->pos = 0; @@ -422,7 +438,7 @@ static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b) *****************/ /* Reset the range decoder. */ -static __always_inline void XZ_FUNC rc_reset(struct rc_dec *rc) +static void XZ_FUNC rc_reset(struct rc_dec *rc) { rc->range = (uint32_t)-1; rc->code = 0; @@ -1088,28 +1104,27 @@ XZ_EXTERN NOINLINE enum xz_ret XZ_FUNC xz_dec_lzma2_run( return XZ_OK; } -XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create(uint32_t dict_max) +XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( + enum xz_mode mode, uint32_t dict_max) { - struct xz_dec_lzma2 *s; - - /* Maximum supported dictionary by this implementation is 3 GiB. */ - if (dict_max > ((uint32_t)3 << 30)) - return NULL; - - s = kmalloc(sizeof(*s), GFP_KERNEL); + struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) return NULL; - if (dict_max > 0) { + s->dict.mode = mode; + s->dict.size_max = dict_max; + + if (DEC_IS_PREALLOC(mode)) { s->dict.buf = vmalloc(dict_max); if (s->dict.buf == NULL) { kfree(s); return NULL; } + } else if (DEC_IS_DYNALLOC(mode)) { + s->dict.buf = NULL; + s->dict.allocated = 0; } - s->dict.allocated = dict_max; - return s; } @@ -1123,18 +1138,23 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( s->dict.size = 2 + (props & 1); s->dict.size <<= (props >> 1) + 11; - if (s->dict.allocated > 0 && s->dict.allocated < s->dict.size) { -#ifdef XZ_REALLOC_DICT_BUF - s->dict.buf = XZ_REALLOC_DICT_BUF(s->dict.buf, s->dict.size); - if (!s->dict.buf) - return XZ_MEMLIMIT_ERROR; - s->dict.allocated = s->dict.size; -#else - return XZ_MEMLIMIT_ERROR; -#endif - } + if (DEC_IS_MULTI(s->dict.mode)) { + if (s->dict.size > s->dict.size_max) + return XZ_MEMLIMIT_ERROR; - s->dict.end = s->dict.size; + s->dict.end = s->dict.size; + + if (DEC_IS_DYNALLOC(s->dict.mode)) { + if (s->dict.allocated < s->dict.size) { + vfree(s->dict.buf); + s->dict.buf = vmalloc(s->dict.size); + if (s->dict.buf == NULL) { + s->dict.allocated = 0; + return XZ_MEM_ERROR; + } + } + } + } s->lzma.len = 0; @@ -1148,7 +1168,7 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s) { - if (s->dict.allocated > 0) + if (DEC_IS_MULTI(s->dict.mode)) vfree(s->dict.buf); kfree(s); diff --git a/archival/libunarchive/unxz/xz_dec_stream.c b/archival/libunarchive/unxz/xz_dec_stream.c index 21db283fb..bdcbf1ba3 100644 --- a/archival/libunarchive/unxz/xz_dec_stream.c +++ b/archival/libunarchive/unxz/xz_dec_stream.c @@ -48,8 +48,8 @@ struct xz_dec { /* Type of the integrity check calculated from uncompressed data */ enum xz_check check_type; - /* True if we are operating in single-call mode. */ - bool single_call; + /* Operation mode */ + enum xz_mode mode; /* * True if the next call to xz_dec_run() is allowed to return @@ -737,14 +737,14 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b) size_t out_start; enum xz_ret ret; - if (s->single_call) + if (DEC_IS_SINGLE(s->mode)) xz_dec_reset(s); in_start = b->in_pos; out_start = b->out_pos; ret = dec_main(s, b); - if (s->single_call) { + if (DEC_IS_SINGLE(s->mode)) { if (ret == XZ_OK) ret = b->in_pos == b->in_size ? XZ_DATA_ERROR : XZ_BUF_ERROR; @@ -767,21 +767,22 @@ XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b) return ret; } -XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init(uint32_t dict_max) +XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( + enum xz_mode mode, uint32_t dict_max) { struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); if (s == NULL) return NULL; - s->single_call = dict_max == 0; + s->mode = mode; #ifdef XZ_DEC_BCJ - s->bcj = xz_dec_bcj_create(s->single_call); + s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); if (s->bcj == NULL) goto error_bcj; #endif - s->lzma2 = xz_dec_lzma2_create(dict_max); + s->lzma2 = xz_dec_lzma2_create(mode, dict_max); if (s->lzma2 == NULL) goto error_lzma2; diff --git a/archival/libunarchive/unxz/xz_private.h b/archival/libunarchive/unxz/xz_private.h index f4e0b4010..145649a83 100644 --- a/archival/libunarchive/unxz/xz_private.h +++ b/archival/libunarchive/unxz/xz_private.h @@ -53,6 +53,45 @@ # include "xz_config.h" #endif +/* If no specific decoding mode is requested, enable support for all modes. */ +#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \ + && !defined(XZ_DEC_DYNALLOC) +# define XZ_DEC_SINGLE +# define XZ_DEC_PREALLOC +# define XZ_DEC_DYNALLOC +#endif + +/* + * The DEC_IS_foo(mode) macros are used in "if" statements. If only some + * of the supported modes are enabled, these macros will evaluate to true or + * false at compile time and thus allow the compiler to omit unneeded code. + */ +#ifdef XZ_DEC_SINGLE +# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE) +#else +# define DEC_IS_SINGLE(mode) (false) +#endif + +#ifdef XZ_DEC_PREALLOC +# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC) +#else +# define DEC_IS_PREALLOC(mode) (false) +#endif + +#ifdef XZ_DEC_DYNALLOC +# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC) +#else +# define DEC_IS_DYNALLOC(mode) (false) +#endif + +#if !defined(XZ_DEC_SINGLE) +# define DEC_IS_MULTI(mode) (true) +#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC) +# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE) +#else +# define DEC_IS_MULTI(mode) (false) +#endif + /* * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ. * XZ_DEC_BCJ is used to enable generic support for BCJ decoders. @@ -71,7 +110,7 @@ * before calling xz_dec_lzma2_run(). */ XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( - uint32_t dict_max); + enum xz_mode mode, uint32_t dict_max); /* * Decode the LZMA2 properties (one byte) and reset the decoder. Return |