From 833d4e7f84f59099ee66eabfa3457ebb7d37eaa8 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 3 Nov 2010 02:38:31 +0100 Subject: rename archival/libunarchive -> archival/libarchive; move bz/ into it Signed-off-by: Denys Vlasenko --- archival/Kbuild.src | 2 +- archival/ar.c | 2 +- archival/bbunzip.c | 2 +- archival/bz/LICENSE | 44 - archival/bz/README | 90 -- archival/bz/blocksort.c | 1072 ----------------- archival/bz/bzlib.c | 431 ------- archival/bz/bzlib.h | 65 - archival/bz/bzlib_private.h | 219 ---- archival/bz/compress.c | 685 ----------- archival/bz/huffman.c | 229 ---- archival/bzip2.c | 14 +- archival/cpio.c | 2 +- archival/dpkg.c | 2 +- archival/dpkg_deb.c | 2 +- archival/gzip.c | 2 +- archival/libarchive/Kbuild.src | 64 + archival/libarchive/bz/LICENSE | 44 + archival/libarchive/bz/README | 90 ++ archival/libarchive/bz/blocksort.c | 1072 +++++++++++++++++ archival/libarchive/bz/bzlib.c | 431 +++++++ archival/libarchive/bz/bzlib.h | 65 + archival/libarchive/bz/bzlib_private.h | 219 ++++ archival/libarchive/bz/compress.c | 685 +++++++++++ archival/libarchive/bz/huffman.c | 229 ++++ archival/libarchive/data_align.c | 15 + archival/libarchive/data_extract_all.c | 200 ++++ archival/libarchive/data_extract_to_command.c | 134 +++ archival/libarchive/data_extract_to_stdout.c | 14 + archival/libarchive/data_skip.c | 12 + archival/libarchive/decompress_bunzip2.c | 822 +++++++++++++ archival/libarchive/decompress_uncompress.c | 307 +++++ archival/libarchive/decompress_unlzma.c | 465 ++++++++ archival/libarchive/decompress_unxz.c | 98 ++ archival/libarchive/decompress_unzip.c | 1252 ++++++++++++++++++++ archival/libarchive/filter_accept_all.c | 17 + archival/libarchive/filter_accept_list.c | 19 + archival/libarchive/filter_accept_list_reassign.c | 51 + archival/libarchive/filter_accept_reject_list.c | 36 + archival/libarchive/find_list_entry.c | 54 + archival/libarchive/get_header_ar.c | 133 +++ archival/libarchive/get_header_cpio.c | 186 +++ archival/libarchive/get_header_tar.c | 461 +++++++ archival/libarchive/get_header_tar_bz2.c | 21 + archival/libarchive/get_header_tar_gz.c | 36 + archival/libarchive/get_header_tar_lzma.c | 24 + archival/libarchive/header_list.c | 12 + archival/libarchive/header_skip.c | 10 + archival/libarchive/header_verbose_list.c | 69 ++ archival/libarchive/init_handle.c | 22 + archival/libarchive/liblzo.h | 93 ++ archival/libarchive/lzo1x_1.c | 35 + archival/libarchive/lzo1x_1o.c | 35 + archival/libarchive/lzo1x_9x.c | 921 ++++++++++++++ archival/libarchive/lzo1x_c.c | 296 +++++ archival/libarchive/lzo1x_d.c | 420 +++++++ archival/libarchive/open_transformer.c | 54 + archival/libarchive/seek_by_jump.c | 19 + archival/libarchive/seek_by_read.c | 16 + archival/libarchive/unpack_ar_archive.c | 22 + archival/libarchive/unxz/README | 135 +++ archival/libarchive/unxz/xz.h | 271 +++++ archival/libarchive/unxz/xz_config.h | 123 ++ archival/libarchive/unxz/xz_dec_bcj.c | 564 +++++++++ archival/libarchive/unxz/xz_dec_lzma2.c | 1175 ++++++++++++++++++ archival/libarchive/unxz/xz_dec_stream.c | 822 +++++++++++++ archival/libarchive/unxz/xz_lzma2.h | 204 ++++ archival/libarchive/unxz/xz_private.h | 159 +++ archival/libarchive/unxz/xz_stream.h | 57 + archival/libunarchive/Kbuild.src | 64 - archival/libunarchive/data_align.c | 15 - archival/libunarchive/data_extract_all.c | 200 ---- archival/libunarchive/data_extract_to_command.c | 134 --- archival/libunarchive/data_extract_to_stdout.c | 14 - archival/libunarchive/data_skip.c | 12 - archival/libunarchive/decompress_bunzip2.c | 822 ------------- archival/libunarchive/decompress_uncompress.c | 307 ----- archival/libunarchive/decompress_unlzma.c | 465 -------- archival/libunarchive/decompress_unxz.c | 98 -- archival/libunarchive/decompress_unzip.c | 1252 -------------------- archival/libunarchive/filter_accept_all.c | 17 - archival/libunarchive/filter_accept_list.c | 19 - .../libunarchive/filter_accept_list_reassign.c | 51 - archival/libunarchive/filter_accept_reject_list.c | 36 - archival/libunarchive/find_list_entry.c | 54 - archival/libunarchive/get_header_ar.c | 133 --- archival/libunarchive/get_header_cpio.c | 186 --- archival/libunarchive/get_header_tar.c | 461 ------- archival/libunarchive/get_header_tar_bz2.c | 21 - archival/libunarchive/get_header_tar_gz.c | 36 - archival/libunarchive/get_header_tar_lzma.c | 24 - archival/libunarchive/header_list.c | 12 - archival/libunarchive/header_skip.c | 10 - archival/libunarchive/header_verbose_list.c | 69 -- archival/libunarchive/init_handle.c | 22 - archival/libunarchive/liblzo.h | 93 -- archival/libunarchive/lzo1x_1.c | 35 - archival/libunarchive/lzo1x_1o.c | 35 - archival/libunarchive/lzo1x_9x.c | 921 -------------- archival/libunarchive/lzo1x_c.c | 296 ----- archival/libunarchive/lzo1x_d.c | 420 ------- archival/libunarchive/open_transformer.c | 54 - archival/libunarchive/seek_by_jump.c | 19 - archival/libunarchive/seek_by_read.c | 16 - archival/libunarchive/unpack_ar_archive.c | 22 - archival/libunarchive/unxz/README | 135 --- archival/libunarchive/unxz/xz.h | 271 ----- archival/libunarchive/unxz/xz_config.h | 123 -- archival/libunarchive/unxz/xz_dec_bcj.c | 564 --------- archival/libunarchive/unxz/xz_dec_lzma2.c | 1175 ------------------ archival/libunarchive/unxz/xz_dec_stream.c | 822 ------------- archival/libunarchive/unxz/xz_lzma2.h | 204 ---- archival/libunarchive/unxz/xz_private.h | 159 --- archival/libunarchive/unxz/xz_stream.h | 57 - archival/lzop.c | 2 +- archival/rpm.c | 2 +- archival/rpm2cpio.c | 2 +- archival/tar.c | 2 +- archival/unzip.c | 2 +- 119 files changed, 12809 insertions(+), 12809 deletions(-) delete mode 100644 archival/bz/LICENSE delete mode 100644 archival/bz/README delete mode 100644 archival/bz/blocksort.c delete mode 100644 archival/bz/bzlib.c delete mode 100644 archival/bz/bzlib.h delete mode 100644 archival/bz/bzlib_private.h delete mode 100644 archival/bz/compress.c delete mode 100644 archival/bz/huffman.c create mode 100644 archival/libarchive/Kbuild.src create mode 100644 archival/libarchive/bz/LICENSE create mode 100644 archival/libarchive/bz/README create mode 100644 archival/libarchive/bz/blocksort.c create mode 100644 archival/libarchive/bz/bzlib.c create mode 100644 archival/libarchive/bz/bzlib.h create mode 100644 archival/libarchive/bz/bzlib_private.h create mode 100644 archival/libarchive/bz/compress.c create mode 100644 archival/libarchive/bz/huffman.c create mode 100644 archival/libarchive/data_align.c create mode 100644 archival/libarchive/data_extract_all.c create mode 100644 archival/libarchive/data_extract_to_command.c create mode 100644 archival/libarchive/data_extract_to_stdout.c create mode 100644 archival/libarchive/data_skip.c create mode 100644 archival/libarchive/decompress_bunzip2.c create mode 100644 archival/libarchive/decompress_uncompress.c create mode 100644 archival/libarchive/decompress_unlzma.c create mode 100644 archival/libarchive/decompress_unxz.c create mode 100644 archival/libarchive/decompress_unzip.c create mode 100644 archival/libarchive/filter_accept_all.c create mode 100644 archival/libarchive/filter_accept_list.c create mode 100644 archival/libarchive/filter_accept_list_reassign.c create mode 100644 archival/libarchive/filter_accept_reject_list.c create mode 100644 archival/libarchive/find_list_entry.c create mode 100644 archival/libarchive/get_header_ar.c create mode 100644 archival/libarchive/get_header_cpio.c create mode 100644 archival/libarchive/get_header_tar.c create mode 100644 archival/libarchive/get_header_tar_bz2.c create mode 100644 archival/libarchive/get_header_tar_gz.c create mode 100644 archival/libarchive/get_header_tar_lzma.c create mode 100644 archival/libarchive/header_list.c create mode 100644 archival/libarchive/header_skip.c create mode 100644 archival/libarchive/header_verbose_list.c create mode 100644 archival/libarchive/init_handle.c create mode 100644 archival/libarchive/liblzo.h create mode 100644 archival/libarchive/lzo1x_1.c create mode 100644 archival/libarchive/lzo1x_1o.c create mode 100644 archival/libarchive/lzo1x_9x.c create mode 100644 archival/libarchive/lzo1x_c.c create mode 100644 archival/libarchive/lzo1x_d.c create mode 100644 archival/libarchive/open_transformer.c create mode 100644 archival/libarchive/seek_by_jump.c create mode 100644 archival/libarchive/seek_by_read.c create mode 100644 archival/libarchive/unpack_ar_archive.c create mode 100644 archival/libarchive/unxz/README create mode 100644 archival/libarchive/unxz/xz.h create mode 100644 archival/libarchive/unxz/xz_config.h create mode 100644 archival/libarchive/unxz/xz_dec_bcj.c create mode 100644 archival/libarchive/unxz/xz_dec_lzma2.c create mode 100644 archival/libarchive/unxz/xz_dec_stream.c create mode 100644 archival/libarchive/unxz/xz_lzma2.h create mode 100644 archival/libarchive/unxz/xz_private.h create mode 100644 archival/libarchive/unxz/xz_stream.h delete mode 100644 archival/libunarchive/Kbuild.src delete mode 100644 archival/libunarchive/data_align.c delete mode 100644 archival/libunarchive/data_extract_all.c delete mode 100644 archival/libunarchive/data_extract_to_command.c delete mode 100644 archival/libunarchive/data_extract_to_stdout.c delete mode 100644 archival/libunarchive/data_skip.c delete mode 100644 archival/libunarchive/decompress_bunzip2.c delete mode 100644 archival/libunarchive/decompress_uncompress.c delete mode 100644 archival/libunarchive/decompress_unlzma.c delete mode 100644 archival/libunarchive/decompress_unxz.c delete mode 100644 archival/libunarchive/decompress_unzip.c delete mode 100644 archival/libunarchive/filter_accept_all.c delete mode 100644 archival/libunarchive/filter_accept_list.c delete mode 100644 archival/libunarchive/filter_accept_list_reassign.c delete mode 100644 archival/libunarchive/filter_accept_reject_list.c delete mode 100644 archival/libunarchive/find_list_entry.c delete mode 100644 archival/libunarchive/get_header_ar.c delete mode 100644 archival/libunarchive/get_header_cpio.c delete mode 100644 archival/libunarchive/get_header_tar.c delete mode 100644 archival/libunarchive/get_header_tar_bz2.c delete mode 100644 archival/libunarchive/get_header_tar_gz.c delete mode 100644 archival/libunarchive/get_header_tar_lzma.c delete mode 100644 archival/libunarchive/header_list.c delete mode 100644 archival/libunarchive/header_skip.c delete mode 100644 archival/libunarchive/header_verbose_list.c delete mode 100644 archival/libunarchive/init_handle.c delete mode 100644 archival/libunarchive/liblzo.h delete mode 100644 archival/libunarchive/lzo1x_1.c delete mode 100644 archival/libunarchive/lzo1x_1o.c delete mode 100644 archival/libunarchive/lzo1x_9x.c delete mode 100644 archival/libunarchive/lzo1x_c.c delete mode 100644 archival/libunarchive/lzo1x_d.c delete mode 100644 archival/libunarchive/open_transformer.c delete mode 100644 archival/libunarchive/seek_by_jump.c delete mode 100644 archival/libunarchive/seek_by_read.c delete mode 100644 archival/libunarchive/unpack_ar_archive.c delete mode 100644 archival/libunarchive/unxz/README delete mode 100644 archival/libunarchive/unxz/xz.h delete mode 100644 archival/libunarchive/unxz/xz_config.h delete mode 100644 archival/libunarchive/unxz/xz_dec_bcj.c delete mode 100644 archival/libunarchive/unxz/xz_dec_lzma2.c delete mode 100644 archival/libunarchive/unxz/xz_dec_stream.c delete mode 100644 archival/libunarchive/unxz/xz_lzma2.h delete mode 100644 archival/libunarchive/unxz/xz_private.h delete mode 100644 archival/libunarchive/unxz/xz_stream.h (limited to 'archival') diff --git a/archival/Kbuild.src b/archival/Kbuild.src index e49d38538..3466452f7 100644 --- a/archival/Kbuild.src +++ b/archival/Kbuild.src @@ -4,7 +4,7 @@ # # Licensed under GPLv2, see file LICENSE in this source tree. -libs-y += libunarchive/ +libs-y += libarchive/ lib-y:= diff --git a/archival/ar.c b/archival/ar.c index 730d7c6c6..a2e3306ac 100644 --- a/archival/ar.c +++ b/archival/ar.c @@ -18,7 +18,7 @@ */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" #include "ar.h" #if ENABLE_FEATURE_AR_CREATE diff --git a/archival/bbunzip.c b/archival/bbunzip.c index 6b6457673..a69e1b3ca 100644 --- a/archival/bbunzip.c +++ b/archival/bbunzip.c @@ -5,7 +5,7 @@ * Licensed under GPLv2 or later, see file LICENSE in this source tree. */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" enum { OPT_STDOUT = 1 << 0, diff --git a/archival/bz/LICENSE b/archival/bz/LICENSE deleted file mode 100644 index da4346520..000000000 --- a/archival/bz/LICENSE +++ /dev/null @@ -1,44 +0,0 @@ -bzip2 applet in busybox is based on lightly-modified source -of bzip2 version 1.0.4. bzip2 source is distributed -under the following conditions (copied verbatim from LICENSE file) -=========================================================== - - -This program, "bzip2", the associated library "libbzip2", and all -documentation, are copyright (C) 1996-2006 Julian R Seward. All -rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions -are met: - -1. Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - -2. The origin of this software must not be misrepresented; you must - not claim that you wrote the original software. If you use this - software in a product, an acknowledgment in the product - documentation would be appreciated but is not required. - -3. Altered source versions must be plainly marked as such, and must - not be misrepresented as being the original software. - -4. The name of the author may not be used to endorse or promote - products derived from this software without specific prior written - permission. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS -OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE -ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE -GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING -NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -Julian Seward, Cambridge, UK. -jseward@bzip.org -bzip2/libbzip2 version 1.0.4 of 20 December 2006 diff --git a/archival/bz/README b/archival/bz/README deleted file mode 100644 index fffd47b8a..000000000 --- a/archival/bz/README +++ /dev/null @@ -1,90 +0,0 @@ -This file is an abridged version of README from bzip2 1.0.4 -Build instructions (which are not relevant to busyboxed bzip2) -are removed. -=========================================================== - - -This is the README for bzip2/libzip2. -This version is fully compatible with the previous public releases. - ------------------------------------------------------------------- -This file is part of bzip2/libbzip2, a program and library for -lossless, block-sorting data compression. - -bzip2/libbzip2 version 1.0.4 of 20 December 2006 -Copyright (C) 1996-2006 Julian Seward - -Please read the WARNING, DISCLAIMER and PATENTS sections in this file. - -This program is released under the terms of the license contained -in the file LICENSE. ------------------------------------------------------------------- - -Please read and be aware of the following: - - -WARNING: - - This program and library (attempts to) compress data by - performing several non-trivial transformations on it. - Unless you are 100% familiar with *all* the algorithms - contained herein, and with the consequences of modifying them, - you should NOT meddle with the compression or decompression - machinery. Incorrect changes can and very likely *will* - lead to disastrous loss of data. - - -DISCLAIMER: - - I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE - USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED. - - Every compression of a file implies an assumption that the - compressed file can be decompressed to reproduce the original. - Great efforts in design, coding and testing have been made to - ensure that this program works correctly. However, the complexity - of the algorithms, and, in particular, the presence of various - special cases in the code which occur with very low but non-zero - probability make it impossible to rule out the possibility of bugs - remaining in the program. DO NOT COMPRESS ANY DATA WITH THIS - PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER - SMALL, THAT THE DATA WILL NOT BE RECOVERABLE. - - That is not to say this program is inherently unreliable. - Indeed, I very much hope the opposite is true. bzip2/libbzip2 - has been carefully constructed and extensively tested. - - -PATENTS: - - To the best of my knowledge, bzip2/libbzip2 does not use any - patented algorithms. However, I do not have the resources - to carry out a patent search. Therefore I cannot give any - guarantee of the above statement. - - -I hope you find bzip2 useful. Feel free to contact me at - jseward@bzip.org -if you have any suggestions or queries. Many people mailed me with -comments, suggestions and patches after the releases of bzip-0.15, -bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1, -1.0.2 and 1.0.3, and the changes in bzip2 are largely a result of this -feedback. I thank you for your comments. - -bzip2's "home" is http://www.bzip.org/ - -Julian Seward -jseward@bzip.org -Cambridge, UK. - -18 July 1996 (version 0.15) -25 August 1996 (version 0.21) - 7 August 1997 (bzip2, version 0.1) -29 August 1997 (bzip2, version 0.1pl2) -23 August 1998 (bzip2, version 0.9.0) - 8 June 1999 (bzip2, version 0.9.5) - 4 Sept 1999 (bzip2, version 0.9.5d) - 5 May 2000 (bzip2, version 1.0pre8) -30 December 2001 (bzip2, version 1.0.2pre1) -15 February 2005 (bzip2, version 1.0.3) -20 December 2006 (bzip2, version 1.0.4) diff --git a/archival/bz/blocksort.c b/archival/bz/blocksort.c deleted file mode 100644 index f70c3701d..000000000 --- a/archival/bz/blocksort.c +++ /dev/null @@ -1,1072 +0,0 @@ -/* - * bzip2 is written by Julian Seward . - * Adapted for busybox by Denys Vlasenko . - * See README and LICENSE files in this directory for more information. - */ - -/*-------------------------------------------------------------*/ -/*--- Block sorting machinery ---*/ -/*--- blocksort.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ -This file is part of bzip2/libbzip2, a program and library for -lossless, block-sorting data compression. - -bzip2/libbzip2 version 1.0.4 of 20 December 2006 -Copyright (C) 1996-2006 Julian Seward - -Please read the WARNING, DISCLAIMER and PATENTS sections in the -README file. - -This program is released under the terms of the license contained -in the file LICENSE. ------------------------------------------------------------------- */ - -/* #include "bzlib_private.h" */ - -#define mswap(zz1, zz2) \ -{ \ - int32_t zztmp = zz1; \ - zz1 = zz2; \ - zz2 = zztmp; \ -} - -static -/* No measurable speed gain with inlining */ -/* ALWAYS_INLINE */ -void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn) -{ - while (zzn > 0) { - mswap(ptr[zzp1], ptr[zzp2]); - zzp1++; - zzp2++; - zzn--; - } -} - -static -ALWAYS_INLINE -int32_t mmin(int32_t a, int32_t b) -{ - return (a < b) ? a : b; -} - - -/*---------------------------------------------*/ -/*--- Fallback O(N log(N)^2) sorting ---*/ -/*--- algorithm, for repetitive blocks ---*/ -/*---------------------------------------------*/ - -/*---------------------------------------------*/ -static -inline -void fallbackSimpleSort(uint32_t* fmap, - uint32_t* eclass, - int32_t lo, - int32_t hi) -{ - int32_t i, j, tmp; - uint32_t ec_tmp; - - if (lo == hi) return; - - if (hi - lo > 3) { - for (i = hi-4; i >= lo; i--) { - tmp = fmap[i]; - ec_tmp = eclass[tmp]; - for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4) - fmap[j-4] = fmap[j]; - fmap[j-4] = tmp; - } - } - - for (i = hi-1; i >= lo; i--) { - tmp = fmap[i]; - ec_tmp = eclass[tmp]; - for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) - fmap[j-1] = fmap[j]; - fmap[j-1] = tmp; - } -} - - -/*---------------------------------------------*/ -#define fpush(lz,hz) { \ - stackLo[sp] = lz; \ - stackHi[sp] = hz; \ - sp++; \ -} - -#define fpop(lz,hz) { \ - sp--; \ - lz = stackLo[sp]; \ - hz = stackHi[sp]; \ -} - -#define FALLBACK_QSORT_SMALL_THRESH 10 -#define FALLBACK_QSORT_STACK_SIZE 100 - -static -void fallbackQSort3(uint32_t* fmap, - uint32_t* eclass, - int32_t loSt, - int32_t hiSt) -{ - int32_t unLo, unHi, ltLo, gtHi, n, m; - int32_t sp, lo, hi; - uint32_t med, r, r3; - int32_t stackLo[FALLBACK_QSORT_STACK_SIZE]; - int32_t stackHi[FALLBACK_QSORT_STACK_SIZE]; - - r = 0; - - sp = 0; - fpush(loSt, hiSt); - - while (sp > 0) { - AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004); - - fpop(lo, hi); - if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { - fallbackSimpleSort(fmap, eclass, lo, hi); - continue; - } - - /* Random partitioning. Median of 3 sometimes fails to - * avoid bad cases. Median of 9 seems to help but - * looks rather expensive. This too seems to work but - * is cheaper. Guidance for the magic constants - * 7621 and 32768 is taken from Sedgewick's algorithms - * book, chapter 35. - */ - r = ((r * 7621) + 1) % 32768; - r3 = r % 3; - if (r3 == 0) - med = eclass[fmap[lo]]; - else if (r3 == 1) - med = eclass[fmap[(lo+hi)>>1]]; - else - med = eclass[fmap[hi]]; - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (1) { - while (1) { - if (unLo > unHi) break; - n = (int32_t)eclass[fmap[unLo]] - (int32_t)med; - if (n == 0) { - mswap(fmap[unLo], fmap[ltLo]); - ltLo++; - unLo++; - continue; - }; - if (n > 0) break; - unLo++; - } - while (1) { - if (unLo > unHi) break; - n = (int32_t)eclass[fmap[unHi]] - (int32_t)med; - if (n == 0) { - mswap(fmap[unHi], fmap[gtHi]); - gtHi--; unHi--; - continue; - }; - if (n < 0) break; - unHi--; - } - if (unLo > unHi) break; - mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; - } - - AssertD(unHi == unLo-1, "fallbackQSort3(2)"); - - if (gtHi < ltLo) continue; - - n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n); - m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - if (n - lo > hi - m) { - fpush(lo, n); - fpush(m, hi); - } else { - fpush(m, hi); - fpush(lo, n); - } - } -} - -#undef fpush -#undef fpop -#undef FALLBACK_QSORT_SMALL_THRESH -#undef FALLBACK_QSORT_STACK_SIZE - - -/*---------------------------------------------*/ -/* Pre: - * nblock > 0 - * eclass exists for [0 .. nblock-1] - * ((uint8_t*)eclass) [0 .. nblock-1] holds block - * ptr exists for [0 .. nblock-1] - * - * Post: - * ((uint8_t*)eclass) [0 .. nblock-1] holds block - * All other areas of eclass destroyed - * fmap [0 .. nblock-1] holds sorted order - * bhtab[0 .. 2+(nblock/32)] destroyed -*/ - -#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) -#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) -#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) -#define WORD_BH(zz) bhtab[(zz) >> 5] -#define UNALIGNED_BH(zz) ((zz) & 0x01f) - -static -void fallbackSort(uint32_t* fmap, - uint32_t* eclass, - uint32_t* bhtab, - int32_t nblock) -{ - int32_t ftab[257]; - int32_t ftabCopy[256]; - int32_t H, i, j, k, l, r, cc, cc1; - int32_t nNotDone; - int32_t nBhtab; - uint8_t* eclass8 = (uint8_t*)eclass; - - /* - * Initial 1-char radix sort to generate - * initial fmap and initial BH bits. - */ - for (i = 0; i < 257; i++) ftab[i] = 0; - for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; - for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; - - j = ftab[0]; /* bbox: optimized */ - for (i = 1; i < 257; i++) { - j += ftab[i]; - ftab[i] = j; - } - - for (i = 0; i < nblock; i++) { - j = eclass8[i]; - k = ftab[j] - 1; - ftab[j] = k; - fmap[k] = i; - } - - nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */ - for (i = 0; i < nBhtab; i++) bhtab[i] = 0; - for (i = 0; i < 256; i++) SET_BH(ftab[i]); - - /* - * Inductively refine the buckets. Kind-of an - * "exponential radix sort" (!), inspired by the - * Manber-Myers suffix array construction algorithm. - */ - - /*-- set sentinel bits for block-end detection --*/ - for (i = 0; i < 32; i++) { - SET_BH(nblock + 2*i); - CLEAR_BH(nblock + 2*i + 1); - } - - /*-- the log(N) loop --*/ - H = 1; - while (1) { - j = 0; - for (i = 0; i < nblock; i++) { - if (ISSET_BH(i)) - j = i; - k = fmap[i] - H; - if (k < 0) - k += nblock; - eclass[k] = j; - } - - nNotDone = 0; - r = -1; - while (1) { - - /*-- find the next non-singleton bucket --*/ - k = r + 1; - while (ISSET_BH(k) && UNALIGNED_BH(k)) - k++; - if (ISSET_BH(k)) { - while (WORD_BH(k) == 0xffffffff) k += 32; - while (ISSET_BH(k)) k++; - } - l = k - 1; - if (l >= nblock) - break; - while (!ISSET_BH(k) && UNALIGNED_BH(k)) - k++; - if (!ISSET_BH(k)) { - while (WORD_BH(k) == 0x00000000) k += 32; - while (!ISSET_BH(k)) k++; - } - r = k - 1; - if (r >= nblock) - break; - - /*-- now [l, r] bracket current bucket --*/ - if (r > l) { - nNotDone += (r - l + 1); - fallbackQSort3(fmap, eclass, l, r); - - /*-- scan bucket and generate header bits-- */ - cc = -1; - for (i = l; i <= r; i++) { - cc1 = eclass[fmap[i]]; - if (cc != cc1) { - SET_BH(i); - cc = cc1; - }; - } - } - } - - H *= 2; - if (H > nblock || nNotDone == 0) - break; - } - - /* - * Reconstruct the original block in - * eclass8 [0 .. nblock-1], since the - * previous phase destroyed it. - */ - j = 0; - for (i = 0; i < nblock; i++) { - while (ftabCopy[j] == 0) - j++; - ftabCopy[j]--; - eclass8[fmap[i]] = (uint8_t)j; - } - AssertH(j < 256, 1005); -} - -#undef SET_BH -#undef CLEAR_BH -#undef ISSET_BH -#undef WORD_BH -#undef UNALIGNED_BH - - -/*---------------------------------------------*/ -/*--- The main, O(N^2 log(N)) sorting ---*/ -/*--- algorithm. Faster for "normal" ---*/ -/*--- non-repetitive blocks. ---*/ -/*---------------------------------------------*/ - -/*---------------------------------------------*/ -static -NOINLINE -int mainGtU( - uint32_t i1, - uint32_t i2, - uint8_t* block, - uint16_t* quadrant, - uint32_t nblock, - int32_t* budget) -{ - int32_t k; - uint8_t c1, c2; - uint16_t s1, s2; - -/* Loop unrolling here is actually very useful - * (generated code is much simpler), - * code size increase is only 270 bytes (i386) - * but speeds up compression 10% overall - */ - -#if CONFIG_BZIP2_FEATURE_SPEED >= 1 - -#define TIMES_8(code) \ - code; code; code; code; \ - code; code; code; code; -#define TIMES_12(code) \ - code; code; code; code; \ - code; code; code; code; \ - code; code; code; code; - -#else - -#define TIMES_8(code) \ -{ \ - int nn = 8; \ - do { \ - code; \ - } while (--nn); \ -} -#define TIMES_12(code) \ -{ \ - int nn = 12; \ - do { \ - code; \ - } while (--nn); \ -} - -#endif - - AssertD(i1 != i2, "mainGtU"); - TIMES_12( - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - i1++; i2++; - ) - - k = nblock + 8; - - do { - TIMES_8( - c1 = block[i1]; c2 = block[i2]; - if (c1 != c2) return (c1 > c2); - s1 = quadrant[i1]; s2 = quadrant[i2]; - if (s1 != s2) return (s1 > s2); - i1++; i2++; - ) - - if (i1 >= nblock) i1 -= nblock; - if (i2 >= nblock) i2 -= nblock; - - (*budget)--; - k -= 8; - } while (k >= 0); - - return False; -} -#undef TIMES_8 -#undef TIMES_12 - -/*---------------------------------------------*/ -/* - * Knuth's increments seem to work better - * than Incerpi-Sedgewick here. Possibly - * because the number of elems to sort is - * usually small, typically <= 20. - */ -static -const int32_t incs[14] = { - 1, 4, 13, 40, 121, 364, 1093, 3280, - 9841, 29524, 88573, 265720, - 797161, 2391484 -}; - -static -void mainSimpleSort(uint32_t* ptr, - uint8_t* block, - uint16_t* quadrant, - int32_t nblock, - int32_t lo, - int32_t hi, - int32_t d, - int32_t* budget) -{ - int32_t i, j, h, bigN, hp; - uint32_t v; - - bigN = hi - lo + 1; - if (bigN < 2) return; - - hp = 0; - while (incs[hp] < bigN) hp++; - hp--; - - for (; hp >= 0; hp--) { - h = incs[hp]; - - i = lo + h; - while (1) { - /*-- copy 1 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - -/* 1.5% overall speedup, +290 bytes */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 3 - /*-- copy 2 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; - - /*-- copy 3 --*/ - if (i > hi) break; - v = ptr[i]; - j = i; - while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { - ptr[j] = ptr[j-h]; - j = j - h; - if (j <= (lo + h - 1)) break; - } - ptr[j] = v; - i++; -#endif - if (*budget < 0) return; - } - } -} - - -/*---------------------------------------------*/ -/* - * The following is an implementation of - * an elegant 3-way quicksort for strings, - * described in a paper "Fast Algorithms for - * Sorting and Searching Strings", by Robert - * Sedgewick and Jon L. Bentley. - */ - -static -ALWAYS_INLINE -uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c) -{ - uint8_t t; - if (a > b) { - t = a; - a = b; - b = t; - }; - /* here b >= a */ - if (b > c) { - b = c; - if (a > b) - b = a; - } - return b; -} - -#define mpush(lz,hz,dz) \ -{ \ - stackLo[sp] = lz; \ - stackHi[sp] = hz; \ - stackD [sp] = dz; \ - sp++; \ -} - -#define mpop(lz,hz,dz) \ -{ \ - sp--; \ - lz = stackLo[sp]; \ - hz = stackHi[sp]; \ - dz = stackD [sp]; \ -} - -#define mnextsize(az) (nextHi[az] - nextLo[az]) - -#define mnextswap(az,bz) \ -{ \ - int32_t tz; \ - tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ - tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ - tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \ -} - -#define MAIN_QSORT_SMALL_THRESH 20 -#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) -#define MAIN_QSORT_STACK_SIZE 100 - -static NOINLINE -void mainQSort3(uint32_t* ptr, - uint8_t* block, - uint16_t* quadrant, - int32_t nblock, - int32_t loSt, - int32_t hiSt, - int32_t dSt, - int32_t* budget) -{ - int32_t unLo, unHi, ltLo, gtHi, n, m, med; - int32_t sp, lo, hi, d; - - int32_t stackLo[MAIN_QSORT_STACK_SIZE]; - int32_t stackHi[MAIN_QSORT_STACK_SIZE]; - int32_t stackD [MAIN_QSORT_STACK_SIZE]; - - int32_t nextLo[3]; - int32_t nextHi[3]; - int32_t nextD [3]; - - sp = 0; - mpush(loSt, hiSt, dSt); - - while (sp > 0) { - AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001); - - mpop(lo, hi, d); - if (hi - lo < MAIN_QSORT_SMALL_THRESH - || d > MAIN_QSORT_DEPTH_THRESH - ) { - mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget); - if (*budget < 0) - return; - continue; - } - med = (int32_t) mmed3(block[ptr[lo ] + d], - block[ptr[hi ] + d], - block[ptr[(lo+hi) >> 1] + d]); - - unLo = ltLo = lo; - unHi = gtHi = hi; - - while (1) { - while (1) { - if (unLo > unHi) - break; - n = ((int32_t)block[ptr[unLo]+d]) - med; - if (n == 0) { - mswap(ptr[unLo], ptr[ltLo]); - ltLo++; - unLo++; - continue; - }; - if (n > 0) break; - unLo++; - } - while (1) { - if (unLo > unHi) - break; - n = ((int32_t)block[ptr[unHi]+d]) - med; - if (n == 0) { - mswap(ptr[unHi], ptr[gtHi]); - gtHi--; - unHi--; - continue; - }; - if (n < 0) break; - unHi--; - } - if (unLo > unHi) - break; - mswap(ptr[unLo], ptr[unHi]); - unLo++; - unHi--; - } - - AssertD(unHi == unLo-1, "mainQSort3(2)"); - - if (gtHi < ltLo) { - mpush(lo, hi, d + 1); - continue; - } - - n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n); - m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m); - - n = lo + unLo - ltLo - 1; - m = hi - (gtHi - unHi) + 1; - - nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; - nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; - nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; - - if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); - if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2); - if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); - - AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)"); - AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)"); - - mpush(nextLo[0], nextHi[0], nextD[0]); - mpush(nextLo[1], nextHi[1], nextD[1]); - mpush(nextLo[2], nextHi[2], nextD[2]); - } -} - -#undef mpush -#undef mpop -#undef mnextsize -#undef mnextswap -#undef MAIN_QSORT_SMALL_THRESH -#undef MAIN_QSORT_DEPTH_THRESH -#undef MAIN_QSORT_STACK_SIZE - - -/*---------------------------------------------*/ -/* Pre: - * nblock > N_OVERSHOOT - * block32 exists for [0 .. nblock-1 +N_OVERSHOOT] - * ((uint8_t*)block32) [0 .. nblock-1] holds block - * ptr exists for [0 .. nblock-1] - * - * Post: - * ((uint8_t*)block32) [0 .. nblock-1] holds block - * All other areas of block32 destroyed - * ftab[0 .. 65536] destroyed - * ptr [0 .. nblock-1] holds sorted order - * if (*budget < 0), sorting was abandoned - */ - -#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) -#define SETMASK (1 << 21) -#define CLEARMASK (~(SETMASK)) - -static NOINLINE -void mainSort(EState* state, - uint32_t* ptr, - uint8_t* block, - uint16_t* quadrant, - uint32_t* ftab, - int32_t nblock, - int32_t* budget) -{ - int32_t i, j, k, ss, sb; - uint8_t c1; - int32_t numQSorted; - uint16_t s; - Bool bigDone[256]; - /* bbox: moved to EState to save stack - int32_t runningOrder[256]; - int32_t copyStart[256]; - int32_t copyEnd [256]; - */ -#define runningOrder (state->mainSort__runningOrder) -#define copyStart (state->mainSort__copyStart) -#define copyEnd (state->mainSort__copyEnd) - - /*-- set up the 2-byte frequency table --*/ - /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */ - memset(ftab, 0, 65537 * sizeof(ftab[0])); - - j = block[0] << 8; - i = nblock - 1; -/* 3%, +300 bytes */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 2 - for (; i >= 3; i -= 4) { - quadrant[i] = 0; - j = (j >> 8) | (((uint16_t)block[i]) << 8); - ftab[j]++; - quadrant[i-1] = 0; - j = (j >> 8) | (((uint16_t)block[i-1]) << 8); - ftab[j]++; - quadrant[i-2] = 0; - j = (j >> 8) | (((uint16_t)block[i-2]) << 8); - ftab[j]++; - quadrant[i-3] = 0; - j = (j >> 8) | (((uint16_t)block[i-3]) << 8); - ftab[j]++; - } -#endif - for (; i >= 0; i--) { - quadrant[i] = 0; - j = (j >> 8) | (((uint16_t)block[i]) << 8); - ftab[j]++; - } - - /*-- (emphasises close relationship of block & quadrant) --*/ - for (i = 0; i < BZ_N_OVERSHOOT; i++) { - block [nblock+i] = block[i]; - quadrant[nblock+i] = 0; - } - - /*-- Complete the initial radix sort --*/ - j = ftab[0]; /* bbox: optimized */ - for (i = 1; i <= 65536; i++) { - j += ftab[i]; - ftab[i] = j; - } - - s = block[0] << 8; - i = nblock - 1; -#if CONFIG_BZIP2_FEATURE_SPEED >= 2 - for (; i >= 3; i -= 4) { - s = (s >> 8) | (block[i] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i; - s = (s >> 8) | (block[i-1] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i-1; - s = (s >> 8) | (block[i-2] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i-2; - s = (s >> 8) | (block[i-3] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i-3; - } -#endif - for (; i >= 0; i--) { - s = (s >> 8) | (block[i] << 8); - j = ftab[s] - 1; - ftab[s] = j; - ptr[j] = i; - } - - /* - * Now ftab contains the first loc of every small bucket. - * Calculate the running order, from smallest to largest - * big bucket. - */ - for (i = 0; i <= 255; i++) { - bigDone [i] = False; - runningOrder[i] = i; - } - - { - int32_t vv; - /* bbox: was: int32_t h = 1; */ - /* do h = 3 * h + 1; while (h <= 256); */ - uint32_t h = 364; - - do { - /*h = h / 3;*/ - h = (h * 171) >> 9; /* bbox: fast h/3 */ - for (i = h; i <= 255; i++) { - vv = runningOrder[i]; - j = i; - while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { - runningOrder[j] = runningOrder[j-h]; - j = j - h; - if (j <= (h - 1)) - goto zero; - } - zero: - runningOrder[j] = vv; - } - } while (h != 1); - } - - /* - * The main sorting loop. - */ - - numQSorted = 0; - - for (i = 0; i <= 255; i++) { - - /* - * Process big buckets, starting with the least full. - * Basically this is a 3-step process in which we call - * mainQSort3 to sort the small buckets [ss, j], but - * also make a big effort to avoid the calls if we can. - */ - ss = runningOrder[i]; - - /* - * Step 1: - * Complete the big bucket [ss] by quicksorting - * any unsorted small buckets [ss, j], for j != ss. - * Hopefully previous pointer-scanning phases have already - * completed many of the small buckets [ss, j], so - * we don't have to sort them at all. - */ - for (j = 0; j <= 255; j++) { - if (j != ss) { - sb = (ss << 8) + j; - if (!(ftab[sb] & SETMASK)) { - int32_t lo = ftab[sb] & CLEARMASK; - int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; - if (hi > lo) { - mainQSort3( - ptr, block, quadrant, nblock, - lo, hi, BZ_N_RADIX, budget - ); - if (*budget < 0) return; - numQSorted += (hi - lo + 1); - } - } - ftab[sb] |= SETMASK; - } - } - - AssertH(!bigDone[ss], 1006); - - /* - * Step 2: - * Now scan this big bucket [ss] so as to synthesise the - * sorted order for small buckets [t, ss] for all t, - * including, magically, the bucket [ss,ss] too. - * This will avoid doing Real Work in subsequent Step 1's. - */ - { - for (j = 0; j <= 255; j++) { - copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; - copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; - } - for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { - k = ptr[j] - 1; - if (k < 0) - k += nblock; - c1 = block[k]; - if (!bigDone[c1]) - ptr[copyStart[c1]++] = k; - } - for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { - k = ptr[j]-1; - if (k < 0) - k += nblock; - c1 = block[k]; - if (!bigDone[c1]) - ptr[copyEnd[c1]--] = k; - } - } - - /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. - * Necessity for this case is demonstrated by compressing - * a sequence of approximately 48.5 million of character - * 251; 1.0.0/1.0.1 will then die here. */ - AssertH((copyStart[ss]-1 == copyEnd[ss]) \ - || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007); - - for (j = 0; j <= 255; j++) - ftab[(j << 8) + ss] |= SETMASK; - - /* - * Step 3: - * The [ss] big bucket is now done. Record this fact, - * and update the quadrant descriptors. Remember to - * update quadrants in the overshoot area too, if - * necessary. The "if (i < 255)" test merely skips - * this updating for the last bucket processed, since - * updating for the last bucket is pointless. - * - * The quadrant array provides a way to incrementally - * cache sort orderings, as they appear, so as to - * make subsequent comparisons in fullGtU() complete - * faster. For repetitive blocks this makes a big - * difference (but not big enough to be able to avoid - * the fallback sorting mechanism, exponential radix sort). - * - * The precise meaning is: at all times: - * - * for 0 <= i < nblock and 0 <= j <= nblock - * - * if block[i] != block[j], - * - * then the relative values of quadrant[i] and - * quadrant[j] are meaningless. - * - * else { - * if quadrant[i] < quadrant[j] - * then the string starting at i lexicographically - * precedes the string starting at j - * - * else if quadrant[i] > quadrant[j] - * then the string starting at j lexicographically - * precedes the string starting at i - * - * else - * the relative ordering of the strings starting - * at i and j has not yet been determined. - * } - */ - bigDone[ss] = True; - - if (i < 255) { - int32_t bbStart = ftab[ss << 8] & CLEARMASK; - int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; - int32_t shifts = 0; - - while ((bbSize >> shifts) > 65534) shifts++; - - for (j = bbSize-1; j >= 0; j--) { - int32_t a2update = ptr[bbStart + j]; - uint16_t qVal = (uint16_t)(j >> shifts); - quadrant[a2update] = qVal; - if (a2update < BZ_N_OVERSHOOT) - quadrant[a2update + nblock] = qVal; - } - AssertH(((bbSize-1) >> shifts) <= 65535, 1002); - } - } -#undef runningOrder -#undef copyStart -#undef copyEnd -} - -#undef BIGFREQ -#undef SETMASK -#undef CLEARMASK - - -/*---------------------------------------------*/ -/* Pre: - * nblock > 0 - * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] - * ((uint8_t*)arr2)[0 .. nblock-1] holds block - * arr1 exists for [0 .. nblock-1] - * - * Post: - * ((uint8_t*)arr2) [0 .. nblock-1] holds block - * All other areas of block destroyed - * ftab[0 .. 65536] destroyed - * arr1[0 .. nblock-1] holds sorted order - */ -static NOINLINE -void BZ2_blockSort(EState* s) -{ - /* In original bzip2 1.0.4, it's a parameter, but 30 - * (which was the default) should work ok. */ - enum { wfact = 30 }; - - uint32_t* ptr = s->ptr; - uint8_t* block = s->block; - uint32_t* ftab = s->ftab; - int32_t nblock = s->nblock; - uint16_t* quadrant; - int32_t budget; - int32_t i; - - if (nblock < 10000) { - fallbackSort(s->arr1, s->arr2, ftab, nblock); - } else { - /* Calculate the location for quadrant, remembering to get - * the alignment right. Assumes that &(block[0]) is at least - * 2-byte aligned -- this should be ok since block is really - * the first section of arr2. - */ - i = nblock + BZ_N_OVERSHOOT; - if (i & 1) i++; - quadrant = (uint16_t*)(&(block[i])); - - /* (wfact-1) / 3 puts the default-factor-30 - * transition point at very roughly the same place as - * with v0.1 and v0.9.0. - * Not that it particularly matters any more, since the - * resulting compressed stream is now the same regardless - * of whether or not we use the main sort or fallback sort. - */ - budget = nblock * ((wfact-1) / 3); - - mainSort(s, ptr, block, quadrant, ftab, nblock, &budget); - if (budget < 0) { - fallbackSort(s->arr1, s->arr2, ftab, nblock); - } - } - - s->origPtr = -1; - for (i = 0; i < s->nblock; i++) - if (ptr[i] == 0) { - s->origPtr = i; - break; - }; - - AssertH(s->origPtr != -1, 1003); -} - - -/*-------------------------------------------------------------*/ -/*--- end blocksort.c ---*/ -/*-------------------------------------------------------------*/ diff --git a/archival/bz/bzlib.c b/archival/bz/bzlib.c deleted file mode 100644 index b3beeabed..000000000 --- a/archival/bz/bzlib.c +++ /dev/null @@ -1,431 +0,0 @@ -/* - * bzip2 is written by Julian Seward . - * Adapted for busybox by Denys Vlasenko . - * See README and LICENSE files in this directory for more information. - */ - -/*-------------------------------------------------------------*/ -/*--- Library top-level functions. ---*/ -/*--- bzlib.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ -This file is part of bzip2/libbzip2, a program and library for -lossless, block-sorting data compression. - -bzip2/libbzip2 version 1.0.4 of 20 December 2006 -Copyright (C) 1996-2006 Julian Seward - -Please read the WARNING, DISCLAIMER and PATENTS sections in the -README file. - -This program is released under the terms of the license contained -in the file LICENSE. ------------------------------------------------------------------- */ - -/* CHANGES - * 0.9.0 -- original version. - * 0.9.0a/b -- no changes in this file. - * 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress(). - * fixed bzWrite/bzRead to ignore zero-length requests. - * fixed bzread to correctly handle read requests after EOF. - * wrong parameter order in call to bzDecompressInit in - * bzBuffToBuffDecompress. Fixed. - */ - -/* #include "bzlib_private.h" */ - -/*---------------------------------------------------*/ -/*--- Compression stuff ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -#if BZ_LIGHT_DEBUG -static -void bz_assert_fail(int errcode) -{ - /* if (errcode == 1007) bb_error_msg_and_die("probably bad RAM"); */ - bb_error_msg_and_die("internal error %d", errcode); -} -#endif - -/*---------------------------------------------------*/ -static -void prepare_new_block(EState* s) -{ - int i; - s->nblock = 0; - s->numZ = 0; - s->state_out_pos = 0; - BZ_INITIALISE_CRC(s->blockCRC); - /* inlined memset would be nice to have here */ - for (i = 0; i < 256; i++) - s->inUse[i] = 0; - s->blockNo++; -} - - -/*---------------------------------------------------*/ -static -ALWAYS_INLINE -void init_RL(EState* s) -{ - s->state_in_ch = 256; - s->state_in_len = 0; -} - - -static -int isempty_RL(EState* s) -{ - return (s->state_in_ch >= 256 || s->state_in_len <= 0); -} - - -/*---------------------------------------------------*/ -static -void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k) -{ - int32_t n; - EState* s; - - s = xzalloc(sizeof(EState)); - s->strm = strm; - - n = 100000 * blockSize100k; - s->arr1 = xmalloc(n * sizeof(uint32_t)); - s->mtfv = (uint16_t*)s->arr1; - s->ptr = (uint32_t*)s->arr1; - s->arr2 = xmalloc((n + BZ_N_OVERSHOOT) * sizeof(uint32_t)); - s->block = (uint8_t*)s->arr2; - s->ftab = xmalloc(65537 * sizeof(uint32_t)); - - s->crc32table = crc32_filltable(NULL, 1); - - s->state = BZ_S_INPUT; - s->mode = BZ_M_RUNNING; - s->blockSize100k = blockSize100k; - s->nblockMAX = n - 19; - - strm->state = s; - /*strm->total_in = 0;*/ - strm->total_out = 0; - init_RL(s); - prepare_new_block(s); -} - - -/*---------------------------------------------------*/ -static -void add_pair_to_block(EState* s) -{ - int32_t i; - uint8_t ch = (uint8_t)(s->state_in_ch); - for (i = 0; i < s->state_in_len; i++) { - BZ_UPDATE_CRC(s, s->blockCRC, ch); - } - s->inUse[s->state_in_ch] = 1; - switch (s->state_in_len) { - case 3: - s->block[s->nblock] = (uint8_t)ch; s->nblock++; - /* fall through */ - case 2: - s->block[s->nblock] = (uint8_t)ch; s->nblock++; - /* fall through */ - case 1: - s->block[s->nblock] = (uint8_t)ch; s->nblock++; - break; - default: - s->inUse[s->state_in_len - 4] = 1; - s->block[s->nblock] = (uint8_t)ch; s->nblock++; - s->block[s->nblock] = (uint8_t)ch; s->nblock++; - s->block[s->nblock] = (uint8_t)ch; s->nblock++; - s->block[s->nblock] = (uint8_t)ch; s->nblock++; - s->block[s->nblock] = (uint8_t)(s->state_in_len - 4); - s->nblock++; - break; - } -} - - -/*---------------------------------------------------*/ -static -void flush_RL(EState* s) -{ - if (s->state_in_ch < 256) add_pair_to_block(s); - init_RL(s); -} - - -/*---------------------------------------------------*/ -#define ADD_CHAR_TO_BLOCK(zs, zchh0) \ -{ \ - uint32_t zchh = (uint32_t)(zchh0); \ - /*-- fast track the common case --*/ \ - if (zchh != zs->state_in_ch && zs->state_in_len == 1) { \ - uint8_t ch = (uint8_t)(zs->state_in_ch); \ - BZ_UPDATE_CRC(zs, zs->blockCRC, ch); \ - zs->inUse[zs->state_in_ch] = 1; \ - zs->block[zs->nblock] = (uint8_t)ch; \ - zs->nblock++; \ - zs->state_in_ch = zchh; \ - } \ - else \ - /*-- general, uncommon cases --*/ \ - if (zchh != zs->state_in_ch || zs->state_in_len == 255) { \ - if (zs->state_in_ch < 256) \ - add_pair_to_block(zs); \ - zs->state_in_ch = zchh; \ - zs->state_in_len = 1; \ - } else { \ - zs->state_in_len++; \ - } \ -} - - -/*---------------------------------------------------*/ -static -void /*Bool*/ copy_input_until_stop(EState* s) -{ - /*Bool progress_in = False;*/ - -#ifdef SAME_CODE_AS_BELOW - if (s->mode == BZ_M_RUNNING) { - /*-- fast track the common case --*/ - while (1) { - /*-- no input? --*/ - if (s->strm->avail_in == 0) break; - /*-- block full? --*/ - if (s->nblock >= s->nblockMAX) break; - /*progress_in = True;*/ - ADD_CHAR_TO_BLOCK(s, (uint32_t)(*(uint8_t*)(s->strm->next_in))); - s->strm->next_in++; - s->strm->avail_in--; - /*s->strm->total_in++;*/ - } - } else -#endif - { - /*-- general, uncommon case --*/ - while (1) { - /*-- no input? --*/ - if (s->strm->avail_in == 0) break; - /*-- block full? --*/ - if (s->nblock >= s->nblockMAX) break; - //# /*-- flush/finish end? --*/ - //# if (s->avail_in_expect == 0) break; - /*progress_in = True;*/ - ADD_CHAR_TO_BLOCK(s, *(uint8_t*)(s->strm->next_in)); - s->strm->next_in++; - s->strm->avail_in--; - /*s->strm->total_in++;*/ - //# s->avail_in_expect--; - } - } - /*return progress_in;*/ -} - - -/*---------------------------------------------------*/ -static -void /*Bool*/ copy_output_until_stop(EState* s) -{ - /*Bool progress_out = False;*/ - - while (1) { - /*-- no output space? --*/ - if (s->strm->avail_out == 0) break; - - /*-- block done? --*/ - if (s->state_out_pos >= s->numZ) break; - - /*progress_out = True;*/ - *(s->strm->next_out) = s->zbits[s->state_out_pos]; - s->state_out_pos++; - s->strm->avail_out--; - s->strm->next_out++; - s->strm->total_out++; - } - /*return progress_out;*/ -} - - -/*---------------------------------------------------*/ -static -void /*Bool*/ handle_compress(bz_stream *strm) -{ - /*Bool progress_in = False;*/ - /*Bool progress_out = False;*/ - EState* s = strm->state; - - while (1) { - if (s->state == BZ_S_OUTPUT) { - /*progress_out |=*/ copy_output_until_stop(s); - if (s->state_out_pos < s->numZ) break; - if (s->mode == BZ_M_FINISHING - //# && s->avail_in_expect == 0 - && s->strm->avail_in == 0 - && isempty_RL(s)) - break; - prepare_new_block(s); - s->state = BZ_S_INPUT; -#ifdef FLUSH_IS_UNUSED - if (s->mode == BZ_M_FLUSHING - && s->avail_in_expect == 0 - && isempty_RL(s)) - break; -#endif - } - - if (s->state == BZ_S_INPUT) { - /*progress_in |=*/ copy_input_until_stop(s); - //#if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { - if (s->mode != BZ_M_RUNNING && s->strm->avail_in == 0) { - flush_RL(s); - BZ2_compressBlock(s, (s->mode == BZ_M_FINISHING)); - s->state = BZ_S_OUTPUT; - } else - if (s->nblock >= s->nblockMAX) { - BZ2_compressBlock(s, 0); - s->state = BZ_S_OUTPUT; - } else - if (s->strm->avail_in == 0) { - break; - } - } - } - - /*return progress_in || progress_out;*/ -} - - -/*---------------------------------------------------*/ -static -int BZ2_bzCompress(bz_stream *strm, int action) -{ - /*Bool progress;*/ - EState* s; - - s = strm->state; - - switch (s->mode) { - case BZ_M_RUNNING: - if (action == BZ_RUN) { - /*progress =*/ handle_compress(strm); - /*return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;*/ - return BZ_RUN_OK; - } -#ifdef FLUSH_IS_UNUSED - else - if (action == BZ_FLUSH) { - //#s->avail_in_expect = strm->avail_in; - s->mode = BZ_M_FLUSHING; - goto case_BZ_M_FLUSHING; - } -#endif - else - /*if (action == BZ_FINISH)*/ { - //#s->avail_in_expect = strm->avail_in; - s->mode = BZ_M_FINISHING; - goto case_BZ_M_FINISHING; - } - -#ifdef FLUSH_IS_UNUSED - case_BZ_M_FLUSHING: - case BZ_M_FLUSHING: - /*if (s->avail_in_expect != s->strm->avail_in) - return BZ_SEQUENCE_ERROR;*/ - /*progress =*/ handle_compress(strm); - if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) - return BZ_FLUSH_OK; - s->mode = BZ_M_RUNNING; - return BZ_RUN_OK; -#endif - - case_BZ_M_FINISHING: - /*case BZ_M_FINISHING:*/ - default: - /*if (s->avail_in_expect != s->strm->avail_in) - return BZ_SEQUENCE_ERROR;*/ - /*progress =*/ handle_compress(strm); - /*if (!progress) return BZ_SEQUENCE_ERROR;*/ - //#if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) - //# return BZ_FINISH_OK; - if (s->strm->avail_in > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) - return BZ_FINISH_OK; - /*s->mode = BZ_M_IDLE;*/ - return BZ_STREAM_END; - } - /* return BZ_OK; --not reached--*/ -} - - -/*---------------------------------------------------*/ -#if ENABLE_FEATURE_CLEAN_UP -static -void BZ2_bzCompressEnd(bz_stream *strm) -{ - EState* s; - - s = strm->state; - free(s->arr1); - free(s->arr2); - free(s->ftab); - free(s->crc32table); - free(strm->state); -} -#endif - - -/*---------------------------------------------------*/ -/*--- Misc convenience stuff ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -#ifdef EXAMPLE_CODE_FOR_MEM_TO_MEM_COMPRESSION -static -int BZ2_bzBuffToBuffCompress(char* dest, - unsigned int* destLen, - char* source, - unsigned int sourceLen, - int blockSize100k) -{ - bz_stream strm; - int ret; - - if (dest == NULL || destLen == NULL - || source == NULL - || blockSize100k < 1 || blockSize100k > 9 - ) { - return BZ_PARAM_ERROR; - } - - BZ2_bzCompressInit(&strm, blockSize100k); - - strm.next_in = source; - strm.next_out = dest; - strm.avail_in = sourceLen; - strm.avail_out = *destLen; - - ret = BZ2_bzCompress(&strm, BZ_FINISH); - if (ret == BZ_FINISH_OK) goto output_overflow; - if (ret != BZ_STREAM_END) goto errhandler; - - /* normal termination */ - *destLen -= strm.avail_out; - BZ2_bzCompressEnd(&strm); - return BZ_OK; - - output_overflow: - BZ2_bzCompressEnd(&strm); - return BZ_OUTBUFF_FULL; - - errhandler: - BZ2_bzCompressEnd(&strm); - return ret; -} -#endif - -/*-------------------------------------------------------------*/ -/*--- end bzlib.c ---*/ -/*-------------------------------------------------------------*/ diff --git a/archival/bz/bzlib.h b/archival/bz/bzlib.h deleted file mode 100644 index 1bb811c4a..000000000 --- a/archival/bz/bzlib.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * bzip2 is written by Julian Seward . - * Adapted for busybox by Denys Vlasenko . - * See README and LICENSE files in this directory for more information. - */ - -/*-------------------------------------------------------------*/ -/*--- Public header file for the library. ---*/ -/*--- bzlib.h ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ -This file is part of bzip2/libbzip2, a program and library for -lossless, block-sorting data compression. - -bzip2/libbzip2 version 1.0.4 of 20 December 2006 -Copyright (C) 1996-2006 Julian Seward - -Please read the WARNING, DISCLAIMER and PATENTS sections in the -README file. - -This program is released under the terms of the license contained -in the file LICENSE. ------------------------------------------------------------------- */ - -#define BZ_RUN 0 -#define BZ_FLUSH 1 -#define BZ_FINISH 2 - -#define BZ_OK 0 -#define BZ_RUN_OK 1 -#define BZ_FLUSH_OK 2 -#define BZ_FINISH_OK 3 -#define BZ_STREAM_END 4 -#define BZ_SEQUENCE_ERROR (-1) -#define BZ_PARAM_ERROR (-2) -#define BZ_MEM_ERROR (-3) -#define BZ_DATA_ERROR (-4) -#define BZ_DATA_ERROR_MAGIC (-5) -#define BZ_IO_ERROR (-6) -#define BZ_UNEXPECTED_EOF (-7) -#define BZ_OUTBUFF_FULL (-8) -#define BZ_CONFIG_ERROR (-9) - -typedef struct bz_stream { - void *state; - char *next_in; - char *next_out; - unsigned avail_in; - unsigned avail_out; - /*unsigned long long total_in;*/ - unsigned long long total_out; -} bz_stream; - -/*-- Core (low-level) library functions --*/ - -static void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k); -static int BZ2_bzCompress(bz_stream *strm, int action); -#if ENABLE_FEATURE_CLEAN_UP -static void BZ2_bzCompressEnd(bz_stream *strm); -#endif - -/*-------------------------------------------------------------*/ -/*--- end bzlib.h ---*/ -/*-------------------------------------------------------------*/ diff --git a/archival/bz/bzlib_private.h b/archival/bz/bzlib_private.h deleted file mode 100644 index 6430ce407..000000000 --- a/archival/bz/bzlib_private.h +++ /dev/null @@ -1,219 +0,0 @@ -/* - * bzip2 is written by Julian Seward . - * Adapted for busybox by Denys Vlasenko . - * See README and LICENSE files in this directory for more information. - */ - -/*-------------------------------------------------------------*/ -/*--- Private header file for the library. ---*/ -/*--- bzlib_private.h ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ -This file is part of bzip2/libbzip2, a program and library for -lossless, block-sorting data compression. - -bzip2/libbzip2 version 1.0.4 of 20 December 2006 -Copyright (C) 1996-2006 Julian Seward - -Please read the WARNING, DISCLAIMER and PATENTS sections in the -README file. - -This program is released under the terms of the license contained -in the file LICENSE. ------------------------------------------------------------------- */ - -/* #include "bzlib.h" */ - -/*-- General stuff. --*/ - -typedef unsigned char Bool; - -#define True ((Bool)1) -#define False ((Bool)0) - -#if BZ_LIGHT_DEBUG -static void bz_assert_fail(int errcode) NORETURN; -#define AssertH(cond, errcode) \ -do { \ - if (!(cond)) \ - bz_assert_fail(errcode); \ -} while (0) -#else -#define AssertH(cond, msg) do { } while (0) -#endif - -#if BZ_DEBUG -#define AssertD(cond, msg) \ -do { \ - if (!(cond)) \ - bb_error_msg_and_die("(debug build): internal error %s", msg); \ -} while (0) -#else -#define AssertD(cond, msg) do { } while (0) -#endif - - -/*-- Header bytes. --*/ - -#define BZ_HDR_B 0x42 /* 'B' */ -#define BZ_HDR_Z 0x5a /* 'Z' */ -#define BZ_HDR_h 0x68 /* 'h' */ -#define BZ_HDR_0 0x30 /* '0' */ - -#define BZ_HDR_BZh0 0x425a6830 - -/*-- Constants for the back end. --*/ - -#define BZ_MAX_ALPHA_SIZE 258 -#define BZ_MAX_CODE_LEN 23 - -#define BZ_RUNA 0 -#define BZ_RUNB 1 - -#define BZ_N_GROUPS 6 -#define BZ_G_SIZE 50 -#define BZ_N_ITERS 4 - -#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) - - -/*-- Stuff for doing CRCs. --*/ - -#define BZ_INITIALISE_CRC(crcVar) \ -{ \ - crcVar = 0xffffffffL; \ -} - -#define BZ_FINALISE_CRC(crcVar) \ -{ \ - crcVar = ~(crcVar); \ -} - -#define BZ_UPDATE_CRC(s, crcVar, cha) \ -{ \ - crcVar = (crcVar << 8) ^ s->crc32table[(crcVar >> 24) ^ ((uint8_t)cha)]; \ -} - - -/*-- States and modes for compression. --*/ - -#define BZ_M_IDLE 1 -#define BZ_M_RUNNING 2 -#define BZ_M_FLUSHING 3 -#define BZ_M_FINISHING 4 - -#define BZ_S_OUTPUT 1 -#define BZ_S_INPUT 2 - -#define BZ_N_RADIX 2 -#define BZ_N_QSORT 12 -#define BZ_N_SHELL 18 -#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) - - -/*-- Structure holding all the compression-side stuff. --*/ - -typedef struct EState { - /* pointer back to the struct bz_stream */ - bz_stream *strm; - - /* mode this stream is in, and whether inputting */ - /* or outputting data */ - int32_t mode; - int32_t state; - - /* remembers avail_in when flush/finish requested */ -/* bbox: not needed, strm->avail_in always has the same value */ -/* commented out with '//#' throughout the code */ - /* uint32_t avail_in_expect; */ - - /* for doing the block sorting */ - int32_t origPtr; - uint32_t *arr1; - uint32_t *arr2; - uint32_t *ftab; - - /* aliases for arr1 and arr2 */ - uint32_t *ptr; - uint8_t *block; - uint16_t *mtfv; - uint8_t *zbits; - - /* guess what */ - uint32_t *crc32table; - - /* run-length-encoding of the input */ - uint32_t state_in_ch; - int32_t state_in_len; - - /* input and output limits and current posns */ - int32_t nblock; - int32_t nblockMAX; - int32_t numZ; - int32_t state_out_pos; - - /* the buffer for bit stream creation */ - uint32_t bsBuff; - int32_t bsLive; - - /* block and combined CRCs */ - uint32_t blockCRC; - uint32_t combinedCRC; - - /* misc administratium */ - int32_t blockNo; - int32_t blockSize100k; - - /* stuff for coding the MTF values */ - int32_t nMTF; - - /* map of bytes used in block */ - int32_t nInUse; - Bool inUse[256] ALIGNED(sizeof(long)); - uint8_t unseqToSeq[256]; - - /* stuff for coding the MTF values */ - int32_t mtfFreq [BZ_MAX_ALPHA_SIZE]; - uint8_t selector [BZ_MAX_SELECTORS]; - uint8_t selectorMtf[BZ_MAX_SELECTORS]; - - uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - - /* stack-saving measures: these can be local, but they are too big */ - int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; -#if CONFIG_BZIP2_FEATURE_SPEED >= 5 - /* second dimension: only 3 needed; 4 makes index calculations faster */ - uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4]; -#endif - int32_t BZ2_hbMakeCodeLengths__heap [BZ_MAX_ALPHA_SIZE + 2]; - int32_t BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2]; - int32_t BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2]; - - int32_t mainSort__runningOrder[256]; - int32_t mainSort__copyStart[256]; - int32_t mainSort__copyEnd[256]; -} EState; - - -/*-- compression. --*/ - -static void -BZ2_blockSort(EState*); - -static void -BZ2_compressBlock(EState*, int); - -static void -BZ2_bsInitWrite(EState*); - -static void -BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t); - -static void -BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t); - -/*-------------------------------------------------------------*/ -/*--- end bzlib_private.h ---*/ -/*-------------------------------------------------------------*/ diff --git a/archival/bz/compress.c b/archival/bz/compress.c deleted file mode 100644 index 6f1c70a08..000000000 --- a/archival/bz/compress.c +++ /dev/null @@ -1,685 +0,0 @@ -/* - * bzip2 is written by Julian Seward . - * Adapted for busybox by Denys Vlasenko . - * See README and LICENSE files in this directory for more information. - */ - -/*-------------------------------------------------------------*/ -/*--- Compression machinery (not incl block sorting) ---*/ -/*--- compress.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ -This file is part of bzip2/libbzip2, a program and library for -lossless, block-sorting data compression. - -bzip2/libbzip2 version 1.0.4 of 20 December 2006 -Copyright (C) 1996-2006 Julian Seward - -Please read the WARNING, DISCLAIMER and PATENTS sections in the -README file. - -This program is released under the terms of the license contained -in the file LICENSE. ------------------------------------------------------------------- */ - -/* CHANGES - * 0.9.0 -- original version. - * 0.9.0a/b -- no changes in this file. - * 0.9.0c -- changed setting of nGroups in sendMTFValues() - * so as to do a bit better on small files -*/ - -/* #include "bzlib_private.h" */ - -/*---------------------------------------------------*/ -/*--- Bit stream I/O ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -static -void BZ2_bsInitWrite(EState* s) -{ - s->bsLive = 0; - s->bsBuff = 0; -} - - -/*---------------------------------------------------*/ -static NOINLINE -void bsFinishWrite(EState* s) -{ - while (s->bsLive > 0) { - s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24); - s->numZ++; - s->bsBuff <<= 8; - s->bsLive -= 8; - } -} - - -/*---------------------------------------------------*/ -static -/* Helps only on level 5, on other levels hurts. ? */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 5 -ALWAYS_INLINE -#endif -void bsW(EState* s, int32_t n, uint32_t v) -{ - while (s->bsLive >= 8) { - s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24); - s->numZ++; - s->bsBuff <<= 8; - s->bsLive -= 8; - } - s->bsBuff |= (v << (32 - s->bsLive - n)); - s->bsLive += n; -} - - -/*---------------------------------------------------*/ -static -void bsPutU32(EState* s, unsigned u) -{ - bsW(s, 8, (u >> 24) & 0xff); - bsW(s, 8, (u >> 16) & 0xff); - bsW(s, 8, (u >> 8) & 0xff); - bsW(s, 8, u & 0xff); -} - - -/*---------------------------------------------------*/ -static -void bsPutU16(EState* s, unsigned u) -{ - bsW(s, 8, (u >> 8) & 0xff); - bsW(s, 8, u & 0xff); -} - - -/*---------------------------------------------------*/ -/*--- The back end proper ---*/ -/*---------------------------------------------------*/ - -/*---------------------------------------------------*/ -static -void makeMaps_e(EState* s) -{ - int i; - s->nInUse = 0; - for (i = 0; i < 256; i++) { - if (s->inUse[i]) { - s->unseqToSeq[i] = s->nInUse; - s->nInUse++; - } - } -} - - -/*---------------------------------------------------*/ -static NOINLINE -void generateMTFValues(EState* s) -{ - uint8_t yy[256]; - int32_t i, j; - int32_t zPend; - int32_t wr; - int32_t EOB; - - /* - * After sorting (eg, here), - * s->arr1[0 .. s->nblock-1] holds sorted order, - * and - * ((uint8_t*)s->arr2)[0 .. s->nblock-1] - * holds the original block data. - * - * The first thing to do is generate the MTF values, - * and put them in ((uint16_t*)s->arr1)[0 .. s->nblock-1]. - * - * Because there are strictly fewer or equal MTF values - * than block values, ptr values in this area are overwritten - * with MTF values only when they are no longer needed. - * - * The final compressed bitstream is generated into the - * area starting at &((uint8_t*)s->arr2)[s->nblock] - * - * These storage aliases are set up in bzCompressInit(), - * except for the last one, which is arranged in - * compressBlock(). - */ - uint32_t* ptr = s->ptr; - uint8_t* block = s->block; - uint16_t* mtfv = s->mtfv; - - makeMaps_e(s); - EOB = s->nInUse+1; - - for (i = 0; i <= EOB; i++) - s->mtfFreq[i] = 0; - - wr = 0; - zPend = 0; - for (i = 0; i < s->nInUse; i++) - yy[i] = (uint8_t) i; - - for (i = 0; i < s->nblock; i++) { - uint8_t ll_i; - AssertD(wr <= i, "generateMTFValues(1)"); - j = ptr[i] - 1; - if (j < 0) - j += s->nblock; - ll_i = s->unseqToSeq[block[j]]; - AssertD(ll_i < s->nInUse, "generateMTFValues(2a)"); - - if (yy[0] == ll_i) { - zPend++; - } else { - if (zPend > 0) { - zPend--; - while (1) { - if (zPend & 1) { - mtfv[wr] = BZ_RUNB; wr++; - s->mtfFreq[BZ_RUNB]++; - } else { - mtfv[wr] = BZ_RUNA; wr++; - s->mtfFreq[BZ_RUNA]++; - } - if (zPend < 2) break; - zPend = (uint32_t)(zPend - 2) / 2; - /* bbox: unsigned div is easier */ - }; - zPend = 0; - } - { - register uint8_t rtmp; - register uint8_t* ryy_j; - register uint8_t rll_i; - rtmp = yy[1]; - yy[1] = yy[0]; - ryy_j = &(yy[1]); - rll_i = ll_i; - while (rll_i != rtmp) { - register uint8_t rtmp2; - ryy_j++; - rtmp2 = rtmp; - rtmp = *ryy_j; - *ryy_j = rtmp2; - }; - yy[0] = rtmp; - j = ryy_j - &(yy[0]); - mtfv[wr] = j+1; - wr++; - s->mtfFreq[j+1]++; - } - } - } - - if (zPend > 0) { - zPend--; - while (1) { - if (zPend & 1) { - mtfv[wr] = BZ_RUNB; - wr++; - s->mtfFreq[BZ_RUNB]++; - } else { - mtfv[wr] = BZ_RUNA; - wr++; - s->mtfFreq[BZ_RUNA]++; - } - if (zPend < 2) - break; - zPend = (uint32_t)(zPend - 2) / 2; - /* bbox: unsigned div is easier */ - }; - zPend = 0; - } - - mtfv[wr] = EOB; - wr++; - s->mtfFreq[EOB]++; - - s->nMTF = wr; -} - - -/*---------------------------------------------------*/ -#define BZ_LESSER_ICOST 0 -#define BZ_GREATER_ICOST 15 - -static NOINLINE -void sendMTFValues(EState* s) -{ - int32_t v, t, i, j, gs, ge, totc, bt, bc, iter; - int32_t nSelectors, alphaSize, minLen, maxLen, selCtr; - int32_t nGroups, nBytes; - - /* - * uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - * is a global since the decoder also needs it. - * - * int32_t code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - * int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; - * are also globals only used in this proc. - * Made global to keep stack frame size small. - */ -#define code sendMTFValues__code -#define rfreq sendMTFValues__rfreq -#define len_pack sendMTFValues__len_pack - - uint16_t cost[BZ_N_GROUPS]; - int32_t fave[BZ_N_GROUPS]; - - uint16_t* mtfv = s->mtfv; - - alphaSize = s->nInUse + 2; - for (t = 0; t < BZ_N_GROUPS; t++) - for (v = 0; v < alphaSize; v++) - s->len[t][v] = BZ_GREATER_ICOST; - - /*--- Decide how many coding tables to use ---*/ - AssertH(s->nMTF > 0, 3001); - if (s->nMTF < 200) nGroups = 2; else - if (s->nMTF < 600) nGroups = 3; else - if (s->nMTF < 1200) nGroups = 4; else - if (s->nMTF < 2400) nGroups = 5; else - nGroups = 6; - - /*--- Generate an initial set of coding tables ---*/ - { - int32_t nPart, remF, tFreq, aFreq; - - nPart = nGroups; - remF = s->nMTF; - gs = 0; - while (nPart > 0) { - tFreq = remF / nPart; - ge = gs - 1; - aFreq = 0; - while (aFreq < tFreq && ge < alphaSize-1) { - ge++; - aFreq += s->mtfFreq[ge]; - } - - if (ge > gs - && nPart != nGroups && nPart != 1 - && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */ - ) { - aFreq -= s->mtfFreq[ge]; - ge--; - } - - for (v = 0; v < alphaSize; v++) - if (v >= gs && v <= ge) - s->len[nPart-1][v] = BZ_LESSER_ICOST; - else - s->len[nPart-1][v] = BZ_GREATER_ICOST; - - nPart--; - gs = ge + 1; - remF -= aFreq; - } - } - - /* - * Iterate up to BZ_N_ITERS times to improve the tables. - */ - for (iter = 0; iter < BZ_N_ITERS; iter++) { - for (t = 0; t < nGroups; t++) - fave[t] = 0; - - for (t = 0; t < nGroups; t++) - for (v = 0; v < alphaSize; v++) - s->rfreq[t][v] = 0; - -#if CONFIG_BZIP2_FEATURE_SPEED >= 5 - /* - * Set up an auxiliary length table which is used to fast-track - * the common case (nGroups == 6). - */ - if (nGroups == 6) { - for (v = 0; v < alphaSize; v++) { - s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; - s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; - s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; - } - } -#endif - nSelectors = 0; - totc = 0; - gs = 0; - while (1) { - /*--- Set group start & end marks. --*/ - if (gs >= s->nMTF) - break; - ge = gs + BZ_G_SIZE - 1; - if (ge >= s->nMTF) - ge = s->nMTF-1; - - /* - * Calculate the cost of this group as coded - * by each of the coding tables. - */ - for (t = 0; t < nGroups; t++) - cost[t] = 0; -#if CONFIG_BZIP2_FEATURE_SPEED >= 5 - if (nGroups == 6 && 50 == ge-gs+1) { - /*--- fast track the common case ---*/ - register uint32_t cost01, cost23, cost45; - register uint16_t icv; - cost01 = cost23 = cost45 = 0; -#define BZ_ITER(nn) \ - icv = mtfv[gs+(nn)]; \ - cost01 += s->len_pack[icv][0]; \ - cost23 += s->len_pack[icv][1]; \ - cost45 += s->len_pack[icv][2]; - BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); - BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); - BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); - BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); - BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); - BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); - BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); - BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); - BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); - BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); -#undef BZ_ITER - cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; - cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; - cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; - - } else -#endif - { - /*--- slow version which correctly handles all situations ---*/ - for (i = gs; i <= ge; i++) { - uint16_t icv = mtfv[i]; - for (t = 0; t < nGroups; t++) - cost[t] += s->len[t][icv]; - } - } - /* - * Find the coding table which is best for this group, - * and record its identity in the selector table. - */ - /*bc = 999999999;*/ - /*bt = -1;*/ - bc = cost[0]; - bt = 0; - for (t = 1 /*0*/; t < nGroups; t++) { - if (cost[t] < bc) { - bc = cost[t]; - bt = t; - } - } - totc += bc; - fave[bt]++; - s->selector[nSelectors] = bt; - nSelectors++; - - /* - * Increment the symbol frequencies for the selected table. - */ -/* 1% faster compress. +800 bytes */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 4 - if (nGroups == 6 && 50 == ge-gs+1) { - /*--- fast track the common case ---*/ -#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++ - BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); - BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); - BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); - BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); - BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); - BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); - BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); - BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); - BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); - BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); -#undef BZ_ITUR - gs = ge + 1; - } else -#endif - { - /*--- slow version which correctly handles all situations ---*/ - while (gs <= ge) { - s->rfreq[bt][mtfv[gs]]++; - gs++; - } - /* already is: gs = ge + 1; */ - } - } - - /* - * Recompute the tables based on the accumulated frequencies. - */ - /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See - * comment in huffman.c for details. */ - for (t = 0; t < nGroups; t++) - BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/); - } - - AssertH(nGroups < 8, 3002); - AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003); - - /*--- Compute MTF values for the selectors. ---*/ - { - uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp; - - for (i = 0; i < nGroups; i++) - pos[i] = i; - for (i = 0; i < nSelectors; i++) { - ll_i = s->selector[i]; - j = 0; - tmp = pos[j]; - while (ll_i != tmp) { - j++; - tmp2 = tmp; - tmp = pos[j]; - pos[j] = tmp2; - }; - pos[0] = tmp; - s->selectorMtf[i] = j; - } - }; - - /*--- Assign actual codes for the tables. --*/ - for (t = 0; t < nGroups; t++) { - minLen = 32; - maxLen = 0; - for (i = 0; i < alphaSize; i++) { - if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; - if (s->len[t][i] < minLen) minLen = s->len[t][i]; - } - AssertH(!(maxLen > 17 /*20*/), 3004); - AssertH(!(minLen < 1), 3005); - BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize); - } - - /*--- Transmit the mapping table. ---*/ - { - /* bbox: optimized a bit more than in bzip2 */ - int inUse16 = 0; - for (i = 0; i < 16; i++) { - if (sizeof(long) <= 4) { - inUse16 = inUse16*2 + - ((*(uint32_t*)&(s->inUse[i * 16 + 0]) - | *(uint32_t*)&(s->inUse[i * 16 + 4]) - | *(uint32_t*)&(s->inUse[i * 16 + 8]) - | *(uint32_t*)&(s->inUse[i * 16 + 12])) != 0); - } else { /* Our CPU can do better */ - inUse16 = inUse16*2 + - ((*(uint64_t*)&(s->inUse[i * 16 + 0]) - | *(uint64_t*)&(s->inUse[i * 16 + 8])) != 0); - } - } - - nBytes = s->numZ; - bsW(s, 16, inUse16); - - inUse16 <<= (sizeof(int)*8 - 16); /* move 15th bit into sign bit */ - for (i = 0; i < 16; i++) { - if (inUse16 < 0) { - unsigned v16 = 0; - for (j = 0; j < 16; j++) - v16 = v16*2 + s->inUse[i * 16 + j]; - bsW(s, 16, v16); - } - inUse16 <<= 1; - } - } - - /*--- Now the selectors. ---*/ - nBytes = s->numZ; - bsW(s, 3, nGroups); - bsW(s, 15, nSelectors); - for (i = 0; i < nSelectors; i++) { - for (j = 0; j < s->selectorMtf[i]; j++) - bsW(s, 1, 1); - bsW(s, 1, 0); - } - - /*--- Now the coding tables. ---*/ - nBytes = s->numZ; - - for (t = 0; t < nGroups; t++) { - int32_t curr = s->len[t][0]; - bsW(s, 5, curr); - for (i = 0; i < alphaSize; i++) { - while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ }; - while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ }; - bsW(s, 1, 0); - } - } - - /*--- And finally, the block data proper ---*/ - nBytes = s->numZ; - selCtr = 0; - gs = 0; - while (1) { - if (gs >= s->nMTF) - break; - ge = gs + BZ_G_SIZE - 1; - if (ge >= s->nMTF) - ge = s->nMTF-1; - AssertH(s->selector[selCtr] < nGroups, 3006); - -/* Costs 1300 bytes and is _slower_ (on Intel Core 2) */ -#if 0 - if (nGroups == 6 && 50 == ge-gs+1) { - /*--- fast track the common case ---*/ - uint16_t mtfv_i; - uint8_t* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]); - int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); -#define BZ_ITAH(nn) \ - mtfv_i = mtfv[gs+(nn)]; \ - bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i]) - BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); - BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); - BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); - BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); - BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); - BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); - BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); - BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); - BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); - BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); -#undef BZ_ITAH - gs = ge+1; - } else -#endif - { - /*--- slow version which correctly handles all situations ---*/ - /* code is bit bigger, but moves multiply out of the loop */ - uint8_t* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]); - int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); - while (gs <= ge) { - bsW(s, - s_len_sel_selCtr[mtfv[gs]], - s_code_sel_selCtr[mtfv[gs]] - ); - gs++; - } - /* already is: gs = ge+1; */ - } - selCtr++; - } - AssertH(selCtr == nSelectors, 3007); -#undef code -#undef rfreq -#undef len_pack -} - - -/*---------------------------------------------------*/ -static -void BZ2_compressBlock(EState* s, int is_last_block) -{ - if (s->nblock > 0) { - BZ_FINALISE_CRC(s->blockCRC); - s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); - s->combinedCRC ^= s->blockCRC; - if (s->blockNo > 1) - s->numZ = 0; - - BZ2_blockSort(s); - } - - s->zbits = &((uint8_t*)s->arr2)[s->nblock]; - - /*-- If this is the first block, create the stream header. --*/ - if (s->blockNo == 1) { - BZ2_bsInitWrite(s); - /*bsPutU8(s, BZ_HDR_B);*/ - /*bsPutU8(s, BZ_HDR_Z);*/ - /*bsPutU8(s, BZ_HDR_h);*/ - /*bsPutU8(s, BZ_HDR_0 + s->blockSize100k);*/ - bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k); - } - - if (s->nblock > 0) { - /*bsPutU8(s, 0x31);*/ - /*bsPutU8(s, 0x41);*/ - /*bsPutU8(s, 0x59);*/ - /*bsPutU8(s, 0x26);*/ - bsPutU32(s, 0x31415926); - /*bsPutU8(s, 0x53);*/ - /*bsPutU8(s, 0x59);*/ - bsPutU16(s, 0x5359); - - /*-- Now the block's CRC, so it is in a known place. --*/ - bsPutU32(s, s->blockCRC); - - /* - * Now a single bit indicating (non-)randomisation. - * As of version 0.9.5, we use a better sorting algorithm - * which makes randomisation unnecessary. So always set - * the randomised bit to 'no'. Of course, the decoder - * still needs to be able to handle randomised blocks - * so as to maintain backwards compatibility with - * older versions of bzip2. - */ - bsW(s, 1, 0); - - bsW(s, 24, s->origPtr); - generateMTFValues(s); - sendMTFValues(s); - } - - /*-- If this is the last block, add the stream trailer. --*/ - if (is_last_block) { - /*bsPutU8(s, 0x17);*/ - /*bsPutU8(s, 0x72);*/ - /*bsPutU8(s, 0x45);*/ - /*bsPutU8(s, 0x38);*/ - bsPutU32(s, 0x17724538); - /*bsPutU8(s, 0x50);*/ - /*bsPutU8(s, 0x90);*/ - bsPutU16(s, 0x5090); - bsPutU32(s, s->combinedCRC); - bsFinishWrite(s); - } -} - - -/*-------------------------------------------------------------*/ -/*--- end compress.c ---*/ -/*-------------------------------------------------------------*/ diff --git a/archival/bz/huffman.c b/archival/bz/huffman.c deleted file mode 100644 index 676b1af66..000000000 --- a/archival/bz/huffman.c +++ /dev/null @@ -1,229 +0,0 @@ -/* - * bzip2 is written by Julian Seward . - * Adapted for busybox by Denys Vlasenko . - * See README and LICENSE files in this directory for more information. - */ - -/*-------------------------------------------------------------*/ -/*--- Huffman coding low-level stuff ---*/ -/*--- huffman.c ---*/ -/*-------------------------------------------------------------*/ - -/* ------------------------------------------------------------------ -This file is part of bzip2/libbzip2, a program and library for -lossless, block-sorting data compression. - -bzip2/libbzip2 version 1.0.4 of 20 December 2006 -Copyright (C) 1996-2006 Julian Seward - -Please read the WARNING, DISCLAIMER and PATENTS sections in the -README file. - -This program is released under the terms of the license contained -in the file LICENSE. ------------------------------------------------------------------- */ - -/* #include "bzlib_private.h" */ - -/*---------------------------------------------------*/ -#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) -#define DEPTHOF(zz1) ((zz1) & 0x000000ff) -#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) - -#define ADDWEIGHTS(zw1,zw2) \ - (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ - (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) - -#define UPHEAP(z) \ -{ \ - int32_t zz, tmp; \ - zz = z; \ - tmp = heap[zz]; \ - while (weight[tmp] < weight[heap[zz >> 1]]) { \ - heap[zz] = heap[zz >> 1]; \ - zz >>= 1; \ - } \ - heap[zz] = tmp; \ -} - - -/* 90 bytes, 0.3% of overall compress speed */ -#if CONFIG_BZIP2_FEATURE_SPEED >= 1 - -/* macro works better than inline (gcc 4.2.1) */ -#define DOWNHEAP1(heap, weight, Heap) \ -{ \ - int32_t zz, yy, tmp; \ - zz = 1; \ - tmp = heap[zz]; \ - while (1) { \ - yy = zz << 1; \ - if (yy > nHeap) \ - break; \ - if (yy < nHeap \ - && weight[heap[yy+1]] < weight[heap[yy]]) \ - yy++; \ - if (weight[tmp] < weight[heap[yy]]) \ - break; \ - heap[zz] = heap[yy]; \ - zz = yy; \ - } \ - heap[zz] = tmp; \ -} - -#else - -static -void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap) -{ - int32_t zz, yy, tmp; - zz = 1; - tmp = heap[zz]; - while (1) { - yy = zz << 1; - if (yy > nHeap) - break; - if (yy < nHeap - && weight[heap[yy + 1]] < weight[heap[yy]]) - yy++; - if (weight[tmp] < weight[heap[yy]]) - break; - heap[zz] = heap[yy]; - zz = yy; - } - heap[zz] = tmp; -} - -#endif - -/*---------------------------------------------------*/ -static -void BZ2_hbMakeCodeLengths(EState *s, - uint8_t *len, - int32_t *freq, - int32_t alphaSize, - int32_t maxLen) -{ - /* - * Nodes and heap entries run from 1. Entry 0 - * for both the heap and nodes is a sentinel. - */ - int32_t nNodes, nHeap, n1, n2, i, j, k; - Bool tooLong; - - /* bbox: moved to EState to save stack - int32_t heap [BZ_MAX_ALPHA_SIZE + 2]; - int32_t weight[BZ_MAX_ALPHA_SIZE * 2]; - int32_t parent[BZ_MAX_ALPHA_SIZE * 2]; - */ -#define heap (s->BZ2_hbMakeCodeLengths__heap) -#define weight (s->BZ2_hbMakeCodeLengths__weight) -#define parent (s->BZ2_hbMakeCodeLengths__parent) - - for (i = 0; i < alphaSize; i++) - weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; - - while (1) { - nNodes = alphaSize; - nHeap = 0; - - heap[0] = 0; - weight[0] = 0; - parent[0] = -2; - - for (i = 1; i <= alphaSize; i++) { - parent[i] = -1; - nHeap++; - heap[nHeap] = i; - UPHEAP(nHeap); - } - - AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001); - - while (nHeap > 1) { - n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap); - n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap); - nNodes++; - parent[n1] = parent[n2] = nNodes; - weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); - parent[nNodes] = -1; - nHeap++; - heap[nHeap] = nNodes; - UPHEAP(nHeap); - } - - AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002); - - tooLong = False; - for (i = 1; i <= alphaSize; i++) { - j = 0; - k = i; - while (parent[k] >= 0) { - k = parent[k]; - j++; - } - len[i-1] = j; - if (j > maxLen) - tooLong = True; - } - - if (!tooLong) - break; - - /* 17 Oct 04: keep-going condition for the following loop used - to be 'i < alphaSize', which missed the last element, - theoretically leading to the possibility of the compressor - looping. However, this count-scaling step is only needed if - one of the generated Huffman code words is longer than - maxLen, which up to and including version 1.0.2 was 20 bits, - which is extremely unlikely. In version 1.0.3 maxLen was - changed to 17 bits, which has minimal effect on compression - ratio, but does mean this scaling step is used from time to - time, enough to verify that it works. - - This means that bzip2-1.0.3 and later will only produce - Huffman codes with a maximum length of 17 bits. However, in - order to preserve backwards compatibility with bitstreams - produced by versions pre-1.0.3, the decompressor must still - handle lengths of up to 20. */ - - for (i = 1; i <= alphaSize; i++) { - j = weight[i] >> 8; - /* bbox: yes, it is a signed division. - * don't replace with shift! */ - j = 1 + (j / 2); - weight[i] = j << 8; - } - } -#undef heap -#undef weight -#undef parent -} - - -/*---------------------------------------------------*/ -static -void BZ2_hbAssignCodes(int32_t *code, - uint8_t *length, - int32_t minLen, - int32_t maxLen, - int32_t alphaSize) -{ - int32_t n, vec, i; - - vec = 0; - for (n = minLen; n <= maxLen; n++) { - for (i = 0; i < alphaSize; i++) { - if (length[i] == n) { - code[i] = vec; - vec++; - }; - } - vec <<= 1; - } -} - - -/*-------------------------------------------------------------*/ -/*--- end huffman.c ---*/ -/*-------------------------------------------------------------*/ diff --git a/archival/bzip2.c b/archival/bzip2.c index fdb8b9306..a6abc931c 100644 --- a/archival/bzip2.c +++ b/archival/bzip2.c @@ -8,7 +8,7 @@ */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" #define CONFIG_BZIP2_FEATURE_SPEED 1 @@ -33,14 +33,14 @@ /* Takes ~300 bytes, detects corruption caused by bad RAM etc */ #define BZ_LIGHT_DEBUG 0 -#include "bz/bzlib.h" +#include "libarchive/bz/bzlib.h" -#include "bz/bzlib_private.h" +#include "libarchive/bz/bzlib_private.h" -#include "bz/blocksort.c" -#include "bz/bzlib.c" -#include "bz/compress.c" -#include "bz/huffman.c" +#include "libarchive/bz/blocksort.c" +#include "libarchive/bz/bzlib.c" +#include "libarchive/bz/compress.c" +#include "libarchive/bz/huffman.c" /* No point in being shy and having very small buffer here. * bzip2 internal buffers are much bigger anyway, hundreds of kbytes. diff --git a/archival/cpio.c b/archival/cpio.c index a2d74dc79..6ab268821 100644 --- a/archival/cpio.c +++ b/archival/cpio.c @@ -12,7 +12,7 @@ * */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" /* GNU cpio 2.9 --help (abridged): diff --git a/archival/dpkg.c b/archival/dpkg.c index 53e30d541..c37ae3349 100644 --- a/archival/dpkg.c +++ b/archival/dpkg.c @@ -30,7 +30,7 @@ #include "libbb.h" #include -#include "unarchive.h" +#include "archive.h" /* note: if you vary hash_prime sizes be aware, * 1) tweaking these will have a big effect on how much memory this program uses. diff --git a/archival/dpkg_deb.c b/archival/dpkg_deb.c index 0ce7c02d8..aee7b4cf5 100644 --- a/archival/dpkg_deb.c +++ b/archival/dpkg_deb.c @@ -5,7 +5,7 @@ * Licensed under GPLv2 or later, see file LICENSE in this source tree. */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" #define DPKG_DEB_OPT_CONTENTS 1 #define DPKG_DEB_OPT_CONTROL 2 diff --git a/archival/gzip.c b/archival/gzip.c index 80141f380..38c5ae7fd 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -40,7 +40,7 @@ aa: 85.1% -- replaced with aa.gz */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" /* =========================================================================== diff --git a/archival/libarchive/Kbuild.src b/archival/libarchive/Kbuild.src new file mode 100644 index 000000000..b0bc4e5aa --- /dev/null +++ b/archival/libarchive/Kbuild.src @@ -0,0 +1,64 @@ +# Makefile for busybox +# +# Copyright (C) 1999-2004 by Erik Andersen +# +# Licensed under GPLv2 or later, see file LICENSE in this source tree. + +lib-y:= + +COMMON_FILES:= \ +\ + data_skip.o \ + data_extract_all.o \ + data_extract_to_stdout.o \ +\ + filter_accept_all.o \ + filter_accept_list.o \ + filter_accept_reject_list.o \ +\ + header_skip.o \ + header_list.o \ + header_verbose_list.o \ +\ + seek_by_read.o \ + seek_by_jump.o \ +\ + data_align.o \ + find_list_entry.o \ + init_handle.o + +DPKG_FILES:= \ + get_header_ar.o \ + unpack_ar_archive.o \ + get_header_tar.o \ + filter_accept_list_reassign.o + +INSERT + +lib-$(CONFIG_AR) += get_header_ar.o unpack_ar_archive.o +lib-$(CONFIG_BUNZIP2) += decompress_bunzip2.o +lib-$(CONFIG_UNLZMA) += decompress_unlzma.o +lib-$(CONFIG_UNXZ) += decompress_unxz.o +lib-$(CONFIG_CPIO) += get_header_cpio.o +lib-$(CONFIG_DPKG) += $(DPKG_FILES) +lib-$(CONFIG_DPKG_DEB) += $(DPKG_FILES) +lib-$(CONFIG_GUNZIP) += decompress_unzip.o +lib-$(CONFIG_RPM2CPIO) += decompress_unzip.o get_header_cpio.o +lib-$(CONFIG_RPM) += open_transformer.o decompress_unzip.o get_header_cpio.o +lib-$(CONFIG_TAR) += get_header_tar.o +lib-$(CONFIG_UNCOMPRESS) += decompress_uncompress.o +lib-$(CONFIG_UNZIP) += decompress_unzip.o +lib-$(CONFIG_LZOP) += lzo1x_1.o lzo1x_1o.o lzo1x_d.o +lib-$(CONFIG_LZOP_COMPR_HIGH) += lzo1x_9x.o +lib-$(CONFIG_FEATURE_SEAMLESS_Z) += open_transformer.o decompress_uncompress.o +lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o +lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o +lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o +lib-$(CONFIG_FEATURE_SEAMLESS_XZ) += open_transformer.o decompress_unxz.o +lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o +lib-$(CONFIG_FEATURE_COMPRESS_BBCONFIG) += decompress_bunzip2.o +lib-$(CONFIG_FEATURE_TAR_TO_COMMAND) += data_extract_to_command.o + +ifneq ($(lib-y),) +lib-y += $(COMMON_FILES) +endif diff --git a/archival/libarchive/bz/LICENSE b/archival/libarchive/bz/LICENSE new file mode 100644 index 000000000..da4346520 --- /dev/null +++ b/archival/libarchive/bz/LICENSE @@ -0,0 +1,44 @@ +bzip2 applet in busybox is based on lightly-modified source +of bzip2 version 1.0.4. bzip2 source is distributed +under the following conditions (copied verbatim from LICENSE file) +=========================================================== + + +This program, "bzip2", the associated library "libbzip2", and all +documentation, are copyright (C) 1996-2006 Julian R Seward. All +rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions +are met: + +1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +2. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + +3. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + +4. The name of the author may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS +OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE +GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Julian Seward, Cambridge, UK. +jseward@bzip.org +bzip2/libbzip2 version 1.0.4 of 20 December 2006 diff --git a/archival/libarchive/bz/README b/archival/libarchive/bz/README new file mode 100644 index 000000000..fffd47b8a --- /dev/null +++ b/archival/libarchive/bz/README @@ -0,0 +1,90 @@ +This file is an abridged version of README from bzip2 1.0.4 +Build instructions (which are not relevant to busyboxed bzip2) +are removed. +=========================================================== + + +This is the README for bzip2/libzip2. +This version is fully compatible with the previous public releases. + +------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in this file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ + +Please read and be aware of the following: + + +WARNING: + + This program and library (attempts to) compress data by + performing several non-trivial transformations on it. + Unless you are 100% familiar with *all* the algorithms + contained herein, and with the consequences of modifying them, + you should NOT meddle with the compression or decompression + machinery. Incorrect changes can and very likely *will* + lead to disastrous loss of data. + + +DISCLAIMER: + + I TAKE NO RESPONSIBILITY FOR ANY LOSS OF DATA ARISING FROM THE + USE OF THIS PROGRAM/LIBRARY, HOWSOEVER CAUSED. + + Every compression of a file implies an assumption that the + compressed file can be decompressed to reproduce the original. + Great efforts in design, coding and testing have been made to + ensure that this program works correctly. However, the complexity + of the algorithms, and, in particular, the presence of various + special cases in the code which occur with very low but non-zero + probability make it impossible to rule out the possibility of bugs + remaining in the program. DO NOT COMPRESS ANY DATA WITH THIS + PROGRAM UNLESS YOU ARE PREPARED TO ACCEPT THE POSSIBILITY, HOWEVER + SMALL, THAT THE DATA WILL NOT BE RECOVERABLE. + + That is not to say this program is inherently unreliable. + Indeed, I very much hope the opposite is true. bzip2/libbzip2 + has been carefully constructed and extensively tested. + + +PATENTS: + + To the best of my knowledge, bzip2/libbzip2 does not use any + patented algorithms. However, I do not have the resources + to carry out a patent search. Therefore I cannot give any + guarantee of the above statement. + + +I hope you find bzip2 useful. Feel free to contact me at + jseward@bzip.org +if you have any suggestions or queries. Many people mailed me with +comments, suggestions and patches after the releases of bzip-0.15, +bzip-0.21, and bzip2 versions 0.1pl2, 0.9.0, 0.9.5, 1.0.0, 1.0.1, +1.0.2 and 1.0.3, and the changes in bzip2 are largely a result of this +feedback. I thank you for your comments. + +bzip2's "home" is http://www.bzip.org/ + +Julian Seward +jseward@bzip.org +Cambridge, UK. + +18 July 1996 (version 0.15) +25 August 1996 (version 0.21) + 7 August 1997 (bzip2, version 0.1) +29 August 1997 (bzip2, version 0.1pl2) +23 August 1998 (bzip2, version 0.9.0) + 8 June 1999 (bzip2, version 0.9.5) + 4 Sept 1999 (bzip2, version 0.9.5d) + 5 May 2000 (bzip2, version 1.0pre8) +30 December 2001 (bzip2, version 1.0.2pre1) +15 February 2005 (bzip2, version 1.0.3) +20 December 2006 (bzip2, version 1.0.4) diff --git a/archival/libarchive/bz/blocksort.c b/archival/libarchive/bz/blocksort.c new file mode 100644 index 000000000..f70c3701d --- /dev/null +++ b/archival/libarchive/bz/blocksort.c @@ -0,0 +1,1072 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Block sorting machinery ---*/ +/*--- blocksort.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +#define mswap(zz1, zz2) \ +{ \ + int32_t zztmp = zz1; \ + zz1 = zz2; \ + zz2 = zztmp; \ +} + +static +/* No measurable speed gain with inlining */ +/* ALWAYS_INLINE */ +void mvswap(uint32_t* ptr, int32_t zzp1, int32_t zzp2, int32_t zzn) +{ + while (zzn > 0) { + mswap(ptr[zzp1], ptr[zzp2]); + zzp1++; + zzp2++; + zzn--; + } +} + +static +ALWAYS_INLINE +int32_t mmin(int32_t a, int32_t b) +{ + return (a < b) ? a : b; +} + + +/*---------------------------------------------*/ +/*--- Fallback O(N log(N)^2) sorting ---*/ +/*--- algorithm, for repetitive blocks ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +inline +void fallbackSimpleSort(uint32_t* fmap, + uint32_t* eclass, + int32_t lo, + int32_t hi) +{ + int32_t i, j, tmp; + uint32_t ec_tmp; + + if (lo == hi) return; + + if (hi - lo > 3) { + for (i = hi-4; i >= lo; i--) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for (j = i+4; j <= hi && ec_tmp > eclass[fmap[j]]; j += 4) + fmap[j-4] = fmap[j]; + fmap[j-4] = tmp; + } + } + + for (i = hi-1; i >= lo; i--) { + tmp = fmap[i]; + ec_tmp = eclass[tmp]; + for (j = i+1; j <= hi && ec_tmp > eclass[fmap[j]]; j++) + fmap[j-1] = fmap[j]; + fmap[j-1] = tmp; + } +} + + +/*---------------------------------------------*/ +#define fpush(lz,hz) { \ + stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + sp++; \ +} + +#define fpop(lz,hz) { \ + sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ +} + +#define FALLBACK_QSORT_SMALL_THRESH 10 +#define FALLBACK_QSORT_STACK_SIZE 100 + +static +void fallbackQSort3(uint32_t* fmap, + uint32_t* eclass, + int32_t loSt, + int32_t hiSt) +{ + int32_t unLo, unHi, ltLo, gtHi, n, m; + int32_t sp, lo, hi; + uint32_t med, r, r3; + int32_t stackLo[FALLBACK_QSORT_STACK_SIZE]; + int32_t stackHi[FALLBACK_QSORT_STACK_SIZE]; + + r = 0; + + sp = 0; + fpush(loSt, hiSt); + + while (sp > 0) { + AssertH(sp < FALLBACK_QSORT_STACK_SIZE - 1, 1004); + + fpop(lo, hi); + if (hi - lo < FALLBACK_QSORT_SMALL_THRESH) { + fallbackSimpleSort(fmap, eclass, lo, hi); + continue; + } + + /* Random partitioning. Median of 3 sometimes fails to + * avoid bad cases. Median of 9 seems to help but + * looks rather expensive. This too seems to work but + * is cheaper. Guidance for the magic constants + * 7621 and 32768 is taken from Sedgewick's algorithms + * book, chapter 35. + */ + r = ((r * 7621) + 1) % 32768; + r3 = r % 3; + if (r3 == 0) + med = eclass[fmap[lo]]; + else if (r3 == 1) + med = eclass[fmap[(lo+hi)>>1]]; + else + med = eclass[fmap[hi]]; + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) break; + n = (int32_t)eclass[fmap[unLo]] - (int32_t)med; + if (n == 0) { + mswap(fmap[unLo], fmap[ltLo]); + ltLo++; + unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) break; + n = (int32_t)eclass[fmap[unHi]] - (int32_t)med; + if (n == 0) { + mswap(fmap[unHi], fmap[gtHi]); + gtHi--; unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) break; + mswap(fmap[unLo], fmap[unHi]); unLo++; unHi--; + } + + AssertD(unHi == unLo-1, "fallbackQSort3(2)"); + + if (gtHi < ltLo) continue; + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(fmap, lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(fmap, unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + if (n - lo > hi - m) { + fpush(lo, n); + fpush(m, hi); + } else { + fpush(m, hi); + fpush(lo, n); + } + } +} + +#undef fpush +#undef fpop +#undef FALLBACK_QSORT_SMALL_THRESH +#undef FALLBACK_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + * nblock > 0 + * eclass exists for [0 .. nblock-1] + * ((uint8_t*)eclass) [0 .. nblock-1] holds block + * ptr exists for [0 .. nblock-1] + * + * Post: + * ((uint8_t*)eclass) [0 .. nblock-1] holds block + * All other areas of eclass destroyed + * fmap [0 .. nblock-1] holds sorted order + * bhtab[0 .. 2+(nblock/32)] destroyed +*/ + +#define SET_BH(zz) bhtab[(zz) >> 5] |= (1 << ((zz) & 31)) +#define CLEAR_BH(zz) bhtab[(zz) >> 5] &= ~(1 << ((zz) & 31)) +#define ISSET_BH(zz) (bhtab[(zz) >> 5] & (1 << ((zz) & 31))) +#define WORD_BH(zz) bhtab[(zz) >> 5] +#define UNALIGNED_BH(zz) ((zz) & 0x01f) + +static +void fallbackSort(uint32_t* fmap, + uint32_t* eclass, + uint32_t* bhtab, + int32_t nblock) +{ + int32_t ftab[257]; + int32_t ftabCopy[256]; + int32_t H, i, j, k, l, r, cc, cc1; + int32_t nNotDone; + int32_t nBhtab; + uint8_t* eclass8 = (uint8_t*)eclass; + + /* + * Initial 1-char radix sort to generate + * initial fmap and initial BH bits. + */ + for (i = 0; i < 257; i++) ftab[i] = 0; + for (i = 0; i < nblock; i++) ftab[eclass8[i]]++; + for (i = 0; i < 256; i++) ftabCopy[i] = ftab[i]; + + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i < 257; i++) { + j += ftab[i]; + ftab[i] = j; + } + + for (i = 0; i < nblock; i++) { + j = eclass8[i]; + k = ftab[j] - 1; + ftab[j] = k; + fmap[k] = i; + } + + nBhtab = 2 + ((uint32_t)nblock / 32); /* bbox: unsigned div is easier */ + for (i = 0; i < nBhtab; i++) bhtab[i] = 0; + for (i = 0; i < 256; i++) SET_BH(ftab[i]); + + /* + * Inductively refine the buckets. Kind-of an + * "exponential radix sort" (!), inspired by the + * Manber-Myers suffix array construction algorithm. + */ + + /*-- set sentinel bits for block-end detection --*/ + for (i = 0; i < 32; i++) { + SET_BH(nblock + 2*i); + CLEAR_BH(nblock + 2*i + 1); + } + + /*-- the log(N) loop --*/ + H = 1; + while (1) { + j = 0; + for (i = 0; i < nblock; i++) { + if (ISSET_BH(i)) + j = i; + k = fmap[i] - H; + if (k < 0) + k += nblock; + eclass[k] = j; + } + + nNotDone = 0; + r = -1; + while (1) { + + /*-- find the next non-singleton bucket --*/ + k = r + 1; + while (ISSET_BH(k) && UNALIGNED_BH(k)) + k++; + if (ISSET_BH(k)) { + while (WORD_BH(k) == 0xffffffff) k += 32; + while (ISSET_BH(k)) k++; + } + l = k - 1; + if (l >= nblock) + break; + while (!ISSET_BH(k) && UNALIGNED_BH(k)) + k++; + if (!ISSET_BH(k)) { + while (WORD_BH(k) == 0x00000000) k += 32; + while (!ISSET_BH(k)) k++; + } + r = k - 1; + if (r >= nblock) + break; + + /*-- now [l, r] bracket current bucket --*/ + if (r > l) { + nNotDone += (r - l + 1); + fallbackQSort3(fmap, eclass, l, r); + + /*-- scan bucket and generate header bits-- */ + cc = -1; + for (i = l; i <= r; i++) { + cc1 = eclass[fmap[i]]; + if (cc != cc1) { + SET_BH(i); + cc = cc1; + }; + } + } + } + + H *= 2; + if (H > nblock || nNotDone == 0) + break; + } + + /* + * Reconstruct the original block in + * eclass8 [0 .. nblock-1], since the + * previous phase destroyed it. + */ + j = 0; + for (i = 0; i < nblock; i++) { + while (ftabCopy[j] == 0) + j++; + ftabCopy[j]--; + eclass8[fmap[i]] = (uint8_t)j; + } + AssertH(j < 256, 1005); +} + +#undef SET_BH +#undef CLEAR_BH +#undef ISSET_BH +#undef WORD_BH +#undef UNALIGNED_BH + + +/*---------------------------------------------*/ +/*--- The main, O(N^2 log(N)) sorting ---*/ +/*--- algorithm. Faster for "normal" ---*/ +/*--- non-repetitive blocks. ---*/ +/*---------------------------------------------*/ + +/*---------------------------------------------*/ +static +NOINLINE +int mainGtU( + uint32_t i1, + uint32_t i2, + uint8_t* block, + uint16_t* quadrant, + uint32_t nblock, + int32_t* budget) +{ + int32_t k; + uint8_t c1, c2; + uint16_t s1, s2; + +/* Loop unrolling here is actually very useful + * (generated code is much simpler), + * code size increase is only 270 bytes (i386) + * but speeds up compression 10% overall + */ + +#if CONFIG_BZIP2_FEATURE_SPEED >= 1 + +#define TIMES_8(code) \ + code; code; code; code; \ + code; code; code; code; +#define TIMES_12(code) \ + code; code; code; code; \ + code; code; code; code; \ + code; code; code; code; + +#else + +#define TIMES_8(code) \ +{ \ + int nn = 8; \ + do { \ + code; \ + } while (--nn); \ +} +#define TIMES_12(code) \ +{ \ + int nn = 12; \ + do { \ + code; \ + } while (--nn); \ +} + +#endif + + AssertD(i1 != i2, "mainGtU"); + TIMES_12( + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + i1++; i2++; + ) + + k = nblock + 8; + + do { + TIMES_8( + c1 = block[i1]; c2 = block[i2]; + if (c1 != c2) return (c1 > c2); + s1 = quadrant[i1]; s2 = quadrant[i2]; + if (s1 != s2) return (s1 > s2); + i1++; i2++; + ) + + if (i1 >= nblock) i1 -= nblock; + if (i2 >= nblock) i2 -= nblock; + + (*budget)--; + k -= 8; + } while (k >= 0); + + return False; +} +#undef TIMES_8 +#undef TIMES_12 + +/*---------------------------------------------*/ +/* + * Knuth's increments seem to work better + * than Incerpi-Sedgewick here. Possibly + * because the number of elems to sort is + * usually small, typically <= 20. + */ +static +const int32_t incs[14] = { + 1, 4, 13, 40, 121, 364, 1093, 3280, + 9841, 29524, 88573, 265720, + 797161, 2391484 +}; + +static +void mainSimpleSort(uint32_t* ptr, + uint8_t* block, + uint16_t* quadrant, + int32_t nblock, + int32_t lo, + int32_t hi, + int32_t d, + int32_t* budget) +{ + int32_t i, j, h, bigN, hp; + uint32_t v; + + bigN = hi - lo + 1; + if (bigN < 2) return; + + hp = 0; + while (incs[hp] < bigN) hp++; + hp--; + + for (; hp >= 0; hp--) { + h = incs[hp]; + + i = lo + h; + while (1) { + /*-- copy 1 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + +/* 1.5% overall speedup, +290 bytes */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 3 + /*-- copy 2 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; + + /*-- copy 3 --*/ + if (i > hi) break; + v = ptr[i]; + j = i; + while (mainGtU(ptr[j-h]+d, v+d, block, quadrant, nblock, budget)) { + ptr[j] = ptr[j-h]; + j = j - h; + if (j <= (lo + h - 1)) break; + } + ptr[j] = v; + i++; +#endif + if (*budget < 0) return; + } + } +} + + +/*---------------------------------------------*/ +/* + * The following is an implementation of + * an elegant 3-way quicksort for strings, + * described in a paper "Fast Algorithms for + * Sorting and Searching Strings", by Robert + * Sedgewick and Jon L. Bentley. + */ + +static +ALWAYS_INLINE +uint8_t mmed3(uint8_t a, uint8_t b, uint8_t c) +{ + uint8_t t; + if (a > b) { + t = a; + a = b; + b = t; + }; + /* here b >= a */ + if (b > c) { + b = c; + if (a > b) + b = a; + } + return b; +} + +#define mpush(lz,hz,dz) \ +{ \ + stackLo[sp] = lz; \ + stackHi[sp] = hz; \ + stackD [sp] = dz; \ + sp++; \ +} + +#define mpop(lz,hz,dz) \ +{ \ + sp--; \ + lz = stackLo[sp]; \ + hz = stackHi[sp]; \ + dz = stackD [sp]; \ +} + +#define mnextsize(az) (nextHi[az] - nextLo[az]) + +#define mnextswap(az,bz) \ +{ \ + int32_t tz; \ + tz = nextLo[az]; nextLo[az] = nextLo[bz]; nextLo[bz] = tz; \ + tz = nextHi[az]; nextHi[az] = nextHi[bz]; nextHi[bz] = tz; \ + tz = nextD [az]; nextD [az] = nextD [bz]; nextD [bz] = tz; \ +} + +#define MAIN_QSORT_SMALL_THRESH 20 +#define MAIN_QSORT_DEPTH_THRESH (BZ_N_RADIX + BZ_N_QSORT) +#define MAIN_QSORT_STACK_SIZE 100 + +static NOINLINE +void mainQSort3(uint32_t* ptr, + uint8_t* block, + uint16_t* quadrant, + int32_t nblock, + int32_t loSt, + int32_t hiSt, + int32_t dSt, + int32_t* budget) +{ + int32_t unLo, unHi, ltLo, gtHi, n, m, med; + int32_t sp, lo, hi, d; + + int32_t stackLo[MAIN_QSORT_STACK_SIZE]; + int32_t stackHi[MAIN_QSORT_STACK_SIZE]; + int32_t stackD [MAIN_QSORT_STACK_SIZE]; + + int32_t nextLo[3]; + int32_t nextHi[3]; + int32_t nextD [3]; + + sp = 0; + mpush(loSt, hiSt, dSt); + + while (sp > 0) { + AssertH(sp < MAIN_QSORT_STACK_SIZE - 2, 1001); + + mpop(lo, hi, d); + if (hi - lo < MAIN_QSORT_SMALL_THRESH + || d > MAIN_QSORT_DEPTH_THRESH + ) { + mainSimpleSort(ptr, block, quadrant, nblock, lo, hi, d, budget); + if (*budget < 0) + return; + continue; + } + med = (int32_t) mmed3(block[ptr[lo ] + d], + block[ptr[hi ] + d], + block[ptr[(lo+hi) >> 1] + d]); + + unLo = ltLo = lo; + unHi = gtHi = hi; + + while (1) { + while (1) { + if (unLo > unHi) + break; + n = ((int32_t)block[ptr[unLo]+d]) - med; + if (n == 0) { + mswap(ptr[unLo], ptr[ltLo]); + ltLo++; + unLo++; + continue; + }; + if (n > 0) break; + unLo++; + } + while (1) { + if (unLo > unHi) + break; + n = ((int32_t)block[ptr[unHi]+d]) - med; + if (n == 0) { + mswap(ptr[unHi], ptr[gtHi]); + gtHi--; + unHi--; + continue; + }; + if (n < 0) break; + unHi--; + } + if (unLo > unHi) + break; + mswap(ptr[unLo], ptr[unHi]); + unLo++; + unHi--; + } + + AssertD(unHi == unLo-1, "mainQSort3(2)"); + + if (gtHi < ltLo) { + mpush(lo, hi, d + 1); + continue; + } + + n = mmin(ltLo-lo, unLo-ltLo); mvswap(ptr, lo, unLo-n, n); + m = mmin(hi-gtHi, gtHi-unHi); mvswap(ptr, unLo, hi-m+1, m); + + n = lo + unLo - ltLo - 1; + m = hi - (gtHi - unHi) + 1; + + nextLo[0] = lo; nextHi[0] = n; nextD[0] = d; + nextLo[1] = m; nextHi[1] = hi; nextD[1] = d; + nextLo[2] = n+1; nextHi[2] = m-1; nextD[2] = d+1; + + if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); + if (mnextsize(1) < mnextsize(2)) mnextswap(1, 2); + if (mnextsize(0) < mnextsize(1)) mnextswap(0, 1); + + AssertD (mnextsize(0) >= mnextsize(1), "mainQSort3(8)"); + AssertD (mnextsize(1) >= mnextsize(2), "mainQSort3(9)"); + + mpush(nextLo[0], nextHi[0], nextD[0]); + mpush(nextLo[1], nextHi[1], nextD[1]); + mpush(nextLo[2], nextHi[2], nextD[2]); + } +} + +#undef mpush +#undef mpop +#undef mnextsize +#undef mnextswap +#undef MAIN_QSORT_SMALL_THRESH +#undef MAIN_QSORT_DEPTH_THRESH +#undef MAIN_QSORT_STACK_SIZE + + +/*---------------------------------------------*/ +/* Pre: + * nblock > N_OVERSHOOT + * block32 exists for [0 .. nblock-1 +N_OVERSHOOT] + * ((uint8_t*)block32) [0 .. nblock-1] holds block + * ptr exists for [0 .. nblock-1] + * + * Post: + * ((uint8_t*)block32) [0 .. nblock-1] holds block + * All other areas of block32 destroyed + * ftab[0 .. 65536] destroyed + * ptr [0 .. nblock-1] holds sorted order + * if (*budget < 0), sorting was abandoned + */ + +#define BIGFREQ(b) (ftab[((b)+1) << 8] - ftab[(b) << 8]) +#define SETMASK (1 << 21) +#define CLEARMASK (~(SETMASK)) + +static NOINLINE +void mainSort(EState* state, + uint32_t* ptr, + uint8_t* block, + uint16_t* quadrant, + uint32_t* ftab, + int32_t nblock, + int32_t* budget) +{ + int32_t i, j, k, ss, sb; + uint8_t c1; + int32_t numQSorted; + uint16_t s; + Bool bigDone[256]; + /* bbox: moved to EState to save stack + int32_t runningOrder[256]; + int32_t copyStart[256]; + int32_t copyEnd [256]; + */ +#define runningOrder (state->mainSort__runningOrder) +#define copyStart (state->mainSort__copyStart) +#define copyEnd (state->mainSort__copyEnd) + + /*-- set up the 2-byte frequency table --*/ + /* was: for (i = 65536; i >= 0; i--) ftab[i] = 0; */ + memset(ftab, 0, 65537 * sizeof(ftab[0])); + + j = block[0] << 8; + i = nblock - 1; +/* 3%, +300 bytes */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 2 + for (; i >= 3; i -= 4) { + quadrant[i] = 0; + j = (j >> 8) | (((uint16_t)block[i]) << 8); + ftab[j]++; + quadrant[i-1] = 0; + j = (j >> 8) | (((uint16_t)block[i-1]) << 8); + ftab[j]++; + quadrant[i-2] = 0; + j = (j >> 8) | (((uint16_t)block[i-2]) << 8); + ftab[j]++; + quadrant[i-3] = 0; + j = (j >> 8) | (((uint16_t)block[i-3]) << 8); + ftab[j]++; + } +#endif + for (; i >= 0; i--) { + quadrant[i] = 0; + j = (j >> 8) | (((uint16_t)block[i]) << 8); + ftab[j]++; + } + + /*-- (emphasises close relationship of block & quadrant) --*/ + for (i = 0; i < BZ_N_OVERSHOOT; i++) { + block [nblock+i] = block[i]; + quadrant[nblock+i] = 0; + } + + /*-- Complete the initial radix sort --*/ + j = ftab[0]; /* bbox: optimized */ + for (i = 1; i <= 65536; i++) { + j += ftab[i]; + ftab[i] = j; + } + + s = block[0] << 8; + i = nblock - 1; +#if CONFIG_BZIP2_FEATURE_SPEED >= 2 + for (; i >= 3; i -= 4) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i; + s = (s >> 8) | (block[i-1] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i-1; + s = (s >> 8) | (block[i-2] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i-2; + s = (s >> 8) | (block[i-3] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i-3; + } +#endif + for (; i >= 0; i--) { + s = (s >> 8) | (block[i] << 8); + j = ftab[s] - 1; + ftab[s] = j; + ptr[j] = i; + } + + /* + * Now ftab contains the first loc of every small bucket. + * Calculate the running order, from smallest to largest + * big bucket. + */ + for (i = 0; i <= 255; i++) { + bigDone [i] = False; + runningOrder[i] = i; + } + + { + int32_t vv; + /* bbox: was: int32_t h = 1; */ + /* do h = 3 * h + 1; while (h <= 256); */ + uint32_t h = 364; + + do { + /*h = h / 3;*/ + h = (h * 171) >> 9; /* bbox: fast h/3 */ + for (i = h; i <= 255; i++) { + vv = runningOrder[i]; + j = i; + while (BIGFREQ(runningOrder[j-h]) > BIGFREQ(vv)) { + runningOrder[j] = runningOrder[j-h]; + j = j - h; + if (j <= (h - 1)) + goto zero; + } + zero: + runningOrder[j] = vv; + } + } while (h != 1); + } + + /* + * The main sorting loop. + */ + + numQSorted = 0; + + for (i = 0; i <= 255; i++) { + + /* + * Process big buckets, starting with the least full. + * Basically this is a 3-step process in which we call + * mainQSort3 to sort the small buckets [ss, j], but + * also make a big effort to avoid the calls if we can. + */ + ss = runningOrder[i]; + + /* + * Step 1: + * Complete the big bucket [ss] by quicksorting + * any unsorted small buckets [ss, j], for j != ss. + * Hopefully previous pointer-scanning phases have already + * completed many of the small buckets [ss, j], so + * we don't have to sort them at all. + */ + for (j = 0; j <= 255; j++) { + if (j != ss) { + sb = (ss << 8) + j; + if (!(ftab[sb] & SETMASK)) { + int32_t lo = ftab[sb] & CLEARMASK; + int32_t hi = (ftab[sb+1] & CLEARMASK) - 1; + if (hi > lo) { + mainQSort3( + ptr, block, quadrant, nblock, + lo, hi, BZ_N_RADIX, budget + ); + if (*budget < 0) return; + numQSorted += (hi - lo + 1); + } + } + ftab[sb] |= SETMASK; + } + } + + AssertH(!bigDone[ss], 1006); + + /* + * Step 2: + * Now scan this big bucket [ss] so as to synthesise the + * sorted order for small buckets [t, ss] for all t, + * including, magically, the bucket [ss,ss] too. + * This will avoid doing Real Work in subsequent Step 1's. + */ + { + for (j = 0; j <= 255; j++) { + copyStart[j] = ftab[(j << 8) + ss] & CLEARMASK; + copyEnd [j] = (ftab[(j << 8) + ss + 1] & CLEARMASK) - 1; + } + for (j = ftab[ss << 8] & CLEARMASK; j < copyStart[ss]; j++) { + k = ptr[j] - 1; + if (k < 0) + k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[copyStart[c1]++] = k; + } + for (j = (ftab[(ss+1) << 8] & CLEARMASK) - 1; j > copyEnd[ss]; j--) { + k = ptr[j]-1; + if (k < 0) + k += nblock; + c1 = block[k]; + if (!bigDone[c1]) + ptr[copyEnd[c1]--] = k; + } + } + + /* Extremely rare case missing in bzip2-1.0.0 and 1.0.1. + * Necessity for this case is demonstrated by compressing + * a sequence of approximately 48.5 million of character + * 251; 1.0.0/1.0.1 will then die here. */ + AssertH((copyStart[ss]-1 == copyEnd[ss]) \ + || (copyStart[ss] == 0 && copyEnd[ss] == nblock-1), 1007); + + for (j = 0; j <= 255; j++) + ftab[(j << 8) + ss] |= SETMASK; + + /* + * Step 3: + * The [ss] big bucket is now done. Record this fact, + * and update the quadrant descriptors. Remember to + * update quadrants in the overshoot area too, if + * necessary. The "if (i < 255)" test merely skips + * this updating for the last bucket processed, since + * updating for the last bucket is pointless. + * + * The quadrant array provides a way to incrementally + * cache sort orderings, as they appear, so as to + * make subsequent comparisons in fullGtU() complete + * faster. For repetitive blocks this makes a big + * difference (but not big enough to be able to avoid + * the fallback sorting mechanism, exponential radix sort). + * + * The precise meaning is: at all times: + * + * for 0 <= i < nblock and 0 <= j <= nblock + * + * if block[i] != block[j], + * + * then the relative values of quadrant[i] and + * quadrant[j] are meaningless. + * + * else { + * if quadrant[i] < quadrant[j] + * then the string starting at i lexicographically + * precedes the string starting at j + * + * else if quadrant[i] > quadrant[j] + * then the string starting at j lexicographically + * precedes the string starting at i + * + * else + * the relative ordering of the strings starting + * at i and j has not yet been determined. + * } + */ + bigDone[ss] = True; + + if (i < 255) { + int32_t bbStart = ftab[ss << 8] & CLEARMASK; + int32_t bbSize = (ftab[(ss+1) << 8] & CLEARMASK) - bbStart; + int32_t shifts = 0; + + while ((bbSize >> shifts) > 65534) shifts++; + + for (j = bbSize-1; j >= 0; j--) { + int32_t a2update = ptr[bbStart + j]; + uint16_t qVal = (uint16_t)(j >> shifts); + quadrant[a2update] = qVal; + if (a2update < BZ_N_OVERSHOOT) + quadrant[a2update + nblock] = qVal; + } + AssertH(((bbSize-1) >> shifts) <= 65535, 1002); + } + } +#undef runningOrder +#undef copyStart +#undef copyEnd +} + +#undef BIGFREQ +#undef SETMASK +#undef CLEARMASK + + +/*---------------------------------------------*/ +/* Pre: + * nblock > 0 + * arr2 exists for [0 .. nblock-1 +N_OVERSHOOT] + * ((uint8_t*)arr2)[0 .. nblock-1] holds block + * arr1 exists for [0 .. nblock-1] + * + * Post: + * ((uint8_t*)arr2) [0 .. nblock-1] holds block + * All other areas of block destroyed + * ftab[0 .. 65536] destroyed + * arr1[0 .. nblock-1] holds sorted order + */ +static NOINLINE +void BZ2_blockSort(EState* s) +{ + /* In original bzip2 1.0.4, it's a parameter, but 30 + * (which was the default) should work ok. */ + enum { wfact = 30 }; + + uint32_t* ptr = s->ptr; + uint8_t* block = s->block; + uint32_t* ftab = s->ftab; + int32_t nblock = s->nblock; + uint16_t* quadrant; + int32_t budget; + int32_t i; + + if (nblock < 10000) { + fallbackSort(s->arr1, s->arr2, ftab, nblock); + } else { + /* Calculate the location for quadrant, remembering to get + * the alignment right. Assumes that &(block[0]) is at least + * 2-byte aligned -- this should be ok since block is really + * the first section of arr2. + */ + i = nblock + BZ_N_OVERSHOOT; + if (i & 1) i++; + quadrant = (uint16_t*)(&(block[i])); + + /* (wfact-1) / 3 puts the default-factor-30 + * transition point at very roughly the same place as + * with v0.1 and v0.9.0. + * Not that it particularly matters any more, since the + * resulting compressed stream is now the same regardless + * of whether or not we use the main sort or fallback sort. + */ + budget = nblock * ((wfact-1) / 3); + + mainSort(s, ptr, block, quadrant, ftab, nblock, &budget); + if (budget < 0) { + fallbackSort(s->arr1, s->arr2, ftab, nblock); + } + } + + s->origPtr = -1; + for (i = 0; i < s->nblock; i++) + if (ptr[i] == 0) { + s->origPtr = i; + break; + }; + + AssertH(s->origPtr != -1, 1003); +} + + +/*-------------------------------------------------------------*/ +/*--- end blocksort.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/bzlib.c b/archival/libarchive/bz/bzlib.c new file mode 100644 index 000000000..b3beeabed --- /dev/null +++ b/archival/libarchive/bz/bzlib.c @@ -0,0 +1,431 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Library top-level functions. ---*/ +/*--- bzlib.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* CHANGES + * 0.9.0 -- original version. + * 0.9.0a/b -- no changes in this file. + * 0.9.0c -- made zero-length BZ_FLUSH work correctly in bzCompress(). + * fixed bzWrite/bzRead to ignore zero-length requests. + * fixed bzread to correctly handle read requests after EOF. + * wrong parameter order in call to bzDecompressInit in + * bzBuffToBuffDecompress. Fixed. + */ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +/*--- Compression stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +#if BZ_LIGHT_DEBUG +static +void bz_assert_fail(int errcode) +{ + /* if (errcode == 1007) bb_error_msg_and_die("probably bad RAM"); */ + bb_error_msg_and_die("internal error %d", errcode); +} +#endif + +/*---------------------------------------------------*/ +static +void prepare_new_block(EState* s) +{ + int i; + s->nblock = 0; + s->numZ = 0; + s->state_out_pos = 0; + BZ_INITIALISE_CRC(s->blockCRC); + /* inlined memset would be nice to have here */ + for (i = 0; i < 256; i++) + s->inUse[i] = 0; + s->blockNo++; +} + + +/*---------------------------------------------------*/ +static +ALWAYS_INLINE +void init_RL(EState* s) +{ + s->state_in_ch = 256; + s->state_in_len = 0; +} + + +static +int isempty_RL(EState* s) +{ + return (s->state_in_ch >= 256 || s->state_in_len <= 0); +} + + +/*---------------------------------------------------*/ +static +void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k) +{ + int32_t n; + EState* s; + + s = xzalloc(sizeof(EState)); + s->strm = strm; + + n = 100000 * blockSize100k; + s->arr1 = xmalloc(n * sizeof(uint32_t)); + s->mtfv = (uint16_t*)s->arr1; + s->ptr = (uint32_t*)s->arr1; + s->arr2 = xmalloc((n + BZ_N_OVERSHOOT) * sizeof(uint32_t)); + s->block = (uint8_t*)s->arr2; + s->ftab = xmalloc(65537 * sizeof(uint32_t)); + + s->crc32table = crc32_filltable(NULL, 1); + + s->state = BZ_S_INPUT; + s->mode = BZ_M_RUNNING; + s->blockSize100k = blockSize100k; + s->nblockMAX = n - 19; + + strm->state = s; + /*strm->total_in = 0;*/ + strm->total_out = 0; + init_RL(s); + prepare_new_block(s); +} + + +/*---------------------------------------------------*/ +static +void add_pair_to_block(EState* s) +{ + int32_t i; + uint8_t ch = (uint8_t)(s->state_in_ch); + for (i = 0; i < s->state_in_len; i++) { + BZ_UPDATE_CRC(s, s->blockCRC, ch); + } + s->inUse[s->state_in_ch] = 1; + switch (s->state_in_len) { + case 3: + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + /* fall through */ + case 2: + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + /* fall through */ + case 1: + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + break; + default: + s->inUse[s->state_in_len - 4] = 1; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)ch; s->nblock++; + s->block[s->nblock] = (uint8_t)(s->state_in_len - 4); + s->nblock++; + break; + } +} + + +/*---------------------------------------------------*/ +static +void flush_RL(EState* s) +{ + if (s->state_in_ch < 256) add_pair_to_block(s); + init_RL(s); +} + + +/*---------------------------------------------------*/ +#define ADD_CHAR_TO_BLOCK(zs, zchh0) \ +{ \ + uint32_t zchh = (uint32_t)(zchh0); \ + /*-- fast track the common case --*/ \ + if (zchh != zs->state_in_ch && zs->state_in_len == 1) { \ + uint8_t ch = (uint8_t)(zs->state_in_ch); \ + BZ_UPDATE_CRC(zs, zs->blockCRC, ch); \ + zs->inUse[zs->state_in_ch] = 1; \ + zs->block[zs->nblock] = (uint8_t)ch; \ + zs->nblock++; \ + zs->state_in_ch = zchh; \ + } \ + else \ + /*-- general, uncommon cases --*/ \ + if (zchh != zs->state_in_ch || zs->state_in_len == 255) { \ + if (zs->state_in_ch < 256) \ + add_pair_to_block(zs); \ + zs->state_in_ch = zchh; \ + zs->state_in_len = 1; \ + } else { \ + zs->state_in_len++; \ + } \ +} + + +/*---------------------------------------------------*/ +static +void /*Bool*/ copy_input_until_stop(EState* s) +{ + /*Bool progress_in = False;*/ + +#ifdef SAME_CODE_AS_BELOW + if (s->mode == BZ_M_RUNNING) { + /*-- fast track the common case --*/ + while (1) { + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + /*progress_in = True;*/ + ADD_CHAR_TO_BLOCK(s, (uint32_t)(*(uint8_t*)(s->strm->next_in))); + s->strm->next_in++; + s->strm->avail_in--; + /*s->strm->total_in++;*/ + } + } else +#endif + { + /*-- general, uncommon case --*/ + while (1) { + /*-- no input? --*/ + if (s->strm->avail_in == 0) break; + /*-- block full? --*/ + if (s->nblock >= s->nblockMAX) break; + //# /*-- flush/finish end? --*/ + //# if (s->avail_in_expect == 0) break; + /*progress_in = True;*/ + ADD_CHAR_TO_BLOCK(s, *(uint8_t*)(s->strm->next_in)); + s->strm->next_in++; + s->strm->avail_in--; + /*s->strm->total_in++;*/ + //# s->avail_in_expect--; + } + } + /*return progress_in;*/ +} + + +/*---------------------------------------------------*/ +static +void /*Bool*/ copy_output_until_stop(EState* s) +{ + /*Bool progress_out = False;*/ + + while (1) { + /*-- no output space? --*/ + if (s->strm->avail_out == 0) break; + + /*-- block done? --*/ + if (s->state_out_pos >= s->numZ) break; + + /*progress_out = True;*/ + *(s->strm->next_out) = s->zbits[s->state_out_pos]; + s->state_out_pos++; + s->strm->avail_out--; + s->strm->next_out++; + s->strm->total_out++; + } + /*return progress_out;*/ +} + + +/*---------------------------------------------------*/ +static +void /*Bool*/ handle_compress(bz_stream *strm) +{ + /*Bool progress_in = False;*/ + /*Bool progress_out = False;*/ + EState* s = strm->state; + + while (1) { + if (s->state == BZ_S_OUTPUT) { + /*progress_out |=*/ copy_output_until_stop(s); + if (s->state_out_pos < s->numZ) break; + if (s->mode == BZ_M_FINISHING + //# && s->avail_in_expect == 0 + && s->strm->avail_in == 0 + && isempty_RL(s)) + break; + prepare_new_block(s); + s->state = BZ_S_INPUT; +#ifdef FLUSH_IS_UNUSED + if (s->mode == BZ_M_FLUSHING + && s->avail_in_expect == 0 + && isempty_RL(s)) + break; +#endif + } + + if (s->state == BZ_S_INPUT) { + /*progress_in |=*/ copy_input_until_stop(s); + //#if (s->mode != BZ_M_RUNNING && s->avail_in_expect == 0) { + if (s->mode != BZ_M_RUNNING && s->strm->avail_in == 0) { + flush_RL(s); + BZ2_compressBlock(s, (s->mode == BZ_M_FINISHING)); + s->state = BZ_S_OUTPUT; + } else + if (s->nblock >= s->nblockMAX) { + BZ2_compressBlock(s, 0); + s->state = BZ_S_OUTPUT; + } else + if (s->strm->avail_in == 0) { + break; + } + } + } + + /*return progress_in || progress_out;*/ +} + + +/*---------------------------------------------------*/ +static +int BZ2_bzCompress(bz_stream *strm, int action) +{ + /*Bool progress;*/ + EState* s; + + s = strm->state; + + switch (s->mode) { + case BZ_M_RUNNING: + if (action == BZ_RUN) { + /*progress =*/ handle_compress(strm); + /*return progress ? BZ_RUN_OK : BZ_PARAM_ERROR;*/ + return BZ_RUN_OK; + } +#ifdef FLUSH_IS_UNUSED + else + if (action == BZ_FLUSH) { + //#s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FLUSHING; + goto case_BZ_M_FLUSHING; + } +#endif + else + /*if (action == BZ_FINISH)*/ { + //#s->avail_in_expect = strm->avail_in; + s->mode = BZ_M_FINISHING; + goto case_BZ_M_FINISHING; + } + +#ifdef FLUSH_IS_UNUSED + case_BZ_M_FLUSHING: + case BZ_M_FLUSHING: + /*if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR;*/ + /*progress =*/ handle_compress(strm); + if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + return BZ_FLUSH_OK; + s->mode = BZ_M_RUNNING; + return BZ_RUN_OK; +#endif + + case_BZ_M_FINISHING: + /*case BZ_M_FINISHING:*/ + default: + /*if (s->avail_in_expect != s->strm->avail_in) + return BZ_SEQUENCE_ERROR;*/ + /*progress =*/ handle_compress(strm); + /*if (!progress) return BZ_SEQUENCE_ERROR;*/ + //#if (s->avail_in_expect > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + //# return BZ_FINISH_OK; + if (s->strm->avail_in > 0 || !isempty_RL(s) || s->state_out_pos < s->numZ) + return BZ_FINISH_OK; + /*s->mode = BZ_M_IDLE;*/ + return BZ_STREAM_END; + } + /* return BZ_OK; --not reached--*/ +} + + +/*---------------------------------------------------*/ +#if ENABLE_FEATURE_CLEAN_UP +static +void BZ2_bzCompressEnd(bz_stream *strm) +{ + EState* s; + + s = strm->state; + free(s->arr1); + free(s->arr2); + free(s->ftab); + free(s->crc32table); + free(strm->state); +} +#endif + + +/*---------------------------------------------------*/ +/*--- Misc convenience stuff ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +#ifdef EXAMPLE_CODE_FOR_MEM_TO_MEM_COMPRESSION +static +int BZ2_bzBuffToBuffCompress(char* dest, + unsigned int* destLen, + char* source, + unsigned int sourceLen, + int blockSize100k) +{ + bz_stream strm; + int ret; + + if (dest == NULL || destLen == NULL + || source == NULL + || blockSize100k < 1 || blockSize100k > 9 + ) { + return BZ_PARAM_ERROR; + } + + BZ2_bzCompressInit(&strm, blockSize100k); + + strm.next_in = source; + strm.next_out = dest; + strm.avail_in = sourceLen; + strm.avail_out = *destLen; + + ret = BZ2_bzCompress(&strm, BZ_FINISH); + if (ret == BZ_FINISH_OK) goto output_overflow; + if (ret != BZ_STREAM_END) goto errhandler; + + /* normal termination */ + *destLen -= strm.avail_out; + BZ2_bzCompressEnd(&strm); + return BZ_OK; + + output_overflow: + BZ2_bzCompressEnd(&strm); + return BZ_OUTBUFF_FULL; + + errhandler: + BZ2_bzCompressEnd(&strm); + return ret; +} +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/bzlib.h b/archival/libarchive/bz/bzlib.h new file mode 100644 index 000000000..1bb811c4a --- /dev/null +++ b/archival/libarchive/bz/bzlib.h @@ -0,0 +1,65 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Public header file for the library. ---*/ +/*--- bzlib.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +#define BZ_RUN 0 +#define BZ_FLUSH 1 +#define BZ_FINISH 2 + +#define BZ_OK 0 +#define BZ_RUN_OK 1 +#define BZ_FLUSH_OK 2 +#define BZ_FINISH_OK 3 +#define BZ_STREAM_END 4 +#define BZ_SEQUENCE_ERROR (-1) +#define BZ_PARAM_ERROR (-2) +#define BZ_MEM_ERROR (-3) +#define BZ_DATA_ERROR (-4) +#define BZ_DATA_ERROR_MAGIC (-5) +#define BZ_IO_ERROR (-6) +#define BZ_UNEXPECTED_EOF (-7) +#define BZ_OUTBUFF_FULL (-8) +#define BZ_CONFIG_ERROR (-9) + +typedef struct bz_stream { + void *state; + char *next_in; + char *next_out; + unsigned avail_in; + unsigned avail_out; + /*unsigned long long total_in;*/ + unsigned long long total_out; +} bz_stream; + +/*-- Core (low-level) library functions --*/ + +static void BZ2_bzCompressInit(bz_stream *strm, int blockSize100k); +static int BZ2_bzCompress(bz_stream *strm, int action); +#if ENABLE_FEATURE_CLEAN_UP +static void BZ2_bzCompressEnd(bz_stream *strm); +#endif + +/*-------------------------------------------------------------*/ +/*--- end bzlib.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/bzlib_private.h b/archival/libarchive/bz/bzlib_private.h new file mode 100644 index 000000000..6430ce407 --- /dev/null +++ b/archival/libarchive/bz/bzlib_private.h @@ -0,0 +1,219 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Private header file for the library. ---*/ +/*--- bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib.h" */ + +/*-- General stuff. --*/ + +typedef unsigned char Bool; + +#define True ((Bool)1) +#define False ((Bool)0) + +#if BZ_LIGHT_DEBUG +static void bz_assert_fail(int errcode) NORETURN; +#define AssertH(cond, errcode) \ +do { \ + if (!(cond)) \ + bz_assert_fail(errcode); \ +} while (0) +#else +#define AssertH(cond, msg) do { } while (0) +#endif + +#if BZ_DEBUG +#define AssertD(cond, msg) \ +do { \ + if (!(cond)) \ + bb_error_msg_and_die("(debug build): internal error %s", msg); \ +} while (0) +#else +#define AssertD(cond, msg) do { } while (0) +#endif + + +/*-- Header bytes. --*/ + +#define BZ_HDR_B 0x42 /* 'B' */ +#define BZ_HDR_Z 0x5a /* 'Z' */ +#define BZ_HDR_h 0x68 /* 'h' */ +#define BZ_HDR_0 0x30 /* '0' */ + +#define BZ_HDR_BZh0 0x425a6830 + +/*-- Constants for the back end. --*/ + +#define BZ_MAX_ALPHA_SIZE 258 +#define BZ_MAX_CODE_LEN 23 + +#define BZ_RUNA 0 +#define BZ_RUNB 1 + +#define BZ_N_GROUPS 6 +#define BZ_G_SIZE 50 +#define BZ_N_ITERS 4 + +#define BZ_MAX_SELECTORS (2 + (900000 / BZ_G_SIZE)) + + +/*-- Stuff for doing CRCs. --*/ + +#define BZ_INITIALISE_CRC(crcVar) \ +{ \ + crcVar = 0xffffffffL; \ +} + +#define BZ_FINALISE_CRC(crcVar) \ +{ \ + crcVar = ~(crcVar); \ +} + +#define BZ_UPDATE_CRC(s, crcVar, cha) \ +{ \ + crcVar = (crcVar << 8) ^ s->crc32table[(crcVar >> 24) ^ ((uint8_t)cha)]; \ +} + + +/*-- States and modes for compression. --*/ + +#define BZ_M_IDLE 1 +#define BZ_M_RUNNING 2 +#define BZ_M_FLUSHING 3 +#define BZ_M_FINISHING 4 + +#define BZ_S_OUTPUT 1 +#define BZ_S_INPUT 2 + +#define BZ_N_RADIX 2 +#define BZ_N_QSORT 12 +#define BZ_N_SHELL 18 +#define BZ_N_OVERSHOOT (BZ_N_RADIX + BZ_N_QSORT + BZ_N_SHELL + 2) + + +/*-- Structure holding all the compression-side stuff. --*/ + +typedef struct EState { + /* pointer back to the struct bz_stream */ + bz_stream *strm; + + /* mode this stream is in, and whether inputting */ + /* or outputting data */ + int32_t mode; + int32_t state; + + /* remembers avail_in when flush/finish requested */ +/* bbox: not needed, strm->avail_in always has the same value */ +/* commented out with '//#' throughout the code */ + /* uint32_t avail_in_expect; */ + + /* for doing the block sorting */ + int32_t origPtr; + uint32_t *arr1; + uint32_t *arr2; + uint32_t *ftab; + + /* aliases for arr1 and arr2 */ + uint32_t *ptr; + uint8_t *block; + uint16_t *mtfv; + uint8_t *zbits; + + /* guess what */ + uint32_t *crc32table; + + /* run-length-encoding of the input */ + uint32_t state_in_ch; + int32_t state_in_len; + + /* input and output limits and current posns */ + int32_t nblock; + int32_t nblockMAX; + int32_t numZ; + int32_t state_out_pos; + + /* the buffer for bit stream creation */ + uint32_t bsBuff; + int32_t bsLive; + + /* block and combined CRCs */ + uint32_t blockCRC; + uint32_t combinedCRC; + + /* misc administratium */ + int32_t blockNo; + int32_t blockSize100k; + + /* stuff for coding the MTF values */ + int32_t nMTF; + + /* map of bytes used in block */ + int32_t nInUse; + Bool inUse[256] ALIGNED(sizeof(long)); + uint8_t unseqToSeq[256]; + + /* stuff for coding the MTF values */ + int32_t mtfFreq [BZ_MAX_ALPHA_SIZE]; + uint8_t selector [BZ_MAX_SELECTORS]; + uint8_t selectorMtf[BZ_MAX_SELECTORS]; + + uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + + /* stack-saving measures: these can be local, but they are too big */ + int32_t sendMTFValues__code [BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + int32_t sendMTFValues__rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 + /* second dimension: only 3 needed; 4 makes index calculations faster */ + uint32_t sendMTFValues__len_pack[BZ_MAX_ALPHA_SIZE][4]; +#endif + int32_t BZ2_hbMakeCodeLengths__heap [BZ_MAX_ALPHA_SIZE + 2]; + int32_t BZ2_hbMakeCodeLengths__weight[BZ_MAX_ALPHA_SIZE * 2]; + int32_t BZ2_hbMakeCodeLengths__parent[BZ_MAX_ALPHA_SIZE * 2]; + + int32_t mainSort__runningOrder[256]; + int32_t mainSort__copyStart[256]; + int32_t mainSort__copyEnd[256]; +} EState; + + +/*-- compression. --*/ + +static void +BZ2_blockSort(EState*); + +static void +BZ2_compressBlock(EState*, int); + +static void +BZ2_bsInitWrite(EState*); + +static void +BZ2_hbAssignCodes(int32_t*, uint8_t*, int32_t, int32_t, int32_t); + +static void +BZ2_hbMakeCodeLengths(EState*, uint8_t*, int32_t*, int32_t, int32_t); + +/*-------------------------------------------------------------*/ +/*--- end bzlib_private.h ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/compress.c b/archival/libarchive/bz/compress.c new file mode 100644 index 000000000..6f1c70a08 --- /dev/null +++ b/archival/libarchive/bz/compress.c @@ -0,0 +1,685 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Compression machinery (not incl block sorting) ---*/ +/*--- compress.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* CHANGES + * 0.9.0 -- original version. + * 0.9.0a/b -- no changes in this file. + * 0.9.0c -- changed setting of nGroups in sendMTFValues() + * so as to do a bit better on small files +*/ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +/*--- Bit stream I/O ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void BZ2_bsInitWrite(EState* s) +{ + s->bsLive = 0; + s->bsBuff = 0; +} + + +/*---------------------------------------------------*/ +static NOINLINE +void bsFinishWrite(EState* s) +{ + while (s->bsLive > 0) { + s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } +} + + +/*---------------------------------------------------*/ +static +/* Helps only on level 5, on other levels hurts. ? */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 +ALWAYS_INLINE +#endif +void bsW(EState* s, int32_t n, uint32_t v) +{ + while (s->bsLive >= 8) { + s->zbits[s->numZ] = (uint8_t)(s->bsBuff >> 24); + s->numZ++; + s->bsBuff <<= 8; + s->bsLive -= 8; + } + s->bsBuff |= (v << (32 - s->bsLive - n)); + s->bsLive += n; +} + + +/*---------------------------------------------------*/ +static +void bsPutU32(EState* s, unsigned u) +{ + bsW(s, 8, (u >> 24) & 0xff); + bsW(s, 8, (u >> 16) & 0xff); + bsW(s, 8, (u >> 8) & 0xff); + bsW(s, 8, u & 0xff); +} + + +/*---------------------------------------------------*/ +static +void bsPutU16(EState* s, unsigned u) +{ + bsW(s, 8, (u >> 8) & 0xff); + bsW(s, 8, u & 0xff); +} + + +/*---------------------------------------------------*/ +/*--- The back end proper ---*/ +/*---------------------------------------------------*/ + +/*---------------------------------------------------*/ +static +void makeMaps_e(EState* s) +{ + int i; + s->nInUse = 0; + for (i = 0; i < 256; i++) { + if (s->inUse[i]) { + s->unseqToSeq[i] = s->nInUse; + s->nInUse++; + } + } +} + + +/*---------------------------------------------------*/ +static NOINLINE +void generateMTFValues(EState* s) +{ + uint8_t yy[256]; + int32_t i, j; + int32_t zPend; + int32_t wr; + int32_t EOB; + + /* + * After sorting (eg, here), + * s->arr1[0 .. s->nblock-1] holds sorted order, + * and + * ((uint8_t*)s->arr2)[0 .. s->nblock-1] + * holds the original block data. + * + * The first thing to do is generate the MTF values, + * and put them in ((uint16_t*)s->arr1)[0 .. s->nblock-1]. + * + * Because there are strictly fewer or equal MTF values + * than block values, ptr values in this area are overwritten + * with MTF values only when they are no longer needed. + * + * The final compressed bitstream is generated into the + * area starting at &((uint8_t*)s->arr2)[s->nblock] + * + * These storage aliases are set up in bzCompressInit(), + * except for the last one, which is arranged in + * compressBlock(). + */ + uint32_t* ptr = s->ptr; + uint8_t* block = s->block; + uint16_t* mtfv = s->mtfv; + + makeMaps_e(s); + EOB = s->nInUse+1; + + for (i = 0; i <= EOB; i++) + s->mtfFreq[i] = 0; + + wr = 0; + zPend = 0; + for (i = 0; i < s->nInUse; i++) + yy[i] = (uint8_t) i; + + for (i = 0; i < s->nblock; i++) { + uint8_t ll_i; + AssertD(wr <= i, "generateMTFValues(1)"); + j = ptr[i] - 1; + if (j < 0) + j += s->nblock; + ll_i = s->unseqToSeq[block[j]]; + AssertD(ll_i < s->nInUse, "generateMTFValues(2a)"); + + if (yy[0] == ll_i) { + zPend++; + } else { + if (zPend > 0) { + zPend--; + while (1) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) break; + zPend = (uint32_t)(zPend - 2) / 2; + /* bbox: unsigned div is easier */ + }; + zPend = 0; + } + { + register uint8_t rtmp; + register uint8_t* ryy_j; + register uint8_t rll_i; + rtmp = yy[1]; + yy[1] = yy[0]; + ryy_j = &(yy[1]); + rll_i = ll_i; + while (rll_i != rtmp) { + register uint8_t rtmp2; + ryy_j++; + rtmp2 = rtmp; + rtmp = *ryy_j; + *ryy_j = rtmp2; + }; + yy[0] = rtmp; + j = ryy_j - &(yy[0]); + mtfv[wr] = j+1; + wr++; + s->mtfFreq[j+1]++; + } + } + } + + if (zPend > 0) { + zPend--; + while (1) { + if (zPend & 1) { + mtfv[wr] = BZ_RUNB; + wr++; + s->mtfFreq[BZ_RUNB]++; + } else { + mtfv[wr] = BZ_RUNA; + wr++; + s->mtfFreq[BZ_RUNA]++; + } + if (zPend < 2) + break; + zPend = (uint32_t)(zPend - 2) / 2; + /* bbox: unsigned div is easier */ + }; + zPend = 0; + } + + mtfv[wr] = EOB; + wr++; + s->mtfFreq[EOB]++; + + s->nMTF = wr; +} + + +/*---------------------------------------------------*/ +#define BZ_LESSER_ICOST 0 +#define BZ_GREATER_ICOST 15 + +static NOINLINE +void sendMTFValues(EState* s) +{ + int32_t v, t, i, j, gs, ge, totc, bt, bc, iter; + int32_t nSelectors, alphaSize, minLen, maxLen, selCtr; + int32_t nGroups, nBytes; + + /* + * uint8_t len[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * is a global since the decoder also needs it. + * + * int32_t code[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * int32_t rfreq[BZ_N_GROUPS][BZ_MAX_ALPHA_SIZE]; + * are also globals only used in this proc. + * Made global to keep stack frame size small. + */ +#define code sendMTFValues__code +#define rfreq sendMTFValues__rfreq +#define len_pack sendMTFValues__len_pack + + uint16_t cost[BZ_N_GROUPS]; + int32_t fave[BZ_N_GROUPS]; + + uint16_t* mtfv = s->mtfv; + + alphaSize = s->nInUse + 2; + for (t = 0; t < BZ_N_GROUPS; t++) + for (v = 0; v < alphaSize; v++) + s->len[t][v] = BZ_GREATER_ICOST; + + /*--- Decide how many coding tables to use ---*/ + AssertH(s->nMTF > 0, 3001); + if (s->nMTF < 200) nGroups = 2; else + if (s->nMTF < 600) nGroups = 3; else + if (s->nMTF < 1200) nGroups = 4; else + if (s->nMTF < 2400) nGroups = 5; else + nGroups = 6; + + /*--- Generate an initial set of coding tables ---*/ + { + int32_t nPart, remF, tFreq, aFreq; + + nPart = nGroups; + remF = s->nMTF; + gs = 0; + while (nPart > 0) { + tFreq = remF / nPart; + ge = gs - 1; + aFreq = 0; + while (aFreq < tFreq && ge < alphaSize-1) { + ge++; + aFreq += s->mtfFreq[ge]; + } + + if (ge > gs + && nPart != nGroups && nPart != 1 + && ((nGroups - nPart) % 2 == 1) /* bbox: can this be replaced by x & 1? */ + ) { + aFreq -= s->mtfFreq[ge]; + ge--; + } + + for (v = 0; v < alphaSize; v++) + if (v >= gs && v <= ge) + s->len[nPart-1][v] = BZ_LESSER_ICOST; + else + s->len[nPart-1][v] = BZ_GREATER_ICOST; + + nPart--; + gs = ge + 1; + remF -= aFreq; + } + } + + /* + * Iterate up to BZ_N_ITERS times to improve the tables. + */ + for (iter = 0; iter < BZ_N_ITERS; iter++) { + for (t = 0; t < nGroups; t++) + fave[t] = 0; + + for (t = 0; t < nGroups; t++) + for (v = 0; v < alphaSize; v++) + s->rfreq[t][v] = 0; + +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 + /* + * Set up an auxiliary length table which is used to fast-track + * the common case (nGroups == 6). + */ + if (nGroups == 6) { + for (v = 0; v < alphaSize; v++) { + s->len_pack[v][0] = (s->len[1][v] << 16) | s->len[0][v]; + s->len_pack[v][1] = (s->len[3][v] << 16) | s->len[2][v]; + s->len_pack[v][2] = (s->len[5][v] << 16) | s->len[4][v]; + } + } +#endif + nSelectors = 0; + totc = 0; + gs = 0; + while (1) { + /*--- Set group start & end marks. --*/ + if (gs >= s->nMTF) + break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) + ge = s->nMTF-1; + + /* + * Calculate the cost of this group as coded + * by each of the coding tables. + */ + for (t = 0; t < nGroups; t++) + cost[t] = 0; +#if CONFIG_BZIP2_FEATURE_SPEED >= 5 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + register uint32_t cost01, cost23, cost45; + register uint16_t icv; + cost01 = cost23 = cost45 = 0; +#define BZ_ITER(nn) \ + icv = mtfv[gs+(nn)]; \ + cost01 += s->len_pack[icv][0]; \ + cost23 += s->len_pack[icv][1]; \ + cost45 += s->len_pack[icv][2]; + BZ_ITER(0); BZ_ITER(1); BZ_ITER(2); BZ_ITER(3); BZ_ITER(4); + BZ_ITER(5); BZ_ITER(6); BZ_ITER(7); BZ_ITER(8); BZ_ITER(9); + BZ_ITER(10); BZ_ITER(11); BZ_ITER(12); BZ_ITER(13); BZ_ITER(14); + BZ_ITER(15); BZ_ITER(16); BZ_ITER(17); BZ_ITER(18); BZ_ITER(19); + BZ_ITER(20); BZ_ITER(21); BZ_ITER(22); BZ_ITER(23); BZ_ITER(24); + BZ_ITER(25); BZ_ITER(26); BZ_ITER(27); BZ_ITER(28); BZ_ITER(29); + BZ_ITER(30); BZ_ITER(31); BZ_ITER(32); BZ_ITER(33); BZ_ITER(34); + BZ_ITER(35); BZ_ITER(36); BZ_ITER(37); BZ_ITER(38); BZ_ITER(39); + BZ_ITER(40); BZ_ITER(41); BZ_ITER(42); BZ_ITER(43); BZ_ITER(44); + BZ_ITER(45); BZ_ITER(46); BZ_ITER(47); BZ_ITER(48); BZ_ITER(49); +#undef BZ_ITER + cost[0] = cost01 & 0xffff; cost[1] = cost01 >> 16; + cost[2] = cost23 & 0xffff; cost[3] = cost23 >> 16; + cost[4] = cost45 & 0xffff; cost[5] = cost45 >> 16; + + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + for (i = gs; i <= ge; i++) { + uint16_t icv = mtfv[i]; + for (t = 0; t < nGroups; t++) + cost[t] += s->len[t][icv]; + } + } + /* + * Find the coding table which is best for this group, + * and record its identity in the selector table. + */ + /*bc = 999999999;*/ + /*bt = -1;*/ + bc = cost[0]; + bt = 0; + for (t = 1 /*0*/; t < nGroups; t++) { + if (cost[t] < bc) { + bc = cost[t]; + bt = t; + } + } + totc += bc; + fave[bt]++; + s->selector[nSelectors] = bt; + nSelectors++; + + /* + * Increment the symbol frequencies for the selected table. + */ +/* 1% faster compress. +800 bytes */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 4 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ +#define BZ_ITUR(nn) s->rfreq[bt][mtfv[gs + (nn)]]++ + BZ_ITUR(0); BZ_ITUR(1); BZ_ITUR(2); BZ_ITUR(3); BZ_ITUR(4); + BZ_ITUR(5); BZ_ITUR(6); BZ_ITUR(7); BZ_ITUR(8); BZ_ITUR(9); + BZ_ITUR(10); BZ_ITUR(11); BZ_ITUR(12); BZ_ITUR(13); BZ_ITUR(14); + BZ_ITUR(15); BZ_ITUR(16); BZ_ITUR(17); BZ_ITUR(18); BZ_ITUR(19); + BZ_ITUR(20); BZ_ITUR(21); BZ_ITUR(22); BZ_ITUR(23); BZ_ITUR(24); + BZ_ITUR(25); BZ_ITUR(26); BZ_ITUR(27); BZ_ITUR(28); BZ_ITUR(29); + BZ_ITUR(30); BZ_ITUR(31); BZ_ITUR(32); BZ_ITUR(33); BZ_ITUR(34); + BZ_ITUR(35); BZ_ITUR(36); BZ_ITUR(37); BZ_ITUR(38); BZ_ITUR(39); + BZ_ITUR(40); BZ_ITUR(41); BZ_ITUR(42); BZ_ITUR(43); BZ_ITUR(44); + BZ_ITUR(45); BZ_ITUR(46); BZ_ITUR(47); BZ_ITUR(48); BZ_ITUR(49); +#undef BZ_ITUR + gs = ge + 1; + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + while (gs <= ge) { + s->rfreq[bt][mtfv[gs]]++; + gs++; + } + /* already is: gs = ge + 1; */ + } + } + + /* + * Recompute the tables based on the accumulated frequencies. + */ + /* maxLen was changed from 20 to 17 in bzip2-1.0.3. See + * comment in huffman.c for details. */ + for (t = 0; t < nGroups; t++) + BZ2_hbMakeCodeLengths(s, &(s->len[t][0]), &(s->rfreq[t][0]), alphaSize, 17 /*20*/); + } + + AssertH(nGroups < 8, 3002); + AssertH(nSelectors < 32768 && nSelectors <= (2 + (900000 / BZ_G_SIZE)), 3003); + + /*--- Compute MTF values for the selectors. ---*/ + { + uint8_t pos[BZ_N_GROUPS], ll_i, tmp2, tmp; + + for (i = 0; i < nGroups; i++) + pos[i] = i; + for (i = 0; i < nSelectors; i++) { + ll_i = s->selector[i]; + j = 0; + tmp = pos[j]; + while (ll_i != tmp) { + j++; + tmp2 = tmp; + tmp = pos[j]; + pos[j] = tmp2; + }; + pos[0] = tmp; + s->selectorMtf[i] = j; + } + }; + + /*--- Assign actual codes for the tables. --*/ + for (t = 0; t < nGroups; t++) { + minLen = 32; + maxLen = 0; + for (i = 0; i < alphaSize; i++) { + if (s->len[t][i] > maxLen) maxLen = s->len[t][i]; + if (s->len[t][i] < minLen) minLen = s->len[t][i]; + } + AssertH(!(maxLen > 17 /*20*/), 3004); + AssertH(!(minLen < 1), 3005); + BZ2_hbAssignCodes(&(s->code[t][0]), &(s->len[t][0]), minLen, maxLen, alphaSize); + } + + /*--- Transmit the mapping table. ---*/ + { + /* bbox: optimized a bit more than in bzip2 */ + int inUse16 = 0; + for (i = 0; i < 16; i++) { + if (sizeof(long) <= 4) { + inUse16 = inUse16*2 + + ((*(uint32_t*)&(s->inUse[i * 16 + 0]) + | *(uint32_t*)&(s->inUse[i * 16 + 4]) + | *(uint32_t*)&(s->inUse[i * 16 + 8]) + | *(uint32_t*)&(s->inUse[i * 16 + 12])) != 0); + } else { /* Our CPU can do better */ + inUse16 = inUse16*2 + + ((*(uint64_t*)&(s->inUse[i * 16 + 0]) + | *(uint64_t*)&(s->inUse[i * 16 + 8])) != 0); + } + } + + nBytes = s->numZ; + bsW(s, 16, inUse16); + + inUse16 <<= (sizeof(int)*8 - 16); /* move 15th bit into sign bit */ + for (i = 0; i < 16; i++) { + if (inUse16 < 0) { + unsigned v16 = 0; + for (j = 0; j < 16; j++) + v16 = v16*2 + s->inUse[i * 16 + j]; + bsW(s, 16, v16); + } + inUse16 <<= 1; + } + } + + /*--- Now the selectors. ---*/ + nBytes = s->numZ; + bsW(s, 3, nGroups); + bsW(s, 15, nSelectors); + for (i = 0; i < nSelectors; i++) { + for (j = 0; j < s->selectorMtf[i]; j++) + bsW(s, 1, 1); + bsW(s, 1, 0); + } + + /*--- Now the coding tables. ---*/ + nBytes = s->numZ; + + for (t = 0; t < nGroups; t++) { + int32_t curr = s->len[t][0]; + bsW(s, 5, curr); + for (i = 0; i < alphaSize; i++) { + while (curr < s->len[t][i]) { bsW(s, 2, 2); curr++; /* 10 */ }; + while (curr > s->len[t][i]) { bsW(s, 2, 3); curr--; /* 11 */ }; + bsW(s, 1, 0); + } + } + + /*--- And finally, the block data proper ---*/ + nBytes = s->numZ; + selCtr = 0; + gs = 0; + while (1) { + if (gs >= s->nMTF) + break; + ge = gs + BZ_G_SIZE - 1; + if (ge >= s->nMTF) + ge = s->nMTF-1; + AssertH(s->selector[selCtr] < nGroups, 3006); + +/* Costs 1300 bytes and is _slower_ (on Intel Core 2) */ +#if 0 + if (nGroups == 6 && 50 == ge-gs+1) { + /*--- fast track the common case ---*/ + uint16_t mtfv_i; + uint8_t* s_len_sel_selCtr = &(s->len[s->selector[selCtr]][0]); + int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); +#define BZ_ITAH(nn) \ + mtfv_i = mtfv[gs+(nn)]; \ + bsW(s, s_len_sel_selCtr[mtfv_i], s_code_sel_selCtr[mtfv_i]) + BZ_ITAH(0); BZ_ITAH(1); BZ_ITAH(2); BZ_ITAH(3); BZ_ITAH(4); + BZ_ITAH(5); BZ_ITAH(6); BZ_ITAH(7); BZ_ITAH(8); BZ_ITAH(9); + BZ_ITAH(10); BZ_ITAH(11); BZ_ITAH(12); BZ_ITAH(13); BZ_ITAH(14); + BZ_ITAH(15); BZ_ITAH(16); BZ_ITAH(17); BZ_ITAH(18); BZ_ITAH(19); + BZ_ITAH(20); BZ_ITAH(21); BZ_ITAH(22); BZ_ITAH(23); BZ_ITAH(24); + BZ_ITAH(25); BZ_ITAH(26); BZ_ITAH(27); BZ_ITAH(28); BZ_ITAH(29); + BZ_ITAH(30); BZ_ITAH(31); BZ_ITAH(32); BZ_ITAH(33); BZ_ITAH(34); + BZ_ITAH(35); BZ_ITAH(36); BZ_ITAH(37); BZ_ITAH(38); BZ_ITAH(39); + BZ_ITAH(40); BZ_ITAH(41); BZ_ITAH(42); BZ_ITAH(43); BZ_ITAH(44); + BZ_ITAH(45); BZ_ITAH(46); BZ_ITAH(47); BZ_ITAH(48); BZ_ITAH(49); +#undef BZ_ITAH + gs = ge+1; + } else +#endif + { + /*--- slow version which correctly handles all situations ---*/ + /* code is bit bigger, but moves multiply out of the loop */ + uint8_t* s_len_sel_selCtr = &(s->len [s->selector[selCtr]][0]); + int32_t* s_code_sel_selCtr = &(s->code[s->selector[selCtr]][0]); + while (gs <= ge) { + bsW(s, + s_len_sel_selCtr[mtfv[gs]], + s_code_sel_selCtr[mtfv[gs]] + ); + gs++; + } + /* already is: gs = ge+1; */ + } + selCtr++; + } + AssertH(selCtr == nSelectors, 3007); +#undef code +#undef rfreq +#undef len_pack +} + + +/*---------------------------------------------------*/ +static +void BZ2_compressBlock(EState* s, int is_last_block) +{ + if (s->nblock > 0) { + BZ_FINALISE_CRC(s->blockCRC); + s->combinedCRC = (s->combinedCRC << 1) | (s->combinedCRC >> 31); + s->combinedCRC ^= s->blockCRC; + if (s->blockNo > 1) + s->numZ = 0; + + BZ2_blockSort(s); + } + + s->zbits = &((uint8_t*)s->arr2)[s->nblock]; + + /*-- If this is the first block, create the stream header. --*/ + if (s->blockNo == 1) { + BZ2_bsInitWrite(s); + /*bsPutU8(s, BZ_HDR_B);*/ + /*bsPutU8(s, BZ_HDR_Z);*/ + /*bsPutU8(s, BZ_HDR_h);*/ + /*bsPutU8(s, BZ_HDR_0 + s->blockSize100k);*/ + bsPutU32(s, BZ_HDR_BZh0 + s->blockSize100k); + } + + if (s->nblock > 0) { + /*bsPutU8(s, 0x31);*/ + /*bsPutU8(s, 0x41);*/ + /*bsPutU8(s, 0x59);*/ + /*bsPutU8(s, 0x26);*/ + bsPutU32(s, 0x31415926); + /*bsPutU8(s, 0x53);*/ + /*bsPutU8(s, 0x59);*/ + bsPutU16(s, 0x5359); + + /*-- Now the block's CRC, so it is in a known place. --*/ + bsPutU32(s, s->blockCRC); + + /* + * Now a single bit indicating (non-)randomisation. + * As of version 0.9.5, we use a better sorting algorithm + * which makes randomisation unnecessary. So always set + * the randomised bit to 'no'. Of course, the decoder + * still needs to be able to handle randomised blocks + * so as to maintain backwards compatibility with + * older versions of bzip2. + */ + bsW(s, 1, 0); + + bsW(s, 24, s->origPtr); + generateMTFValues(s); + sendMTFValues(s); + } + + /*-- If this is the last block, add the stream trailer. --*/ + if (is_last_block) { + /*bsPutU8(s, 0x17);*/ + /*bsPutU8(s, 0x72);*/ + /*bsPutU8(s, 0x45);*/ + /*bsPutU8(s, 0x38);*/ + bsPutU32(s, 0x17724538); + /*bsPutU8(s, 0x50);*/ + /*bsPutU8(s, 0x90);*/ + bsPutU16(s, 0x5090); + bsPutU32(s, s->combinedCRC); + bsFinishWrite(s); + } +} + + +/*-------------------------------------------------------------*/ +/*--- end compress.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/bz/huffman.c b/archival/libarchive/bz/huffman.c new file mode 100644 index 000000000..676b1af66 --- /dev/null +++ b/archival/libarchive/bz/huffman.c @@ -0,0 +1,229 @@ +/* + * bzip2 is written by Julian Seward . + * Adapted for busybox by Denys Vlasenko . + * See README and LICENSE files in this directory for more information. + */ + +/*-------------------------------------------------------------*/ +/*--- Huffman coding low-level stuff ---*/ +/*--- huffman.c ---*/ +/*-------------------------------------------------------------*/ + +/* ------------------------------------------------------------------ +This file is part of bzip2/libbzip2, a program and library for +lossless, block-sorting data compression. + +bzip2/libbzip2 version 1.0.4 of 20 December 2006 +Copyright (C) 1996-2006 Julian Seward + +Please read the WARNING, DISCLAIMER and PATENTS sections in the +README file. + +This program is released under the terms of the license contained +in the file LICENSE. +------------------------------------------------------------------ */ + +/* #include "bzlib_private.h" */ + +/*---------------------------------------------------*/ +#define WEIGHTOF(zz0) ((zz0) & 0xffffff00) +#define DEPTHOF(zz1) ((zz1) & 0x000000ff) +#define MYMAX(zz2,zz3) ((zz2) > (zz3) ? (zz2) : (zz3)) + +#define ADDWEIGHTS(zw1,zw2) \ + (WEIGHTOF(zw1)+WEIGHTOF(zw2)) | \ + (1 + MYMAX(DEPTHOF(zw1),DEPTHOF(zw2))) + +#define UPHEAP(z) \ +{ \ + int32_t zz, tmp; \ + zz = z; \ + tmp = heap[zz]; \ + while (weight[tmp] < weight[heap[zz >> 1]]) { \ + heap[zz] = heap[zz >> 1]; \ + zz >>= 1; \ + } \ + heap[zz] = tmp; \ +} + + +/* 90 bytes, 0.3% of overall compress speed */ +#if CONFIG_BZIP2_FEATURE_SPEED >= 1 + +/* macro works better than inline (gcc 4.2.1) */ +#define DOWNHEAP1(heap, weight, Heap) \ +{ \ + int32_t zz, yy, tmp; \ + zz = 1; \ + tmp = heap[zz]; \ + while (1) { \ + yy = zz << 1; \ + if (yy > nHeap) \ + break; \ + if (yy < nHeap \ + && weight[heap[yy+1]] < weight[heap[yy]]) \ + yy++; \ + if (weight[tmp] < weight[heap[yy]]) \ + break; \ + heap[zz] = heap[yy]; \ + zz = yy; \ + } \ + heap[zz] = tmp; \ +} + +#else + +static +void DOWNHEAP1(int32_t *heap, int32_t *weight, int32_t nHeap) +{ + int32_t zz, yy, tmp; + zz = 1; + tmp = heap[zz]; + while (1) { + yy = zz << 1; + if (yy > nHeap) + break; + if (yy < nHeap + && weight[heap[yy + 1]] < weight[heap[yy]]) + yy++; + if (weight[tmp] < weight[heap[yy]]) + break; + heap[zz] = heap[yy]; + zz = yy; + } + heap[zz] = tmp; +} + +#endif + +/*---------------------------------------------------*/ +static +void BZ2_hbMakeCodeLengths(EState *s, + uint8_t *len, + int32_t *freq, + int32_t alphaSize, + int32_t maxLen) +{ + /* + * Nodes and heap entries run from 1. Entry 0 + * for both the heap and nodes is a sentinel. + */ + int32_t nNodes, nHeap, n1, n2, i, j, k; + Bool tooLong; + + /* bbox: moved to EState to save stack + int32_t heap [BZ_MAX_ALPHA_SIZE + 2]; + int32_t weight[BZ_MAX_ALPHA_SIZE * 2]; + int32_t parent[BZ_MAX_ALPHA_SIZE * 2]; + */ +#define heap (s->BZ2_hbMakeCodeLengths__heap) +#define weight (s->BZ2_hbMakeCodeLengths__weight) +#define parent (s->BZ2_hbMakeCodeLengths__parent) + + for (i = 0; i < alphaSize; i++) + weight[i+1] = (freq[i] == 0 ? 1 : freq[i]) << 8; + + while (1) { + nNodes = alphaSize; + nHeap = 0; + + heap[0] = 0; + weight[0] = 0; + parent[0] = -2; + + for (i = 1; i <= alphaSize; i++) { + parent[i] = -1; + nHeap++; + heap[nHeap] = i; + UPHEAP(nHeap); + } + + AssertH(nHeap < (BZ_MAX_ALPHA_SIZE+2), 2001); + + while (nHeap > 1) { + n1 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap); + n2 = heap[1]; heap[1] = heap[nHeap]; nHeap--; DOWNHEAP1(heap, weight, nHeap); + nNodes++; + parent[n1] = parent[n2] = nNodes; + weight[nNodes] = ADDWEIGHTS(weight[n1], weight[n2]); + parent[nNodes] = -1; + nHeap++; + heap[nHeap] = nNodes; + UPHEAP(nHeap); + } + + AssertH(nNodes < (BZ_MAX_ALPHA_SIZE * 2), 2002); + + tooLong = False; + for (i = 1; i <= alphaSize; i++) { + j = 0; + k = i; + while (parent[k] >= 0) { + k = parent[k]; + j++; + } + len[i-1] = j; + if (j > maxLen) + tooLong = True; + } + + if (!tooLong) + break; + + /* 17 Oct 04: keep-going condition for the following loop used + to be 'i < alphaSize', which missed the last element, + theoretically leading to the possibility of the compressor + looping. However, this count-scaling step is only needed if + one of the generated Huffman code words is longer than + maxLen, which up to and including version 1.0.2 was 20 bits, + which is extremely unlikely. In version 1.0.3 maxLen was + changed to 17 bits, which has minimal effect on compression + ratio, but does mean this scaling step is used from time to + time, enough to verify that it works. + + This means that bzip2-1.0.3 and later will only produce + Huffman codes with a maximum length of 17 bits. However, in + order to preserve backwards compatibility with bitstreams + produced by versions pre-1.0.3, the decompressor must still + handle lengths of up to 20. */ + + for (i = 1; i <= alphaSize; i++) { + j = weight[i] >> 8; + /* bbox: yes, it is a signed division. + * don't replace with shift! */ + j = 1 + (j / 2); + weight[i] = j << 8; + } + } +#undef heap +#undef weight +#undef parent +} + + +/*---------------------------------------------------*/ +static +void BZ2_hbAssignCodes(int32_t *code, + uint8_t *length, + int32_t minLen, + int32_t maxLen, + int32_t alphaSize) +{ + int32_t n, vec, i; + + vec = 0; + for (n = minLen; n <= maxLen; n++) { + for (i = 0; i < alphaSize; i++) { + if (length[i] == n) { + code[i] = vec; + vec++; + }; + } + vec <<= 1; + } +} + + +/*-------------------------------------------------------------*/ +/*--- end huffman.c ---*/ +/*-------------------------------------------------------------*/ diff --git a/archival/libarchive/data_align.c b/archival/libarchive/data_align.c new file mode 100644 index 000000000..2e56fa8ff --- /dev/null +++ b/archival/libarchive/data_align.c @@ -0,0 +1,15 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC data_align(archive_handle_t *archive_handle, unsigned boundary) +{ + unsigned skip_amount = (boundary - (archive_handle->offset % boundary)) % boundary; + + archive_handle->seek(archive_handle->src_fd, skip_amount); + archive_handle->offset += skip_amount; +} diff --git a/archival/libarchive/data_extract_all.c b/archival/libarchive/data_extract_all.c new file mode 100644 index 000000000..1b25c8bd6 --- /dev/null +++ b/archival/libarchive/data_extract_all.c @@ -0,0 +1,200 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + int dst_fd; + int res; + +#if ENABLE_FEATURE_TAR_SELINUX + char *sctx = archive_handle->tar__next_file_sctx; + if (!sctx) + sctx = archive_handle->tar__global_sctx; + if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */ + setfscreatecon(sctx); + free(archive_handle->tar__next_file_sctx); + archive_handle->tar__next_file_sctx = NULL; + } +#endif + + if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) { + char *slash = strrchr(file_header->name, '/'); + if (slash) { + *slash = '\0'; + bb_make_directory(file_header->name, -1, FILEUTILS_RECUR); + *slash = '/'; + } + } + + if (archive_handle->ah_flags & ARCHIVE_UNLINK_OLD) { + /* Remove the entry if it exists */ + if (!S_ISDIR(file_header->mode)) { + /* Is it hardlink? + * We encode hard links as regular files of size 0 with a symlink */ + if (S_ISREG(file_header->mode) + && file_header->link_target + && file_header->size == 0 + ) { + /* Ugly special case: + * tar cf t.tar hardlink1 hardlink2 hardlink1 + * results in this tarball structure: + * hardlink1 + * hardlink2 -> hardlink1 + * hardlink1 -> hardlink1 <== !!! + */ + if (strcmp(file_header->link_target, file_header->name) == 0) + goto ret; + } + /* Proceed with deleting */ + if (unlink(file_header->name) == -1 + && errno != ENOENT + ) { + bb_perror_msg_and_die("can't remove old file %s", + file_header->name); + } + } + } + else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) { + /* Remove the existing entry if its older than the extracted entry */ + struct stat existing_sb; + if (lstat(file_header->name, &existing_sb) == -1) { + if (errno != ENOENT) { + bb_perror_msg_and_die("can't stat old file"); + } + } + else if (existing_sb.st_mtime >= file_header->mtime) { + if (!(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + && !S_ISDIR(file_header->mode) + ) { + bb_error_msg("%s not created: newer or " + "same age file exists", file_header->name); + } + data_skip(archive_handle); + goto ret; + } + else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) { + bb_perror_msg_and_die("can't remove old file %s", + file_header->name); + } + } + + /* Handle hard links separately + * We encode hard links as regular files of size 0 with a symlink */ + if (S_ISREG(file_header->mode) + && file_header->link_target + && file_header->size == 0 + ) { + /* hard link */ + res = link(file_header->link_target, file_header->name); + if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) { + bb_perror_msg("can't create %slink " + "from %s to %s", "hard", + file_header->name, + file_header->link_target); + } + /* Hardlinks have no separate mode/ownership, skip chown/chmod */ + goto ret; + } + + /* Create the filesystem entry */ + switch (file_header->mode & S_IFMT) { + case S_IFREG: { + /* Regular file */ + int flags = O_WRONLY | O_CREAT | O_EXCL; + if (archive_handle->ah_flags & ARCHIVE_O_TRUNC) + flags = O_WRONLY | O_CREAT | O_TRUNC; + dst_fd = xopen3(file_header->name, + flags, + file_header->mode + ); + bb_copyfd_exact_size(archive_handle->src_fd, dst_fd, file_header->size); + close(dst_fd); + break; + } + case S_IFDIR: + res = mkdir(file_header->name, file_header->mode); + if ((res == -1) + && (errno != EISDIR) /* btw, Linux doesn't return this */ + && (errno != EEXIST) + && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + ) { + bb_perror_msg("can't make dir %s", file_header->name); + } + break; + case S_IFLNK: + /* Symlink */ +//TODO: what if file_header->link_target == NULL (say, corrupted tarball?) + res = symlink(file_header->link_target, file_header->name); + if ((res == -1) + && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + ) { + bb_perror_msg("can't create %slink " + "from %s to %s", "sym", + file_header->name, + file_header->link_target); + } + break; + case S_IFSOCK: + case S_IFBLK: + case S_IFCHR: + case S_IFIFO: + res = mknod(file_header->name, file_header->mode, file_header->device); + if ((res == -1) + && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) + ) { + bb_perror_msg("can't create node %s", file_header->name); + } + break; + default: + bb_error_msg_and_die("unrecognized file type"); + } + + if (!S_ISLNK(file_header->mode)) { + if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_OWNER)) { + uid_t uid = file_header->uid; + gid_t gid = file_header->gid; +#if ENABLE_FEATURE_TAR_UNAME_GNAME + if (!(archive_handle->ah_flags & ARCHIVE_NUMERIC_OWNER)) { + if (file_header->tar__uname) { +//TODO: cache last name/id pair? + struct passwd *pwd = getpwnam(file_header->tar__uname); + if (pwd) uid = pwd->pw_uid; + } + if (file_header->tar__gname) { + struct group *grp = getgrnam(file_header->tar__gname); + if (grp) gid = grp->gr_gid; + } + } +#endif + /* GNU tar 1.15.1 uses chown, not lchown */ + chown(file_header->name, uid, gid); + } + /* uclibc has no lchmod, glibc is even stranger - + * it has lchmod which seems to do nothing! + * so we use chmod... */ + if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) { + chmod(file_header->name, file_header->mode); + } + if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) { + struct timeval t[2]; + + t[1].tv_sec = t[0].tv_sec = file_header->mtime; + t[1].tv_usec = t[0].tv_usec = 0; + utimes(file_header->name, t); + } + } + + ret: ; +#if ENABLE_FEATURE_TAR_SELINUX + if (sctx) { + /* reset the context after creating an entry */ + setfscreatecon(NULL); + } +#endif +} diff --git a/archival/libarchive/data_extract_to_command.c b/archival/libarchive/data_extract_to_command.c new file mode 100644 index 000000000..2bbab7641 --- /dev/null +++ b/archival/libarchive/data_extract_to_command.c @@ -0,0 +1,134 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +enum { + //TAR_FILETYPE, + TAR_MODE, + TAR_FILENAME, + TAR_REALNAME, +#if ENABLE_FEATURE_TAR_UNAME_GNAME + TAR_UNAME, + TAR_GNAME, +#endif + TAR_SIZE, + TAR_UID, + TAR_GID, + TAR_MAX, +}; + +static const char *const tar_var[] = { + // "FILETYPE", + "MODE", + "FILENAME", + "REALNAME", +#if ENABLE_FEATURE_TAR_UNAME_GNAME + "UNAME", + "GNAME", +#endif + "SIZE", + "UID", + "GID", +}; + +static void xputenv(char *str) +{ + if (putenv(str)) + bb_error_msg_and_die(bb_msg_memory_exhausted); +} + +static void str2env(char *env[], int idx, const char *str) +{ + env[idx] = xasprintf("TAR_%s=%s", tar_var[idx], str); + xputenv(env[idx]); +} + +static void dec2env(char *env[], int idx, unsigned long long val) +{ + env[idx] = xasprintf("TAR_%s=%llu", tar_var[idx], val); + xputenv(env[idx]); +} + +static void oct2env(char *env[], int idx, unsigned long val) +{ + env[idx] = xasprintf("TAR_%s=%lo", tar_var[idx], val); + xputenv(env[idx]); +} + +void FAST_FUNC data_extract_to_command(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + +#if 0 /* do we need this? ENABLE_FEATURE_TAR_SELINUX */ + char *sctx = archive_handle->tar__next_file_sctx; + if (!sctx) + sctx = archive_handle->tar__global_sctx; + if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */ + setfscreatecon(sctx); + free(archive_handle->tar__next_file_sctx); + archive_handle->tar__next_file_sctx = NULL; + } +#endif + + if ((file_header->mode & S_IFMT) == S_IFREG) { + pid_t pid; + int p[2], status; + char *tar_env[TAR_MAX]; + + memset(tar_env, 0, sizeof(tar_env)); + + xpipe(p); + pid = BB_MMU ? xfork() : xvfork(); + if (pid == 0) { + /* Child */ + /* str2env(tar_env, TAR_FILETYPE, "f"); - parent should do it once */ + oct2env(tar_env, TAR_MODE, file_header->mode); + str2env(tar_env, TAR_FILENAME, file_header->name); + str2env(tar_env, TAR_REALNAME, file_header->name); +#if ENABLE_FEATURE_TAR_UNAME_GNAME + str2env(tar_env, TAR_UNAME, file_header->tar__uname); + str2env(tar_env, TAR_GNAME, file_header->tar__gname); +#endif + dec2env(tar_env, TAR_SIZE, file_header->size); + dec2env(tar_env, TAR_UID, file_header->uid); + dec2env(tar_env, TAR_GID, file_header->gid); + close(p[1]); + xdup2(p[0], STDIN_FILENO); + signal(SIGPIPE, SIG_DFL); + execl(DEFAULT_SHELL, DEFAULT_SHELL_SHORT_NAME, "-c", archive_handle->tar__to_command, NULL); + bb_perror_msg_and_die("can't execute '%s'", DEFAULT_SHELL); + } + close(p[0]); + /* Our caller is expected to do signal(SIGPIPE, SIG_IGN) + * so that we don't die if child don't read all the input: */ + bb_copyfd_exact_size(archive_handle->src_fd, p[1], -file_header->size); + close(p[1]); + + if (safe_waitpid(pid, &status, 0) == -1) + bb_perror_msg_and_die("waitpid"); + if (WIFEXITED(status) && WEXITSTATUS(status)) + bb_error_msg_and_die("'%s' returned status %d", + archive_handle->tar__to_command, WEXITSTATUS(status)); + if (WIFSIGNALED(status)) + bb_error_msg_and_die("'%s' terminated on signal %d", + archive_handle->tar__to_command, WTERMSIG(status)); + + if (!BB_MMU) { + int i; + for (i = 0; i < TAR_MAX; i++) { + if (tar_env[i]) + bb_unsetenv_and_free(tar_env[i]); + } + } + } + +#if 0 /* ENABLE_FEATURE_TAR_SELINUX */ + if (sctx) + /* reset the context after creating an entry */ + setfscreatecon(NULL); +#endif +} diff --git a/archival/libarchive/data_extract_to_stdout.c b/archival/libarchive/data_extract_to_stdout.c new file mode 100644 index 000000000..91f3f3539 --- /dev/null +++ b/archival/libarchive/data_extract_to_stdout.c @@ -0,0 +1,14 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC data_extract_to_stdout(archive_handle_t *archive_handle) +{ + bb_copyfd_exact_size(archive_handle->src_fd, + STDOUT_FILENO, + archive_handle->file_header->size); +} diff --git a/archival/libarchive/data_skip.c b/archival/libarchive/data_skip.c new file mode 100644 index 000000000..a055424e2 --- /dev/null +++ b/archival/libarchive/data_skip.c @@ -0,0 +1,12 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC data_skip(archive_handle_t *archive_handle) +{ + archive_handle->seek(archive_handle->src_fd, archive_handle->file_header->size); +} diff --git a/archival/libarchive/decompress_bunzip2.c b/archival/libarchive/decompress_bunzip2.c new file mode 100644 index 000000000..4e46e6849 --- /dev/null +++ b/archival/libarchive/decompress_bunzip2.c @@ -0,0 +1,822 @@ +/* vi: set sw=4 ts=4: */ +/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). + + Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), + which also acknowledges contributions by Mike Burrows, David Wheeler, + Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, + Robert Sedgewick, and Jon L. Bentley. + + Licensed under GPLv2 or later, see file LICENSE in this source tree. +*/ + +/* + Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). + + More efficient reading of Huffman codes, a streamlined read_bunzip() + function, and various other tweaks. In (limited) tests, approximately + 20% faster than bzcat on x86 and about 10% faster on arm. + + Note that about 2/3 of the time is spent in read_bunzip() reversing + the Burrows-Wheeler transformation. Much of that time is delay + resulting from cache misses. + + (2010 update by vda: profiled "bzcat <84mbyte.bz2 >/dev/null" + on x86-64 CPU with L2 > 1M: get_next_block is hotter than read_bunzip: + %time seconds calls function + 71.01 12.69 444 get_next_block + 28.65 5.12 93065 read_bunzip + 00.22 0.04 7736490 get_bits + 00.11 0.02 47 dealloc_bunzip + 00.00 0.00 93018 full_write + ...) + + + I would ask that anyone benefiting from this work, especially those + using it in commercial products, consider making a donation to my local + non-profit hospice organization (www.hospiceacadiana.com) in the name of + the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003. + + Manuel + */ + +#include "libbb.h" +#include "archive.h" + +/* Constants for Huffman coding */ +#define MAX_GROUPS 6 +#define GROUP_SIZE 50 /* 64 would have been more efficient */ +#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ +#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ +#define SYMBOL_RUNA 0 +#define SYMBOL_RUNB 1 + +/* Status return values */ +#define RETVAL_OK 0 +#define RETVAL_LAST_BLOCK (-1) +#define RETVAL_NOT_BZIP_DATA (-2) +#define RETVAL_UNEXPECTED_INPUT_EOF (-3) +#define RETVAL_SHORT_WRITE (-4) +#define RETVAL_DATA_ERROR (-5) +#define RETVAL_OUT_OF_MEMORY (-6) +#define RETVAL_OBSOLETE_INPUT (-7) + +/* Other housekeeping constants */ +#define IOBUF_SIZE 4096 + +/* This is what we know about each Huffman coding group */ +struct group_data { + /* We have an extra slot at the end of limit[] for a sentinel value. */ + int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS]; + int minLen, maxLen; +}; + +/* Structure holding all the housekeeping data, including IO buffers and + * memory that persists between calls to bunzip + * Found the most used member: + * cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \ + * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER + * and moved it (inbufBitCount) to offset 0. + */ +struct bunzip_data { + /* I/O tracking data (file handles, buffers, positions, etc.) */ + unsigned inbufBitCount, inbufBits; + int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; + uint8_t *inbuf /*,*outbuf*/; + + /* State for interrupting output loop */ + int writeCopies, writePos, writeRunCountdown, writeCount; + int writeCurrent; /* actually a uint8_t */ + + /* The CRC values stored in the block header and calculated from the data */ + uint32_t headerCRC, totalCRC, writeCRC; + + /* Intermediate buffer and its size (in bytes) */ + uint32_t *dbuf; + unsigned dbufSize; + + /* For I/O error handling */ + jmp_buf jmpbuf; + + /* Big things go last (register-relative addressing can be larger for big offsets) */ + uint32_t crc32Table[256]; + uint8_t selectors[32768]; /* nSelectors=15 bits */ + struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ +}; +/* typedef struct bunzip_data bunzip_data; -- done in .h file */ + + +/* Return the next nnn bits of input. All reads from the compressed input + are done through this function. All reads are big endian */ +static unsigned get_bits(bunzip_data *bd, int bits_wanted) +{ + unsigned bits = 0; + /* Cache bd->inbufBitCount in a CPU register (hopefully): */ + int bit_count = bd->inbufBitCount; + + /* If we need to get more data from the byte buffer, do so. (Loop getting + one byte at a time to enforce endianness and avoid unaligned access.) */ + while (bit_count < bits_wanted) { + + /* If we need to read more data from file into byte buffer, do so */ + if (bd->inbufPos == bd->inbufCount) { + /* if "no input fd" case: in_fd == -1, read fails, we jump */ + bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); + if (bd->inbufCount <= 0) + longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF); + bd->inbufPos = 0; + } + + /* Avoid 32-bit overflow (dump bit buffer to top of output) */ + if (bit_count >= 24) { + bits = bd->inbufBits & ((1 << bit_count) - 1); + bits_wanted -= bit_count; + bits <<= bits_wanted; + bit_count = 0; + } + + /* Grab next 8 bits of input from buffer. */ + bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; + bit_count += 8; + } + + /* Calculate result */ + bit_count -= bits_wanted; + bd->inbufBitCount = bit_count; + bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1); + + return bits; +} + +/* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */ +static int get_next_block(bunzip_data *bd) +{ + struct group_data *hufGroup; + int dbufCount, dbufSize, groupCount, *base, *limit, selector, + i, j, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; + int runCnt = runCnt; /* for compiler */ + uint8_t uc, symToByte[256], mtfSymbol[256], *selectors; + uint32_t *dbuf; + unsigned origPtr; + + dbuf = bd->dbuf; + dbufSize = bd->dbufSize; + selectors = bd->selectors; + +/* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */ +#if 0 + /* Reset longjmp I/O error handling */ + i = setjmp(bd->jmpbuf); + if (i) return i; +#endif + + /* Read in header signature and CRC, then validate signature. + (last block signature means CRC is for whole file, return now) */ + i = get_bits(bd, 24); + j = get_bits(bd, 24); + bd->headerCRC = get_bits(bd, 32); + if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK; + if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA; + + /* We can add support for blockRandomised if anybody complains. There was + some code for this in busybox 1.0.0-pre3, but nobody ever noticed that + it didn't actually work. */ + if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; + origPtr = get_bits(bd, 24); + if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR; + + /* mapping table: if some byte values are never used (encoding things + like ascii text), the compression code removes the gaps to have fewer + symbols to deal with, and writes a sparse bitfield indicating which + values were present. We make a translation table to convert the symbols + back to the corresponding bytes. */ + symTotal = 0; + i = 0; + t = get_bits(bd, 16); + do { + if (t & (1 << 15)) { + unsigned inner_map = get_bits(bd, 16); + do { + if (inner_map & (1 << 15)) + symToByte[symTotal++] = i; + inner_map <<= 1; + i++; + } while (i & 15); + i -= 16; + } + t <<= 1; + i += 16; + } while (i < 256); + + /* How many different Huffman coding groups does this block use? */ + groupCount = get_bits(bd, 3); + if (groupCount < 2 || groupCount > MAX_GROUPS) + return RETVAL_DATA_ERROR; + + /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding + group. Read in the group selector list, which is stored as MTF encoded + bit runs. (MTF=Move To Front, as each value is used it's moved to the + start of the list.) */ + for (i = 0; i < groupCount; i++) + mtfSymbol[i] = i; + nSelectors = get_bits(bd, 15); + if (!nSelectors) + return RETVAL_DATA_ERROR; + for (i = 0; i < nSelectors; i++) { + uint8_t tmp_byte; + /* Get next value */ + int n = 0; + while (get_bits(bd, 1)) { + if (n >= groupCount) return RETVAL_DATA_ERROR; + n++; + } + /* Decode MTF to get the next selector */ + tmp_byte = mtfSymbol[n]; + while (--n >= 0) + mtfSymbol[n + 1] = mtfSymbol[n]; + mtfSymbol[0] = selectors[i] = tmp_byte; + } + + /* Read the Huffman coding tables for each group, which code for symTotal + literal symbols, plus two run symbols (RUNA, RUNB) */ + symCount = symTotal + 2; + for (j = 0; j < groupCount; j++) { + uint8_t length[MAX_SYMBOLS]; + /* 8 bits is ALMOST enough for temp[], see below */ + unsigned temp[MAX_HUFCODE_BITS+1]; + int minLen, maxLen, pp, len_m1; + + /* Read Huffman code lengths for each symbol. They're stored in + a way similar to mtf; record a starting value for the first symbol, + and an offset from the previous value for every symbol after that. + (Subtracting 1 before the loop and then adding it back at the end is + an optimization that makes the test inside the loop simpler: symbol + length 0 becomes negative, so an unsigned inequality catches it.) */ + len_m1 = get_bits(bd, 5) - 1; + for (i = 0; i < symCount; i++) { + for (;;) { + int two_bits; + if ((unsigned)len_m1 > (MAX_HUFCODE_BITS-1)) + return RETVAL_DATA_ERROR; + + /* If first bit is 0, stop. Else second bit indicates whether + to increment or decrement the value. Optimization: grab 2 + bits and unget the second if the first was 0. */ + two_bits = get_bits(bd, 2); + if (two_bits < 2) { + bd->inbufBitCount++; + break; + } + + /* Add one if second bit 1, else subtract 1. Avoids if/else */ + len_m1 += (((two_bits+1) & 2) - 1); + } + + /* Correct for the initial -1, to get the final symbol length */ + length[i] = len_m1 + 1; + } + + /* Find largest and smallest lengths in this group */ + minLen = maxLen = length[0]; + for (i = 1; i < symCount; i++) { + if (length[i] > maxLen) maxLen = length[i]; + else if (length[i] < minLen) minLen = length[i]; + } + + /* Calculate permute[], base[], and limit[] tables from length[]. + * + * permute[] is the lookup table for converting Huffman coded symbols + * into decoded symbols. base[] is the amount to subtract from the + * value of a Huffman symbol of a given length when using permute[]. + * + * limit[] indicates the largest numerical value a symbol with a given + * number of bits can have. This is how the Huffman codes can vary in + * length: each code with a value>limit[length] needs another bit. + */ + hufGroup = bd->groups + j; + hufGroup->minLen = minLen; + hufGroup->maxLen = maxLen; + + /* Note that minLen can't be smaller than 1, so we adjust the base + and limit array pointers so we're not always wasting the first + entry. We do this again when using them (during symbol decoding). */ + base = hufGroup->base - 1; + limit = hufGroup->limit - 1; + + /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ + pp = 0; + for (i = minLen; i <= maxLen; i++) { + int k; + temp[i] = limit[i] = 0; + for (k = 0; k < symCount; k++) + if (length[k] == i) + hufGroup->permute[pp++] = k; + } + + /* Count symbols coded for at each bit length */ + /* NB: in pathological cases, temp[8] can end ip being 256. + * That's why uint8_t is too small for temp[]. */ + for (i = 0; i < symCount; i++) temp[length[i]]++; + + /* Calculate limit[] (the largest symbol-coding value at each bit + * length, which is (previous limit<<1)+symbols at this level), and + * base[] (number of symbols to ignore at each bit length, which is + * limit minus the cumulative count of symbols coded for already). */ + pp = t = 0; + for (i = minLen; i < maxLen;) { + unsigned temp_i = temp[i]; + + pp += temp_i; + + /* We read the largest possible symbol size and then unget bits + after determining how many we need, and those extra bits could + be set to anything. (They're noise from future symbols.) At + each level we're really only interested in the first few bits, + so here we set all the trailing to-be-ignored bits to 1 so they + don't affect the value>limit[length] comparison. */ + limit[i] = (pp << (maxLen - i)) - 1; + pp <<= 1; + t += temp_i; + base[++i] = pp - t; + } + limit[maxLen] = pp + temp[maxLen] - 1; + limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ + base[minLen] = 0; + } + + /* We've finished reading and digesting the block header. Now read this + block's Huffman coded symbols from the file and undo the Huffman coding + and run length encoding, saving the result into dbuf[dbufCount++] = uc */ + + /* Initialize symbol occurrence counters and symbol Move To Front table */ + /*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */ + for (i = 0; i < 256; i++) { + byteCount[i] = 0; + mtfSymbol[i] = (uint8_t)i; + } + + /* Loop through compressed symbols. */ + + runPos = dbufCount = selector = 0; + for (;;) { + int nextSym; + + /* Fetch next Huffman coding group from list. */ + symCount = GROUP_SIZE - 1; + if (selector >= nSelectors) return RETVAL_DATA_ERROR; + hufGroup = bd->groups + selectors[selector++]; + base = hufGroup->base - 1; + limit = hufGroup->limit - 1; + + continue_this_group: + /* Read next Huffman-coded symbol. */ + + /* Note: It is far cheaper to read maxLen bits and back up than it is + to read minLen bits and then add additional bit at a time, testing + as we go. Because there is a trailing last block (with file CRC), + there is no danger of the overread causing an unexpected EOF for a + valid compressed file. + */ + if (1) { + /* As a further optimization, we do the read inline + (falling back to a call to get_bits if the buffer runs dry). + */ + int new_cnt; + while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) { + /* bd->inbufBitCount < hufGroup->maxLen */ + if (bd->inbufPos == bd->inbufCount) { + nextSym = get_bits(bd, hufGroup->maxLen); + goto got_huff_bits; + } + bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; + bd->inbufBitCount += 8; + }; + bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */ + nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1); + got_huff_bits: ; + } else { /* unoptimized equivalent */ + nextSym = get_bits(bd, hufGroup->maxLen); + } + /* Figure how many bits are in next symbol and unget extras */ + i = hufGroup->minLen; + while (nextSym > limit[i]) ++i; + j = hufGroup->maxLen - i; + if (j < 0) + return RETVAL_DATA_ERROR; + bd->inbufBitCount += j; + + /* Huffman decode value to get nextSym (with bounds checking) */ + nextSym = (nextSym >> j) - base[i]; + if ((unsigned)nextSym >= MAX_SYMBOLS) + return RETVAL_DATA_ERROR; + nextSym = hufGroup->permute[nextSym]; + + /* We have now decoded the symbol, which indicates either a new literal + byte, or a repeated run of the most recent literal byte. First, + check if nextSym indicates a repeated run, and if so loop collecting + how many times to repeat the last literal. */ + if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ + + /* If this is the start of a new run, zero out counter */ + if (runPos == 0) { + runPos = 1; + runCnt = 0; + } + + /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at + each bit position, add 1 or 2 instead. For example, + 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2. + You can make any bit pattern that way using 1 less symbol than + the basic or 0/1 method (except all bits 0, which would use no + symbols, but a run of length 0 doesn't mean anything in this + context). Thus space is saved. */ + runCnt += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ + if (runPos < dbufSize) runPos <<= 1; + goto end_of_huffman_loop; + } + + /* When we hit the first non-run symbol after a run, we now know + how many times to repeat the last literal, so append that many + copies to our buffer of decoded symbols (dbuf) now. (The last + literal used is the one at the head of the mtfSymbol array.) */ + if (runPos != 0) { + uint8_t tmp_byte; + if (dbufCount + runCnt >= dbufSize) return RETVAL_DATA_ERROR; + tmp_byte = symToByte[mtfSymbol[0]]; + byteCount[tmp_byte] += runCnt; + while (--runCnt >= 0) dbuf[dbufCount++] = (uint32_t)tmp_byte; + runPos = 0; + } + + /* Is this the terminating symbol? */ + if (nextSym > symTotal) break; + + /* At this point, nextSym indicates a new literal character. Subtract + one to get the position in the MTF array at which this literal is + currently to be found. (Note that the result can't be -1 or 0, + because 0 and 1 are RUNA and RUNB. But another instance of the + first symbol in the mtf array, position 0, would have been handled + as part of a run above. Therefore 1 unused mtf position minus + 2 non-literal nextSym values equals -1.) */ + if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; + i = nextSym - 1; + uc = mtfSymbol[i]; + + /* Adjust the MTF array. Since we typically expect to move only a + * small number of symbols, and are bound by 256 in any case, using + * memmove here would typically be bigger and slower due to function + * call overhead and other assorted setup costs. */ + do { + mtfSymbol[i] = mtfSymbol[i-1]; + } while (--i); + mtfSymbol[0] = uc; + uc = symToByte[uc]; + + /* We have our literal byte. Save it into dbuf. */ + byteCount[uc]++; + dbuf[dbufCount++] = (uint32_t)uc; + + /* Skip group initialization if we're not done with this group. Done + * this way to avoid compiler warning. */ + end_of_huffman_loop: + if (--symCount >= 0) goto continue_this_group; + } + + /* At this point, we've read all the Huffman-coded symbols (and repeated + runs) for this block from the input stream, and decoded them into the + intermediate buffer. There are dbufCount many decoded bytes in dbuf[]. + Now undo the Burrows-Wheeler transform on dbuf. + See http://dogma.net/markn/articles/bwt/bwt.htm + */ + + /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ + j = 0; + for (i = 0; i < 256; i++) { + int tmp_count = j + byteCount[i]; + byteCount[i] = j; + j = tmp_count; + } + + /* Figure out what order dbuf would be in if we sorted it. */ + for (i = 0; i < dbufCount; i++) { + uint8_t tmp_byte = (uint8_t)dbuf[i]; + int tmp_count = byteCount[tmp_byte]; + dbuf[tmp_count] |= (i << 8); + byteCount[tmp_byte] = tmp_count + 1; + } + + /* Decode first byte by hand to initialize "previous" byte. Note that it + doesn't get output, and if the first three characters are identical + it doesn't qualify as a run (hence writeRunCountdown=5). */ + if (dbufCount) { + uint32_t tmp; + if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; + tmp = dbuf[origPtr]; + bd->writeCurrent = (uint8_t)tmp; + bd->writePos = (tmp >> 8); + bd->writeRunCountdown = 5; + } + bd->writeCount = dbufCount; + + return RETVAL_OK; +} + +/* Undo Burrows-Wheeler transform on intermediate buffer to produce output. + If start_bunzip was initialized with out_fd=-1, then up to len bytes of + data are written to outbuf. Return value is number of bytes written or + error (all errors are negative numbers). If out_fd!=-1, outbuf and len + are ignored, data is written to out_fd and return is RETVAL_OK or error. + + NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes + in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0. + (Why? This allows to get rid of one local variable) +*/ +int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) +{ + const uint32_t *dbuf; + int pos, current, previous; + uint32_t CRC; + + /* If we already have error/end indicator, return it */ + if (bd->writeCount < 0) + return bd->writeCount; + + dbuf = bd->dbuf; + + /* Register-cached state (hopefully): */ + pos = bd->writePos; + current = bd->writeCurrent; + CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */ + + /* We will always have pending decoded data to write into the output + buffer unless this is the very first call (in which case we haven't + Huffman-decoded a block into the intermediate buffer yet). */ + if (bd->writeCopies) { + + dec_writeCopies: + /* Inside the loop, writeCopies means extra copies (beyond 1) */ + --bd->writeCopies; + + /* Loop outputting bytes */ + for (;;) { + + /* If the output buffer is full, save cached state and return */ + if (--len < 0) { + /* Unlikely branch. + * Use of "goto" instead of keeping code here + * helps compiler to realize this. */ + goto outbuf_full; + } + + /* Write next byte into output buffer, updating CRC */ + *outbuf++ = current; + CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current]; + + /* Loop now if we're outputting multiple copies of this byte */ + if (bd->writeCopies) { + /* Unlikely branch */ + /*--bd->writeCopies;*/ + /*continue;*/ + /* Same, but (ab)using other existing --writeCopies operation + * (and this if() compiles into just test+branch pair): */ + goto dec_writeCopies; + } + decode_next_byte: + if (--bd->writeCount < 0) + break; /* input block is fully consumed, need next one */ + + /* Follow sequence vector to undo Burrows-Wheeler transform */ + previous = current; + pos = dbuf[pos]; + current = (uint8_t)pos; + pos >>= 8; + + /* After 3 consecutive copies of the same byte, the 4th + * is a repeat count. We count down from 4 instead + * of counting up because testing for non-zero is faster */ + if (--bd->writeRunCountdown != 0) { + if (current != previous) + bd->writeRunCountdown = 4; + } else { + /* Unlikely branch */ + /* We have a repeated run, this byte indicates the count */ + bd->writeCopies = current; + current = previous; + bd->writeRunCountdown = 5; + + /* Sometimes there are just 3 bytes (run length 0) */ + if (!bd->writeCopies) goto decode_next_byte; + + /* Subtract the 1 copy we'd output anyway to get extras */ + --bd->writeCopies; + } + } /* for(;;) */ + + /* Decompression of this input block completed successfully */ + bd->writeCRC = CRC = ~CRC; + bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC; + + /* If this block had a CRC error, force file level CRC error */ + if (CRC != bd->headerCRC) { + bd->totalCRC = bd->headerCRC + 1; + return RETVAL_LAST_BLOCK; + } + } + + /* Refill the intermediate buffer by Huffman-decoding next block of input */ + { + int r = get_next_block(bd); + if (r) { /* error/end */ + bd->writeCount = r; + return (r != RETVAL_LAST_BLOCK) ? r : len; + } + } + + CRC = ~0; + pos = bd->writePos; + current = bd->writeCurrent; + goto decode_next_byte; + + outbuf_full: + /* Output buffer is full, save cached state and return */ + bd->writePos = pos; + bd->writeCurrent = current; + bd->writeCRC = CRC; + + bd->writeCopies++; + + return 0; +} + +/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain + a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are + ignored, and data is read from file handle into temporary buffer. */ + +/* Because bunzip2 is used for help text unpacking, and because bb_show_usage() + should work for NOFORK applets too, we must be extremely careful to not leak + any allocations! */ +int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, + const void *inbuf, int len) +{ + bunzip_data *bd; + unsigned i; + enum { + BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0', + h0 = ('h' << 8) + '0', + }; + + /* Figure out how much data to allocate */ + i = sizeof(bunzip_data); + if (in_fd != -1) i += IOBUF_SIZE; + + /* Allocate bunzip_data. Most fields initialize to zero. */ + bd = *bdp = xzalloc(i); + + /* Setup input buffer */ + bd->in_fd = in_fd; + if (-1 == in_fd) { + /* in this case, bd->inbuf is read-only */ + bd->inbuf = (void*)inbuf; /* cast away const-ness */ + } else { + bd->inbuf = (uint8_t*)(bd + 1); + memcpy(bd->inbuf, inbuf, len); + } + bd->inbufCount = len; + + /* Init the CRC32 table (big endian) */ + crc32_filltable(bd->crc32Table, 1); + + /* Setup for I/O error handling via longjmp */ + i = setjmp(bd->jmpbuf); + if (i) return i; + + /* Ensure that file starts with "BZh['1'-'9']." */ + /* Update: now caller verifies 1st two bytes, makes .gz/.bz2 + * integration easier */ + /* was: */ + /* i = get_bits(bd, 32); */ + /* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */ + i = get_bits(bd, 16); + if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; + + /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of + uncompressed data. Allocate intermediate buffer for block. */ + /* bd->dbufSize = 100000 * (i - BZh0); */ + bd->dbufSize = 100000 * (i - h0); + + /* Cannot use xmalloc - may leak bd in NOFORK case! */ + bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0])); + if (!bd->dbuf) { + free(bd); + xfunc_die(); + } + return RETVAL_OK; +} + +void FAST_FUNC dealloc_bunzip(bunzip_data *bd) +{ + free(bd->dbuf); + free(bd); +} + + +/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ +IF_DESKTOP(long long) int FAST_FUNC +unpack_bz2_stream(int src_fd, int dst_fd) +{ + IF_DESKTOP(long long total_written = 0;) + bunzip_data *bd; + char *outbuf; + int i; + unsigned len; + + outbuf = xmalloc(IOBUF_SIZE); + len = 0; + while (1) { /* "Process one BZ... stream" loop */ + + i = start_bunzip(&bd, src_fd, outbuf + 2, len); + + if (i == 0) { + while (1) { /* "Produce some output bytes" loop */ + i = read_bunzip(bd, outbuf, IOBUF_SIZE); + if (i < 0) /* error? */ + break; + i = IOBUF_SIZE - i; /* number of bytes produced */ + if (i == 0) /* EOF? */ + break; + if (i != full_write(dst_fd, outbuf, i)) { + bb_error_msg("short write"); + i = RETVAL_SHORT_WRITE; + goto release_mem; + } + IF_DESKTOP(total_written += i;) + } + } + + if (i != RETVAL_LAST_BLOCK) { + bb_error_msg("bunzip error %d", i); + break; + } + if (bd->headerCRC != bd->totalCRC) { + bb_error_msg("CRC error"); + break; + } + + /* Successfully unpacked one BZ stream */ + i = RETVAL_OK; + + /* Do we have "BZ..." after last processed byte? + * pbzip2 (parallelized bzip2) produces such files. + */ + len = bd->inbufCount - bd->inbufPos; + memcpy(outbuf, &bd->inbuf[bd->inbufPos], len); + if (len < 2) { + if (safe_read(src_fd, outbuf + len, 2 - len) != 2 - len) + break; + len = 2; + } + if (*(uint16_t*)outbuf != BZIP2_MAGIC) /* "BZ"? */ + break; + dealloc_bunzip(bd); + len -= 2; + } + + release_mem: + dealloc_bunzip(bd); + free(outbuf); + + return i ? i : IF_DESKTOP(total_written) + 0; +} + +IF_DESKTOP(long long) int FAST_FUNC +unpack_bz2_stream_prime(int src_fd, int dst_fd) +{ + uint16_t magic2; + xread(src_fd, &magic2, 2); + if (magic2 != BZIP2_MAGIC) { + bb_error_msg_and_die("invalid magic"); + } + return unpack_bz2_stream(src_fd, dst_fd); +} + +#ifdef TESTING + +static char *const bunzip_errors[] = { + NULL, "Bad file checksum", "Not bzip data", + "Unexpected input EOF", "Unexpected output EOF", "Data error", + "Out of memory", "Obsolete (pre 0.9.5) bzip format not supported" +}; + +/* Dumb little test thing, decompress stdin to stdout */ +int main(int argc, char **argv) +{ + int i; + char c; + + int i = unpack_bz2_stream_prime(0, 1); + if (i < 0) + fprintf(stderr, "%s\n", bunzip_errors[-i]); + else if (read(STDIN_FILENO, &c, 1)) + fprintf(stderr, "Trailing garbage ignored\n"); + return -i; +} +#endif diff --git a/archival/libarchive/decompress_uncompress.c b/archival/libarchive/decompress_uncompress.c new file mode 100644 index 000000000..44d894244 --- /dev/null +++ b/archival/libarchive/decompress_uncompress.c @@ -0,0 +1,307 @@ +/* vi: set sw=4 ts=4: */ +/* uncompress for busybox -- (c) 2002 Robert Griebl + * + * based on the original compress42.c source + * (see disclaimer below) + */ + +/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992. + * + * Authors: + * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) + * Jim McKie (decvax!mcvax!jim) + * Steve Davies (decvax!vax135!petsd!peora!srd) + * Ken Turkowski (decvax!decwrl!turtlevax!ken) + * James A. Woods (decvax!ihnp4!ames!jaw) + * Joe Orost (decvax!vax135!petsd!joe) + * Dave Mack (csu@alembic.acs.com) + * Peter Jannesen, Network Communication Systems + * (peter@ncs.nl) + * + * marc@suse.de : a small security fix for a buffer overflow + * + * [... History snipped ...] + * + */ + +#include "libbb.h" +#include "archive.h" + + +/* Default input buffer size */ +#define IBUFSIZ 2048 + +/* Default output buffer size */ +#define OBUFSIZ 2048 + +/* Defines for third byte of header */ +#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */ + /* Masks 0x20 and 0x40 are free. */ + /* I think 0x20 should mean that there is */ + /* a fourth header byte (for expansion). */ +#define BLOCK_MODE 0x80 /* Block compression if table is full and */ + /* compression rate is dropping flush tables */ + /* the next two codes should not be changed lightly, as they must not */ + /* lie within the contiguous general code space. */ +#define FIRST 257 /* first free entry */ +#define CLEAR 256 /* table clear output code */ + +#define INIT_BITS 9 /* initial number of bits/code */ + + +/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */ +#define HBITS 17 /* 50% occupancy */ +#define HSIZE (1< BITS) { + bb_error_msg("compressed with %d bits, can only handle " + BITS_STR" bits", maxbits); + goto err; + } + + n_bits = INIT_BITS; + maxcode = MAXCODE(INIT_BITS) - 1; + bitmask = (1 << INIT_BITS) - 1; + oldcode = -1; + finchar = 0; + outpos = 0; + posbits = 0 << 3; + + free_ent = ((block_mode) ? FIRST : 256); + + /* As above, initialize the first 256 entries in the table. */ + /*clear_tab_prefixof(); - done by xzalloc */ + + for (code = 255; code >= 0; --code) { + tab_suffixof(code) = (unsigned char) code; + } + + do { + resetbuf: + { + int i; + int e; + int o; + + o = posbits >> 3; + e = insize - o; + + for (i = 0; i < e; ++i) + inbuf[i] = inbuf[i + o]; + + insize = e; + posbits = 0; + } + + if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) { + rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ); +//error check?? + insize += rsize; + } + + inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 : + (insize << 3) - (n_bits - 1)); + + while (inbits > posbits) { + if (free_ent > maxcode) { + posbits = + ((posbits - 1) + + ((n_bits << 3) - + (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); + ++n_bits; + if (n_bits == maxbits) { + maxcode = maxmaxcode; + } else { + maxcode = MAXCODE(n_bits) - 1; + } + bitmask = (1 << n_bits) - 1; + goto resetbuf; + } + { + unsigned char *p = &inbuf[posbits >> 3]; + + code = ((((long) (p[0])) | ((long) (p[1]) << 8) | + ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask; + } + posbits += n_bits; + + + if (oldcode == -1) { + oldcode = code; + finchar = (int) oldcode; + outbuf[outpos++] = (unsigned char) finchar; + continue; + } + + if (code == CLEAR && block_mode) { + clear_tab_prefixof(); + free_ent = FIRST - 1; + posbits = + ((posbits - 1) + + ((n_bits << 3) - + (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); + n_bits = INIT_BITS; + maxcode = MAXCODE(INIT_BITS) - 1; + bitmask = (1 << INIT_BITS) - 1; + goto resetbuf; + } + + incode = code; + stackp = de_stack; + + /* Special case for KwKwK string. */ + if (code >= free_ent) { + if (code > free_ent) { + unsigned char *p; + + posbits -= n_bits; + p = &inbuf[posbits >> 3]; + + bb_error_msg + ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)", + insize, posbits, p[-1], p[0], p[1], p[2], p[3], + (posbits & 07)); + bb_error_msg("corrupted data"); + goto err; + } + + *--stackp = (unsigned char) finchar; + code = oldcode; + } + + /* Generate output characters in reverse order */ + while ((long) code >= (long) 256) { + *--stackp = tab_suffixof(code); + code = tab_prefixof(code); + } + + finchar = tab_suffixof(code); + *--stackp = (unsigned char) finchar; + + /* And put them out in forward order */ + { + int i; + + i = de_stack - stackp; + if (outpos + i >= OBUFSIZ) { + do { + if (i > OBUFSIZ - outpos) { + i = OBUFSIZ - outpos; + } + + if (i > 0) { + memcpy(outbuf + outpos, stackp, i); + outpos += i; + } + + if (outpos >= OBUFSIZ) { + full_write(fd_out, outbuf, outpos); +//error check?? + IF_DESKTOP(total_written += outpos;) + outpos = 0; + } + stackp += i; + i = de_stack - stackp; + } while (i > 0); + } else { + memcpy(outbuf + outpos, stackp, i); + outpos += i; + } + } + + /* Generate the new entry. */ + code = free_ent; + if (code < maxmaxcode) { + tab_prefixof(code) = (unsigned short) oldcode; + tab_suffixof(code) = (unsigned char) finchar; + free_ent = code + 1; + } + + /* Remember previous code. */ + oldcode = incode; + } + + } while (rsize > 0); + + if (outpos > 0) { + full_write(fd_out, outbuf, outpos); +//error check?? + IF_DESKTOP(total_written += outpos;) + } + + retval = IF_DESKTOP(total_written) + 0; + err: + free(inbuf); + free(outbuf); + free(htab); + free(codetab); + return retval; +} diff --git a/archival/libarchive/decompress_unlzma.c b/archival/libarchive/decompress_unlzma.c new file mode 100644 index 000000000..a04714341 --- /dev/null +++ b/archival/libarchive/decompress_unlzma.c @@ -0,0 +1,465 @@ +/* vi: set sw=4 ts=4: */ +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs + * + * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) + * Copyright (C) 1999-2005 Igor Pavlov + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +#include "libbb.h" +#include "archive.h" + +#if ENABLE_FEATURE_LZMA_FAST +# define speed_inline ALWAYS_INLINE +# define size_inline +#else +# define speed_inline +# define size_inline ALWAYS_INLINE +#endif + + +typedef struct { + int fd; + uint8_t *ptr; + +/* Was keeping rc on stack in unlzma and separately allocating buffer, + * but with "buffer 'attached to' allocated rc" code is smaller: */ + /* uint8_t *buffer; */ +#define RC_BUFFER ((uint8_t*)(rc+1)) + + uint8_t *buffer_end; + +/* Had provisions for variable buffer, but we don't need it here */ + /* int buffer_size; */ +#define RC_BUFFER_SIZE 0x10000 + + uint32_t code; + uint32_t range; + uint32_t bound; +} rc_t; + +#define RC_TOP_BITS 24 +#define RC_MOVE_BITS 5 +#define RC_MODEL_TOTAL_BITS 11 + + +/* Called twice: once at startup (LZMA_FAST only) and once in rc_normalize() */ +static size_inline void rc_read(rc_t *rc) +{ + int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); +//TODO: return -1 instead +//This will make unlzma delete broken unpacked file on unpack errors + if (buffer_size <= 0) + bb_error_msg_and_die("unexpected EOF"); + rc->ptr = RC_BUFFER; + rc->buffer_end = RC_BUFFER + buffer_size; +} + +/* Called twice, but one callsite is in speed_inline'd rc_is_bit_1() */ +static void rc_do_normalize(rc_t *rc) +{ + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->range <<= 8; + rc->code = (rc->code << 8) | *rc->ptr++; +} + +/* Called once */ +static ALWAYS_INLINE rc_t* rc_init(int fd) /*, int buffer_size) */ +{ + int i; + rc_t *rc; + + rc = xzalloc(sizeof(*rc) + RC_BUFFER_SIZE); + + rc->fd = fd; + /* rc->ptr = rc->buffer_end; */ + + for (i = 0; i < 5; i++) { +#if ENABLE_FEATURE_LZMA_FAST + if (rc->ptr >= rc->buffer_end) + rc_read(rc); + rc->code = (rc->code << 8) | *rc->ptr++; +#else + rc_do_normalize(rc); +#endif + } + rc->range = 0xFFFFFFFF; + return rc; +} + +/* Called once */ +static ALWAYS_INLINE void rc_free(rc_t *rc) +{ + free(rc); +} + +static ALWAYS_INLINE void rc_normalize(rc_t *rc) +{ + if (rc->range < (1 << RC_TOP_BITS)) { + rc_do_normalize(rc); + } +} + +/* rc_is_bit_1 is called 9 times */ +static speed_inline int rc_is_bit_1(rc_t *rc, uint16_t *p) +{ + rc_normalize(rc); + rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); + if (rc->code < rc->bound) { + rc->range = rc->bound; + *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; + return 0; + } + rc->range -= rc->bound; + rc->code -= rc->bound; + *p -= *p >> RC_MOVE_BITS; + return 1; +} + +/* Called 4 times in unlzma loop */ +static speed_inline int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) +{ + int ret = rc_is_bit_1(rc, p); + *symbol = *symbol * 2 + ret; + return ret; +} + +/* Called once */ +static ALWAYS_INLINE int rc_direct_bit(rc_t *rc) +{ + rc_normalize(rc); + rc->range >>= 1; + if (rc->code >= rc->range) { + rc->code -= rc->range; + return 1; + } + return 0; +} + +/* Called twice */ +static speed_inline void +rc_bit_tree_decode(rc_t *rc, uint16_t *p, int num_levels, int *symbol) +{ + int i = num_levels; + + *symbol = 1; + while (i--) + rc_get_bit(rc, p + *symbol, symbol); + *symbol -= 1 << num_levels; +} + + +typedef struct { + uint8_t pos; + uint32_t dict_size; + uint64_t dst_size; +} PACKED lzma_header_t; + + +/* #defines will force compiler to compute/optimize each one with each usage. + * Have heart and use enum instead. */ +enum { + LZMA_BASE_SIZE = 1846, + LZMA_LIT_SIZE = 768, + + LZMA_NUM_POS_BITS_MAX = 4, + + LZMA_LEN_NUM_LOW_BITS = 3, + LZMA_LEN_NUM_MID_BITS = 3, + LZMA_LEN_NUM_HIGH_BITS = 8, + + LZMA_LEN_CHOICE = 0, + LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1), + LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1), + LZMA_LEN_MID = (LZMA_LEN_LOW \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))), + LZMA_LEN_HIGH = (LZMA_LEN_MID \ + + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))), + LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)), + + LZMA_NUM_STATES = 12, + LZMA_NUM_LIT_STATES = 7, + + LZMA_START_POS_MODEL_INDEX = 4, + LZMA_END_POS_MODEL_INDEX = 14, + LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)), + + LZMA_NUM_POS_SLOT_BITS = 6, + LZMA_NUM_LEN_TO_POS_STATES = 4, + + LZMA_NUM_ALIGN_BITS = 4, + + LZMA_MATCH_MIN_LEN = 2, + + LZMA_IS_MATCH = 0, + LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), + LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES), + LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES), + LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES), + LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES), + LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \ + + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), + LZMA_SPEC_POS = (LZMA_POS_SLOT \ + + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)), + LZMA_ALIGN = (LZMA_SPEC_POS \ + + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX), + LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)), + LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS), + LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS), +}; + + +IF_DESKTOP(long long) int FAST_FUNC +unpack_lzma_stream(int src_fd, int dst_fd) +{ + IF_DESKTOP(long long total_written = 0;) + lzma_header_t header; + int lc, pb, lp; + uint32_t pos_state_mask; + uint32_t literal_pos_mask; + uint16_t *p; + int num_bits; + int num_probs; + rc_t *rc; + int i; + uint8_t *buffer; + uint8_t previous_byte = 0; + size_t buffer_pos = 0, global_pos = 0; + int len = 0; + int state = 0; + uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; + + if (full_read(src_fd, &header, sizeof(header)) != sizeof(header) + || header.pos >= (9 * 5 * 5) + ) { + bb_error_msg("bad lzma header"); + return -1; + } + + i = header.pos / 9; + lc = header.pos % 9; + pb = i / 5; + lp = i % 5; + pos_state_mask = (1 << pb) - 1; + literal_pos_mask = (1 << lp) - 1; + + header.dict_size = SWAP_LE32(header.dict_size); + header.dst_size = SWAP_LE64(header.dst_size); + + if (header.dict_size == 0) + header.dict_size++; + + buffer = xmalloc(MIN(header.dst_size, header.dict_size)); + + num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); + p = xmalloc(num_probs * sizeof(*p)); + num_probs += LZMA_LITERAL - LZMA_BASE_SIZE; + for (i = 0; i < num_probs; i++) + p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; + + rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */ + + while (global_pos + buffer_pos < header.dst_size) { + int pos_state = (buffer_pos + global_pos) & pos_state_mask; + uint16_t *prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; + + if (!rc_is_bit_1(rc, prob)) { + static const char next_state[LZMA_NUM_STATES] = + { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; + int mi = 1; + + prob = (p + LZMA_LITERAL + + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) + + (previous_byte >> (8 - lc)) + ) + ) + ); + + if (state >= LZMA_NUM_LIT_STATES) { + int match_byte; + uint32_t pos = buffer_pos - rep0; + + while (pos >= header.dict_size) + pos += header.dict_size; + match_byte = buffer[pos]; + do { + int bit; + + match_byte <<= 1; + bit = match_byte & 0x100; + bit ^= (rc_get_bit(rc, prob + 0x100 + bit + mi, &mi) << 8); /* 0x100 or 0 */ + if (bit) + break; + } while (mi < 0x100); + } + while (mi < 0x100) { + rc_get_bit(rc, prob + mi, &mi); + } + + state = next_state[state]; + + previous_byte = (uint8_t) mi; +#if ENABLE_FEATURE_LZMA_FAST + one_byte1: + buffer[buffer_pos++] = previous_byte; + if (buffer_pos == header.dict_size) { + buffer_pos = 0; + global_pos += header.dict_size; + if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) + goto bad; + IF_DESKTOP(total_written += header.dict_size;) + } +#else + len = 1; + goto one_byte2; +#endif + } else { + int offset; + uint16_t *prob2; +#define prob_len prob2 + + prob2 = p + LZMA_IS_REP + state; + if (!rc_is_bit_1(rc, prob2)) { + rep3 = rep2; + rep2 = rep1; + rep1 = rep0; + state = state < LZMA_NUM_LIT_STATES ? 0 : 3; + prob2 = p + LZMA_LEN_CODER; + } else { + prob2 += LZMA_IS_REP_G0 - LZMA_IS_REP; + if (!rc_is_bit_1(rc, prob2)) { + prob2 = (p + LZMA_IS_REP_0_LONG + + (state << LZMA_NUM_POS_BITS_MAX) + + pos_state + ); + if (!rc_is_bit_1(rc, prob2)) { +#if ENABLE_FEATURE_LZMA_FAST + uint32_t pos = buffer_pos - rep0; + state = state < LZMA_NUM_LIT_STATES ? 9 : 11; + while (pos >= header.dict_size) + pos += header.dict_size; + previous_byte = buffer[pos]; + goto one_byte1; +#else + state = state < LZMA_NUM_LIT_STATES ? 9 : 11; + len = 1; + goto string; +#endif + } + } else { + uint32_t distance; + + prob2 += LZMA_IS_REP_G1 - LZMA_IS_REP_G0; + distance = rep1; + if (rc_is_bit_1(rc, prob2)) { + prob2 += LZMA_IS_REP_G2 - LZMA_IS_REP_G1; + distance = rep2; + if (rc_is_bit_1(rc, prob2)) { + distance = rep3; + rep3 = rep2; + } + rep2 = rep1; + } + rep1 = rep0; + rep0 = distance; + } + state = state < LZMA_NUM_LIT_STATES ? 8 : 11; + prob2 = p + LZMA_REP_LEN_CODER; + } + + prob_len = prob2 + LZMA_LEN_CHOICE; + num_bits = LZMA_LEN_NUM_LOW_BITS; + if (!rc_is_bit_1(rc, prob_len)) { + prob_len += LZMA_LEN_LOW - LZMA_LEN_CHOICE + + (pos_state << LZMA_LEN_NUM_LOW_BITS); + offset = 0; + } else { + prob_len += LZMA_LEN_CHOICE_2 - LZMA_LEN_CHOICE; + if (!rc_is_bit_1(rc, prob_len)) { + prob_len += LZMA_LEN_MID - LZMA_LEN_CHOICE_2 + + (pos_state << LZMA_LEN_NUM_MID_BITS); + offset = 1 << LZMA_LEN_NUM_LOW_BITS; + num_bits += LZMA_LEN_NUM_MID_BITS - LZMA_LEN_NUM_LOW_BITS; + } else { + prob_len += LZMA_LEN_HIGH - LZMA_LEN_CHOICE_2; + offset = ((1 << LZMA_LEN_NUM_LOW_BITS) + + (1 << LZMA_LEN_NUM_MID_BITS)); + num_bits += LZMA_LEN_NUM_HIGH_BITS - LZMA_LEN_NUM_LOW_BITS; + } + } + rc_bit_tree_decode(rc, prob_len, num_bits, &len); + len += offset; + + if (state < 4) { + int pos_slot; + uint16_t *prob3; + + state += LZMA_NUM_LIT_STATES; + prob3 = p + LZMA_POS_SLOT + + ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : + LZMA_NUM_LEN_TO_POS_STATES - 1) + << LZMA_NUM_POS_SLOT_BITS); + rc_bit_tree_decode(rc, prob3, + LZMA_NUM_POS_SLOT_BITS, &pos_slot); + rep0 = pos_slot; + if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { + int i2, mi2, num_bits2 = (pos_slot >> 1) - 1; + rep0 = 2 | (pos_slot & 1); + if (pos_slot < LZMA_END_POS_MODEL_INDEX) { + rep0 <<= num_bits2; + prob3 = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; + } else { + for (; num_bits2 != LZMA_NUM_ALIGN_BITS; num_bits2--) + rep0 = (rep0 << 1) | rc_direct_bit(rc); + rep0 <<= LZMA_NUM_ALIGN_BITS; + prob3 = p + LZMA_ALIGN; + } + i2 = 1; + mi2 = 1; + while (num_bits2--) { + if (rc_get_bit(rc, prob3 + mi2, &mi2)) + rep0 |= i2; + i2 <<= 1; + } + } + if (++rep0 == 0) + break; + } + + len += LZMA_MATCH_MIN_LEN; + IF_NOT_FEATURE_LZMA_FAST(string:) + do { + uint32_t pos = buffer_pos - rep0; + while (pos >= header.dict_size) + pos += header.dict_size; + previous_byte = buffer[pos]; + IF_NOT_FEATURE_LZMA_FAST(one_byte2:) + buffer[buffer_pos++] = previous_byte; + if (buffer_pos == header.dict_size) { + buffer_pos = 0; + global_pos += header.dict_size; + if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) + goto bad; + IF_DESKTOP(total_written += header.dict_size;) + } + len--; + } while (len != 0 && buffer_pos < header.dst_size); + } + } + + { + IF_NOT_DESKTOP(int total_written = 0; /* success */) + IF_DESKTOP(total_written += buffer_pos;) + if (full_write(dst_fd, buffer, buffer_pos) != (ssize_t)buffer_pos) { + bad: + total_written = -1; /* failure */ + } + rc_free(rc); + free(p); + free(buffer); + return total_written; + } +} diff --git a/archival/libarchive/decompress_unxz.c b/archival/libarchive/decompress_unxz.c new file mode 100644 index 000000000..e90dfb06f --- /dev/null +++ b/archival/libarchive/decompress_unxz.c @@ -0,0 +1,98 @@ +/* + * This file uses XZ Embedded library code which is written + * by Lasse Collin + * and Igor Pavlov + * + * See README file in unxz/ directory for more information. + * + * This file is: + * Copyright (C) 2010 Denys Vlasenko + * Licensed under GPLv2, see file LICENSE in this source tree. + */ +#include "libbb.h" +#include "archive.h" + +#define XZ_FUNC FAST_FUNC +#define XZ_EXTERN static + +#define XZ_DEC_DYNALLOC + +/* Skip check (rather than fail) of unsupported hash functions */ +#define XZ_DEC_ANY_CHECK 1 + +/* We use our own crc32 function */ +#define XZ_INTERNAL_CRC32 0 +static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) +{ + return ~crc32_block_endian0(~crc, buf, size, global_crc32_table); +} + +/* We use arch-optimized unaligned accessors */ +#define get_unaligned_le32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_LE32(v); }) +#define get_unaligned_be32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_BE32(v); }) +#define put_unaligned_le32(val, buf) move_to_unaligned16(buf, SWAP_LE32(val)) +#define put_unaligned_be32(val, buf) move_to_unaligned16(buf, SWAP_BE32(val)) + +#include "unxz/xz_dec_bcj.c" +#include "unxz/xz_dec_lzma2.c" +#include "unxz/xz_dec_stream.c" + +IF_DESKTOP(long long) int FAST_FUNC +unpack_xz_stream(int src_fd, int dst_fd) +{ + struct xz_buf iobuf; + struct xz_dec *state; + unsigned char *membuf; + IF_DESKTOP(long long) int total = 0; + + if (!global_crc32_table) + global_crc32_table = crc32_filltable(NULL, /*endian:*/ 0); + + memset(&iobuf, 0, sizeof(iobuf)); + /* Preload XZ file signature */ + membuf = (void*) strcpy(xmalloc(2 * BUFSIZ), HEADER_MAGIC); + iobuf.in = membuf; + iobuf.in_size = HEADER_MAGIC_SIZE; + iobuf.out = membuf + BUFSIZ; + iobuf.out_size = BUFSIZ; + + /* Limit memory usage to about 64 MiB. */ + state = xz_dec_init(XZ_DYNALLOC, 64*1024*1024); + + while (1) { + enum xz_ret r; + + if (iobuf.in_pos == iobuf.in_size) { + int rd = safe_read(src_fd, membuf, BUFSIZ); + if (rd < 0) { + bb_error_msg(bb_msg_read_error); + total = -1; + break; + } + iobuf.in_size = rd; + iobuf.in_pos = 0; + } +// bb_error_msg(">in pos:%d size:%d out pos:%d size:%d", +// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size); + r = xz_dec_run(state, &iobuf); +// bb_error_msg(" + * based on gzip sources + * + * Adjusted further by Erik Andersen to support + * files as well as stdin/stdout, and to generally behave itself wrt + * command line handling. + * + * General cleanup to better adhere to the style guide and make use of standard + * busybox functions by Glenn McGrath + * + * read_gz interface + associated hacking by Laurence Anderson + * + * Fixed huft_build() so decoding end-of-block code does not grab more bits + * than necessary (this is required by unzip applet), added inflate_cleanup() + * to free leaked bytebuffer memory (used in unzip.c), and some minor style + * guide cleanups by Ed Clark + * + * gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface + * Copyright (C) 1992-1993 Jean-loup Gailly + * The unzip code was written and put in the public domain by Mark Adler. + * Portions of the lzw code are derived from the public domain 'compress' + * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies, + * Ken Turkowski, Dave Mack and Peter Jannesen. + * + * See the file algorithm.doc for the compression algorithms and file formats. + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include +#include "libbb.h" +#include "archive.h" + +typedef struct huft_t { + unsigned char e; /* number of extra bits or operation */ + unsigned char b; /* number of bits in this code or subcode */ + union { + unsigned short n; /* literal, length base, or distance base */ + struct huft_t *t; /* pointer to next level of table */ + } v; +} huft_t; + +enum { + /* gunzip_window size--must be a power of two, and + * at least 32K for zip's deflate method */ + GUNZIP_WSIZE = 0x8000, + /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ + BMAX = 16, /* maximum bit length of any code (16 for explode) */ + N_MAX = 288, /* maximum number of codes in any set */ +}; + + +/* This is somewhat complex-looking arrangement, but it allows + * to place decompressor state either in bss or in + * malloc'ed space simply by changing #defines below. + * Sizes on i386: + * text data bss dec hex + * 5256 0 108 5364 14f4 - bss + * 4915 0 0 4915 1333 - malloc + */ +#define STATE_IN_BSS 0 +#define STATE_IN_MALLOC 1 + + +typedef struct state_t { + off_t gunzip_bytes_out; /* number of output bytes */ + uint32_t gunzip_crc; + + int gunzip_src_fd; + unsigned gunzip_outbuf_count; /* bytes in output buffer */ + + unsigned char *gunzip_window; + + uint32_t *gunzip_crc_table; + + /* bitbuffer */ + unsigned gunzip_bb; /* bit buffer */ + unsigned char gunzip_bk; /* bits in bit buffer */ + + /* input (compressed) data */ + unsigned char *bytebuffer; /* buffer itself */ + off_t to_read; /* compressed bytes to read (unzip only, -1 for gunzip) */ +// unsigned bytebuffer_max; /* buffer size */ + unsigned bytebuffer_offset; /* buffer position */ + unsigned bytebuffer_size; /* how much data is there (size <= max) */ + + /* private data of inflate_codes() */ + unsigned inflate_codes_ml; /* masks for bl and bd bits */ + unsigned inflate_codes_md; /* masks for bl and bd bits */ + unsigned inflate_codes_bb; /* bit buffer */ + unsigned inflate_codes_k; /* number of bits in bit buffer */ + unsigned inflate_codes_w; /* current gunzip_window position */ + huft_t *inflate_codes_tl; + huft_t *inflate_codes_td; + unsigned inflate_codes_bl; + unsigned inflate_codes_bd; + unsigned inflate_codes_nn; /* length and index for copy */ + unsigned inflate_codes_dd; + + smallint resume_copy; + + /* private data of inflate_get_next_window() */ + smallint method; /* method == -1 for stored, -2 for codes */ + smallint need_another_block; + smallint end_reached; + + /* private data of inflate_stored() */ + unsigned inflate_stored_n; + unsigned inflate_stored_b; + unsigned inflate_stored_k; + unsigned inflate_stored_w; + + const char *error_msg; + jmp_buf error_jmp; +} state_t; +#define gunzip_bytes_out (S()gunzip_bytes_out ) +#define gunzip_crc (S()gunzip_crc ) +#define gunzip_src_fd (S()gunzip_src_fd ) +#define gunzip_outbuf_count (S()gunzip_outbuf_count) +#define gunzip_window (S()gunzip_window ) +#define gunzip_crc_table (S()gunzip_crc_table ) +#define gunzip_bb (S()gunzip_bb ) +#define gunzip_bk (S()gunzip_bk ) +#define to_read (S()to_read ) +// #define bytebuffer_max (S()bytebuffer_max ) +// Both gunzip and unzip can use constant buffer size now (16k): +#define bytebuffer_max 0x4000 +#define bytebuffer (S()bytebuffer ) +#define bytebuffer_offset (S()bytebuffer_offset ) +#define bytebuffer_size (S()bytebuffer_size ) +#define inflate_codes_ml (S()inflate_codes_ml ) +#define inflate_codes_md (S()inflate_codes_md ) +#define inflate_codes_bb (S()inflate_codes_bb ) +#define inflate_codes_k (S()inflate_codes_k ) +#define inflate_codes_w (S()inflate_codes_w ) +#define inflate_codes_tl (S()inflate_codes_tl ) +#define inflate_codes_td (S()inflate_codes_td ) +#define inflate_codes_bl (S()inflate_codes_bl ) +#define inflate_codes_bd (S()inflate_codes_bd ) +#define inflate_codes_nn (S()inflate_codes_nn ) +#define inflate_codes_dd (S()inflate_codes_dd ) +#define resume_copy (S()resume_copy ) +#define method (S()method ) +#define need_another_block (S()need_another_block ) +#define end_reached (S()end_reached ) +#define inflate_stored_n (S()inflate_stored_n ) +#define inflate_stored_b (S()inflate_stored_b ) +#define inflate_stored_k (S()inflate_stored_k ) +#define inflate_stored_w (S()inflate_stored_w ) +#define error_msg (S()error_msg ) +#define error_jmp (S()error_jmp ) + +/* This is a generic part */ +#if STATE_IN_BSS /* Use global data segment */ +#define DECLARE_STATE /*nothing*/ +#define ALLOC_STATE /*nothing*/ +#define DEALLOC_STATE ((void)0) +#define S() state. +#define PASS_STATE /*nothing*/ +#define PASS_STATE_ONLY /*nothing*/ +#define STATE_PARAM /*nothing*/ +#define STATE_PARAM_ONLY void +static state_t state; +#endif + +#if STATE_IN_MALLOC /* Use malloc space */ +#define DECLARE_STATE state_t *state +#define ALLOC_STATE (state = xzalloc(sizeof(*state))) +#define DEALLOC_STATE free(state) +#define S() state-> +#define PASS_STATE state, +#define PASS_STATE_ONLY state +#define STATE_PARAM state_t *state, +#define STATE_PARAM_ONLY state_t *state +#endif + + +static const uint16_t mask_bits[] ALIGN2 = { + 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, + 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff +}; + +/* Copy lengths for literal codes 257..285 */ +static const uint16_t cplens[] ALIGN2 = { + 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, + 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 +}; + +/* note: see note #13 above about the 258 in this list. */ +/* Extra bits for literal codes 257..285 */ +static const uint8_t cplext[] ALIGN1 = { + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, + 5, 5, 5, 0, 99, 99 +}; /* 99 == invalid */ + +/* Copy offsets for distance codes 0..29 */ +static const uint16_t cpdist[] ALIGN2 = { + 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, + 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 +}; + +/* Extra bits for distance codes */ +static const uint8_t cpdext[] ALIGN1 = { + 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, + 11, 11, 12, 12, 13, 13 +}; + +/* Tables for deflate from PKZIP's appnote.txt. */ +/* Order of the bit length code lengths */ +static const uint8_t border[] ALIGN1 = { + 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 +}; + + +/* + * Free the malloc'ed tables built by huft_build(), which makes a linked + * list of the tables it made, with the links in a dummy first entry of + * each table. + * t: table to free + */ +static void huft_free(huft_t *p) +{ + huft_t *q; + + /* Go through linked list, freeing from the malloced (t[-1]) address. */ + while (p) { + q = (--p)->v.t; + free(p); + p = q; + } +} + +static void huft_free_all(STATE_PARAM_ONLY) +{ + huft_free(inflate_codes_tl); + huft_free(inflate_codes_td); + inflate_codes_tl = NULL; + inflate_codes_td = NULL; +} + +static void abort_unzip(STATE_PARAM_ONLY) NORETURN; +static void abort_unzip(STATE_PARAM_ONLY) +{ + huft_free_all(PASS_STATE_ONLY); + longjmp(error_jmp, 1); +} + +static unsigned fill_bitbuffer(STATE_PARAM unsigned bitbuffer, unsigned *current, const unsigned required) +{ + while (*current < required) { + if (bytebuffer_offset >= bytebuffer_size) { + unsigned sz = bytebuffer_max - 4; + if (to_read >= 0 && to_read < sz) /* unzip only */ + sz = to_read; + /* Leave the first 4 bytes empty so we can always unwind the bitbuffer + * to the front of the bytebuffer */ + bytebuffer_size = safe_read(gunzip_src_fd, &bytebuffer[4], sz); + if ((int)bytebuffer_size < 1) { + error_msg = "unexpected end of file"; + abort_unzip(PASS_STATE_ONLY); + } + if (to_read >= 0) /* unzip only */ + to_read -= bytebuffer_size; + bytebuffer_size += 4; + bytebuffer_offset = 4; + } + bitbuffer |= ((unsigned) bytebuffer[bytebuffer_offset]) << *current; + bytebuffer_offset++; + *current += 8; + } + return bitbuffer; +} + + +/* Given a list of code lengths and a maximum table size, make a set of + * tables to decode that set of codes. Return zero on success, one if + * the given code set is incomplete (the tables are still built in this + * case), two if the input is invalid (all zero length codes or an + * oversubscribed set of lengths) - in this case stores NULL in *t. + * + * b: code lengths in bits (all assumed <= BMAX) + * n: number of codes (assumed <= N_MAX) + * s: number of simple-valued codes (0..s-1) + * d: list of base values for non-simple codes + * e: list of extra bits for non-simple codes + * t: result: starting table + * m: maximum lookup bits, returns actual + */ +static int huft_build(const unsigned *b, const unsigned n, + const unsigned s, const unsigned short *d, + const unsigned char *e, huft_t **t, unsigned *m) +{ + unsigned a; /* counter for codes of length k */ + unsigned c[BMAX + 1]; /* bit length count table */ + unsigned eob_len; /* length of end-of-block code (value 256) */ + unsigned f; /* i repeats in table every f entries */ + int g; /* maximum code length */ + int htl; /* table level */ + unsigned i; /* counter, current code */ + unsigned j; /* counter */ + int k; /* number of bits in current code */ + unsigned *p; /* pointer into c[], b[], or v[] */ + huft_t *q; /* points to current table */ + huft_t r; /* table entry for structure assignment */ + huft_t *u[BMAX]; /* table stack */ + unsigned v[N_MAX]; /* values in order of bit length */ + int ws[BMAX + 1]; /* bits decoded stack */ + int w; /* bits decoded */ + unsigned x[BMAX + 1]; /* bit offsets, then code stack */ + unsigned *xp; /* pointer into x */ + int y; /* number of dummy codes added */ + unsigned z; /* number of entries in current table */ + + /* Length of EOB code, if any */ + eob_len = n > 256 ? b[256] : BMAX; + + *t = NULL; + + /* Generate counts for each bit length */ + memset(c, 0, sizeof(c)); + p = (unsigned *) b; /* cast allows us to reuse p for pointing to b */ + i = n; + do { + c[*p]++; /* assume all entries <= BMAX */ + p++; /* can't combine with above line (Solaris bug) */ + } while (--i); + if (c[0] == n) { /* null input - all zero length codes */ + *m = 0; + return 2; + } + + /* Find minimum and maximum length, bound *m by those */ + for (j = 1; (c[j] == 0) && (j <= BMAX); j++) + continue; + k = j; /* minimum code length */ + for (i = BMAX; (c[i] == 0) && i; i--) + continue; + g = i; /* maximum code length */ + *m = (*m < j) ? j : ((*m > i) ? i : *m); + + /* Adjust last length count to fill out codes, if needed */ + for (y = 1 << j; j < i; j++, y <<= 1) { + y -= c[j]; + if (y < 0) + return 2; /* bad input: more codes than bits */ + } + y -= c[i]; + if (y < 0) + return 2; + c[i] += y; + + /* Generate starting offsets into the value table for each length */ + x[1] = j = 0; + p = c + 1; + xp = x + 2; + while (--i) { /* note that i == g from above */ + j += *p++; + *xp++ = j; + } + + /* Make a table of values in order of bit lengths */ + p = (unsigned *) b; + i = 0; + do { + j = *p++; + if (j != 0) { + v[x[j]++] = i; + } + } while (++i < n); + + /* Generate the Huffman codes and for each, make the table entries */ + x[0] = i = 0; /* first Huffman code is zero */ + p = v; /* grab values in bit order */ + htl = -1; /* no tables yet--level -1 */ + w = ws[0] = 0; /* bits decoded */ + u[0] = NULL; /* just to keep compilers happy */ + q = NULL; /* ditto */ + z = 0; /* ditto */ + + /* go through the bit lengths (k already is bits in shortest code) */ + for (; k <= g; k++) { + a = c[k]; + while (a--) { + /* here i is the Huffman code of length k bits for value *p */ + /* make tables up to required level */ + while (k > ws[htl + 1]) { + w = ws[++htl]; + + /* compute minimum size table less than or equal to *m bits */ + z = g - w; + z = z > *m ? *m : z; /* upper limit on table size */ + j = k - w; + f = 1 << j; + if (f > a + 1) { /* try a k-w bit table */ + /* too few codes for k-w bit table */ + f -= a + 1; /* deduct codes from patterns left */ + xp = c + k; + while (++j < z) { /* try smaller tables up to z bits */ + f <<= 1; + if (f <= *++xp) { + break; /* enough codes to use up j bits */ + } + f -= *xp; /* else deduct codes from patterns */ + } + } + j = (w + j > eob_len && w < eob_len) ? eob_len - w : j; /* make EOB code end at table */ + z = 1 << j; /* table entries for j-bit table */ + ws[htl+1] = w + j; /* set bits decoded in stack */ + + /* allocate and link in new table */ + q = xzalloc((z + 1) * sizeof(huft_t)); + *t = q + 1; /* link to list for huft_free() */ + t = &(q->v.t); + u[htl] = ++q; /* table starts after link */ + + /* connect to last table, if there is one */ + if (htl) { + x[htl] = i; /* save pattern for backing up */ + r.b = (unsigned char) (w - ws[htl - 1]); /* bits to dump before this table */ + r.e = (unsigned char) (16 + j); /* bits in this table */ + r.v.t = q; /* pointer to this table */ + j = (i & ((1 << w) - 1)) >> ws[htl - 1]; + u[htl - 1][j] = r; /* connect to last table */ + } + } + + /* set up table entry in r */ + r.b = (unsigned char) (k - w); + if (p >= v + n) { + r.e = 99; /* out of values--invalid code */ + } else if (*p < s) { + r.e = (unsigned char) (*p < 256 ? 16 : 15); /* 256 is EOB code */ + r.v.n = (unsigned short) (*p++); /* simple code is just the value */ + } else { + r.e = (unsigned char) e[*p - s]; /* non-simple--look up in lists */ + r.v.n = d[*p++ - s]; + } + + /* fill code-like entries with r */ + f = 1 << (k - w); + for (j = i >> w; j < z; j += f) { + q[j] = r; + } + + /* backwards increment the k-bit code i */ + for (j = 1 << (k - 1); i & j; j >>= 1) { + i ^= j; + } + i ^= j; + + /* backup over finished tables */ + while ((i & ((1 << w) - 1)) != x[htl]) { + w = ws[--htl]; + } + } + } + + /* return actual size of base table */ + *m = ws[1]; + + /* Return 1 if we were given an incomplete table */ + return y != 0 && g != 1; +} + + +/* + * inflate (decompress) the codes in a deflated (compressed) block. + * Return an error code or zero if it all goes ok. + * + * tl, td: literal/length and distance decoder tables + * bl, bd: number of bits decoded by tl[] and td[] + */ +/* called once from inflate_block */ + +/* map formerly local static variables to globals */ +#define ml inflate_codes_ml +#define md inflate_codes_md +#define bb inflate_codes_bb +#define k inflate_codes_k +#define w inflate_codes_w +#define tl inflate_codes_tl +#define td inflate_codes_td +#define bl inflate_codes_bl +#define bd inflate_codes_bd +#define nn inflate_codes_nn +#define dd inflate_codes_dd +static void inflate_codes_setup(STATE_PARAM unsigned my_bl, unsigned my_bd) +{ + bl = my_bl; + bd = my_bd; + /* make local copies of globals */ + bb = gunzip_bb; /* initialize bit buffer */ + k = gunzip_bk; + w = gunzip_outbuf_count; /* initialize gunzip_window position */ + /* inflate the coded data */ + ml = mask_bits[bl]; /* precompute masks for speed */ + md = mask_bits[bd]; +} +/* called once from inflate_get_next_window */ +static NOINLINE int inflate_codes(STATE_PARAM_ONLY) +{ + unsigned e; /* table entry flag/number of extra bits */ + huft_t *t; /* pointer to table entry */ + + if (resume_copy) + goto do_copy; + + while (1) { /* do until end of block */ + bb = fill_bitbuffer(PASS_STATE bb, &k, bl); + t = tl + ((unsigned) bb & ml); + e = t->e; + if (e > 16) + do { + if (e == 99) + abort_unzip(PASS_STATE_ONLY);; + bb >>= t->b; + k -= t->b; + e -= 16; + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + t = t->v.t + ((unsigned) bb & mask_bits[e]); + e = t->e; + } while (e > 16); + bb >>= t->b; + k -= t->b; + if (e == 16) { /* then it's a literal */ + gunzip_window[w++] = (unsigned char) t->v.n; + if (w == GUNZIP_WSIZE) { + gunzip_outbuf_count = w; + //flush_gunzip_window(); + w = 0; + return 1; // We have a block to read + } + } else { /* it's an EOB or a length */ + /* exit if end of block */ + if (e == 15) { + break; + } + + /* get length of block to copy */ + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + nn = t->v.n + ((unsigned) bb & mask_bits[e]); + bb >>= e; + k -= e; + + /* decode distance of block to copy */ + bb = fill_bitbuffer(PASS_STATE bb, &k, bd); + t = td + ((unsigned) bb & md); + e = t->e; + if (e > 16) + do { + if (e == 99) + abort_unzip(PASS_STATE_ONLY); + bb >>= t->b; + k -= t->b; + e -= 16; + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + t = t->v.t + ((unsigned) bb & mask_bits[e]); + e = t->e; + } while (e > 16); + bb >>= t->b; + k -= t->b; + bb = fill_bitbuffer(PASS_STATE bb, &k, e); + dd = w - t->v.n - ((unsigned) bb & mask_bits[e]); + bb >>= e; + k -= e; + + /* do the copy */ + do_copy: + do { + /* Was: nn -= (e = (e = GUNZIP_WSIZE - ((dd &= GUNZIP_WSIZE - 1) > w ? dd : w)) > nn ? nn : e); */ + /* Who wrote THAT?? rewritten as: */ + unsigned delta; + + dd &= GUNZIP_WSIZE - 1; + e = GUNZIP_WSIZE - (dd > w ? dd : w); + delta = w > dd ? w - dd : dd - w; + if (e > nn) e = nn; + nn -= e; + + /* copy to new buffer to prevent possible overwrite */ + if (delta >= e) { + memcpy(gunzip_window + w, gunzip_window + dd, e); + w += e; + dd += e; + } else { + /* do it slow to avoid memcpy() overlap */ + /* !NOMEMCPY */ + do { + gunzip_window[w++] = gunzip_window[dd++]; + } while (--e); + } + if (w == GUNZIP_WSIZE) { + gunzip_outbuf_count = w; + resume_copy = (nn != 0); + //flush_gunzip_window(); + w = 0; + return 1; + } + } while (nn); + resume_copy = 0; + } + } + + /* restore the globals from the locals */ + gunzip_outbuf_count = w; /* restore global gunzip_window pointer */ + gunzip_bb = bb; /* restore global bit buffer */ + gunzip_bk = k; + + /* normally just after call to inflate_codes, but save code by putting it here */ + /* free the decoding tables (tl and td), return */ + huft_free_all(PASS_STATE_ONLY); + + /* done */ + return 0; +} +#undef ml +#undef md +#undef bb +#undef k +#undef w +#undef tl +#undef td +#undef bl +#undef bd +#undef nn +#undef dd + + +/* called once from inflate_block */ +static void inflate_stored_setup(STATE_PARAM int my_n, int my_b, int my_k) +{ + inflate_stored_n = my_n; + inflate_stored_b = my_b; + inflate_stored_k = my_k; + /* initialize gunzip_window position */ + inflate_stored_w = gunzip_outbuf_count; +} +/* called once from inflate_get_next_window */ +static int inflate_stored(STATE_PARAM_ONLY) +{ + /* read and output the compressed data */ + while (inflate_stored_n--) { + inflate_stored_b = fill_bitbuffer(PASS_STATE inflate_stored_b, &inflate_stored_k, 8); + gunzip_window[inflate_stored_w++] = (unsigned char) inflate_stored_b; + if (inflate_stored_w == GUNZIP_WSIZE) { + gunzip_outbuf_count = inflate_stored_w; + //flush_gunzip_window(); + inflate_stored_w = 0; + inflate_stored_b >>= 8; + inflate_stored_k -= 8; + return 1; /* We have a block */ + } + inflate_stored_b >>= 8; + inflate_stored_k -= 8; + } + + /* restore the globals from the locals */ + gunzip_outbuf_count = inflate_stored_w; /* restore global gunzip_window pointer */ + gunzip_bb = inflate_stored_b; /* restore global bit buffer */ + gunzip_bk = inflate_stored_k; + return 0; /* Finished */ +} + + +/* + * decompress an inflated block + * e: last block flag + * + * GLOBAL VARIABLES: bb, kk, + */ +/* Return values: -1 = inflate_stored, -2 = inflate_codes */ +/* One callsite in inflate_get_next_window */ +static int inflate_block(STATE_PARAM smallint *e) +{ + unsigned ll[286 + 30]; /* literal/length and distance code lengths */ + unsigned t; /* block type */ + unsigned b; /* bit buffer */ + unsigned k; /* number of bits in bit buffer */ + + /* make local bit buffer */ + + b = gunzip_bb; + k = gunzip_bk; + + /* read in last block bit */ + b = fill_bitbuffer(PASS_STATE b, &k, 1); + *e = b & 1; + b >>= 1; + k -= 1; + + /* read in block type */ + b = fill_bitbuffer(PASS_STATE b, &k, 2); + t = (unsigned) b & 3; + b >>= 2; + k -= 2; + + /* restore the global bit buffer */ + gunzip_bb = b; + gunzip_bk = k; + + /* Do we see block type 1 often? Yes! + * TODO: fix performance problem (see below) */ + //bb_error_msg("blktype %d", t); + + /* inflate that block type */ + switch (t) { + case 0: /* Inflate stored */ + { + unsigned n; /* number of bytes in block */ + unsigned b_stored; /* bit buffer */ + unsigned k_stored; /* number of bits in bit buffer */ + + /* make local copies of globals */ + b_stored = gunzip_bb; /* initialize bit buffer */ + k_stored = gunzip_bk; + + /* go to byte boundary */ + n = k_stored & 7; + b_stored >>= n; + k_stored -= n; + + /* get the length and its complement */ + b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16); + n = ((unsigned) b_stored & 0xffff); + b_stored >>= 16; + k_stored -= 16; + + b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16); + if (n != (unsigned) ((~b_stored) & 0xffff)) { + abort_unzip(PASS_STATE_ONLY); /* error in compressed data */ + } + b_stored >>= 16; + k_stored -= 16; + + inflate_stored_setup(PASS_STATE n, b_stored, k_stored); + + return -1; + } + case 1: + /* Inflate fixed + * decompress an inflated type 1 (fixed Huffman codes) block. We should + * either replace this with a custom decoder, or at least precompute the + * Huffman tables. TODO */ + { + int i; /* temporary variable */ + unsigned bl; /* lookup bits for tl */ + unsigned bd; /* lookup bits for td */ + /* gcc 4.2.1 is too dumb to reuse stackspace. Moved up... */ + //unsigned ll[288]; /* length list for huft_build */ + + /* set up literal table */ + for (i = 0; i < 144; i++) + ll[i] = 8; + for (; i < 256; i++) + ll[i] = 9; + for (; i < 280; i++) + ll[i] = 7; + for (; i < 288; i++) /* make a complete, but wrong code set */ + ll[i] = 8; + bl = 7; + huft_build(ll, 288, 257, cplens, cplext, &inflate_codes_tl, &bl); + /* huft_build() never return nonzero - we use known data */ + + /* set up distance table */ + for (i = 0; i < 30; i++) /* make an incomplete code set */ + ll[i] = 5; + bd = 5; + huft_build(ll, 30, 0, cpdist, cpdext, &inflate_codes_td, &bd); + + /* set up data for inflate_codes() */ + inflate_codes_setup(PASS_STATE bl, bd); + + /* huft_free code moved into inflate_codes */ + + return -2; + } + case 2: /* Inflate dynamic */ + { + enum { dbits = 6 }; /* bits in base distance lookup table */ + enum { lbits = 9 }; /* bits in base literal/length lookup table */ + + huft_t *td; /* distance code table */ + unsigned i; /* temporary variables */ + unsigned j; + unsigned l; /* last length */ + unsigned m; /* mask for bit lengths table */ + unsigned n; /* number of lengths to get */ + unsigned bl; /* lookup bits for tl */ + unsigned bd; /* lookup bits for td */ + unsigned nb; /* number of bit length codes */ + unsigned nl; /* number of literal/length codes */ + unsigned nd; /* number of distance codes */ + + //unsigned ll[286 + 30];/* literal/length and distance code lengths */ + unsigned b_dynamic; /* bit buffer */ + unsigned k_dynamic; /* number of bits in bit buffer */ + + /* make local bit buffer */ + b_dynamic = gunzip_bb; + k_dynamic = gunzip_bk; + + /* read in table lengths */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5); + nl = 257 + ((unsigned) b_dynamic & 0x1f); /* number of literal/length codes */ + + b_dynamic >>= 5; + k_dynamic -= 5; + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5); + nd = 1 + ((unsigned) b_dynamic & 0x1f); /* number of distance codes */ + + b_dynamic >>= 5; + k_dynamic -= 5; + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 4); + nb = 4 + ((unsigned) b_dynamic & 0xf); /* number of bit length codes */ + + b_dynamic >>= 4; + k_dynamic -= 4; + if (nl > 286 || nd > 30) + abort_unzip(PASS_STATE_ONLY); /* bad lengths */ + + /* read in bit-length-code lengths */ + for (j = 0; j < nb; j++) { + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3); + ll[border[j]] = (unsigned) b_dynamic & 7; + b_dynamic >>= 3; + k_dynamic -= 3; + } + for (; j < 19; j++) + ll[border[j]] = 0; + + /* build decoding table for trees - single level, 7 bit lookup */ + bl = 7; + i = huft_build(ll, 19, 19, NULL, NULL, &inflate_codes_tl, &bl); + if (i != 0) { + abort_unzip(PASS_STATE_ONLY); //return i; /* incomplete code set */ + } + + /* read in literal and distance code lengths */ + n = nl + nd; + m = mask_bits[bl]; + i = l = 0; + while ((unsigned) i < n) { + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, (unsigned)bl); + td = inflate_codes_tl + ((unsigned) b_dynamic & m); + j = td->b; + b_dynamic >>= j; + k_dynamic -= j; + j = td->v.n; + if (j < 16) { /* length of code in bits (0..15) */ + ll[i++] = l = j; /* save last length in l */ + } else if (j == 16) { /* repeat last length 3 to 6 times */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 2); + j = 3 + ((unsigned) b_dynamic & 3); + b_dynamic >>= 2; + k_dynamic -= 2; + if ((unsigned) i + j > n) { + abort_unzip(PASS_STATE_ONLY); //return 1; + } + while (j--) { + ll[i++] = l; + } + } else if (j == 17) { /* 3 to 10 zero length codes */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3); + j = 3 + ((unsigned) b_dynamic & 7); + b_dynamic >>= 3; + k_dynamic -= 3; + if ((unsigned) i + j > n) { + abort_unzip(PASS_STATE_ONLY); //return 1; + } + while (j--) { + ll[i++] = 0; + } + l = 0; + } else { /* j == 18: 11 to 138 zero length codes */ + b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 7); + j = 11 + ((unsigned) b_dynamic & 0x7f); + b_dynamic >>= 7; + k_dynamic -= 7; + if ((unsigned) i + j > n) { + abort_unzip(PASS_STATE_ONLY); //return 1; + } + while (j--) { + ll[i++] = 0; + } + l = 0; + } + } + + /* free decoding table for trees */ + huft_free(inflate_codes_tl); + + /* restore the global bit buffer */ + gunzip_bb = b_dynamic; + gunzip_bk = k_dynamic; + + /* build the decoding tables for literal/length and distance codes */ + bl = lbits; + + i = huft_build(ll, nl, 257, cplens, cplext, &inflate_codes_tl, &bl); + if (i != 0) + abort_unzip(PASS_STATE_ONLY); + bd = dbits; + i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &inflate_codes_td, &bd); + if (i != 0) + abort_unzip(PASS_STATE_ONLY); + + /* set up data for inflate_codes() */ + inflate_codes_setup(PASS_STATE bl, bd); + + /* huft_free code moved into inflate_codes */ + + return -2; + } + default: + abort_unzip(PASS_STATE_ONLY); + } +} + +/* Two callsites, both in inflate_get_next_window */ +static void calculate_gunzip_crc(STATE_PARAM_ONLY) +{ + gunzip_crc = crc32_block_endian0(gunzip_crc, gunzip_window, gunzip_outbuf_count, gunzip_crc_table); + gunzip_bytes_out += gunzip_outbuf_count; +} + +/* One callsite in inflate_unzip_internal */ +static int inflate_get_next_window(STATE_PARAM_ONLY) +{ + gunzip_outbuf_count = 0; + + while (1) { + int ret; + + if (need_another_block) { + if (end_reached) { + calculate_gunzip_crc(PASS_STATE_ONLY); + end_reached = 0; + /* NB: need_another_block is still set */ + return 0; /* Last block */ + } + method = inflate_block(PASS_STATE &end_reached); + need_another_block = 0; + } + + switch (method) { + case -1: + ret = inflate_stored(PASS_STATE_ONLY); + break; + case -2: + ret = inflate_codes(PASS_STATE_ONLY); + break; + default: /* cannot happen */ + abort_unzip(PASS_STATE_ONLY); + } + + if (ret == 1) { + calculate_gunzip_crc(PASS_STATE_ONLY); + return 1; /* more data left */ + } + need_another_block = 1; /* end of that block */ + } + /* Doesnt get here */ +} + + +/* Called from unpack_gz_stream() and inflate_unzip() */ +static IF_DESKTOP(long long) int +inflate_unzip_internal(STATE_PARAM int in, int out) +{ + IF_DESKTOP(long long) int n = 0; + ssize_t nwrote; + + /* Allocate all global buffers (for DYN_ALLOC option) */ + gunzip_window = xmalloc(GUNZIP_WSIZE); + gunzip_outbuf_count = 0; + gunzip_bytes_out = 0; + gunzip_src_fd = in; + + /* (re) initialize state */ + method = -1; + need_another_block = 1; + resume_copy = 0; + gunzip_bk = 0; + gunzip_bb = 0; + + /* Create the crc table */ + gunzip_crc_table = crc32_filltable(NULL, 0); + gunzip_crc = ~0; + + error_msg = "corrupted data"; + if (setjmp(error_jmp)) { + /* Error from deep inside zip machinery */ + n = -1; + goto ret; + } + + while (1) { + int r = inflate_get_next_window(PASS_STATE_ONLY); + nwrote = full_write(out, gunzip_window, gunzip_outbuf_count); + if (nwrote != (ssize_t)gunzip_outbuf_count) { + bb_perror_msg("write"); + n = -1; + goto ret; + } + IF_DESKTOP(n += nwrote;) + if (r == 0) break; + } + + /* Store unused bytes in a global buffer so calling applets can access it */ + if (gunzip_bk >= 8) { + /* Undo too much lookahead. The next read will be byte aligned + * so we can discard unused bits in the last meaningful byte. */ + bytebuffer_offset--; + bytebuffer[bytebuffer_offset] = gunzip_bb & 0xff; + gunzip_bb >>= 8; + gunzip_bk -= 8; + } + ret: + /* Cleanup */ + free(gunzip_window); + free(gunzip_crc_table); + return n; +} + + +/* External entry points */ + +/* For unzip */ + +IF_DESKTOP(long long) int FAST_FUNC +inflate_unzip(inflate_unzip_result *res, off_t compr_size, int in, int out) +{ + IF_DESKTOP(long long) int n; + DECLARE_STATE; + + ALLOC_STATE; + + to_read = compr_size; +// bytebuffer_max = 0x8000; + bytebuffer_offset = 4; + bytebuffer = xmalloc(bytebuffer_max); + n = inflate_unzip_internal(PASS_STATE in, out); + free(bytebuffer); + + res->crc = gunzip_crc; + res->bytes_out = gunzip_bytes_out; + DEALLOC_STATE; + return n; +} + + +/* For gunzip */ + +/* helpers first */ + +/* Top up the input buffer with at least n bytes. */ +static int top_up(STATE_PARAM unsigned n) +{ + int count = bytebuffer_size - bytebuffer_offset; + + if (count < (int)n) { + memmove(bytebuffer, &bytebuffer[bytebuffer_offset], count); + bytebuffer_offset = 0; + bytebuffer_size = full_read(gunzip_src_fd, &bytebuffer[count], bytebuffer_max - count); + if ((int)bytebuffer_size < 0) { + bb_error_msg(bb_msg_read_error); + return 0; + } + bytebuffer_size += count; + if (bytebuffer_size < n) + return 0; + } + return 1; +} + +static uint16_t buffer_read_le_u16(STATE_PARAM_ONLY) +{ + uint16_t res; +#if BB_LITTLE_ENDIAN + move_from_unaligned16(res, &bytebuffer[bytebuffer_offset]); +#else + res = bytebuffer[bytebuffer_offset]; + res |= bytebuffer[bytebuffer_offset + 1] << 8; +#endif + bytebuffer_offset += 2; + return res; +} + +static uint32_t buffer_read_le_u32(STATE_PARAM_ONLY) +{ + uint32_t res; +#if BB_LITTLE_ENDIAN + move_from_unaligned32(res, &bytebuffer[bytebuffer_offset]); +#else + res = bytebuffer[bytebuffer_offset]; + res |= bytebuffer[bytebuffer_offset + 1] << 8; + res |= bytebuffer[bytebuffer_offset + 2] << 16; + res |= bytebuffer[bytebuffer_offset + 3] << 24; +#endif + bytebuffer_offset += 4; + return res; +} + +static int check_header_gzip(STATE_PARAM unpack_info_t *info) +{ + union { + unsigned char raw[8]; + struct { + uint8_t gz_method; + uint8_t flags; + uint32_t mtime; + uint8_t xtra_flags_UNUSED; + uint8_t os_flags_UNUSED; + } PACKED formatted; + } header; + struct BUG_header { + char BUG_header[sizeof(header) == 8 ? 1 : -1]; + }; + + /* + * Rewind bytebuffer. We use the beginning because the header has 8 + * bytes, leaving enough for unwinding afterwards. + */ + bytebuffer_size -= bytebuffer_offset; + memmove(bytebuffer, &bytebuffer[bytebuffer_offset], bytebuffer_size); + bytebuffer_offset = 0; + + if (!top_up(PASS_STATE 8)) + return 0; + memcpy(header.raw, &bytebuffer[bytebuffer_offset], 8); + bytebuffer_offset += 8; + + /* Check the compression method */ + if (header.formatted.gz_method != 8) { + return 0; + } + + if (header.formatted.flags & 0x04) { + /* bit 2 set: extra field present */ + unsigned extra_short; + + if (!top_up(PASS_STATE 2)) + return 0; + extra_short = buffer_read_le_u16(PASS_STATE_ONLY); + if (!top_up(PASS_STATE extra_short)) + return 0; + /* Ignore extra field */ + bytebuffer_offset += extra_short; + } + + /* Discard original name and file comment if any */ + /* bit 3 set: original file name present */ + /* bit 4 set: file comment present */ + if (header.formatted.flags & 0x18) { + while (1) { + do { + if (!top_up(PASS_STATE 1)) + return 0; + } while (bytebuffer[bytebuffer_offset++] != 0); + if ((header.formatted.flags & 0x18) != 0x18) + break; + header.formatted.flags &= ~0x18; + } + } + + if (info) + info->mtime = SWAP_LE32(header.formatted.mtime); + + /* Read the header checksum */ + if (header.formatted.flags & 0x02) { + if (!top_up(PASS_STATE 2)) + return 0; + bytebuffer_offset += 2; + } + return 1; +} + +IF_DESKTOP(long long) int FAST_FUNC +unpack_gz_stream_with_info(int in, int out, unpack_info_t *info) +{ + uint32_t v32; + IF_DESKTOP(long long) int n; + DECLARE_STATE; + + n = 0; + + ALLOC_STATE; + to_read = -1; +// bytebuffer_max = 0x8000; + bytebuffer = xmalloc(bytebuffer_max); + gunzip_src_fd = in; + + again: + if (!check_header_gzip(PASS_STATE info)) { + bb_error_msg("corrupted data"); + n = -1; + goto ret; + } + n += inflate_unzip_internal(PASS_STATE in, out); + if (n < 0) + goto ret; + + if (!top_up(PASS_STATE 8)) { + bb_error_msg("corrupted data"); + n = -1; + goto ret; + } + + /* Validate decompression - crc */ + v32 = buffer_read_le_u32(PASS_STATE_ONLY); + if ((~gunzip_crc) != v32) { + bb_error_msg("crc error"); + n = -1; + goto ret; + } + + /* Validate decompression - size */ + v32 = buffer_read_le_u32(PASS_STATE_ONLY); + if ((uint32_t)gunzip_bytes_out != v32) { + bb_error_msg("incorrect length"); + n = -1; + } + + if (!top_up(PASS_STATE 2)) + goto ret; /* EOF */ + + if (bytebuffer[bytebuffer_offset] == 0x1f + && bytebuffer[bytebuffer_offset + 1] == 0x8b + ) { + bytebuffer_offset += 2; + goto again; + } + /* GNU gzip says: */ + /*bb_error_msg("decompression OK, trailing garbage ignored");*/ + + ret: + free(bytebuffer); + DEALLOC_STATE; + return n; +} + +IF_DESKTOP(long long) int FAST_FUNC +unpack_gz_stream(int in, int out) +{ + return unpack_gz_stream_with_info(in, out, NULL); +} diff --git a/archival/libarchive/filter_accept_all.c b/archival/libarchive/filter_accept_all.c new file mode 100644 index 000000000..e69deb679 --- /dev/null +++ b/archival/libarchive/filter_accept_all.c @@ -0,0 +1,17 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* Accept any non-null name, its not really a filter at all */ +char FAST_FUNC filter_accept_all(archive_handle_t *archive_handle) +{ + if (archive_handle->file_header->name) + return EXIT_SUCCESS; + return EXIT_FAILURE; +} diff --git a/archival/libarchive/filter_accept_list.c b/archival/libarchive/filter_accept_list.c new file mode 100644 index 000000000..a7640af79 --- /dev/null +++ b/archival/libarchive/filter_accept_list.c @@ -0,0 +1,19 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* + * Accept names that are in the accept list, ignoring reject list. + */ +char FAST_FUNC filter_accept_list(archive_handle_t *archive_handle) +{ + if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) + return EXIT_SUCCESS; + return EXIT_FAILURE; +} diff --git a/archival/libarchive/filter_accept_list_reassign.c b/archival/libarchive/filter_accept_list_reassign.c new file mode 100644 index 000000000..d80f71668 --- /dev/null +++ b/archival/libarchive/filter_accept_list_reassign.c @@ -0,0 +1,51 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* Built and used only if ENABLE_DPKG || ENABLE_DPKG_DEB */ + +/* + * Reassign the subarchive metadata parser based on the filename extension + * e.g. if its a .tar.gz modify archive_handle->sub_archive to process a .tar.gz + * or if its a .tar.bz2 make archive_handle->sub_archive handle that + */ +char FAST_FUNC filter_accept_list_reassign(archive_handle_t *archive_handle) +{ + /* Check the file entry is in the accept list */ + if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) { + const char *name_ptr; + + /* Find extension */ + name_ptr = strrchr(archive_handle->file_header->name, '.'); + if (!name_ptr) + return EXIT_FAILURE; + name_ptr++; + + /* Modify the subarchive handler based on the extension */ + if (ENABLE_FEATURE_SEAMLESS_GZ + && strcmp(name_ptr, "gz") == 0 + ) { + archive_handle->dpkg__action_data_subarchive = get_header_tar_gz; + return EXIT_SUCCESS; + } + if (ENABLE_FEATURE_SEAMLESS_BZ2 + && strcmp(name_ptr, "bz2") == 0 + ) { + archive_handle->dpkg__action_data_subarchive = get_header_tar_bz2; + return EXIT_SUCCESS; + } + if (ENABLE_FEATURE_SEAMLESS_LZMA + && strcmp(name_ptr, "lzma") == 0 + ) { + archive_handle->dpkg__action_data_subarchive = get_header_tar_lzma; + return EXIT_SUCCESS; + } + } + return EXIT_FAILURE; +} diff --git a/archival/libarchive/filter_accept_reject_list.c b/archival/libarchive/filter_accept_reject_list.c new file mode 100644 index 000000000..3e86cca65 --- /dev/null +++ b/archival/libarchive/filter_accept_reject_list.c @@ -0,0 +1,36 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* + * Accept names that are in the accept list and not in the reject list + */ +char FAST_FUNC filter_accept_reject_list(archive_handle_t *archive_handle) +{ + const char *key; + const llist_t *reject_entry; + const llist_t *accept_entry; + + key = archive_handle->file_header->name; + + /* If the key is in a reject list fail */ + reject_entry = find_list_entry2(archive_handle->reject, key); + if (reject_entry) { + return EXIT_FAILURE; + } + accept_entry = find_list_entry2(archive_handle->accept, key); + + /* Fail if an accept list was specified and the key wasnt in there */ + if ((accept_entry == NULL) && archive_handle->accept) { + return EXIT_FAILURE; + } + + /* Accepted */ + return EXIT_SUCCESS; +} diff --git a/archival/libarchive/find_list_entry.c b/archival/libarchive/find_list_entry.c new file mode 100644 index 000000000..5efd1af2e --- /dev/null +++ b/archival/libarchive/find_list_entry.c @@ -0,0 +1,54 @@ +/* vi: set sw=4 ts=4: */ +/* + * Copyright (C) 2002 by Glenn McGrath + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include +#include "libbb.h" +#include "archive.h" + +/* Find a string in a shell pattern list */ +const llist_t* FAST_FUNC find_list_entry(const llist_t *list, const char *filename) +{ + while (list) { + if (fnmatch(list->data, filename, 0) == 0) { + return list; + } + list = list->link; + } + return NULL; +} + +/* Same, but compares only path components present in pattern + * (extra trailing path components in filename are assumed to match) + */ +const llist_t* FAST_FUNC find_list_entry2(const llist_t *list, const char *filename) +{ + char buf[PATH_MAX]; + int pattern_slash_cnt; + const char *c; + char *d; + + while (list) { + c = list->data; + pattern_slash_cnt = 0; + while (*c) + if (*c++ == '/') pattern_slash_cnt++; + c = filename; + d = buf; + /* paranoia is better than buffer overflows */ + while (*c && d != buf + sizeof(buf)-1) { + if (*c == '/' && --pattern_slash_cnt < 0) + break; + *d++ = *c++; + } + *d = '\0'; + if (fnmatch(list->data, buf, 0) == 0) { + return list; + } + list = list->link; + } + return NULL; +} diff --git a/archival/libarchive/get_header_ar.c b/archival/libarchive/get_header_ar.c new file mode 100644 index 000000000..df603b111 --- /dev/null +++ b/archival/libarchive/get_header_ar.c @@ -0,0 +1,133 @@ +/* vi: set sw=4 ts=4: */ +/* Copyright 2001 Glenn McGrath. + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" +#include "ar.h" + +static unsigned read_num(const char *str, int base) +{ + /* This code works because + * on misformatted numbers bb_strtou returns all-ones */ + int err = bb_strtou(str, NULL, base); + if (err == -1) + bb_error_msg_and_die("invalid ar header"); + return err; +} + +char FAST_FUNC get_header_ar(archive_handle_t *archive_handle) +{ + file_header_t *typed = archive_handle->file_header; + unsigned size; + union { + char raw[60]; + struct ar_header formatted; + } ar; +#if ENABLE_FEATURE_AR_LONG_FILENAMES + static char *ar_long_names; + static unsigned ar_long_name_size; +#endif + + /* dont use xread as we want to handle the error ourself */ + if (read(archive_handle->src_fd, ar.raw, 60) != 60) { + /* End Of File */ + return EXIT_FAILURE; + } + + /* ar header starts on an even byte (2 byte aligned) + * '\n' is used for padding + */ + if (ar.raw[0] == '\n') { + /* fix up the header, we started reading 1 byte too early */ + memmove(ar.raw, &ar.raw[1], 59); + ar.raw[59] = xread_char(archive_handle->src_fd); + archive_handle->offset++; + } + archive_handle->offset += 60; + + if (ar.formatted.magic[0] != '`' || ar.formatted.magic[1] != '\n') + bb_error_msg_and_die("invalid ar header"); + + /* FIXME: more thorough routine would be in order here + * (we have something like that in tar) + * but for now we are lax. */ + ar.formatted.magic[0] = '\0'; /* else 4G-2 file will have size="4294967294`\n..." */ + typed->size = size = read_num(ar.formatted.size, 10); + + /* special filenames have '/' as the first character */ + if (ar.formatted.name[0] == '/') { + if (ar.formatted.name[1] == ' ') { + /* This is the index of symbols in the file for compilers */ + data_skip(archive_handle); + archive_handle->offset += size; + return get_header_ar(archive_handle); /* Return next header */ + } +#if ENABLE_FEATURE_AR_LONG_FILENAMES + if (ar.formatted.name[1] == '/') { + /* If the second char is a '/' then this entries data section + * stores long filename for multiple entries, they are stored + * in static variable long_names for use in future entries + */ + ar_long_name_size = size; + free(ar_long_names); + ar_long_names = xmalloc(size); + xread(archive_handle->src_fd, ar_long_names, size); + archive_handle->offset += size; + /* Return next header */ + return get_header_ar(archive_handle); + } +#else + bb_error_msg_and_die("long filenames not supported"); +#endif + } + /* Only size is always present, the rest may be missing in + * long filename pseudo file. Thus we decode the rest + * after dealing with long filename pseudo file. + */ + typed->mode = read_num(ar.formatted.mode, 8); + typed->mtime = read_num(ar.formatted.date, 10); + typed->uid = read_num(ar.formatted.uid, 10); + typed->gid = read_num(ar.formatted.gid, 10); + +#if ENABLE_FEATURE_AR_LONG_FILENAMES + if (ar.formatted.name[0] == '/') { + unsigned long_offset; + + /* The number after the '/' indicates the offset in the ar data section + * (saved in ar_long_names) that conatains the real filename */ + long_offset = read_num(&ar.formatted.name[1], 10); + if (long_offset >= ar_long_name_size) { + bb_error_msg_and_die("can't resolve long filename"); + } + typed->name = xstrdup(ar_long_names + long_offset); + } else +#endif + { + /* short filenames */ + typed->name = xstrndup(ar.formatted.name, 16); + } + + typed->name[strcspn(typed->name, " /")] = '\0'; + + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { + archive_handle->action_header(typed); +#if ENABLE_DPKG || ENABLE_DPKG_DEB + if (archive_handle->dpkg__sub_archive) { + while (archive_handle->dpkg__action_data_subarchive(archive_handle->dpkg__sub_archive) == EXIT_SUCCESS) + continue; + } else +#endif + archive_handle->action_data(archive_handle); + } else { + data_skip(archive_handle); + } + + archive_handle->offset += typed->size; + /* Set the file pointer to the correct spot, we may have been reading a compressed file */ + lseek(archive_handle->src_fd, archive_handle->offset, SEEK_SET); + + return EXIT_SUCCESS; +} diff --git a/archival/libarchive/get_header_cpio.c b/archival/libarchive/get_header_cpio.c new file mode 100644 index 000000000..3d99b492a --- /dev/null +++ b/archival/libarchive/get_header_cpio.c @@ -0,0 +1,186 @@ +/* vi: set sw=4 ts=4: */ +/* Copyright 2002 Laurence Anderson + * + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +typedef struct hardlinks_t { + struct hardlinks_t *next; + int inode; /* TODO: must match maj/min too! */ + int mode ; + int mtime; /* These three are useful only in corner case */ + int uid ; /* of hardlinks with zero size body */ + int gid ; + char name[1]; +} hardlinks_t; + +char FAST_FUNC get_header_cpio(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + char cpio_header[110]; + int namesize; + int major, minor, nlink, mode, inode; + unsigned size, uid, gid, mtime; + + /* There can be padding before archive header */ + data_align(archive_handle, 4); + + size = full_read(archive_handle->src_fd, cpio_header, 110); + if (size == 0) { + goto create_hardlinks; + } + if (size != 110) { + bb_error_msg_and_die("short read"); + } + archive_handle->offset += 110; + + if (strncmp(&cpio_header[0], "07070", 5) != 0 + || (cpio_header[5] != '1' && cpio_header[5] != '2') + ) { + bb_error_msg_and_die("unsupported cpio format, use newc or crc"); + } + + if (sscanf(cpio_header + 6, + "%8x" "%8x" "%8x" "%8x" + "%8x" "%8x" "%8x" /*maj,min:*/ "%*16c" + /*rmaj,rmin:*/"%8x" "%8x" "%8x" /*chksum: "%*8c"*/, + &inode, &mode, &uid, &gid, + &nlink, &mtime, &size, + &major, &minor, &namesize) != 10) + bb_error_msg_and_die("damaged cpio file"); + file_header->mode = mode; + file_header->uid = uid; + file_header->gid = gid; + file_header->mtime = mtime; + file_header->size = size; + + namesize &= 0x1fff; /* paranoia: limit names to 8k chars */ + file_header->name = xzalloc(namesize + 1); + /* Read in filename */ + xread(archive_handle->src_fd, file_header->name, namesize); + if (file_header->name[0] == '/') { + /* Testcase: echo /etc/hosts | cpio -pvd /tmp + * Without this code, it tries to unpack /etc/hosts + * into "/etc/hosts", not "etc/hosts". + */ + char *p = file_header->name; + do p++; while (*p == '/'); + overlapping_strcpy(file_header->name, p); + } + archive_handle->offset += namesize; + + /* Update offset amount and skip padding before file contents */ + data_align(archive_handle, 4); + + if (strcmp(file_header->name, "TRAILER!!!") == 0) { + /* Always round up. ">> 9" divides by 512 */ + archive_handle->cpio__blocks = (uoff_t)(archive_handle->offset + 511) >> 9; + goto create_hardlinks; + } + + file_header->link_target = NULL; + if (S_ISLNK(file_header->mode)) { + file_header->size &= 0x1fff; /* paranoia: limit names to 8k chars */ + file_header->link_target = xzalloc(file_header->size + 1); + xread(archive_handle->src_fd, file_header->link_target, file_header->size); + archive_handle->offset += file_header->size; + file_header->size = 0; /* Stop possible seeks in future */ + } + +// TODO: data_extract_all can't deal with hardlinks to non-files... +// when fixed, change S_ISREG to !S_ISDIR here + + if (nlink > 1 && S_ISREG(file_header->mode)) { + hardlinks_t *new = xmalloc(sizeof(*new) + namesize); + new->inode = inode; + new->mode = mode ; + new->mtime = mtime; + new->uid = uid ; + new->gid = gid ; + strcpy(new->name, file_header->name); + /* Put file on a linked list for later */ + if (size == 0) { + new->next = archive_handle->cpio__hardlinks_to_create; + archive_handle->cpio__hardlinks_to_create = new; + return EXIT_SUCCESS; /* Skip this one */ + /* TODO: this breaks cpio -t (it does not show hardlinks) */ + } + new->next = archive_handle->cpio__created_hardlinks; + archive_handle->cpio__created_hardlinks = new; + } + file_header->device = makedev(major, minor); + + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { + archive_handle->action_data(archive_handle); +//TODO: run "echo /etc/hosts | cpio -pv /tmp" twice. On 2nd run: +//cpio: etc/hosts not created: newer or same age file exists +//etc/hosts <-- should NOT show it +//2 blocks <-- should say "0 blocks" + archive_handle->action_header(file_header); + } else { + data_skip(archive_handle); + } + + archive_handle->offset += file_header->size; + + free(file_header->link_target); + free(file_header->name); + file_header->link_target = NULL; + file_header->name = NULL; + + return EXIT_SUCCESS; + + create_hardlinks: + free(file_header->link_target); + free(file_header->name); + + while (archive_handle->cpio__hardlinks_to_create) { + hardlinks_t *cur; + hardlinks_t *make_me = archive_handle->cpio__hardlinks_to_create; + + archive_handle->cpio__hardlinks_to_create = make_me->next; + + memset(file_header, 0, sizeof(*file_header)); + file_header->mtime = make_me->mtime; + file_header->name = make_me->name; + file_header->mode = make_me->mode; + file_header->uid = make_me->uid; + file_header->gid = make_me->gid; + /*file_header->size = 0;*/ + /*file_header->link_target = NULL;*/ + + /* Try to find a file we are hardlinked to */ + cur = archive_handle->cpio__created_hardlinks; + while (cur) { + /* TODO: must match maj/min too! */ + if (cur->inode == make_me->inode) { + file_header->link_target = cur->name; + /* link_target != NULL, size = 0: "I am a hardlink" */ + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) + archive_handle->action_data(archive_handle); + free(make_me); + goto next_link; + } + cur = cur->next; + } + /* Oops... no file with such inode was created... do it now + * (happens when hardlinked files are empty (zero length)) */ + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) + archive_handle->action_data(archive_handle); + /* Move to the list of created hardlinked files */ + make_me->next = archive_handle->cpio__created_hardlinks; + archive_handle->cpio__created_hardlinks = make_me; + next_link: ; + } + + while (archive_handle->cpio__created_hardlinks) { + hardlinks_t *p = archive_handle->cpio__created_hardlinks; + archive_handle->cpio__created_hardlinks = p->next; + free(p); + } + + return EXIT_FAILURE; /* "No more files to process" */ +} diff --git a/archival/libarchive/get_header_tar.c b/archival/libarchive/get_header_tar.c new file mode 100644 index 000000000..78b0ae25f --- /dev/null +++ b/archival/libarchive/get_header_tar.c @@ -0,0 +1,461 @@ +/* vi: set sw=4 ts=4: */ +/* Licensed under GPLv2 or later, see file LICENSE in this source tree. + * + * FIXME: + * In privileged mode if uname and gname map to a uid and gid then use the + * mapped value instead of the uid/gid values in tar header + * + * References: + * GNU tar and star man pages, + * Opengroup's ustar interchange format, + * http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html + */ + +#include "libbb.h" +#include "archive.h" + +typedef uint32_t aliased_uint32_t FIX_ALIASING; +typedef off_t aliased_off_t FIX_ALIASING; + + +/* NB: _DESTROYS_ str[len] character! */ +static unsigned long long getOctal(char *str, int len) +{ + unsigned long long v; + char *end; + /* NB: leading spaces are allowed. Using strtoull to handle that. + * The downside is that we accept e.g. "-123" too :( + */ + str[len] = '\0'; + v = strtoull(str, &end, 8); + /* std: "Each numeric field is terminated by one or more + * or NUL characters". We must support ' '! */ + if (*end != '\0' && *end != ' ') { + int8_t first = str[0]; + if (!(first & 0x80)) + bb_error_msg_and_die("corrupted octal value in tar header"); + /* + * GNU tar uses "base-256 encoding" for very large numbers. + * Encoding is binary, with highest bit always set as a marker + * and sign in next-highest bit: + * 80 00 .. 00 - zero + * bf ff .. ff - largest positive number + * ff ff .. ff - minus 1 + * c0 00 .. 00 - smallest negative number + * + * Example of tar file with 8914993153 (0x213600001) byte file. + * Field starts at offset 7c: + * 00070 30 30 30 00 30 30 30 30 30 30 30 00 80 00 00 00 |000.0000000.....| + * 00080 00 00 00 02 13 60 00 01 31 31 31 32 30 33 33 36 |.....`..11120336| + * + * NB: tarballs with NEGATIVE unix times encoded that way were seen! + */ + v = first; + /* Sign-extend using 6th bit: */ + v <<= sizeof(unsigned long long)*8 - 7; + v = (long long)v >> (sizeof(unsigned long long)*8 - 7); + while (--len != 0) + v = (v << 8) + (unsigned char) *str++; + } + return v; +} +#define GET_OCTAL(a) getOctal((a), sizeof(a)) + +#if ENABLE_FEATURE_TAR_SELINUX +/* Scan a PAX header for SELinux contexts, via "RHT.security.selinux" keyword. + * This is what Red Hat's patched version of tar uses. + */ +# define SELINUX_CONTEXT_KEYWORD "RHT.security.selinux" +static char *get_selinux_sctx_from_pax_hdr(archive_handle_t *archive_handle, unsigned sz) +{ + char *buf, *p; + char *result; + + p = buf = xmalloc(sz + 1); + /* prevent bb_strtou from running off the buffer */ + buf[sz] = '\0'; + xread(archive_handle->src_fd, buf, sz); + archive_handle->offset += sz; + + result = NULL; + while (sz != 0) { + char *end, *value; + unsigned len; + + /* Every record has this format: "LEN NAME=VALUE\n" */ + len = bb_strtou(p, &end, 10); + /* expect errno to be EINVAL, because the character + * following the digits should be a space + */ + p += len; + sz -= len; + if ((int)sz < 0 + || len == 0 + || errno != EINVAL + || *end != ' ' + ) { + bb_error_msg("malformed extended header, skipped"); + // More verbose version: + //bb_error_msg("malformed extended header at %"OFF_FMT"d, skipped", + // archive_handle->offset - (sz + len)); + break; + } + /* overwrite the terminating newline with NUL + * (we do not bother to check that it *was* a newline) + */ + p[-1] = '\0'; + /* Is it selinux security context? */ + value = end + 1; + if (strncmp(value, SELINUX_CONTEXT_KEYWORD"=", sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1) == 0) { + value += sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1; + result = xstrdup(value); + break; + } + } + + free(buf); + return result; +} +#endif + +char FAST_FUNC get_header_tar(archive_handle_t *archive_handle) +{ + file_header_t *file_header = archive_handle->file_header; + struct tar_header_t tar; + char *cp; + int i, sum_u, sum; +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + int sum_s; +#endif + int parse_names; + + /* Our "private data" */ +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS +# define p_longname (archive_handle->tar__longname) +# define p_linkname (archive_handle->tar__linkname) +#else +# define p_longname 0 +# define p_linkname 0 +#endif + +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS || ENABLE_FEATURE_TAR_SELINUX + again: +#endif + /* Align header */ + data_align(archive_handle, 512); + + again_after_align: + +#if ENABLE_DESKTOP || ENABLE_FEATURE_TAR_AUTODETECT + /* to prevent misdetection of bz2 sig */ + *(aliased_uint32_t*)&tar = 0; + i = full_read(archive_handle->src_fd, &tar, 512); + /* If GNU tar sees EOF in above read, it says: + * "tar: A lone zero block at N", where N = kilobyte + * where EOF was met (not EOF block, actual EOF!), + * and exits with EXIT_SUCCESS. + * We will mimic exit(EXIT_SUCCESS), although we will not mimic + * the message and we don't check whether we indeed + * saw zero block directly before this. */ + if (i == 0) { + xfunc_error_retval = 0; + short_read: + bb_error_msg_and_die("short read"); + } + if (i != 512) { + IF_FEATURE_TAR_AUTODETECT(goto autodetect;) + goto short_read; + } + +#else + i = 512; + xread(archive_handle->src_fd, &tar, i); +#endif + archive_handle->offset += i; + + /* If there is no filename its an empty header */ + if (tar.name[0] == 0 && tar.prefix[0] == 0) { + if (archive_handle->tar__end) { + /* Second consecutive empty header - end of archive. + * Read until the end to empty the pipe from gz or bz2 + */ + while (full_read(archive_handle->src_fd, &tar, 512) == 512) + continue; + return EXIT_FAILURE; + } + archive_handle->tar__end = 1; + return EXIT_SUCCESS; + } + archive_handle->tar__end = 0; + + /* Check header has valid magic, "ustar" is for the proper tar, + * five NULs are for the old tar format */ + if (strncmp(tar.magic, "ustar", 5) != 0 + && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY + || memcmp(tar.magic, "\0\0\0\0", 5) != 0) + ) { +#if ENABLE_FEATURE_TAR_AUTODETECT + char FAST_FUNC (*get_header_ptr)(archive_handle_t *); + uint16_t magic2; + + autodetect: + magic2 = *(uint16_t*)tar.name; + /* tar gz/bz autodetect: check for gz/bz2 magic. + * If we see the magic, and it is the very first block, + * we can switch to get_header_tar_gz/bz2/lzma(). + * Needs seekable fd. I wish recv(MSG_PEEK) works + * on any fd... */ +# if ENABLE_FEATURE_SEAMLESS_GZ + if (magic2 == GZIP_MAGIC) { + get_header_ptr = get_header_tar_gz; + } else +# endif +# if ENABLE_FEATURE_SEAMLESS_BZ2 + if (magic2 == BZIP2_MAGIC + && tar.name[2] == 'h' && isdigit(tar.name[3]) + ) { /* bzip2 */ + get_header_ptr = get_header_tar_bz2; + } else +# endif +# if ENABLE_FEATURE_SEAMLESS_XZ + //TODO: if (magic2 == XZ_MAGIC1)... + //else +# endif + goto err; + /* Two different causes for lseek() != 0: + * unseekable fd (would like to support that too, but...), + * or not first block (false positive, it's not .gz/.bz2!) */ + if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0) + goto err; + while (get_header_ptr(archive_handle) == EXIT_SUCCESS) + continue; + return EXIT_FAILURE; + err: +#endif /* FEATURE_TAR_AUTODETECT */ + bb_error_msg_and_die("invalid tar magic"); + } + + /* Do checksum on headers. + * POSIX says that checksum is done on unsigned bytes, but + * Sun and HP-UX gets it wrong... more details in + * GNU tar source. */ +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + sum_s = ' ' * sizeof(tar.chksum); +#endif + sum_u = ' ' * sizeof(tar.chksum); + for (i = 0; i < 148; i++) { + sum_u += ((unsigned char*)&tar)[i]; +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + sum_s += ((signed char*)&tar)[i]; +#endif + } + for (i = 156; i < 512; i++) { + sum_u += ((unsigned char*)&tar)[i]; +#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY + sum_s += ((signed char*)&tar)[i]; +#endif + } + /* This field does not need special treatment (getOctal) */ + { + char *endp; /* gcc likes temp var for &endp */ + sum = strtoul(tar.chksum, &endp, 8); + if ((*endp != '\0' && *endp != ' ') + || (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) + ) { + bb_error_msg_and_die("invalid tar header checksum"); + } + } + /* don't use xstrtoul, tar.chksum may have leading spaces */ + sum = strtoul(tar.chksum, NULL, 8); + if (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) { + bb_error_msg_and_die("invalid tar header checksum"); + } + + /* 0 is reserved for high perf file, treat as normal file */ + if (!tar.typeflag) tar.typeflag = '0'; + parse_names = (tar.typeflag >= '0' && tar.typeflag <= '7'); + + /* getOctal trashes subsequent field, therefore we call it + * on fields in reverse order */ + if (tar.devmajor[0]) { + char t = tar.prefix[0]; + /* we trash prefix[0] here, but we DO need it later! */ + unsigned minor = GET_OCTAL(tar.devminor); + unsigned major = GET_OCTAL(tar.devmajor); + file_header->device = makedev(major, minor); + tar.prefix[0] = t; + } + file_header->link_target = NULL; + if (!p_linkname && parse_names && tar.linkname[0]) { + file_header->link_target = xstrndup(tar.linkname, sizeof(tar.linkname)); + /* FIXME: what if we have non-link object with link_target? */ + /* Will link_target be free()ed? */ + } +#if ENABLE_FEATURE_TAR_UNAME_GNAME + file_header->tar__uname = tar.uname[0] ? xstrndup(tar.uname, sizeof(tar.uname)) : NULL; + file_header->tar__gname = tar.gname[0] ? xstrndup(tar.gname, sizeof(tar.gname)) : NULL; +#endif + file_header->mtime = GET_OCTAL(tar.mtime); + file_header->size = GET_OCTAL(tar.size); + file_header->gid = GET_OCTAL(tar.gid); + file_header->uid = GET_OCTAL(tar.uid); + /* Set bits 0-11 of the files mode */ + file_header->mode = 07777 & GET_OCTAL(tar.mode); + + file_header->name = NULL; + if (!p_longname && parse_names) { + /* we trash mode[0] here, it's ok */ + //tar.name[sizeof(tar.name)] = '\0'; - gcc 4.3.0 would complain + tar.mode[0] = '\0'; + if (tar.prefix[0]) { + /* and padding[0] */ + //tar.prefix[sizeof(tar.prefix)] = '\0'; - gcc 4.3.0 would complain + tar.padding[0] = '\0'; + file_header->name = concat_path_file(tar.prefix, tar.name); + } else + file_header->name = xstrdup(tar.name); + } + + /* Set bits 12-15 of the files mode */ + /* (typeflag was not trashed because chksum does not use getOctal) */ + switch (tar.typeflag) { + /* busybox identifies hard links as being regular files with 0 size and a link name */ + case '1': + file_header->mode |= S_IFREG; + break; + case '7': + /* case 0: */ + case '0': +#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY + if (last_char_is(file_header->name, '/')) { + goto set_dir; + } +#endif + file_header->mode |= S_IFREG; + break; + case '2': + file_header->mode |= S_IFLNK; + /* have seen tarballs with size field containing + * the size of the link target's name */ + size0: + file_header->size = 0; + break; + case '3': + file_header->mode |= S_IFCHR; + goto size0; /* paranoia */ + case '4': + file_header->mode |= S_IFBLK; + goto size0; + case '5': + IF_FEATURE_TAR_OLDGNU_COMPATIBILITY(set_dir:) + file_header->mode |= S_IFDIR; + goto size0; + case '6': + file_header->mode |= S_IFIFO; + goto size0; +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + case 'L': + /* free: paranoia: tar with several consecutive longnames */ + free(p_longname); + /* For paranoia reasons we allocate extra NUL char */ + p_longname = xzalloc(file_header->size + 1); + /* We read ASCIZ string, including NUL */ + xread(archive_handle->src_fd, p_longname, file_header->size); + archive_handle->offset += file_header->size; + /* return get_header_tar(archive_handle); */ + /* gcc 4.1.1 didn't optimize it into jump */ + /* so we will do it ourself, this also saves stack */ + goto again; + case 'K': + free(p_linkname); + p_linkname = xzalloc(file_header->size + 1); + xread(archive_handle->src_fd, p_linkname, file_header->size); + archive_handle->offset += file_header->size; + /* return get_header_tar(archive_handle); */ + goto again; + case 'D': /* GNU dump dir */ + case 'M': /* Continuation of multi volume archive */ + case 'N': /* Old GNU for names > 100 characters */ + case 'S': /* Sparse file */ + case 'V': /* Volume header */ +#endif +#if !ENABLE_FEATURE_TAR_SELINUX + case 'g': /* pax global header */ + case 'x': /* pax extended header */ +#else + skip_ext_hdr: +#endif + { + off_t sz; + bb_error_msg("warning: skipping header '%c'", tar.typeflag); + sz = (file_header->size + 511) & ~(off_t)511; + archive_handle->offset += sz; + sz >>= 9; /* sz /= 512 but w/o contortions for signed div */ + while (sz--) + xread(archive_handle->src_fd, &tar, 512); + /* return get_header_tar(archive_handle); */ + goto again_after_align; + } +#if ENABLE_FEATURE_TAR_SELINUX + case 'g': /* pax global header */ + case 'x': { /* pax extended header */ + char **pp; + if ((uoff_t)file_header->size > 0xfffff) /* paranoia */ + goto skip_ext_hdr; + pp = (tar.typeflag == 'g') ? &archive_handle->tar__global_sctx : &archive_handle->tar__next_file_sctx; + free(*pp); + *pp = get_selinux_sctx_from_pax_hdr(archive_handle, file_header->size); + goto again; + } +#endif + default: + bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag); + } + +#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS + if (p_longname) { + file_header->name = p_longname; + p_longname = NULL; + } + if (p_linkname) { + file_header->link_target = p_linkname; + p_linkname = NULL; + } +#endif + if (strncmp(file_header->name, "/../"+1, 3) == 0 + || strstr(file_header->name, "/../") + ) { + bb_error_msg_and_die("name with '..' encountered: '%s'", + file_header->name); + } + + /* Strip trailing '/' in directories */ + /* Must be done after mode is set as '/' is used to check if it's a directory */ + cp = last_char_is(file_header->name, '/'); + + if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { + archive_handle->action_header(/*archive_handle->*/ file_header); + /* Note that we kill the '/' only after action_header() */ + /* (like GNU tar 1.15.1: verbose mode outputs "dir/dir/") */ + if (cp) + *cp = '\0'; + archive_handle->action_data(archive_handle); + if (archive_handle->accept || archive_handle->reject) + llist_add_to(&archive_handle->passed, file_header->name); + else /* Caller isn't interested in list of unpacked files */ + free(file_header->name); + } else { + data_skip(archive_handle); + free(file_header->name); + } + archive_handle->offset += file_header->size; + + free(file_header->link_target); + /* Do not free(file_header->name)! + * It might be inserted in archive_handle->passed - see above */ +#if ENABLE_FEATURE_TAR_UNAME_GNAME + free(file_header->tar__uname); + free(file_header->tar__gname); +#endif + return EXIT_SUCCESS; +} diff --git a/archival/libarchive/get_header_tar_bz2.c b/archival/libarchive/get_header_tar_bz2.c new file mode 100644 index 000000000..60d32069f --- /dev/null +++ b/archival/libarchive/get_header_tar_bz2.c @@ -0,0 +1,21 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +char FAST_FUNC get_header_tar_bz2(archive_handle_t *archive_handle) +{ + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + open_transformer(archive_handle->src_fd, unpack_bz2_stream_prime, "bunzip2"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libarchive/get_header_tar_gz.c b/archival/libarchive/get_header_tar_gz.c new file mode 100644 index 000000000..b09f8691c --- /dev/null +++ b/archival/libarchive/get_header_tar_gz.c @@ -0,0 +1,36 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +char FAST_FUNC get_header_tar_gz(archive_handle_t *archive_handle) +{ +#if BB_MMU + unsigned char magic[2]; +#endif + + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + /* Check gzip magic only if open_transformer will invoke unpack_gz_stream (MMU case). + * Otherwise, it will invoke an external helper "gunzip -cf" (NOMMU case) which will + * need the header. */ +#if BB_MMU + xread(archive_handle->src_fd, &magic, 2); + /* Can skip this check, but error message will be less clear */ + if ((magic[0] != 0x1f) || (magic[1] != 0x8b)) { + bb_error_msg_and_die("invalid gzip magic"); + } +#endif + + open_transformer(archive_handle->src_fd, unpack_gz_stream, "gunzip"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libarchive/get_header_tar_lzma.c b/archival/libarchive/get_header_tar_lzma.c new file mode 100644 index 000000000..da08e0c72 --- /dev/null +++ b/archival/libarchive/get_header_tar_lzma.c @@ -0,0 +1,24 @@ +/* vi: set sw=4 ts=4: */ +/* + * Small lzma deflate implementation. + * Copyright (C) 2006 Aurelien Jacobs + * + * Licensed under GPLv2, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +char FAST_FUNC get_header_tar_lzma(archive_handle_t *archive_handle) +{ + /* Can't lseek over pipes */ + archive_handle->seek = seek_by_read; + + open_transformer(archive_handle->src_fd, unpack_lzma_stream, "unlzma"); + archive_handle->offset = 0; + while (get_header_tar(archive_handle) == EXIT_SUCCESS) + continue; + + /* Can only do one file at a time */ + return EXIT_FAILURE; +} diff --git a/archival/libarchive/header_list.c b/archival/libarchive/header_list.c new file mode 100644 index 000000000..c4fc75f38 --- /dev/null +++ b/archival/libarchive/header_list.c @@ -0,0 +1,12 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC header_list(const file_header_t *file_header) +{ +//TODO: cpio -vp DIR should output "DIR/NAME", not just "NAME" */ + puts(file_header->name); +} diff --git a/archival/libarchive/header_skip.c b/archival/libarchive/header_skip.c new file mode 100644 index 000000000..2bfc5253c --- /dev/null +++ b/archival/libarchive/header_skip.c @@ -0,0 +1,10 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC header_skip(const file_header_t *file_header UNUSED_PARAM) +{ +} diff --git a/archival/libarchive/header_verbose_list.c b/archival/libarchive/header_verbose_list.c new file mode 100644 index 000000000..bc4e4154b --- /dev/null +++ b/archival/libarchive/header_verbose_list.c @@ -0,0 +1,69 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC header_verbose_list(const file_header_t *file_header) +{ + struct tm tm_time; + struct tm *ptm = &tm_time; //localtime(&file_header->mtime); + +#if ENABLE_FEATURE_TAR_UNAME_GNAME + char uid[sizeof(int)*3 + 2]; + /*char gid[sizeof(int)*3 + 2];*/ + char *user; + char *group; + + localtime_r(&file_header->mtime, ptm); + + user = file_header->tar__uname; + if (user == NULL) { + sprintf(uid, "%u", (unsigned)file_header->uid); + user = uid; + } + group = file_header->tar__gname; + if (group == NULL) { + /*sprintf(gid, "%u", (unsigned)file_header->gid);*/ + group = utoa(file_header->gid); + } + printf("%s %s/%s %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s", + bb_mode_string(file_header->mode), + user, + group, + file_header->size, + 1900 + ptm->tm_year, + 1 + ptm->tm_mon, + ptm->tm_mday, + ptm->tm_hour, + ptm->tm_min, + ptm->tm_sec, + file_header->name); + +#else /* !FEATURE_TAR_UNAME_GNAME */ + + localtime_r(&file_header->mtime, ptm); + + printf("%s %u/%u %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s", + bb_mode_string(file_header->mode), + (unsigned)file_header->uid, + (unsigned)file_header->gid, + file_header->size, + 1900 + ptm->tm_year, + 1 + ptm->tm_mon, + ptm->tm_mday, + ptm->tm_hour, + ptm->tm_min, + ptm->tm_sec, + file_header->name); + +#endif /* FEATURE_TAR_UNAME_GNAME */ + + /* NB: GNU tar shows "->" for symlinks and "link to" for hardlinks */ + if (file_header->link_target) { + printf(" -> %s", file_header->link_target); + } + bb_putchar('\n'); +} diff --git a/archival/libarchive/init_handle.c b/archival/libarchive/init_handle.c new file mode 100644 index 000000000..6644ea13b --- /dev/null +++ b/archival/libarchive/init_handle.c @@ -0,0 +1,22 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +archive_handle_t* FAST_FUNC init_handle(void) +{ + archive_handle_t *archive_handle; + + /* Initialize default values */ + archive_handle = xzalloc(sizeof(archive_handle_t)); + archive_handle->file_header = xzalloc(sizeof(file_header_t)); + archive_handle->action_header = header_skip; + archive_handle->action_data = data_skip; + archive_handle->filter = filter_accept_all; + archive_handle->seek = seek_by_jump; + + return archive_handle; +} diff --git a/archival/libarchive/liblzo.h b/archival/libarchive/liblzo.h new file mode 100644 index 000000000..843997cb9 --- /dev/null +++ b/archival/libarchive/liblzo.h @@ -0,0 +1,93 @@ +/* + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +#include "liblzo_interface.h" + +/* lzo-2.03/src/config1x.h */ +#define M2_MIN_LEN 3 +#define M2_MAX_LEN 8 +#define M3_MAX_LEN 33 +#define M4_MAX_LEN 9 +#define M1_MAX_OFFSET 0x0400 +#define M2_MAX_OFFSET 0x0800 +#define M3_MAX_OFFSET 0x4000 +#define M4_MAX_OFFSET 0xbfff +#define M1_MARKER 0 +#define M3_MARKER 32 +#define M4_MARKER 16 + +#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET) +#define MIN_LOOKAHEAD (M2_MAX_LEN + 1) + +#define LZO_EOF_CODE + +/* lzo-2.03/src/lzo_dict.h */ +#define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex] +#define DX2(p,s1,s2) \ + (((((unsigned)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0]) +//#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0]) +//#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0]) +#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0]) + +#define D_SIZE (1U << D_BITS) +#define D_MASK ((1U << D_BITS) - 1) +#define D_HIGH ((D_MASK >> 1) + 1) + +#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ + ( \ + m_pos = ip - (unsigned)(ip - m_pos), \ + ((uintptr_t)m_pos < (uintptr_t)in \ + || (m_off = (unsigned)(ip - m_pos)) <= 0 \ + || m_off > max_offset) \ + ) + +#define DENTRY(p,in) (p) +#define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in) + +#define DMS(v,s) ((unsigned) (((v) & (D_MASK >> (s))) << (s))) +#define DM(v) ((unsigned) ((v) & D_MASK)) +#define DMUL(a,b) ((unsigned) ((a) * (b))) + +/* lzo-2.03/src/lzo_ptr.h */ +#define pd(a,b) ((unsigned)((a)-(b))) + +# define TEST_IP (ip < ip_end) +# define NEED_IP(x) \ + if ((unsigned)(ip_end - ip) < (unsigned)(x)) goto input_overrun + +# undef TEST_OP /* don't need both of the tests here */ +# define TEST_OP 1 +# define NEED_OP(x) \ + if ((unsigned)(op_end - op) < (unsigned)(x)) goto output_overrun + +#define HAVE_ANY_OP 1 + +//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND) +# define TEST_LB(m_pos) if (m_pos < out || m_pos >= op) goto lookbehind_overrun +//# define TEST_LBO(m_pos,o) if (m_pos < out || m_pos >= op - (o)) goto lookbehind_overrun +//#else +//# define TEST_LB(m_pos) ((void) 0) +//# define TEST_LBO(m_pos,o) ((void) 0) +//#endif diff --git a/archival/libarchive/lzo1x_1.c b/archival/libarchive/lzo1x_1.c new file mode 100644 index 000000000..a88839846 --- /dev/null +++ b/archival/libarchive/lzo1x_1.c @@ -0,0 +1,35 @@ +/* LZO1X-1 compression + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "libbb.h" +#include "liblzo.h" + +#define D_BITS 14 +#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5) +#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) + +#define DO_COMPRESS lzo1x_1_compress + +#include "lzo1x_c.c" diff --git a/archival/libarchive/lzo1x_1o.c b/archival/libarchive/lzo1x_1o.c new file mode 100644 index 000000000..3c61253e0 --- /dev/null +++ b/archival/libarchive/lzo1x_1o.c @@ -0,0 +1,35 @@ +/* LZO1X-1(15) compression + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "libbb.h" +#include "liblzo.h" + +#define D_BITS 15 +#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5) +#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) + +#define DO_COMPRESS lzo1x_1_15_compress + +#include "lzo1x_c.c" diff --git a/archival/libarchive/lzo1x_9x.c b/archival/libarchive/lzo1x_9x.c new file mode 100644 index 000000000..483205155 --- /dev/null +++ b/archival/libarchive/lzo1x_9x.c @@ -0,0 +1,921 @@ +/* lzo1x_9x.c -- implementation of the LZO1X-999 compression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 2008 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2007 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2006 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2005 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer + Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + + Markus F.X.J. Oberhumer + + http://www.oberhumer.com/opensource/lzo/ +*/ +#include "libbb.h" + +/* The following is probably only safe on Intel-compatible processors ... */ +#define LZO_UNALIGNED_OK_2 +#define LZO_UNALIGNED_OK_4 + +#include "liblzo.h" + +#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b)) +#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b)) +#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c)) + +/*********************************************************************** +// +************************************************************************/ +#define SWD_N M4_MAX_OFFSET /* size of ring buffer */ +#define SWD_F 2048 /* upper limit for match length */ + +#define SWD_BEST_OFF (LZO_MAX3(M2_MAX_LEN, M3_MAX_LEN, M4_MAX_LEN) + 1) + +typedef struct { + int init; + + unsigned look; /* bytes in lookahead buffer */ + + unsigned m_len; + unsigned m_off; + + const uint8_t *bp; + const uint8_t *ip; + const uint8_t *in; + const uint8_t *in_end; + uint8_t *out; + + unsigned r1_lit; + +} lzo1x_999_t; + +#define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1)) + +/* lzo_swd.c -- sliding window dictionary */ + +/*********************************************************************** +// +************************************************************************/ +#define SWD_UINT_MAX USHRT_MAX + +#ifndef SWD_HSIZE +# define SWD_HSIZE 16384 +#endif +#ifndef SWD_MAX_CHAIN +# define SWD_MAX_CHAIN 2048 +#endif + +#define HEAD3(b, p) \ + ( ((0x9f5f * ((((b[p]<<5)^b[p+1])<<5) ^ b[p+2])) >> 5) & (SWD_HSIZE-1) ) + +#if defined(LZO_UNALIGNED_OK_2) +# define HEAD2(b,p) (* (uint16_t *) &(b[p])) +#else +# define HEAD2(b,p) (b[p] ^ ((unsigned)b[p+1]<<8)) +#endif +#define NIL2 SWD_UINT_MAX + +typedef struct lzo_swd { + /* public - "built-in" */ + + /* public - configuration */ + unsigned max_chain; + int use_best_off; + + /* public - output */ + unsigned m_len; + unsigned m_off; + unsigned look; + int b_char; +#if defined(SWD_BEST_OFF) + unsigned best_off[SWD_BEST_OFF]; +#endif + + /* semi public */ + lzo1x_999_t *c; + unsigned m_pos; +#if defined(SWD_BEST_OFF) + unsigned best_pos[SWD_BEST_OFF]; +#endif + + /* private */ + unsigned ip; /* input pointer (lookahead) */ + unsigned bp; /* buffer pointer */ + unsigned rp; /* remove pointer */ + + unsigned node_count; + unsigned first_rp; + + uint8_t b[SWD_N + SWD_F]; + uint8_t b_wrap[SWD_F]; /* must follow b */ + uint16_t head3[SWD_HSIZE]; + uint16_t succ3[SWD_N + SWD_F]; + uint16_t best3[SWD_N + SWD_F]; + uint16_t llen3[SWD_HSIZE]; +#ifdef HEAD2 + uint16_t head2[65536L]; +#endif +} lzo_swd_t, *lzo_swd_p; + +#define SIZEOF_LZO_SWD_T (sizeof(lzo_swd_t)) + + +/* Access macro for head3. + * head3[key] may be uninitialized, but then its value will never be used. + */ +#define s_get_head3(s,key) s->head3[key] + + +/*********************************************************************** +// +************************************************************************/ +#define B_SIZE (SWD_N + SWD_F) + +static int swd_init(lzo_swd_p s) +{ + /* defaults */ + s->node_count = SWD_N; + + memset(s->llen3, 0, sizeof(s->llen3[0]) * (unsigned)SWD_HSIZE); +#ifdef HEAD2 + memset(s->head2, 0xff, sizeof(s->head2[0]) * 65536L); + assert(s->head2[0] == NIL2); +#endif + + s->ip = 0; + s->bp = s->ip; + s->first_rp = s->ip; + + assert(s->ip + SWD_F <= B_SIZE); + s->look = (unsigned) (s->c->in_end - s->c->ip); + if (s->look > 0) { + if (s->look > SWD_F) + s->look = SWD_F; + memcpy(&s->b[s->ip], s->c->ip, s->look); + s->c->ip += s->look; + s->ip += s->look; + } + if (s->ip == B_SIZE) + s->ip = 0; + + s->rp = s->first_rp; + if (s->rp >= s->node_count) + s->rp -= s->node_count; + else + s->rp += B_SIZE - s->node_count; + + return LZO_E_OK; +} + +#define swd_pos2off(s,pos) \ + (s->bp > (pos) ? s->bp - (pos) : B_SIZE - ((pos) - s->bp)) + + +/*********************************************************************** +// +************************************************************************/ +static void swd_getbyte(lzo_swd_p s) +{ + int c; + + if ((c = getbyte(*(s->c))) < 0) { + if (s->look > 0) + --s->look; + } else { + s->b[s->ip] = c; + if (s->ip < SWD_F) + s->b_wrap[s->ip] = c; + } + if (++s->ip == B_SIZE) + s->ip = 0; + if (++s->bp == B_SIZE) + s->bp = 0; + if (++s->rp == B_SIZE) + s->rp = 0; +} + + +/*********************************************************************** +// remove node from lists +************************************************************************/ +static void swd_remove_node(lzo_swd_p s, unsigned node) +{ + if (s->node_count == 0) { + unsigned key; + + key = HEAD3(s->b,node); + assert(s->llen3[key] > 0); + --s->llen3[key]; + +#ifdef HEAD2 + key = HEAD2(s->b,node); + assert(s->head2[key] != NIL2); + if ((unsigned) s->head2[key] == node) + s->head2[key] = NIL2; +#endif + } else + --s->node_count; +} + + +/*********************************************************************** +// +************************************************************************/ +static void swd_accept(lzo_swd_p s, unsigned n) +{ + assert(n <= s->look); + + while (n--) { + unsigned key; + + swd_remove_node(s,s->rp); + + /* add bp into HEAD3 */ + key = HEAD3(s->b, s->bp); + s->succ3[s->bp] = s_get_head3(s, key); + s->head3[key] = s->bp; + s->best3[s->bp] = SWD_F + 1; + s->llen3[key]++; + assert(s->llen3[key] <= SWD_N); + +#ifdef HEAD2 + /* add bp into HEAD2 */ + key = HEAD2(s->b, s->bp); + s->head2[key] = s->bp; +#endif + + swd_getbyte(s); + } +} + + +/*********************************************************************** +// +************************************************************************/ +static void swd_search(lzo_swd_p s, unsigned node, unsigned cnt) +{ + const uint8_t *p1; + const uint8_t *p2; + const uint8_t *px; + unsigned m_len = s->m_len; + const uint8_t *b = s->b; + const uint8_t *bp = s->b + s->bp; + const uint8_t *bx = s->b + s->bp + s->look; + unsigned char scan_end1; + + assert(s->m_len > 0); + + scan_end1 = bp[m_len - 1]; + for ( ; cnt-- > 0; node = s->succ3[node]) { + p1 = bp; + p2 = b + node; + px = bx; + + assert(m_len < s->look); + + if (p2[m_len - 1] == scan_end1 + && p2[m_len] == p1[m_len] + && p2[0] == p1[0] + && p2[1] == p1[1] + ) { + unsigned i; + assert(lzo_memcmp(bp, &b[node], 3) == 0); + + p1 += 2; p2 += 2; + do {} while (++p1 < px && *p1 == *++p2); + i = p1-bp; + + assert(lzo_memcmp(bp, &b[node], i) == 0); + +#if defined(SWD_BEST_OFF) + if (i < SWD_BEST_OFF) { + if (s->best_pos[i] == 0) + s->best_pos[i] = node + 1; + } +#endif + if (i > m_len) { + s->m_len = m_len = i; + s->m_pos = node; + if (m_len == s->look) + return; + if (m_len >= SWD_F) + return; + if (m_len > (unsigned) s->best3[node]) + return; + scan_end1 = bp[m_len - 1]; + } + } + } +} + + +/*********************************************************************** +// +************************************************************************/ +#ifdef HEAD2 + +static int swd_search2(lzo_swd_p s) +{ + unsigned key; + + assert(s->look >= 2); + assert(s->m_len > 0); + + key = s->head2[HEAD2(s->b, s->bp)]; + if (key == NIL2) + return 0; + assert(lzo_memcmp(&s->b[s->bp], &s->b[key], 2) == 0); +#if defined(SWD_BEST_OFF) + if (s->best_pos[2] == 0) + s->best_pos[2] = key + 1; +#endif + + if (s->m_len < 2) { + s->m_len = 2; + s->m_pos = key; + } + return 1; +} + +#endif + + +/*********************************************************************** +// +************************************************************************/ +static void swd_findbest(lzo_swd_p s) +{ + unsigned key; + unsigned cnt, node; + unsigned len; + + assert(s->m_len > 0); + + /* get current head, add bp into HEAD3 */ + key = HEAD3(s->b,s->bp); + node = s->succ3[s->bp] = s_get_head3(s, key); + cnt = s->llen3[key]++; + assert(s->llen3[key] <= SWD_N + SWD_F); + if (cnt > s->max_chain) + cnt = s->max_chain; + s->head3[key] = s->bp; + + s->b_char = s->b[s->bp]; + len = s->m_len; + if (s->m_len >= s->look) { + if (s->look == 0) + s->b_char = -1; + s->m_off = 0; + s->best3[s->bp] = SWD_F + 1; + } else { +#ifdef HEAD2 + if (swd_search2(s)) +#endif + if (s->look >= 3) + swd_search(s, node, cnt); + if (s->m_len > len) + s->m_off = swd_pos2off(s,s->m_pos); + s->best3[s->bp] = s->m_len; + +#if defined(SWD_BEST_OFF) + if (s->use_best_off) { + int i; + for (i = 2; i < SWD_BEST_OFF; i++) { + if (s->best_pos[i] > 0) + s->best_off[i] = swd_pos2off(s, s->best_pos[i]-1); + else + s->best_off[i] = 0; + } + } +#endif + } + + swd_remove_node(s,s->rp); + +#ifdef HEAD2 + /* add bp into HEAD2 */ + key = HEAD2(s->b, s->bp); + s->head2[key] = s->bp; +#endif +} + +#undef HEAD3 +#undef HEAD2 +#undef s_get_head3 + + +/*********************************************************************** +// +************************************************************************/ +static int init_match(lzo1x_999_t *c, lzo_swd_p s, uint32_t use_best_off) +{ + int r; + + assert(!c->init); + c->init = 1; + + s->c = c; + + r = swd_init(s); + if (r != 0) + return r; + + s->use_best_off = use_best_off; + return r; +} + + +/*********************************************************************** +// +************************************************************************/ +static int find_match(lzo1x_999_t *c, lzo_swd_p s, + unsigned this_len, unsigned skip) +{ + assert(c->init); + + if (skip > 0) { + assert(this_len >= skip); + swd_accept(s, this_len - skip); + } else { + assert(this_len <= 1); + } + + s->m_len = 1; + s->m_len = 1; +#ifdef SWD_BEST_OFF + if (s->use_best_off) + memset(s->best_pos, 0, sizeof(s->best_pos)); +#endif + swd_findbest(s); + c->m_len = s->m_len; + c->m_off = s->m_off; + + swd_getbyte(s); + + if (s->b_char < 0) { + c->look = 0; + c->m_len = 0; + } else { + c->look = s->look + 1; + } + c->bp = c->ip - c->look; + + return LZO_E_OK; +} + +/* this is a public functions, but there is no prototype in a header file */ +static int lzo1x_999_compress_internal(const uint8_t *in , unsigned in_len, + uint8_t *out, unsigned *out_len, + void *wrkmem, + unsigned good_length, + unsigned max_lazy, + unsigned max_chain, + uint32_t use_best_off); + + +/*********************************************************************** +// +************************************************************************/ +static uint8_t *code_match(lzo1x_999_t *c, + uint8_t *op, unsigned m_len, unsigned m_off) +{ + assert(op > c->out); + if (m_len == 2) { + assert(m_off <= M1_MAX_OFFSET); + assert(c->r1_lit > 0); + assert(c->r1_lit < 4); + m_off -= 1; + *op++ = M1_MARKER | ((m_off & 3) << 2); + *op++ = m_off >> 2; + } else if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { + assert(m_len >= 3); + m_off -= 1; + *op++ = ((m_len - 1) << 5) | ((m_off & 7) << 2); + *op++ = m_off >> 3; + assert(op[-2] >= M2_MARKER); + } else if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && c->r1_lit >= 4) { + assert(m_len == 3); + assert(m_off > M2_MAX_OFFSET); + m_off -= 1 + M2_MAX_OFFSET; + *op++ = M1_MARKER | ((m_off & 3) << 2); + *op++ = m_off >> 2; + } else if (m_off <= M3_MAX_OFFSET) { + assert(m_len >= 3); + m_off -= 1; + if (m_len <= M3_MAX_LEN) + *op++ = M3_MARKER | (m_len - 2); + else { + m_len -= M3_MAX_LEN; + *op++ = M3_MARKER | 0; + while (m_len > 255) { + m_len -= 255; + *op++ = 0; + } + assert(m_len > 0); + *op++ = m_len; + } + *op++ = m_off << 2; + *op++ = m_off >> 6; + } else { + unsigned k; + + assert(m_len >= 3); + assert(m_off > 0x4000); + assert(m_off <= 0xbfff); + m_off -= 0x4000; + k = (m_off & 0x4000) >> 11; + if (m_len <= M4_MAX_LEN) + *op++ = M4_MARKER | k | (m_len - 2); + else { + m_len -= M4_MAX_LEN; + *op++ = M4_MARKER | k | 0; + while (m_len > 255) { + m_len -= 255; + *op++ = 0; + } + assert(m_len > 0); + *op++ = m_len; + } + *op++ = m_off << 2; + *op++ = m_off >> 6; + } + + return op; +} + + +static uint8_t *STORE_RUN(lzo1x_999_t *c, uint8_t *op, + const uint8_t *ii, unsigned t) +{ + if (op == c->out && t <= 238) { + *op++ = 17 + t; + } else if (t <= 3) { + op[-2] |= t; + } else if (t <= 18) { + *op++ = t - 3; + } else { + unsigned tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + assert(tt > 0); + *op++ = tt; + } + do *op++ = *ii++; while (--t > 0); + + return op; +} + + +static uint8_t *code_run(lzo1x_999_t *c, uint8_t *op, const uint8_t *ii, + unsigned lit) +{ + if (lit > 0) { + assert(m_len >= 2); + op = STORE_RUN(c, op, ii, lit); + } else { + assert(m_len >= 3); + } + c->r1_lit = lit; + + return op; +} + + +/*********************************************************************** +// +************************************************************************/ +static int len_of_coded_match(unsigned m_len, unsigned m_off, unsigned lit) +{ + int n = 4; + + if (m_len < 2) + return -1; + if (m_len == 2) + return (m_off <= M1_MAX_OFFSET && lit > 0 && lit < 4) ? 2 : -1; + if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) + return 2; + if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && lit >= 4) + return 2; + if (m_off <= M3_MAX_OFFSET) { + if (m_len <= M3_MAX_LEN) + return 3; + m_len -= M3_MAX_LEN; + } else if (m_off <= M4_MAX_OFFSET) { + if (m_len <= M4_MAX_LEN) + return 3; + m_len -= M4_MAX_LEN; + } else + return -1; + while (m_len > 255) { + m_len -= 255; + n++; + } + return n; +} + + +static int min_gain(unsigned ahead, unsigned lit1, + unsigned lit2, int l1, int l2, int l3) +{ + int lazy_match_min_gain = 0; + + assert (ahead >= 1); + lazy_match_min_gain += ahead; + + if (lit1 <= 3) + lazy_match_min_gain += (lit2 <= 3) ? 0 : 2; + else if (lit1 <= 18) + lazy_match_min_gain += (lit2 <= 18) ? 0 : 1; + + lazy_match_min_gain += (l2 - l1) * 2; + if (l3 > 0) + lazy_match_min_gain -= (ahead - l3) * 2; + + if (lazy_match_min_gain < 0) + lazy_match_min_gain = 0; + + return lazy_match_min_gain; +} + + +/*********************************************************************** +// +************************************************************************/ +#if defined(SWD_BEST_OFF) + +static void better_match(const lzo_swd_p swd, + unsigned *m_len, unsigned *m_off) +{ + if (*m_len <= M2_MIN_LEN) + return; + + if (*m_off <= M2_MAX_OFFSET) + return; + + /* M3/M4 -> M2 */ + if (*m_off > M2_MAX_OFFSET + && *m_len >= M2_MIN_LEN + 1 && *m_len <= M2_MAX_LEN + 1 + && swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M2_MAX_OFFSET + ) { + *m_len = *m_len - 1; + *m_off = swd->best_off[*m_len]; + return; + } + + /* M4 -> M2 */ + if (*m_off > M3_MAX_OFFSET + && *m_len >= M4_MAX_LEN + 1 && *m_len <= M2_MAX_LEN + 2 + && swd->best_off[*m_len-2] && swd->best_off[*m_len-2] <= M2_MAX_OFFSET + ) { + *m_len = *m_len - 2; + *m_off = swd->best_off[*m_len]; + return; + } + /* M4 -> M3 */ + if (*m_off > M3_MAX_OFFSET + && *m_len >= M4_MAX_LEN + 1 && *m_len <= M3_MAX_LEN + 1 + && swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M3_MAX_OFFSET + ) { + *m_len = *m_len - 1; + *m_off = swd->best_off[*m_len]; + } +} + +#endif + + +/*********************************************************************** +// +************************************************************************/ +static int lzo1x_999_compress_internal(const uint8_t *in, unsigned in_len, + uint8_t *out, unsigned *out_len, + void *wrkmem, + unsigned good_length, + unsigned max_lazy, + unsigned max_chain, + uint32_t use_best_off) +{ + uint8_t *op; + const uint8_t *ii; + unsigned lit; + unsigned m_len, m_off; + lzo1x_999_t cc; + lzo1x_999_t *const c = &cc; + const lzo_swd_p swd = (lzo_swd_p) wrkmem; + int r; + + c->init = 0; + c->ip = c->in = in; + c->in_end = in + in_len; + c->out = out; + + op = out; + ii = c->ip; /* point to start of literal run */ + lit = 0; + c->r1_lit = 0; + + r = init_match(c, swd, use_best_off); + if (r != 0) + return r; + swd->max_chain = max_chain; + + r = find_match(c, swd, 0, 0); + if (r != 0) + return r; + + while (c->look > 0) { + unsigned ahead; + unsigned max_ahead; + int l1, l2, l3; + + m_len = c->m_len; + m_off = c->m_off; + + assert(c->bp == c->ip - c->look); + assert(c->bp >= in); + if (lit == 0) + ii = c->bp; + assert(ii + lit == c->bp); + assert(swd->b_char == *(c->bp)); + + if (m_len < 2 + || (m_len == 2 && (m_off > M1_MAX_OFFSET || lit == 0 || lit >= 4)) + /* Do not accept this match for compressed-data compatibility + * with LZO v1.01 and before + * [ might be a problem for decompress() and optimize() ] + */ + || (m_len == 2 && op == out) + || (op == out && lit == 0) + ) { + /* a literal */ + m_len = 0; + } + else if (m_len == M2_MIN_LEN) { + /* compression ratio improves if we code a literal in some cases */ + if (m_off > MX_MAX_OFFSET && lit >= 4) + m_len = 0; + } + + if (m_len == 0) { + /* a literal */ + lit++; + swd->max_chain = max_chain; + r = find_match(c, swd, 1, 0); + assert(r == 0); + continue; + } + + /* a match */ +#if defined(SWD_BEST_OFF) + if (swd->use_best_off) + better_match(swd, &m_len, &m_off); +#endif + + /* shall we try a lazy match ? */ + ahead = 0; + if (m_len >= max_lazy) { + /* no */ + l1 = 0; + max_ahead = 0; + } else { + /* yes, try a lazy match */ + l1 = len_of_coded_match(m_len, m_off, lit); + assert(l1 > 0); + max_ahead = LZO_MIN(2, (unsigned)l1 - 1); + } + + + while (ahead < max_ahead && c->look > m_len) { + int lazy_match_min_gain; + + if (m_len >= good_length) + swd->max_chain = max_chain >> 2; + else + swd->max_chain = max_chain; + r = find_match(c, swd, 1, 0); + ahead++; + + assert(r == 0); + assert(c->look > 0); + assert(ii + lit + ahead == c->bp); + + if (c->m_len < m_len) + continue; + if (c->m_len == m_len && c->m_off >= m_off) + continue; +#if defined(SWD_BEST_OFF) + if (swd->use_best_off) + better_match(swd, &c->m_len, &c->m_off); +#endif + l2 = len_of_coded_match(c->m_len, c->m_off, lit+ahead); + if (l2 < 0) + continue; + + /* compressed-data compatibility [see above] */ + l3 = (op == out) ? -1 : len_of_coded_match(ahead, m_off, lit); + + lazy_match_min_gain = min_gain(ahead, lit, lit+ahead, l1, l2, l3); + if (c->m_len >= m_len + lazy_match_min_gain) { + if (l3 > 0) { + /* code previous run */ + op = code_run(c, op, ii, lit); + lit = 0; + /* code shortened match */ + op = code_match(c, op, ahead, m_off); + } else { + lit += ahead; + assert(ii + lit == c->bp); + } + goto lazy_match_done; + } + } + + assert(ii + lit + ahead == c->bp); + + /* 1 - code run */ + op = code_run(c, op, ii, lit); + lit = 0; + + /* 2 - code match */ + op = code_match(c, op, m_len, m_off); + swd->max_chain = max_chain; + r = find_match(c, swd, m_len, 1+ahead); + assert(r == 0); + + lazy_match_done: ; + } + + /* store final run */ + if (lit > 0) + op = STORE_RUN(c, op, ii, lit); + +#if defined(LZO_EOF_CODE) + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; +#endif + + *out_len = op - out; + + return LZO_E_OK; +} + + +/*********************************************************************** +// +************************************************************************/ +int lzo1x_999_compress_level(const uint8_t *in, unsigned in_len, + uint8_t *out, unsigned *out_len, + void *wrkmem, + int compression_level) +{ + static const struct { + uint16_t good_length; + uint16_t max_lazy; + uint16_t max_chain; + uint16_t use_best_off; + } c[3] = { + { 8, 32, 256, 0 }, + { 32, 128, 2048, 1 }, + { SWD_F, SWD_F, 4096, 1 } /* max. compression */ + }; + + if (compression_level < 7 || compression_level > 9) + return LZO_E_ERROR; + + compression_level -= 7; + return lzo1x_999_compress_internal(in, in_len, out, out_len, wrkmem, + c[compression_level].good_length, + c[compression_level].max_lazy, + c[compression_level].max_chain, + c[compression_level].use_best_off); +} diff --git a/archival/libarchive/lzo1x_c.c b/archival/libarchive/lzo1x_c.c new file mode 100644 index 000000000..cc86f74b1 --- /dev/null +++ b/archival/libarchive/lzo1x_c.c @@ -0,0 +1,296 @@ +/* implementation of the LZO1[XY]-1 compression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ + +/*********************************************************************** +// compress a block of data. +************************************************************************/ +static NOINLINE unsigned +do_compress(const uint8_t* in, unsigned in_len, + uint8_t* out, unsigned* out_len, + void* wrkmem) +{ + register const uint8_t* ip; + uint8_t* op; + const uint8_t* const in_end = in + in_len; + const uint8_t* const ip_end = in + in_len - M2_MAX_LEN - 5; + const uint8_t* ii; + const void* *const dict = (const void**) wrkmem; + + op = out; + ip = in; + ii = ip; + + ip += 4; + for (;;) { + register const uint8_t* m_pos; + unsigned m_off; + unsigned m_len; + unsigned dindex; + + D_INDEX1(dindex,ip); + GINDEX(m_pos,m_off,dict,dindex,in); + if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) + goto literal; +#if 1 + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + D_INDEX2(dindex,ip); +#endif + GINDEX(m_pos,m_off,dict,dindex,in); + if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) + goto literal; + if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) + goto try_match; + goto literal; + + try_match: +#if 1 && defined(LZO_UNALIGNED_OK_2) + if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip) +#else + if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) +#endif + { + } else { + if (m_pos[2] == ip[2]) { +#if 0 + if (m_off <= M2_MAX_OFFSET) + goto match; + if (lit <= 3) + goto match; + if (lit == 3) { /* better compression, but slower */ + assert(op - 2 > out); op[-2] |= (uint8_t)(3); + *op++ = *ii++; *op++ = *ii++; *op++ = *ii++; + goto code_match; + } + if (m_pos[3] == ip[3]) +#endif + goto match; + } + else { + /* still need a better way for finding M1 matches */ +#if 0 + /* a M1 match */ +#if 0 + if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3) +#else + if (m_off <= M1_MAX_OFFSET && lit == 3) +#endif + { + register unsigned t; + + t = lit; + assert(op - 2 > out); op[-2] |= (uint8_t)(t); + do *op++ = *ii++; while (--t > 0); + assert(ii == ip); + m_off -= 1; + *op++ = (uint8_t)(M1_MARKER | ((m_off & 3) << 2)); + *op++ = (uint8_t)(m_off >> 2); + ip += 2; + goto match_done; + } +#endif + } + } + + /* a literal */ + literal: + UPDATE_I(dict, 0, dindex, ip, in); + ++ip; + if (ip >= ip_end) + break; + continue; + + /* a match */ +match: + UPDATE_I(dict, 0, dindex, ip, in); + /* store current literal run */ + if (pd(ip, ii) > 0) { + register unsigned t = pd(ip, ii); + + if (t <= 3) { + assert(op - 2 > out); + op[-2] |= (uint8_t)(t); + } + else if (t <= 18) + *op++ = (uint8_t)(t - 3); + else { + register unsigned tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + assert(tt > 0); + *op++ = (uint8_t)(tt); + } + do *op++ = *ii++; while (--t > 0); + } + + /* code the match */ + assert(ii == ip); + ip += 3; + if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++ + || m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++ +#ifdef LZO1Y + || m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++ + || m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++ +#endif + ) { + --ip; + m_len = pd(ip, ii); + assert(m_len >= 3); + assert(m_len <= M2_MAX_LEN); + + if (m_off <= M2_MAX_OFFSET) { + m_off -= 1; +#if defined(LZO1X) + *op++ = (uint8_t)(((m_len - 1) << 5) | ((m_off & 7) << 2)); + *op++ = (uint8_t)(m_off >> 3); +#elif defined(LZO1Y) + *op++ = (uint8_t)(((m_len + 1) << 4) | ((m_off & 3) << 2)); + *op++ = (uint8_t)(m_off >> 2); +#endif + } + else if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + *op++ = (uint8_t)(M3_MARKER | (m_len - 2)); + goto m3_m4_offset; + } else { +#if defined(LZO1X) + m_off -= 0x4000; + assert(m_off > 0); + assert(m_off <= 0x7fff); + *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2)); + goto m3_m4_offset; +#elif defined(LZO1Y) + goto m4_match; +#endif + } + } + else { + { + const uint8_t* end = in_end; + const uint8_t* m = m_pos + M2_MAX_LEN + 1; + while (ip < end && *m == *ip) + m++, ip++; + m_len = pd(ip, ii); + } + assert(m_len > M2_MAX_LEN); + + if (m_off <= M3_MAX_OFFSET) { + m_off -= 1; + if (m_len <= 33) + *op++ = (uint8_t)(M3_MARKER | (m_len - 2)); + else { + m_len -= 33; + *op++ = M3_MARKER | 0; + goto m3_m4_len; + } + } else { +#if defined(LZO1Y) + m4_match: +#endif + m_off -= 0x4000; + assert(m_off > 0); + assert(m_off <= 0x7fff); + if (m_len <= M4_MAX_LEN) + *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2)); + else { + m_len -= M4_MAX_LEN; + *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11)); + m3_m4_len: + while (m_len > 255) { + m_len -= 255; + *op++ = 0; + } + assert(m_len > 0); + *op++ = (uint8_t)(m_len); + } + } + m3_m4_offset: + *op++ = (uint8_t)((m_off & 63) << 2); + *op++ = (uint8_t)(m_off >> 6); + } +#if 0 + match_done: +#endif + ii = ip; + if (ip >= ip_end) + break; + } + + *out_len = pd(op, out); + return pd(in_end, ii); +} + +/*********************************************************************** +// public entry point +************************************************************************/ +int DO_COMPRESS(const uint8_t* in, unsigned in_len, + uint8_t* out, unsigned* out_len, + void* wrkmem) +{ + uint8_t* op = out; + unsigned t; + + if (in_len <= M2_MAX_LEN + 5) + t = in_len; + else { + t = do_compress(in,in_len,op,out_len,wrkmem); + op += *out_len; + } + + if (t > 0) { + const uint8_t* ii = in + in_len - t; + + if (op == out && t <= 238) + *op++ = (uint8_t)(17 + t); + else if (t <= 3) + op[-2] |= (uint8_t)(t); + else if (t <= 18) + *op++ = (uint8_t)(t - 3); + else { + unsigned tt = t - 18; + + *op++ = 0; + while (tt > 255) { + tt -= 255; + *op++ = 0; + } + assert(tt > 0); + *op++ = (uint8_t)(tt); + } + do *op++ = *ii++; while (--t > 0); + } + + *op++ = M4_MARKER | 1; + *op++ = 0; + *op++ = 0; + + *out_len = pd(op, out); + return 0; /*LZO_E_OK*/ +} diff --git a/archival/libarchive/lzo1x_d.c b/archival/libarchive/lzo1x_d.c new file mode 100644 index 000000000..348a85510 --- /dev/null +++ b/archival/libarchive/lzo1x_d.c @@ -0,0 +1,420 @@ +/* implementation of the LZO1X decompression algorithm + + This file is part of the LZO real-time data compression library. + + Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer + All Rights Reserved. + + Markus F.X.J. Oberhumer + http://www.oberhumer.com/opensource/lzo/ + + The LZO library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public License as + published by the Free Software Foundation; either version 2 of + the License, or (at your option) any later version. + + The LZO library is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with the LZO library; see the file COPYING. + If not, write to the Free Software Foundation, Inc., + 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + */ +#include "libbb.h" +#include "liblzo.h" + +/*********************************************************************** +// decompress a block of data. +************************************************************************/ +/* safe decompression with overrun testing */ +int lzo1x_decompress_safe(const uint8_t* in, unsigned in_len, + uint8_t* out, unsigned* out_len, + void* wrkmem UNUSED_PARAM) +{ + register uint8_t* op; + register const uint8_t* ip; + register unsigned t; +#if defined(COPY_DICT) + unsigned m_off; + const uint8_t* dict_end; +#else + register const uint8_t* m_pos = NULL; /* possibly not needed */ +#endif + const uint8_t* const ip_end = in + in_len; +#if defined(HAVE_ANY_OP) + uint8_t* const op_end = out + *out_len; +#endif +#if defined(LZO1Z) + unsigned last_m_off = 0; +#endif + +// LZO_UNUSED(wrkmem); + +#if defined(COPY_DICT) + if (dict) { + if (dict_len > M4_MAX_OFFSET) { + dict += dict_len - M4_MAX_OFFSET; + dict_len = M4_MAX_OFFSET; + } + dict_end = dict + dict_len; + } else { + dict_len = 0; + dict_end = NULL; + } +#endif /* COPY_DICT */ + + *out_len = 0; + + op = out; + ip = in; + + if (*ip > 17) { + t = *ip++ - 17; + if (t < 4) + goto match_next; + assert(t > 0); NEED_OP(t); NEED_IP(t+1); + do *op++ = *ip++; while (--t > 0); + goto first_literal_run; + } + + while (TEST_IP && TEST_OP) { + t = *ip++; + if (t >= 16) + goto match; + /* a literal run */ + if (t == 0) { + NEED_IP(1); + while (*ip == 0) { + t += 255; + ip++; + NEED_IP(1); + } + t += 15 + *ip++; + } + /* copy literals */ + assert(t > 0); + NEED_OP(t+3); + NEED_IP(t+4); +#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) +# if !defined(LZO_UNALIGNED_OK_4) + if (PTR_ALIGNED2_4(op, ip)) +# endif + { + COPY4(op, ip); + op += 4; + ip += 4; + if (--t > 0) { + if (t >= 4) { + do { + COPY4(op, ip); + op += 4; + ip += 4; + t -= 4; + } while (t >= 4); + if (t > 0) + do *op++ = *ip++; while (--t > 0); + } else { + do *op++ = *ip++; while (--t > 0); + } + } + } +# if !defined(LZO_UNALIGNED_OK_4) + else +# endif +#endif +#if !defined(LZO_UNALIGNED_OK_4) + { + *op++ = *ip++; + *op++ = *ip++; + *op++ = *ip++; + do *op++ = *ip++; while (--t > 0); + } +#endif + + first_literal_run: + t = *ip++; + if (t >= 16) + goto match; +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(3); + t = 3; COPY_DICT(t,m_off) +#else /* !COPY_DICT */ +#if defined(LZO1Z) + t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - (1 + M2_MAX_OFFSET); + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LB(m_pos); NEED_OP(3); + *op++ = *m_pos++; + *op++ = *m_pos++; + *op++ = *m_pos; +#endif /* COPY_DICT */ + goto match_done; + + /* handle matches */ + do { + match: + if (t >= 64) { /* a M2 match */ +#if defined(COPY_DICT) +#if defined(LZO1X) + m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3); + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2); + t = (t >> 4) - 3; +#elif defined(LZO1Z) + m_off = t & 0x1f; + if (m_off >= 0x1c) + m_off = last_m_off; + else { + m_off = 1 + (m_off << 6) + (*ip++ >> 2); + last_m_off = m_off; + } + t = (t >> 5) - 1; +#endif +#else /* !COPY_DICT */ +#if defined(LZO1X) + m_pos = op - 1; + m_pos -= (t >> 2) & 7; + m_pos -= *ip++ << 3; + t = (t >> 5) - 1; +#elif defined(LZO1Y) + m_pos = op - 1; + m_pos -= (t >> 2) & 3; + m_pos -= *ip++ << 2; + t = (t >> 4) - 3; +#elif defined(LZO1Z) + { + unsigned off = t & 0x1f; + m_pos = op; + if (off >= 0x1c) { + assert(last_m_off > 0); + m_pos -= last_m_off; + } else { + off = 1 + (off << 6) + (*ip++ >> 2); + m_pos -= off; + last_m_off = off; + } + } + t = (t >> 5) - 1; +#endif + TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1); + goto copy_match; +#endif /* COPY_DICT */ + } + else if (t >= 32) { /* a M3 match */ + t &= 31; + if (t == 0) { + NEED_IP(1); + while (*ip == 0) { + t += 255; + ip++; + NEED_IP(1); + } + t += 31 + *ip++; + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (ip[0] << 6) + (ip[1] >> 2); + last_m_off = m_off; +#else + m_off = 1 + (ip[0] >> 2) + (ip[1] << 6); +#endif +#else /* !COPY_DICT */ +#if defined(LZO1Z) + { + unsigned off = 1 + (ip[0] << 6) + (ip[1] >> 2); + m_pos = op - off; + last_m_off = off; + } +#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN) + m_pos = op - 1; + m_pos -= (* (const lzo_ushortp) ip) >> 2; +#else + m_pos = op - 1; + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif +#endif /* COPY_DICT */ + ip += 2; + } + else if (t >= 16) { /* a M4 match */ +#if defined(COPY_DICT) + m_off = (t & 8) << 11; +#else /* !COPY_DICT */ + m_pos = op; + m_pos -= (t & 8) << 11; +#endif /* COPY_DICT */ + t &= 7; + if (t == 0) { + NEED_IP(1); + while (*ip == 0) { + t += 255; + ip++; + NEED_IP(1); + } + t += 7 + *ip++; + } +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off += (ip[0] << 6) + (ip[1] >> 2); +#else + m_off += (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_off == 0) + goto eof_found; + m_off += 0x4000; +#if defined(LZO1Z) + last_m_off = m_off; +#endif +#else /* !COPY_DICT */ +#if defined(LZO1Z) + m_pos -= (ip[0] << 6) + (ip[1] >> 2); +#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN) + m_pos -= (* (const lzo_ushortp) ip) >> 2; +#else + m_pos -= (ip[0] >> 2) + (ip[1] << 6); +#endif + ip += 2; + if (m_pos == op) + goto eof_found; + m_pos -= 0x4000; +#if defined(LZO1Z) + last_m_off = pd((const uint8_t*)op, m_pos); +#endif +#endif /* COPY_DICT */ + } + else { /* a M1 match */ +#if defined(COPY_DICT) +#if defined(LZO1Z) + m_off = 1 + (t << 6) + (*ip++ >> 2); + last_m_off = m_off; +#else + m_off = 1 + (t >> 2) + (*ip++ << 2); +#endif + NEED_OP(2); + t = 2; COPY_DICT(t,m_off) +#else /* !COPY_DICT */ +#if defined(LZO1Z) + t = 1 + (t << 6) + (*ip++ >> 2); + m_pos = op - t; + last_m_off = t; +#else + m_pos = op - 1; + m_pos -= t >> 2; + m_pos -= *ip++ << 2; +#endif + TEST_LB(m_pos); NEED_OP(2); + *op++ = *m_pos++; + *op++ = *m_pos; +#endif /* COPY_DICT */ + goto match_done; + } + + /* copy match */ +#if defined(COPY_DICT) + + NEED_OP(t+3-1); + t += 3-1; COPY_DICT(t,m_off) + +#else /* !COPY_DICT */ + + TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1); +#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) +# if !defined(LZO_UNALIGNED_OK_4) + if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) { + assert((op - m_pos) >= 4); /* both pointers are aligned */ +# else + if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { +# endif + COPY4(op,m_pos); + op += 4; m_pos += 4; t -= 4 - (3 - 1); + do { + COPY4(op,m_pos); + op += 4; m_pos += 4; t -= 4; + } while (t >= 4); + if (t > 0) + do *op++ = *m_pos++; while (--t > 0); + } + else +#endif + { + copy_match: + *op++ = *m_pos++; *op++ = *m_pos++; + do *op++ = *m_pos++; while (--t > 0); + } + +#endif /* COPY_DICT */ + + match_done: +#if defined(LZO1Z) + t = ip[-1] & 3; +#else + t = ip[-2] & 3; +#endif + if (t == 0) + break; + + /* copy literals */ + match_next: + assert(t > 0); + assert(t < 4); + NEED_OP(t); + NEED_IP(t+1); +#if 0 + do *op++ = *ip++; while (--t > 0); +#else + *op++ = *ip++; + if (t > 1) { + *op++ = *ip++; + if (t > 2) + *op++ = *ip++; + } +#endif + t = *ip++; + } while (TEST_IP && TEST_OP); + } + +//#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP) + /* no EOF code was found */ + *out_len = pd(op, out); + return LZO_E_EOF_NOT_FOUND; +//#endif + + eof_found: + assert(t == 1); + *out_len = pd(op, out); + return (ip == ip_end ? LZO_E_OK : + (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); + +//#if defined(HAVE_NEED_IP) + input_overrun: + *out_len = pd(op, out); + return LZO_E_INPUT_OVERRUN; +//#endif + +//#if defined(HAVE_NEED_OP) + output_overrun: + *out_len = pd(op, out); + return LZO_E_OUTPUT_OVERRUN; +//#endif + +//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND) + lookbehind_overrun: + *out_len = pd(op, out); + return LZO_E_LOOKBEHIND_OVERRUN; +//#endif +} diff --git a/archival/libarchive/open_transformer.c b/archival/libarchive/open_transformer.c new file mode 100644 index 000000000..26ae565f5 --- /dev/null +++ b/archival/libarchive/open_transformer.c @@ -0,0 +1,54 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* transformer(), more than meets the eye */ +/* + * On MMU machine, the transform_prog is removed by macro magic + * in include/archive.h. On NOMMU, transformer is removed. + */ +void FAST_FUNC open_transformer(int fd, + IF_DESKTOP(long long) int FAST_FUNC (*transformer)(int src_fd, int dst_fd), + const char *transform_prog) +{ + struct fd_pair fd_pipe; + int pid; + + xpiped_pair(fd_pipe); + pid = BB_MMU ? xfork() : xvfork(); + if (pid == 0) { + /* Child */ + close(fd_pipe.rd); /* we don't want to read from the parent */ + // FIXME: error check? +#if BB_MMU + transformer(fd, fd_pipe.wr); + if (ENABLE_FEATURE_CLEAN_UP) { + close(fd_pipe.wr); /* send EOF */ + close(fd); + } + /* must be _exit! bug was actually seen here */ + _exit(EXIT_SUCCESS); +#else + { + char *argv[4]; + xmove_fd(fd, 0); + xmove_fd(fd_pipe.wr, 1); + argv[0] = (char*)transform_prog; + argv[1] = (char*)"-cf"; + argv[2] = (char*)"-"; + argv[3] = NULL; + BB_EXECVP(transform_prog, argv); + bb_perror_msg_and_die("can't execute '%s'", transform_prog); + } +#endif + /* notreached */ + } + + /* parent process */ + close(fd_pipe.wr); /* don't want to write to the child */ + xmove_fd(fd_pipe.rd, fd); +} diff --git a/archival/libarchive/seek_by_jump.c b/archival/libarchive/seek_by_jump.c new file mode 100644 index 000000000..7c2c52ae1 --- /dev/null +++ b/archival/libarchive/seek_by_jump.c @@ -0,0 +1,19 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +void FAST_FUNC seek_by_jump(int fd, off_t amount) +{ + if (amount + && lseek(fd, amount, SEEK_CUR) == (off_t) -1 + ) { + if (errno == ESPIPE) + seek_by_read(fd, amount); + else + bb_perror_msg_and_die("seek failure"); + } +} diff --git a/archival/libarchive/seek_by_read.c b/archival/libarchive/seek_by_read.c new file mode 100644 index 000000000..ad931a8de --- /dev/null +++ b/archival/libarchive/seek_by_read.c @@ -0,0 +1,16 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" + +/* If we are reading through a pipe, or from stdin then we can't lseek, + * we must read and discard the data to skip over it. + */ +void FAST_FUNC seek_by_read(int fd, off_t amount) +{ + if (amount) + bb_copyfd_exact_size(fd, -1, amount); +} diff --git a/archival/libarchive/unpack_ar_archive.c b/archival/libarchive/unpack_ar_archive.c new file mode 100644 index 000000000..18dbfd54d --- /dev/null +++ b/archival/libarchive/unpack_ar_archive.c @@ -0,0 +1,22 @@ +/* vi: set sw=4 ts=4: */ +/* + * Licensed under GPLv2 or later, see file LICENSE in this source tree. + */ + +#include "libbb.h" +#include "archive.h" +#include "ar.h" + +void FAST_FUNC unpack_ar_archive(archive_handle_t *ar_archive) +{ + char magic[7]; + + xread(ar_archive->src_fd, magic, AR_MAGIC_LEN); + if (strncmp(magic, AR_MAGIC, AR_MAGIC_LEN) != 0) { + bb_error_msg_and_die("invalid ar magic"); + } + ar_archive->offset += AR_MAGIC_LEN; + + while (get_header_ar(ar_archive) == EXIT_SUCCESS) + continue; +} diff --git a/archival/libarchive/unxz/README b/archival/libarchive/unxz/README new file mode 100644 index 000000000..c5972f6b8 --- /dev/null +++ b/archival/libarchive/unxz/README @@ -0,0 +1,135 @@ + +XZ Embedded +=========== + + XZ Embedded is a relatively small, limited implementation of the .xz + file format. Currently only decoding is implemented. + + XZ Embedded was written for use in the Linux kernel, but the code can + be easily used in other environments too, including regular userspace + applications. + + This README contains information that is useful only when the copy + of XZ Embedded isn't part of the Linux kernel tree. You should also + read linux/Documentation/xz.txt even if you aren't using XZ Embedded + as part of Linux; information in that file is not repeated in this + README. + +Compiling the Linux kernel module + + The xz_dec module depends on crc32 module, so make sure that you have + it enabled (CONFIG_CRC32). + + Building the xz_dec and xz_dec_test modules without support for BCJ + filters: + + cd linux/lib/xz + make -C /path/to/kernel/source \ + KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \ + CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m + + Building the xz_dec and xz_dec_test modules with support for BCJ + filters: + + cd linux/lib/xz + make -C /path/to/kernel/source \ + KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \ + CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m CONFIG_XZ_DEC_BCJ=y \ + CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y \ + CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y \ + CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y + + If you want only one or a few of the BCJ filters, omit the appropriate + variables. CONFIG_XZ_DEC_BCJ=y is always required to build the support + code shared between all BCJ filters. + + Most people don't need the xz_dec_test module. You can skip building + it by omitting CONFIG_XZ_DEC_TEST=m from the make command line. + +Compiler requirements + + XZ Embedded should compile as either GNU-C89 (used in the Linux + kernel) or with any C99 compiler. Getting the code to compile with + non-GNU C89 compiler or a C++ compiler should be quite easy as + long as there is a data type for unsigned 64-bit integer (or the + code is modified not to support large files, which needs some more + care than just using 32-bit integer instead of 64-bit). + + If you use GCC, try to use a recent version. For example, on x86, + xz_dec_lzma2.c compiled with GCC 3.3.6 is 15-25 % slower than when + compiled with GCC 4.3.3. + +Embedding into userspace applications + + To embed the XZ decoder, copy the following files into a single + directory in your source code tree: + + linux/include/linux/xz.h + linux/lib/xz/xz_crc32.c + linux/lib/xz/xz_dec_lzma2.c + linux/lib/xz/xz_dec_stream.c + linux/lib/xz/xz_lzma2.h + linux/lib/xz/xz_private.h + linux/lib/xz/xz_stream.h + userspace/xz_config.h + + Alternatively, xz.h may be placed into a different directory but then + that directory must be in the compiler include path when compiling + the .c files. + + Your code should use only the functions declared in xz.h. The rest of + the .h files are meant only for internal use in XZ Embedded. + + You may want to modify xz_config.h to be more suitable for your build + environment. Probably you should at least skim through it even if the + default file works as is. + +BCJ filter support + + If you want support for one or more BCJ filters, you need to copy also + linux/lib/xz/xz_dec_bcj.c into your application, and use appropriate + #defines in xz_config.h or in compiler flags. You don't need these + #defines in the code that just uses XZ Embedded via xz.h, but having + them always #defined doesn't hurt either. + + #define Instruction set BCJ filter endianness + XZ_DEC_X86 x86 or x86-64 Little endian only + XZ_DEC_POWERPC PowerPC Big endian only + XZ_DEC_IA64 Itanium (IA-64) Big or little endian + XZ_DEC_ARM ARM Little endian only + XZ_DEC_ARMTHUMB ARM-Thumb Little endian only + XZ_DEC_SPARC SPARC Big or little endian + + While some architectures are (partially) bi-endian, the endianness + setting doesn't change the endianness of the instructions on all + architectures. That's why Itanium and SPARC filters work for both big + and little endian executables (Itanium has little endian instructions + and SPARC has big endian instructions). + + There currently is no filter for little endian PowerPC or big endian + ARM or ARM-Thumb. Implementing filters for them can be considered if + there is a need for such filters in real-world applications. + +Notes about shared libraries + + If you are including XZ Embedded into a shared library, you very + probably should rename the xz_* functions to prevent symbol + conflicts in case your library is linked against some other library + or application that also has XZ Embedded in it (which may even be + a different version of XZ Embedded). TODO: Provide an easy way + to do this. + + Please don't create a shared library of XZ Embedded itself unless + it is fine to rebuild everything depending on that shared library + everytime you upgrade to a newer version of XZ Embedded. There are + no API or ABI stability guarantees between different versions of + XZ Embedded. + +Specifying the calling convention + + XZ_FUNC macro was included to support declaring functions with __init + in Linux. Outside Linux, it can be used to specify the calling + convention on systems that support multiple calling conventions. + For example, on Windows, you may make all functions use the stdcall + calling convention by defining XZ_FUNC=__stdcall when building and + using the functions from XZ Embedded. diff --git a/archival/libarchive/unxz/xz.h b/archival/libarchive/unxz/xz.h new file mode 100644 index 000000000..c6c071c4a --- /dev/null +++ b/archival/libarchive/unxz/xz.h @@ -0,0 +1,271 @@ +/* + * XZ decompressor + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_H +#define XZ_H + +#ifdef __KERNEL__ +# include +# include +#else +# include +# include +#endif + +/* In Linux, this is used to make extern functions static when needed. */ +#ifndef XZ_EXTERN +# define XZ_EXTERN extern +#endif + +/* In Linux, this is used to mark the functions with __init when needed. */ +#ifndef XZ_FUNC +# define XZ_FUNC +#endif + +/** + * enum xz_mode - Operation mode + * + * @XZ_SINGLE: Single-call mode. This uses less RAM than + * than multi-call modes, because the LZMA2 + * dictionary doesn't need to be allocated as + * part of the decoder state. All required data + * structures are allocated at initialization, + * so xz_dec_run() cannot return XZ_MEM_ERROR. + * @XZ_PREALLOC: Multi-call mode with preallocated LZMA2 + * dictionary buffer. All data structures are + * allocated at initialization, so xz_dec_run() + * cannot return XZ_MEM_ERROR. + * @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is + * allocated once the required size has been + * parsed from the stream headers. If the + * allocation fails, xz_dec_run() will return + * XZ_MEM_ERROR. + * + * It is possible to enable support only for a subset of the above + * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC, + * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled + * with support for all operation modes, but the preboot code may + * be built with fewer features to minimize code size. + */ +enum xz_mode { + XZ_SINGLE, + XZ_PREALLOC, + XZ_DYNALLOC +}; + +/** + * enum xz_ret - Return codes + * @XZ_OK: Everything is OK so far. More input or more + * output space is required to continue. This + * return code is possible only in multi-call mode + * (XZ_PREALLOC or XZ_DYNALLOC). + * @XZ_STREAM_END: Operation finished successfully. + * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding + * is still possible in multi-call mode by simply + * calling xz_dec_run() again. + * NOTE: This return value is used only if + * XZ_DEC_ANY_CHECK was defined at build time, + * which is not used in the kernel. Unsupported + * check types return XZ_OPTIONS_ERROR if + * XZ_DEC_ANY_CHECK was not defined at build time. + * @XZ_MEM_ERROR: Allocating memory failed. This return code is + * possible only if the decoder was initialized + * with XZ_DYNALLOC. The amount of memory that was + * tried to be allocated was no more than the + * dict_max argument given to xz_dec_init(). + * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than + * allowed by the dict_max argument given to + * xz_dec_init(). This return value is possible + * only in multi-call mode (XZ_PREALLOC or + * XZ_DYNALLOC); the single-call mode (XZ_SINGLE) + * ignores the dict_max argument. + * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic + * bytes). + * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested + * compression options. In the decoder this means + * that the header CRC32 matches, but the header + * itself specifies something that we don't support. + * @XZ_DATA_ERROR: Compressed data is corrupt. + * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly + * different between multi-call and single-call + * mode; more information below. + * + * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls + * to XZ code cannot consume any input and cannot produce any new output. + * This happens when there is no new input available, or the output buffer + * is full while at least one output byte is still pending. Assuming your + * code is not buggy, you can get this error only when decoding a compressed + * stream that is truncated or otherwise corrupt. + * + * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer + * is too small, or the compressed input is corrupt in a way that makes the + * decoder produce more output than the caller expected. When it is + * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR + * is used instead of XZ_BUF_ERROR. + */ +enum xz_ret { + XZ_OK, + XZ_STREAM_END, + XZ_UNSUPPORTED_CHECK, + XZ_MEM_ERROR, + XZ_MEMLIMIT_ERROR, + XZ_FORMAT_ERROR, + XZ_OPTIONS_ERROR, + XZ_DATA_ERROR, + XZ_BUF_ERROR +}; + +/** + * struct xz_buf - Passing input and output buffers to XZ code + * @in: Beginning of the input buffer. This may be NULL if and only + * if in_pos is equal to in_size. + * @in_pos: Current position in the input buffer. This must not exceed + * in_size. + * @in_size: Size of the input buffer + * @out: Beginning of the output buffer. This may be NULL if and only + * if out_pos is equal to out_size. + * @out_pos: Current position in the output buffer. This must not exceed + * out_size. + * @out_size: Size of the output buffer + * + * Only the contents of the output buffer from out[out_pos] onward, and + * the variables in_pos and out_pos are modified by the XZ code. + */ +struct xz_buf { + const uint8_t *in; + size_t in_pos; + size_t in_size; + + uint8_t *out; + size_t out_pos; + size_t out_size; +}; + +/** + * struct xz_dec - Opaque type to hold the XZ decoder state + */ +struct xz_dec; + +/** + * xz_dec_init() - Allocate and initialize a XZ decoder state + * @mode: Operation mode + * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for + * multi-call decoding. This is ignored in single-call mode + * (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes + * or 2^n + 2^(n-1) bytes (the latter sizes are less common + * in practice), so other values for dict_max don't make sense. + * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB, + * 512 KiB, and 1 MiB are probably the only reasonable values, + * except for kernel and initramfs images where a bigger + * dictionary can be fine and useful. + * + * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at + * once. The caller must provide enough output space or the decoding will + * fail. The output space is used as the dictionary buffer, which is why + * there is no need to allocate the dictionary as part of the decoder's + * internal state. + * + * Because the output buffer is used as the workspace, streams encoded using + * a big dictionary are not a problem in single-call mode. It is enough that + * the output buffer is big enough to hold the actual uncompressed data; it + * can be smaller than the dictionary size stored in the stream headers. + * + * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes + * of memory is preallocated for the LZMA2 dictionary. This way there is no + * risk that xz_dec_run() could run out of memory, since xz_dec_run() will + * never allocate any memory. Instead, if the preallocated dictionary is too + * small for decoding the given input stream, xz_dec_run() will return + * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be + * decoded to avoid allocating excessive amount of memory for the dictionary. + * + * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC): + * dict_max specifies the maximum allowed dictionary size that xz_dec_run() + * may allocate once it has parsed the dictionary size from the stream + * headers. This way excessive allocations can be avoided while still + * limiting the maximum memory usage to a sane value to prevent running the + * system out of memory when decompressing streams from untrusted sources. + * + * On success, xz_dec_init() returns a pointer to struct xz_dec, which is + * ready to be used with xz_dec_run(). If memory allocation fails, + * xz_dec_init() returns NULL. + */ +XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( + enum xz_mode mode, uint32_t dict_max); + +/** + * xz_dec_run() - Run the XZ decoder + * @s: Decoder state allocated using xz_dec_init() + * @b: Input and output buffers + * + * The possible return values depend on build options and operation mode. + * See enum xz_ret for details. + * + * NOTE: If an error occurs in single-call mode (return value is not + * XZ_STREAM_END), b->in_pos and b->out_pos are not modified, and the + * contents of the output buffer from b->out[b->out_pos] onward are + * undefined. This is true even after XZ_BUF_ERROR, because with some filter + * chains, there may be a second pass over the output buffer, and this pass + * cannot be properly done if the output buffer is truncated. Thus, you + * cannot give the single-call decoder a too small buffer and then expect to + * get that amount valid data from the beginning of the stream. You must use + * the multi-call decoder if you don't want to uncompress the whole stream. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b); + +/** + * xz_dec_reset() - Reset an already allocated decoder state + * @s: Decoder state allocated using xz_dec_init() + * + * This function can be used to reset the multi-call decoder state without + * freeing and reallocating memory with xz_dec_end() and xz_dec_init(). + * + * In single-call mode, xz_dec_reset() is always called in the beginning of + * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in + * multi-call mode. + */ +XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s); + +/** + * xz_dec_end() - Free the memory allocated for the decoder state + * @s: Decoder state allocated using xz_dec_init(). If s is NULL, + * this function does nothing. + */ +XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s); + +/* + * Standalone build (userspace build or in-kernel build for boot time use) + * needs a CRC32 implementation. For normal in-kernel use, kernel's own + * CRC32 module is used instead, and users of this module don't need to + * care about the functions below. + */ +#ifndef XZ_INTERNAL_CRC32 +# ifdef __KERNEL__ +# define XZ_INTERNAL_CRC32 0 +# else +# define XZ_INTERNAL_CRC32 1 +# endif +#endif + +#if XZ_INTERNAL_CRC32 +/* + * This must be called before any other xz_* function to initialize + * the CRC32 lookup table. + */ +XZ_EXTERN void XZ_FUNC xz_crc32_init(void); + +/* + * Update CRC32 value using the polynomial from IEEE-802.3. To start a new + * calculation, the third argument must be zero. To continue the calculation, + * the previously returned value is passed as the third argument. + */ +XZ_EXTERN uint32_t XZ_FUNC xz_crc32( + const uint8_t *buf, size_t size, uint32_t crc); +#endif +#endif diff --git a/archival/libarchive/unxz/xz_config.h b/archival/libarchive/unxz/xz_config.h new file mode 100644 index 000000000..187e1cbed --- /dev/null +++ b/archival/libarchive/unxz/xz_config.h @@ -0,0 +1,123 @@ +/* + * Private includes and definitions for userspace use of XZ Embedded + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_CONFIG_H +#define XZ_CONFIG_H + +/* Uncomment as needed to enable BCJ filter decoders. */ +/* #define XZ_DEC_X86 */ +/* #define XZ_DEC_POWERPC */ +/* #define XZ_DEC_IA64 */ +/* #define XZ_DEC_ARM */ +/* #define XZ_DEC_ARMTHUMB */ +/* #define XZ_DEC_SPARC */ + +#include +#include +#include + +#include "xz.h" + +#define kmalloc(size, flags) malloc(size) +#define kfree(ptr) free(ptr) +#define vmalloc(size) malloc(size) +#define vfree(ptr) free(ptr) + +#define memeq(a, b, size) (memcmp(a, b, size) == 0) +#define memzero(buf, size) memset(buf, 0, size) + +#undef min +#undef min_t +#define min(x, y) ((x) < (y) ? (x) : (y)) +#define min_t(type, x, y) min(x, y) + +/* + * Some functions have been marked with __always_inline to keep the + * performance reasonable even when the compiler is optimizing for + * small code size. You may be able to save a few bytes by #defining + * __always_inline to plain inline, but don't complain if the code + * becomes slow. + * + * NOTE: System headers on GNU/Linux may #define this macro already, + * so if you want to change it, you need to #undef it first. + */ +#ifndef __always_inline +# ifdef __GNUC__ +# define __always_inline \ + inline __attribute__((__always_inline__)) +# else +# define __always_inline inline +# endif +#endif + +/* + * Some functions are marked to never be inlined to reduce stack usage. + * If you don't care about stack usage, you may want to modify this so + * that noinline_for_stack is #defined to be empty even when using GCC. + * Doing so may save a few bytes in binary size. + */ +#ifndef noinline_for_stack +# ifdef __GNUC__ +# define noinline_for_stack __attribute__((__noinline__)) +# else +# define noinline_for_stack +# endif +#endif + +/* Inline functions to access unaligned unsigned 32-bit integers */ +#ifndef get_unaligned_le32 +static inline uint32_t XZ_FUNC get_unaligned_le32(const uint8_t *buf) +{ + return (uint32_t)buf[0] + | ((uint32_t)buf[1] << 8) + | ((uint32_t)buf[2] << 16) + | ((uint32_t)buf[3] << 24); +} +#endif + +#ifndef get_unaligned_be32 +static inline uint32_t XZ_FUNC get_unaligned_be32(const uint8_t *buf) +{ + return (uint32_t)(buf[0] << 24) + | ((uint32_t)buf[1] << 16) + | ((uint32_t)buf[2] << 8) + | (uint32_t)buf[3]; +} +#endif + +#ifndef put_unaligned_le32 +static inline void XZ_FUNC put_unaligned_le32(uint32_t val, uint8_t *buf) +{ + buf[0] = (uint8_t)val; + buf[1] = (uint8_t)(val >> 8); + buf[2] = (uint8_t)(val >> 16); + buf[3] = (uint8_t)(val >> 24); +} +#endif + +#ifndef put_unaligned_be32 +static inline void XZ_FUNC put_unaligned_be32(uint32_t val, uint8_t *buf) +{ + buf[0] = (uint8_t)(val >> 24); + buf[1] = (uint8_t)(val >> 16); + buf[2] = (uint8_t)(val >> 8); + buf[3] = (uint8_t)val; +} +#endif + +/* + * Use get_unaligned_le32() also for aligned access for simplicity. On + * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr)) + * could save a few bytes in code size. + */ +#ifndef get_le32 +# define get_le32 get_unaligned_le32 +#endif + +#endif diff --git a/archival/libarchive/unxz/xz_dec_bcj.c b/archival/libarchive/unxz/xz_dec_bcj.c new file mode 100644 index 000000000..09162b51f --- /dev/null +++ b/archival/libarchive/unxz/xz_dec_bcj.c @@ -0,0 +1,564 @@ +/* + * Branch/Call/Jump (BCJ) filter decoders + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" + +/* + * The rest of the file is inside this ifdef. It makes things a little more + * convenient when building without support for any BCJ filters. + */ +#ifdef XZ_DEC_BCJ + +struct xz_dec_bcj { + /* Type of the BCJ filter being used */ + enum { + BCJ_X86 = 4, /* x86 or x86-64 */ + BCJ_POWERPC = 5, /* Big endian only */ + BCJ_IA64 = 6, /* Big or little endian */ + BCJ_ARM = 7, /* Little endian only */ + BCJ_ARMTHUMB = 8, /* Little endian only */ + BCJ_SPARC = 9 /* Big or little endian */ + } type; + + /* + * Return value of the next filter in the chain. We need to preserve + * this information across calls, because we must not call the next + * filter anymore once it has returned XZ_STREAM_END. + */ + enum xz_ret ret; + + /* True if we are operating in single-call mode. */ + bool single_call; + + /* + * Absolute position relative to the beginning of the uncompressed + * data (in a single .xz Block). We care only about the lowest 32 + * bits so this doesn't need to be uint64_t even with big files. + */ + uint32_t pos; + + /* x86 filter state */ + uint32_t x86_prev_mask; + + /* Temporary space to hold the variables from struct xz_buf */ + uint8_t *out; + size_t out_pos; + size_t out_size; + + struct { + /* Amount of already filtered data in the beginning of buf */ + size_t filtered; + + /* Total amount of data currently stored in buf */ + size_t size; + + /* + * Buffer to hold a mix of filtered and unfiltered data. This + * needs to be big enough to hold Alignment + 2 * Look-ahead: + * + * Type Alignment Look-ahead + * x86 1 4 + * PowerPC 4 0 + * IA-64 16 0 + * ARM 4 0 + * ARM-Thumb 2 2 + * SPARC 4 0 + */ + uint8_t buf[16]; + } temp; +}; + +#ifdef XZ_DEC_X86 +/* + * This is macro used to test the most significant byte of a memory address + * in an x86 instruction. + */ +#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF) + +static noinline_for_stack size_t XZ_FUNC bcj_x86( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + static const bool mask_to_allowed_status[8] + = { true, true, true, false, true, false, false, false }; + + static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; + + size_t i; + size_t prev_pos = (size_t)-1; + uint32_t prev_mask = s->x86_prev_mask; + uint32_t src; + uint32_t dest; + uint32_t j; + uint8_t b; + + if (size <= 4) + return 0; + + size -= 4; + for (i = 0; i < size; ++i) { + if ((buf[i] & 0xFE) != 0xE8) + continue; + + prev_pos = i - prev_pos; + if (prev_pos > 3) { + prev_mask = 0; + } else { + prev_mask = (prev_mask << (prev_pos - 1)) & 7; + if (prev_mask != 0) { + b = buf[i + 4 - mask_to_bit_num[prev_mask]]; + if (!mask_to_allowed_status[prev_mask] + || bcj_x86_test_msbyte(b)) { + prev_pos = i; + prev_mask = (prev_mask << 1) | 1; + continue; + } + } + } + + prev_pos = i; + + if (bcj_x86_test_msbyte(buf[i + 4])) { + src = get_unaligned_le32(buf + i + 1); + while (true) { + dest = src - (s->pos + (uint32_t)i + 5); + if (prev_mask == 0) + break; + + j = mask_to_bit_num[prev_mask] * 8; + b = (uint8_t)(dest >> (24 - j)); + if (!bcj_x86_test_msbyte(b)) + break; + + src = dest ^ (((uint32_t)1 << (32 - j)) - 1); + } + + dest &= 0x01FFFFFF; + dest |= (uint32_t)0 - (dest & 0x01000000); + put_unaligned_le32(dest, buf + i + 1); + i += 4; + } else { + prev_mask = (prev_mask << 1) | 1; + } + } + + prev_pos = i - prev_pos; + s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1); + return i; +} +#endif + +#ifdef XZ_DEC_POWERPC +static noinline_for_stack size_t XZ_FUNC bcj_powerpc( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t instr; + + for (i = 0; i + 4 <= size; i += 4) { + instr = get_unaligned_be32(buf + i); + if ((instr & 0xFC000003) == 0x48000001) { + instr &= 0x03FFFFFC; + instr -= s->pos + (uint32_t)i; + instr &= 0x03FFFFFC; + instr |= 0x48000001; + put_unaligned_be32(instr, buf + i); + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_IA64 +static noinline_for_stack size_t XZ_FUNC bcj_ia64( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + static const uint8_t branch_table[32] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 4, 4, 6, 6, 0, 0, 7, 7, + 4, 4, 0, 0, 4, 4, 0, 0 + }; + + /* + * The local variables take a little bit stack space, but it's less + * than what LZMA2 decoder takes, so it doesn't make sense to reduce + * stack usage here without doing that for the LZMA2 decoder too. + */ + + /* Loop counters */ + size_t i; + size_t j; + + /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ + uint32_t slot; + + /* Bitwise offset of the instruction indicated by slot */ + uint32_t bit_pos; + + /* bit_pos split into byte and bit parts */ + uint32_t byte_pos; + uint32_t bit_res; + + /* Address part of an instruction */ + uint32_t addr; + + /* Mask used to detect which instructions to convert */ + uint32_t mask; + + /* 41-bit instruction stored somewhere in the lowest 48 bits */ + uint64_t instr; + + /* Instruction normalized with bit_res for easier manipulation */ + uint64_t norm; + + for (i = 0; i + 16 <= size; i += 16) { + mask = branch_table[buf[i] & 0x1F]; + for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { + if (((mask >> slot) & 1) == 0) + continue; + + byte_pos = bit_pos >> 3; + bit_res = bit_pos & 7; + instr = 0; + for (j = 0; j < 6; ++j) + instr |= (uint64_t)(buf[i + j + byte_pos]) + << (8 * j); + + norm = instr >> bit_res; + + if (((norm >> 37) & 0x0F) == 0x05 + && ((norm >> 9) & 0x07) == 0) { + addr = (norm >> 13) & 0x0FFFFF; + addr |= ((uint32_t)(norm >> 36) & 1) << 20; + addr <<= 4; + addr -= s->pos + (uint32_t)i; + addr >>= 4; + + norm &= ~((uint64_t)0x8FFFFF << 13); + norm |= (uint64_t)(addr & 0x0FFFFF) << 13; + norm |= (uint64_t)(addr & 0x100000) + << (36 - 20); + + instr &= (1 << bit_res) - 1; + instr |= norm << bit_res; + + for (j = 0; j < 6; j++) + buf[i + j + byte_pos] + = (uint8_t)(instr >> (8 * j)); + } + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_ARM +static noinline_for_stack size_t XZ_FUNC bcj_arm( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t addr; + + for (i = 0; i + 4 <= size; i += 4) { + if (buf[i + 3] == 0xEB) { + addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) + | ((uint32_t)buf[i + 2] << 16); + addr <<= 2; + addr -= s->pos + (uint32_t)i + 8; + addr >>= 2; + buf[i] = (uint8_t)addr; + buf[i + 1] = (uint8_t)(addr >> 8); + buf[i + 2] = (uint8_t)(addr >> 16); + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_ARMTHUMB +static noinline_for_stack size_t XZ_FUNC bcj_armthumb( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t addr; + + for (i = 0; i + 4 <= size; i += 2) { + if ((buf[i + 1] & 0xF8) == 0xF0 + && (buf[i + 3] & 0xF8) == 0xF8) { + addr = (((uint32_t)buf[i + 1] & 0x07) << 19) + | ((uint32_t)buf[i] << 11) + | (((uint32_t)buf[i + 3] & 0x07) << 8) + | (uint32_t)buf[i + 2]; + addr <<= 1; + addr -= s->pos + (uint32_t)i + 4; + addr >>= 1; + buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07)); + buf[i] = (uint8_t)(addr >> 11); + buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07)); + buf[i + 2] = (uint8_t)addr; + i += 2; + } + } + + return i; +} +#endif + +#ifdef XZ_DEC_SPARC +static noinline_for_stack size_t XZ_FUNC bcj_sparc( + struct xz_dec_bcj *s, uint8_t *buf, size_t size) +{ + size_t i; + uint32_t instr; + + for (i = 0; i + 4 <= size; i += 4) { + instr = get_unaligned_be32(buf + i); + if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { + instr <<= 2; + instr -= s->pos + (uint32_t)i; + instr >>= 2; + instr = ((uint32_t)0x40000000 - (instr & 0x400000)) + | 0x40000000 | (instr & 0x3FFFFF); + put_unaligned_be32(instr, buf + i); + } + } + + return i; +} +#endif + +/* + * Apply the selected BCJ filter. Update *pos and s->pos to match the amount + * of data that got filtered. + * + * NOTE: This is implemented as a switch statement to avoid using function + * pointers, which could be problematic in the kernel boot code, which must + * avoid pointers to static data (at least on x86). + */ +static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s, + uint8_t *buf, size_t *pos, size_t size) +{ + size_t filtered; + + buf += *pos; + size -= *pos; + + switch (s->type) { +#ifdef XZ_DEC_X86 + case BCJ_X86: + filtered = bcj_x86(s, buf, size); + break; +#endif +#ifdef XZ_DEC_POWERPC + case BCJ_POWERPC: + filtered = bcj_powerpc(s, buf, size); + break; +#endif +#ifdef XZ_DEC_IA64 + case BCJ_IA64: + filtered = bcj_ia64(s, buf, size); + break; +#endif +#ifdef XZ_DEC_ARM + case BCJ_ARM: + filtered = bcj_arm(s, buf, size); + break; +#endif +#ifdef XZ_DEC_ARMTHUMB + case BCJ_ARMTHUMB: + filtered = bcj_armthumb(s, buf, size); + break; +#endif +#ifdef XZ_DEC_SPARC + case BCJ_SPARC: + filtered = bcj_sparc(s, buf, size); + break; +#endif + default: + /* Never reached but silence compiler warnings. */ + filtered = 0; + break; + } + + *pos += filtered; + s->pos += filtered; +} + +/* + * Flush pending filtered data from temp to the output buffer. + * Move the remaining mixture of possibly filtered and unfiltered + * data to the beginning of temp. + */ +static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) +{ + size_t copy_size; + + copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos); + memcpy(b->out + b->out_pos, s->temp.buf, copy_size); + b->out_pos += copy_size; + + s->temp.filtered -= copy_size; + s->temp.size -= copy_size; + memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size); +} + +/* + * The BCJ filter functions are primitive in sense that they process the + * data in chunks of 1-16 bytes. To hide this issue, this function does + * some buffering. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s, + struct xz_dec_lzma2 *lzma2, struct xz_buf *b) +{ + size_t out_start; + + /* + * Flush pending already filtered data to the output buffer. Return + * immediatelly if we couldn't flush everything, or if the next + * filter in the chain had already returned XZ_STREAM_END. + */ + if (s->temp.filtered > 0) { + bcj_flush(s, b); + if (s->temp.filtered > 0) + return XZ_OK; + + if (s->ret == XZ_STREAM_END) + return XZ_STREAM_END; + } + + /* + * If we have more output space than what is currently pending in + * temp, copy the unfiltered data from temp to the output buffer + * and try to fill the output buffer by decoding more data from the + * next filter in the chain. Apply the BCJ filter on the new data + * in the output buffer. If everything cannot be filtered, copy it + * to temp and rewind the output buffer position accordingly. + */ + if (s->temp.size < b->out_size - b->out_pos) { + out_start = b->out_pos; + memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size); + b->out_pos += s->temp.size; + + s->ret = xz_dec_lzma2_run(lzma2, b); + if (s->ret != XZ_STREAM_END + && (s->ret != XZ_OK || s->single_call)) + return s->ret; + + bcj_apply(s, b->out, &out_start, b->out_pos); + + /* + * As an exception, if the next filter returned XZ_STREAM_END, + * we can do that too, since the last few bytes that remain + * unfiltered are meant to remain unfiltered. + */ + if (s->ret == XZ_STREAM_END) + return XZ_STREAM_END; + + s->temp.size = b->out_pos - out_start; + b->out_pos -= s->temp.size; + memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size); + } + + /* + * If we have unfiltered data in temp, try to fill by decoding more + * data from the next filter. Apply the BCJ filter on temp. Then we + * hopefully can fill the actual output buffer by copying filtered + * data from temp. A mix of filtered and unfiltered data may be left + * in temp; it will be taken care on the next call to this function. + */ + if (s->temp.size > 0) { + /* Make b->out{,_pos,_size} temporarily point to s->temp. */ + s->out = b->out; + s->out_pos = b->out_pos; + s->out_size = b->out_size; + b->out = s->temp.buf; + b->out_pos = s->temp.size; + b->out_size = sizeof(s->temp.buf); + + s->ret = xz_dec_lzma2_run(lzma2, b); + + s->temp.size = b->out_pos; + b->out = s->out; + b->out_pos = s->out_pos; + b->out_size = s->out_size; + + if (s->ret != XZ_OK && s->ret != XZ_STREAM_END) + return s->ret; + + bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size); + + /* + * If the next filter returned XZ_STREAM_END, we mark that + * everything is filtered, since the last unfiltered bytes + * of the stream are meant to be left as is. + */ + if (s->ret == XZ_STREAM_END) + s->temp.filtered = s->temp.size; + + bcj_flush(s, b); + if (s->temp.filtered > 0) + return XZ_OK; + } + + return s->ret; +} + +XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call) +{ + struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s != NULL) + s->single_call = single_call; + + return s; +} + +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset( + struct xz_dec_bcj *s, uint8_t id) +{ + switch (id) { +#ifdef XZ_DEC_X86 + case BCJ_X86: +#endif +#ifdef XZ_DEC_POWERPC + case BCJ_POWERPC: +#endif +#ifdef XZ_DEC_IA64 + case BCJ_IA64: +#endif +#ifdef XZ_DEC_ARM + case BCJ_ARM: +#endif +#ifdef XZ_DEC_ARMTHUMB + case BCJ_ARMTHUMB: +#endif +#ifdef XZ_DEC_SPARC + case BCJ_SPARC: +#endif + break; + + default: + /* Unsupported Filter ID */ + return XZ_OPTIONS_ERROR; + } + + s->type = id; + s->ret = XZ_OK; + s->pos = 0; + s->x86_prev_mask = 0; + s->temp.filtered = 0; + s->temp.size = 0; + + return XZ_OK; +} + +#endif diff --git a/archival/libarchive/unxz/xz_dec_lzma2.c b/archival/libarchive/unxz/xz_dec_lzma2.c new file mode 100644 index 000000000..da71cb4d4 --- /dev/null +++ b/archival/libarchive/unxz/xz_dec_lzma2.c @@ -0,0 +1,1175 @@ +/* + * LZMA2 decoder + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" +#include "xz_lzma2.h" + +/* + * Range decoder initialization eats the first five bytes of each LZMA chunk. + */ +#define RC_INIT_BYTES 5 + +/* + * Minimum number of usable input buffer to safely decode one LZMA symbol. + * The worst case is that we decode 22 bits using probabilities and 26 + * direct bits. This may decode at maximum of 20 bytes of input. However, + * lzma_main() does an extra normalization before returning, thus we + * need to put 21 here. + */ +#define LZMA_IN_REQUIRED 21 + +/* + * Dictionary (history buffer) + * + * These are always true: + * start <= pos <= full <= end + * pos <= limit <= end + * + * In multi-call mode, also these are true: + * end == size + * size <= size_max + * allocated <= size + * + * Most of these variables are size_t to support single-call mode, + * in which the dictionary variables address the actual output + * buffer directly. + */ +struct dictionary { + /* Beginning of the history buffer */ + uint8_t *buf; + + /* Old position in buf (before decoding more data) */ + size_t start; + + /* Position in buf */ + size_t pos; + + /* + * How full dictionary is. This is used to detect corrupt input that + * would read beyond the beginning of the uncompressed stream. + */ + size_t full; + + /* Write limit; we don't write to buf[limit] or later bytes. */ + size_t limit; + + /* + * End of the dictionary buffer. In multi-call mode, this is + * the same as the dictionary size. In single-call mode, this + * indicates the size of the output buffer. + */ + size_t end; + + /* + * Size of the dictionary as specified in Block Header. This is used + * together with "full" to detect corrupt input that would make us + * read beyond the beginning of the uncompressed stream. + */ + uint32_t size; + + /* + * Maximum allowed dictionary size in multi-call mode. + * This is ignored in single-call mode. + */ + uint32_t size_max; + + /* + * Amount of memory currently allocated for the dictionary. + * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC, + * size_max is always the same as the allocated size.) + */ + uint32_t allocated; + + /* Operation mode */ + enum xz_mode mode; +}; + +/* Range decoder */ +struct rc_dec { + uint32_t range; + uint32_t code; + + /* + * Number of initializing bytes remaining to be read + * by rc_read_init(). + */ + uint32_t init_bytes_left; + + /* + * Buffer from which we read our input. It can be either + * temp.buf or the caller-provided input buffer. + */ + const uint8_t *in; + size_t in_pos; + size_t in_limit; +}; + +/* Probabilities for a length decoder. */ +struct lzma_len_dec { + /* Probability of match length being at least 10 */ + uint16_t choice; + + /* Probability of match length being at least 18 */ + uint16_t choice2; + + /* Probabilities for match lengths 2-9 */ + uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; + + /* Probabilities for match lengths 10-17 */ + uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; + + /* Probabilities for match lengths 18-273 */ + uint16_t high[LEN_HIGH_SYMBOLS]; +}; + +struct lzma_dec { + /* Distances of latest four matches */ + uint32_t rep0; + uint32_t rep1; + uint32_t rep2; + uint32_t rep3; + + /* Types of the most recently seen LZMA symbols */ + enum lzma_state state; + + /* + * Length of a match. This is updated so that dict_repeat can + * be called again to finish repeating the whole match. + */ + uint32_t len; + + /* + * LZMA properties or related bit masks (number of literal + * context bits, a mask dervied from the number of literal + * position bits, and a mask dervied from the number + * position bits) + */ + uint32_t lc; + uint32_t literal_pos_mask; /* (1 << lp) - 1 */ + uint32_t pos_mask; /* (1 << pb) - 1 */ + + /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ + uint16_t is_match[STATES][POS_STATES_MAX]; + + /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */ + uint16_t is_rep[STATES]; + + /* + * If 0, distance of a repeated match is rep0. + * Otherwise check is_rep1. + */ + uint16_t is_rep0[STATES]; + + /* + * If 0, distance of a repeated match is rep1. + * Otherwise check is_rep2. + */ + uint16_t is_rep1[STATES]; + + /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */ + uint16_t is_rep2[STATES]; + + /* + * If 1, the repeated match has length of one byte. Otherwise + * the length is decoded from rep_len_decoder. + */ + uint16_t is_rep0_long[STATES][POS_STATES_MAX]; + + /* + * Probability tree for the highest two bits of the match + * distance. There is a separate probability tree for match + * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. + */ + uint16_t dist_slot[DIST_STATES][DIST_SLOTS]; + + /* + * Probility trees for additional bits for match distance + * when the distance is in the range [4, 127]. + */ + uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END]; + + /* + * Probability tree for the lowest four bits of a match + * distance that is equal to or greater than 128. + */ + uint16_t dist_align[ALIGN_SIZE]; + + /* Length of a normal match */ + struct lzma_len_dec match_len_dec; + + /* Length of a repeated match */ + struct lzma_len_dec rep_len_dec; + + /* Probabilities of literals */ + uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; +}; + +struct lzma2_dec { + /* Position in xz_dec_lzma2_run(). */ + enum lzma2_seq { + SEQ_CONTROL, + SEQ_UNCOMPRESSED_1, + SEQ_UNCOMPRESSED_2, + SEQ_COMPRESSED_0, + SEQ_COMPRESSED_1, + SEQ_PROPERTIES, + SEQ_LZMA_PREPARE, + SEQ_LZMA_RUN, + SEQ_COPY + } sequence; + + /* Next position after decoding the compressed size of the chunk. */ + enum lzma2_seq next_sequence; + + /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ + uint32_t uncompressed; + + /* + * Compressed size of LZMA chunk or compressed/uncompressed + * size of uncompressed chunk (64 KiB at maximum) + */ + uint32_t compressed; + + /* + * True if dictionary reset is needed. This is false before + * the first chunk (LZMA or uncompressed). + */ + bool need_dict_reset; + + /* + * True if new LZMA properties are needed. This is false + * before the first LZMA chunk. + */ + bool need_props; +}; + +struct xz_dec_lzma2 { + /* + * The order below is important on x86 to reduce code size and + * it shouldn't hurt on other platforms. Everything up to and + * including lzma.pos_mask are in the first 128 bytes on x86-32, + * which allows using smaller instructions to access those + * variables. On x86-64, fewer variables fit into the first 128 + * bytes, but this is still the best order without sacrificing + * the readability by splitting the structures. + */ + struct rc_dec rc; + struct dictionary dict; + struct lzma2_dec lzma2; + struct lzma_dec lzma; + + /* + * Temporary buffer which holds small number of input bytes between + * decoder calls. See lzma2_lzma() for details. + */ + struct { + uint32_t size; + uint8_t buf[3 * LZMA_IN_REQUIRED]; + } temp; +}; + +/************** + * Dictionary * + **************/ + +/* + * Reset the dictionary state. When in single-call mode, set up the beginning + * of the dictionary to point to the actual output buffer. + */ +static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b) +{ + if (DEC_IS_SINGLE(dict->mode)) { + dict->buf = b->out + b->out_pos; + dict->end = b->out_size - b->out_pos; + } + + dict->start = 0; + dict->pos = 0; + dict->limit = 0; + dict->full = 0; +} + +/* Set dictionary write limit */ +static void XZ_FUNC dict_limit(struct dictionary *dict, size_t out_max) +{ + if (dict->end - dict->pos <= out_max) + dict->limit = dict->end; + else + dict->limit = dict->pos + out_max; +} + +/* Return true if at least one byte can be written into the dictionary. */ +static __always_inline bool XZ_FUNC dict_has_space(const struct dictionary *dict) +{ + return dict->pos < dict->limit; +} + +/* + * Get a byte from the dictionary at the given distance. The distance is + * assumed to valid, or as a special case, zero when the dictionary is + * still empty. This special case is needed for single-call decoding to + * avoid writing a '\0' to the end of the destination buffer. + */ +static __always_inline uint32_t XZ_FUNC dict_get( + const struct dictionary *dict, uint32_t dist) +{ + size_t offset = dict->pos - dist - 1; + + if (dist >= dict->pos) + offset += dict->end; + + return dict->full > 0 ? dict->buf[offset] : 0; +} + +/* + * Put one byte into the dictionary. It is assumed that there is space for it. + */ +static inline void XZ_FUNC dict_put(struct dictionary *dict, uint8_t byte) +{ + dict->buf[dict->pos++] = byte; + + if (dict->full < dict->pos) + dict->full = dict->pos; +} + +/* + * Repeat given number of bytes from the given distance. If the distance is + * invalid, false is returned. On success, true is returned and *len is + * updated to indicate how many bytes were left to be repeated. + */ +static bool XZ_FUNC dict_repeat( + struct dictionary *dict, uint32_t *len, uint32_t dist) +{ + size_t back; + uint32_t left; + + if (dist >= dict->full || dist >= dict->size) + return false; + + left = min_t(size_t, dict->limit - dict->pos, *len); + *len -= left; + + back = dict->pos - dist - 1; + if (dist >= dict->pos) + back += dict->end; + + do { + dict->buf[dict->pos++] = dict->buf[back++]; + if (back == dict->end) + back = 0; + } while (--left > 0); + + if (dict->full < dict->pos) + dict->full = dict->pos; + + return true; +} + +/* Copy uncompressed data as is from input to dictionary and output buffers. */ +static void XZ_FUNC dict_uncompressed( + struct dictionary *dict, struct xz_buf *b, uint32_t *left) +{ + size_t copy_size; + + while (*left > 0 && b->in_pos < b->in_size + && b->out_pos < b->out_size) { + copy_size = min(b->in_size - b->in_pos, + b->out_size - b->out_pos); + if (copy_size > dict->end - dict->pos) + copy_size = dict->end - dict->pos; + if (copy_size > *left) + copy_size = *left; + + *left -= copy_size; + + memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size); + dict->pos += copy_size; + + if (dict->full < dict->pos) + dict->full = dict->pos; + + if (DEC_IS_MULTI(dict->mode)) { + if (dict->pos == dict->end) + dict->pos = 0; + + memcpy(b->out + b->out_pos, b->in + b->in_pos, + copy_size); + } + + dict->start = dict->pos; + + b->out_pos += copy_size; + b->in_pos += copy_size; + + } +} + +/* + * Flush pending data from dictionary to b->out. It is assumed that there is + * enough space in b->out. This is guaranteed because caller uses dict_limit() + * before decoding data into the dictionary. + */ +static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b) +{ + size_t copy_size = dict->pos - dict->start; + + if (DEC_IS_MULTI(dict->mode)) { + if (dict->pos == dict->end) + dict->pos = 0; + + memcpy(b->out + b->out_pos, dict->buf + dict->start, + copy_size); + } + + dict->start = dict->pos; + b->out_pos += copy_size; + return copy_size; +} + +/***************** + * Range decoder * + *****************/ + +/* Reset the range decoder. */ +static void XZ_FUNC rc_reset(struct rc_dec *rc) +{ + rc->range = (uint32_t)-1; + rc->code = 0; + rc->init_bytes_left = RC_INIT_BYTES; +} + +/* + * Read the first five initial bytes into rc->code if they haven't been + * read already. (Yes, the first byte gets completely ignored.) + */ +static bool XZ_FUNC rc_read_init(struct rc_dec *rc, struct xz_buf *b) +{ + while (rc->init_bytes_left > 0) { + if (b->in_pos == b->in_size) + return false; + + rc->code = (rc->code << 8) + b->in[b->in_pos++]; + --rc->init_bytes_left; + } + + return true; +} + +/* Return true if there may not be enough input for the next decoding loop. */ +static inline bool XZ_FUNC rc_limit_exceeded(const struct rc_dec *rc) +{ + return rc->in_pos > rc->in_limit; +} + +/* + * Return true if it is possible (from point of view of range decoder) that + * we have reached the end of the LZMA chunk. + */ +static inline bool XZ_FUNC rc_is_finished(const struct rc_dec *rc) +{ + return rc->code == 0; +} + +/* Read the next input byte if needed. */ +static __always_inline void XZ_FUNC rc_normalize(struct rc_dec *rc) +{ + if (rc->range < RC_TOP_VALUE) { + rc->range <<= RC_SHIFT_BITS; + rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++]; + } +} + +/* + * Decode one bit. In some versions, this function has been splitted in three + * functions so that the compiler is supposed to be able to more easily avoid + * an extra branch. In this particular version of the LZMA decoder, this + * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 + * on x86). Using a non-splitted version results in nicer looking code too. + * + * NOTE: This must return an int. Do not make it return a bool or the speed + * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, + * and it generates 10-20 % faster code than GCC 3.x from this file anyway.) + */ +static __always_inline int XZ_FUNC rc_bit(struct rc_dec *rc, uint16_t *prob) +{ + uint32_t bound; + int bit; + + rc_normalize(rc); + bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob; + if (rc->code < bound) { + rc->range = bound; + *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS; + bit = 0; + } else { + rc->range -= bound; + rc->code -= bound; + *prob -= *prob >> RC_MOVE_BITS; + bit = 1; + } + + return bit; +} + +/* Decode a bittree starting from the most significant bit. */ +static __always_inline uint32_t XZ_FUNC rc_bittree( + struct rc_dec *rc, uint16_t *probs, uint32_t limit) +{ + uint32_t symbol = 1; + + do { + if (rc_bit(rc, &probs[symbol])) + symbol = (symbol << 1) + 1; + else + symbol <<= 1; + } while (symbol < limit); + + return symbol; +} + +/* Decode a bittree starting from the least significant bit. */ +static __always_inline void XZ_FUNC rc_bittree_reverse(struct rc_dec *rc, + uint16_t *probs, uint32_t *dest, uint32_t limit) +{ + uint32_t symbol = 1; + uint32_t i = 0; + + do { + if (rc_bit(rc, &probs[symbol])) { + symbol = (symbol << 1) + 1; + *dest += 1 << i; + } else { + symbol <<= 1; + } + } while (++i < limit); +} + +/* Decode direct bits (fixed fifty-fifty probability) */ +static inline void XZ_FUNC rc_direct( + struct rc_dec *rc, uint32_t *dest, uint32_t limit) +{ + uint32_t mask; + + do { + rc_normalize(rc); + rc->range >>= 1; + rc->code -= rc->range; + mask = (uint32_t)0 - (rc->code >> 31); + rc->code += rc->range & mask; + *dest = (*dest << 1) + (mask + 1); + } while (--limit > 0); +} + +/******** + * LZMA * + ********/ + +/* Get pointer to literal coder probability array. */ +static uint16_t * XZ_FUNC lzma_literal_probs(struct xz_dec_lzma2 *s) +{ + uint32_t prev_byte = dict_get(&s->dict, 0); + uint32_t low = prev_byte >> (8 - s->lzma.lc); + uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc; + return s->lzma.literal[low + high]; +} + +/* Decode a literal (one 8-bit byte) */ +static void XZ_FUNC lzma_literal(struct xz_dec_lzma2 *s) +{ + uint16_t *probs; + uint32_t symbol; + uint32_t match_byte; + uint32_t match_bit; + uint32_t offset; + uint32_t i; + + probs = lzma_literal_probs(s); + + if (lzma_state_is_literal(s->lzma.state)) { + symbol = rc_bittree(&s->rc, probs, 0x100); + } else { + symbol = 1; + match_byte = dict_get(&s->dict, s->lzma.rep0) << 1; + offset = 0x100; + + do { + match_bit = match_byte & offset; + match_byte <<= 1; + i = offset + match_bit + symbol; + + if (rc_bit(&s->rc, &probs[i])) { + symbol = (symbol << 1) + 1; + offset &= match_bit; + } else { + symbol <<= 1; + offset &= ~match_bit; + } + } while (symbol < 0x100); + } + + dict_put(&s->dict, (uint8_t)symbol); + lzma_state_literal(&s->lzma.state); +} + +/* Decode the length of the match into s->lzma.len. */ +static void XZ_FUNC lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l, + uint32_t pos_state) +{ + uint16_t *probs; + uint32_t limit; + + if (!rc_bit(&s->rc, &l->choice)) { + probs = l->low[pos_state]; + limit = LEN_LOW_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN; + } else { + if (!rc_bit(&s->rc, &l->choice2)) { + probs = l->mid[pos_state]; + limit = LEN_MID_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; + } else { + probs = l->high; + limit = LEN_HIGH_SYMBOLS; + s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS + + LEN_MID_SYMBOLS; + } + } + + s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit; +} + +/* Decode a match. The distance will be stored in s->lzma.rep0. */ +static void XZ_FUNC lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state) +{ + uint16_t *probs; + uint32_t dist_slot; + uint32_t limit; + + lzma_state_match(&s->lzma.state); + + s->lzma.rep3 = s->lzma.rep2; + s->lzma.rep2 = s->lzma.rep1; + s->lzma.rep1 = s->lzma.rep0; + + lzma_len(s, &s->lzma.match_len_dec, pos_state); + + probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)]; + dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS; + + if (dist_slot < DIST_MODEL_START) { + s->lzma.rep0 = dist_slot; + } else { + limit = (dist_slot >> 1) - 1; + s->lzma.rep0 = 2 + (dist_slot & 1); + + if (dist_slot < DIST_MODEL_END) { + s->lzma.rep0 <<= limit; + probs = s->lzma.dist_special + s->lzma.rep0 + - dist_slot - 1; + rc_bittree_reverse(&s->rc, probs, + &s->lzma.rep0, limit); + } else { + rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS); + s->lzma.rep0 <<= ALIGN_BITS; + rc_bittree_reverse(&s->rc, s->lzma.dist_align, + &s->lzma.rep0, ALIGN_BITS); + } + } +} + +/* + * Decode a repeated match. The distance is one of the four most recently + * seen matches. The distance will be stored in s->lzma.rep0. + */ +static void XZ_FUNC lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state) +{ + uint32_t tmp; + + if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) { + if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[ + s->lzma.state][pos_state])) { + lzma_state_short_rep(&s->lzma.state); + s->lzma.len = 1; + return; + } + } else { + if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) { + tmp = s->lzma.rep1; + } else { + if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) { + tmp = s->lzma.rep2; + } else { + tmp = s->lzma.rep3; + s->lzma.rep3 = s->lzma.rep2; + } + + s->lzma.rep2 = s->lzma.rep1; + } + + s->lzma.rep1 = s->lzma.rep0; + s->lzma.rep0 = tmp; + } + + lzma_state_long_rep(&s->lzma.state); + lzma_len(s, &s->lzma.rep_len_dec, pos_state); +} + +/* LZMA decoder core */ +static bool XZ_FUNC lzma_main(struct xz_dec_lzma2 *s) +{ + uint32_t pos_state; + + /* + * If the dictionary was reached during the previous call, try to + * finish the possibly pending repeat in the dictionary. + */ + if (dict_has_space(&s->dict) && s->lzma.len > 0) + dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0); + + /* + * Decode more LZMA symbols. One iteration may consume up to + * LZMA_IN_REQUIRED - 1 bytes. + */ + while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) { + pos_state = s->dict.pos & s->lzma.pos_mask; + + if (!rc_bit(&s->rc, &s->lzma.is_match[ + s->lzma.state][pos_state])) { + lzma_literal(s); + } else { + if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state])) + lzma_rep_match(s, pos_state); + else + lzma_match(s, pos_state); + + if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0)) + return false; + } + } + + /* + * Having the range decoder always normalized when we are outside + * this function makes it easier to correctly handle end of the chunk. + */ + rc_normalize(&s->rc); + + return true; +} + +/* + * Reset the LZMA decoder and range decoder state. Dictionary is nore reset + * here, because LZMA state may be reset without resetting the dictionary. + */ +static void XZ_FUNC lzma_reset(struct xz_dec_lzma2 *s) +{ + uint16_t *probs; + size_t i; + + s->lzma.state = STATE_LIT_LIT; + s->lzma.rep0 = 0; + s->lzma.rep1 = 0; + s->lzma.rep2 = 0; + s->lzma.rep3 = 0; + + /* + * All probabilities are initialized to the same value. This hack + * makes the code smaller by avoiding a separate loop for each + * probability array. + * + * This could be optimized so that only that part of literal + * probabilities that are actually required. In the common case + * we would write 12 KiB less. + */ + probs = s->lzma.is_match[0]; + for (i = 0; i < PROBS_TOTAL; ++i) + probs[i] = RC_BIT_MODEL_TOTAL / 2; + + rc_reset(&s->rc); +} + +/* + * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks + * from the decoded lp and pb values. On success, the LZMA decoder state is + * reset and true is returned. + */ +static bool XZ_FUNC lzma_props(struct xz_dec_lzma2 *s, uint8_t props) +{ + if (props > (4 * 5 + 4) * 9 + 8) + return false; + + s->lzma.pos_mask = 0; + while (props >= 9 * 5) { + props -= 9 * 5; + ++s->lzma.pos_mask; + } + + s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1; + + s->lzma.literal_pos_mask = 0; + while (props >= 9) { + props -= 9; + ++s->lzma.literal_pos_mask; + } + + s->lzma.lc = props; + + if (s->lzma.lc + s->lzma.literal_pos_mask > 4) + return false; + + s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1; + + lzma_reset(s); + + return true; +} + +/********* + * LZMA2 * + *********/ + +/* + * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't + * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This + * wrapper function takes care of making the LZMA decoder's assumption safe. + * + * As long as there is plenty of input left to be decoded in the current LZMA + * chunk, we decode directly from the caller-supplied input buffer until + * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into + * s->temp.buf, which (hopefully) gets filled on the next call to this + * function. We decode a few bytes from the temporary buffer so that we can + * continue decoding from the caller-supplied input buffer again. + */ +static bool XZ_FUNC lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b) +{ + size_t in_avail; + uint32_t tmp; + + in_avail = b->in_size - b->in_pos; + if (s->temp.size > 0 || s->lzma2.compressed == 0) { + tmp = 2 * LZMA_IN_REQUIRED - s->temp.size; + if (tmp > s->lzma2.compressed - s->temp.size) + tmp = s->lzma2.compressed - s->temp.size; + if (tmp > in_avail) + tmp = in_avail; + + memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp); + + if (s->temp.size + tmp == s->lzma2.compressed) { + memzero(s->temp.buf + s->temp.size + tmp, + sizeof(s->temp.buf) + - s->temp.size - tmp); + s->rc.in_limit = s->temp.size + tmp; + } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) { + s->temp.size += tmp; + b->in_pos += tmp; + return true; + } else { + s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED; + } + + s->rc.in = s->temp.buf; + s->rc.in_pos = 0; + + if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp) + return false; + + s->lzma2.compressed -= s->rc.in_pos; + + if (s->rc.in_pos < s->temp.size) { + s->temp.size -= s->rc.in_pos; + memmove(s->temp.buf, s->temp.buf + s->rc.in_pos, + s->temp.size); + return true; + } + + b->in_pos += s->rc.in_pos - s->temp.size; + s->temp.size = 0; + } + + in_avail = b->in_size - b->in_pos; + if (in_avail >= LZMA_IN_REQUIRED) { + s->rc.in = b->in; + s->rc.in_pos = b->in_pos; + + if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED) + s->rc.in_limit = b->in_pos + s->lzma2.compressed; + else + s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED; + + if (!lzma_main(s)) + return false; + + in_avail = s->rc.in_pos - b->in_pos; + if (in_avail > s->lzma2.compressed) + return false; + + s->lzma2.compressed -= in_avail; + b->in_pos = s->rc.in_pos; + } + + in_avail = b->in_size - b->in_pos; + if (in_avail < LZMA_IN_REQUIRED) { + if (in_avail > s->lzma2.compressed) + in_avail = s->lzma2.compressed; + + memcpy(s->temp.buf, b->in + b->in_pos, in_avail); + s->temp.size = in_avail; + b->in_pos += in_avail; + } + + return true; +} + +/* + * Take care of the LZMA2 control layer, and forward the job of actual LZMA + * decoding or copying of uncompressed chunks to other functions. + */ +XZ_EXTERN NOINLINE enum xz_ret XZ_FUNC xz_dec_lzma2_run( + struct xz_dec_lzma2 *s, struct xz_buf *b) +{ + uint32_t tmp; + + while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) { + switch (s->lzma2.sequence) { + case SEQ_CONTROL: + /* + * LZMA2 control byte + * + * Exact values: + * 0x00 End marker + * 0x01 Dictionary reset followed by + * an uncompressed chunk + * 0x02 Uncompressed chunk (no dictionary reset) + * + * Highest three bits (s->control & 0xE0): + * 0xE0 Dictionary reset, new properties and state + * reset, followed by LZMA compressed chunk + * 0xC0 New properties and state reset, followed + * by LZMA compressed chunk (no dictionary + * reset) + * 0xA0 State reset using old properties, + * followed by LZMA compressed chunk (no + * dictionary reset) + * 0x80 LZMA chunk (no dictionary or state reset) + * + * For LZMA compressed chunks, the lowest five bits + * (s->control & 1F) are the highest bits of the + * uncompressed size (bits 16-20). + * + * A new LZMA2 stream must begin with a dictionary + * reset. The first LZMA chunk must set new + * properties and reset the LZMA state. + * + * Values that don't match anything described above + * are invalid and we return XZ_DATA_ERROR. + */ + tmp = b->in[b->in_pos++]; + + if (tmp >= 0xE0 || tmp == 0x01) { + s->lzma2.need_props = true; + s->lzma2.need_dict_reset = false; + dict_reset(&s->dict, b); + } else if (s->lzma2.need_dict_reset) { + return XZ_DATA_ERROR; + } + + if (tmp >= 0x80) { + s->lzma2.uncompressed = (tmp & 0x1F) << 16; + s->lzma2.sequence = SEQ_UNCOMPRESSED_1; + + if (tmp >= 0xC0) { + /* + * When there are new properties, + * state reset is done at + * SEQ_PROPERTIES. + */ + s->lzma2.need_props = false; + s->lzma2.next_sequence + = SEQ_PROPERTIES; + + } else if (s->lzma2.need_props) { + return XZ_DATA_ERROR; + + } else { + s->lzma2.next_sequence + = SEQ_LZMA_PREPARE; + if (tmp >= 0xA0) + lzma_reset(s); + } + } else { + if (tmp == 0x00) + return XZ_STREAM_END; + + if (tmp > 0x02) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_COMPRESSED_0; + s->lzma2.next_sequence = SEQ_COPY; + } + + break; + + case SEQ_UNCOMPRESSED_1: + s->lzma2.uncompressed + += (uint32_t)b->in[b->in_pos++] << 8; + s->lzma2.sequence = SEQ_UNCOMPRESSED_2; + break; + + case SEQ_UNCOMPRESSED_2: + s->lzma2.uncompressed + += (uint32_t)b->in[b->in_pos++] + 1; + s->lzma2.sequence = SEQ_COMPRESSED_0; + break; + + case SEQ_COMPRESSED_0: + s->lzma2.compressed + = (uint32_t)b->in[b->in_pos++] << 8; + s->lzma2.sequence = SEQ_COMPRESSED_1; + break; + + case SEQ_COMPRESSED_1: + s->lzma2.compressed + += (uint32_t)b->in[b->in_pos++] + 1; + s->lzma2.sequence = s->lzma2.next_sequence; + break; + + case SEQ_PROPERTIES: + if (!lzma_props(s, b->in[b->in_pos++])) + return XZ_DATA_ERROR; + + s->lzma2.sequence = SEQ_LZMA_PREPARE; + + case SEQ_LZMA_PREPARE: + if (s->lzma2.compressed < RC_INIT_BYTES) + return XZ_DATA_ERROR; + + if (!rc_read_init(&s->rc, b)) + return XZ_OK; + + s->lzma2.compressed -= RC_INIT_BYTES; + s->lzma2.sequence = SEQ_LZMA_RUN; + + case SEQ_LZMA_RUN: + /* + * Set dictionary limit to indicate how much we want + * to be encoded at maximum. Decode new data into the + * dictionary. Flush the new data from dictionary to + * b->out. Check if we finished decoding this chunk. + * In case the dictionary got full but we didn't fill + * the output buffer yet, we may run this loop + * multiple times without changing s->lzma2.sequence. + */ + dict_limit(&s->dict, min_t(size_t, + b->out_size - b->out_pos, + s->lzma2.uncompressed)); + if (!lzma2_lzma(s, b)) + return XZ_DATA_ERROR; + + s->lzma2.uncompressed -= dict_flush(&s->dict, b); + + if (s->lzma2.uncompressed == 0) { + if (s->lzma2.compressed > 0 || s->lzma.len > 0 + || !rc_is_finished(&s->rc)) + return XZ_DATA_ERROR; + + rc_reset(&s->rc); + s->lzma2.sequence = SEQ_CONTROL; + + } else if (b->out_pos == b->out_size + || (b->in_pos == b->in_size + && s->temp.size + < s->lzma2.compressed)) { + return XZ_OK; + } + + break; + + case SEQ_COPY: + dict_uncompressed(&s->dict, b, &s->lzma2.compressed); + if (s->lzma2.compressed > 0) + return XZ_OK; + + s->lzma2.sequence = SEQ_CONTROL; + break; + } + } + + return XZ_OK; +} + +XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( + enum xz_mode mode, uint32_t dict_max) +{ + struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->dict.mode = mode; + s->dict.size_max = dict_max; + + if (DEC_IS_PREALLOC(mode)) { + s->dict.buf = vmalloc(dict_max); + if (s->dict.buf == NULL) { + kfree(s); + return NULL; + } + } else if (DEC_IS_DYNALLOC(mode)) { + s->dict.buf = NULL; + s->dict.allocated = 0; + } + + return s; +} + +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( + struct xz_dec_lzma2 *s, uint8_t props) +{ + /* This limits dictionary size to 3 GiB to keep parsing simpler. */ + if (props > 39) + return XZ_OPTIONS_ERROR; + + s->dict.size = 2 + (props & 1); + s->dict.size <<= (props >> 1) + 11; + + if (DEC_IS_MULTI(s->dict.mode)) { + if (s->dict.size > s->dict.size_max) + return XZ_MEMLIMIT_ERROR; + + s->dict.end = s->dict.size; + + if (DEC_IS_DYNALLOC(s->dict.mode)) { + if (s->dict.allocated < s->dict.size) { + vfree(s->dict.buf); + s->dict.buf = vmalloc(s->dict.size); + if (s->dict.buf == NULL) { + s->dict.allocated = 0; + return XZ_MEM_ERROR; + } + } + } + } + + s->lzma.len = 0; + + s->lzma2.sequence = SEQ_CONTROL; + s->lzma2.need_dict_reset = true; + + s->temp.size = 0; + + return XZ_OK; +} + +XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s) +{ + if (DEC_IS_MULTI(s->dict.mode)) + vfree(s->dict.buf); + + kfree(s); +} diff --git a/archival/libarchive/unxz/xz_dec_stream.c b/archival/libarchive/unxz/xz_dec_stream.c new file mode 100644 index 000000000..bdcbf1ba3 --- /dev/null +++ b/archival/libarchive/unxz/xz_dec_stream.c @@ -0,0 +1,822 @@ +/* + * .xz Stream decoder + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#include "xz_private.h" +#include "xz_stream.h" + +/* Hash used to validate the Index field */ +struct xz_dec_hash { + vli_type unpadded; + vli_type uncompressed; + uint32_t crc32; +}; + +struct xz_dec { + /* Position in dec_main() */ + enum { + SEQ_STREAM_HEADER, + SEQ_BLOCK_START, + SEQ_BLOCK_HEADER, + SEQ_BLOCK_UNCOMPRESS, + SEQ_BLOCK_PADDING, + SEQ_BLOCK_CHECK, + SEQ_INDEX, + SEQ_INDEX_PADDING, + SEQ_INDEX_CRC32, + SEQ_STREAM_FOOTER + } sequence; + + /* Position in variable-length integers and Check fields */ + uint32_t pos; + + /* Variable-length integer decoded by dec_vli() */ + vli_type vli; + + /* Saved in_pos and out_pos */ + size_t in_start; + size_t out_start; + + /* CRC32 value in Block or Index */ + uint32_t crc32; + + /* Type of the integrity check calculated from uncompressed data */ + enum xz_check check_type; + + /* Operation mode */ + enum xz_mode mode; + + /* + * True if the next call to xz_dec_run() is allowed to return + * XZ_BUF_ERROR. + */ + bool allow_buf_error; + + /* Information stored in Block Header */ + struct { + /* + * Value stored in the Compressed Size field, or + * VLI_UNKNOWN if Compressed Size is not present. + */ + vli_type compressed; + + /* + * Value stored in the Uncompressed Size field, or + * VLI_UNKNOWN if Uncompressed Size is not present. + */ + vli_type uncompressed; + + /* Size of the Block Header field */ + uint32_t size; + } block_header; + + /* Information collected when decoding Blocks */ + struct { + /* Observed compressed size of the current Block */ + vli_type compressed; + + /* Observed uncompressed size of the current Block */ + vli_type uncompressed; + + /* Number of Blocks decoded so far */ + vli_type count; + + /* + * Hash calculated from the Block sizes. This is used to + * validate the Index field. + */ + struct xz_dec_hash hash; + } block; + + /* Variables needed when verifying the Index field */ + struct { + /* Position in dec_index() */ + enum { + SEQ_INDEX_COUNT, + SEQ_INDEX_UNPADDED, + SEQ_INDEX_UNCOMPRESSED + } sequence; + + /* Size of the Index in bytes */ + vli_type size; + + /* Number of Records (matches block.count in valid files) */ + vli_type count; + + /* + * Hash calculated from the Records (matches block.hash in + * valid files). + */ + struct xz_dec_hash hash; + } index; + + /* + * Temporary buffer needed to hold Stream Header, Block Header, + * and Stream Footer. The Block Header is the biggest (1 KiB) + * so we reserve space according to that. buf[] has to be aligned + * to a multiple of four bytes; the size_t variables before it + * should guarantee this. + */ + struct { + size_t pos; + size_t size; + uint8_t buf[1024]; + } temp; + + struct xz_dec_lzma2 *lzma2; + +#ifdef XZ_DEC_BCJ + struct xz_dec_bcj *bcj; + bool bcj_active; +#endif +}; + +#ifdef XZ_DEC_ANY_CHECK +/* Sizes of the Check field with different Check IDs */ +static const uint8_t check_sizes[16] = { + 0, + 4, 4, 4, + 8, 8, 8, + 16, 16, 16, + 32, 32, 32, + 64, 64, 64 +}; +#endif + +/* + * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller + * must have set s->temp.pos to indicate how much data we are supposed + * to copy into s->temp.buf. Return true once s->temp.pos has reached + * s->temp.size. + */ +static bool XZ_FUNC fill_temp(struct xz_dec *s, struct xz_buf *b) +{ + size_t copy_size = min_t(size_t, + b->in_size - b->in_pos, s->temp.size - s->temp.pos); + + memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); + b->in_pos += copy_size; + s->temp.pos += copy_size; + + if (s->temp.pos == s->temp.size) { + s->temp.pos = 0; + return true; + } + + return false; +} + +/* Decode a variable-length integer (little-endian base-128 encoding) */ +static enum xz_ret XZ_FUNC dec_vli(struct xz_dec *s, + const uint8_t *in, size_t *in_pos, size_t in_size) +{ + uint8_t byte; + + if (s->pos == 0) + s->vli = 0; + + while (*in_pos < in_size) { + byte = in[*in_pos]; + ++*in_pos; + + s->vli |= (vli_type)(byte & 0x7F) << s->pos; + + if ((byte & 0x80) == 0) { + /* Don't allow non-minimal encodings. */ + if (byte == 0 && s->pos != 0) + return XZ_DATA_ERROR; + + s->pos = 0; + return XZ_STREAM_END; + } + + s->pos += 7; + if (s->pos == 7 * VLI_BYTES_MAX) + return XZ_DATA_ERROR; + } + + return XZ_OK; +} + +/* + * Decode the Compressed Data field from a Block. Update and validate + * the observed compressed and uncompressed sizes of the Block so that + * they don't exceed the values possibly stored in the Block Header + * (validation assumes that no integer overflow occurs, since vli_type + * is normally uint64_t). Update the CRC32 if presence of the CRC32 + * field was indicated in Stream Header. + * + * Once the decoding is finished, validate that the observed sizes match + * the sizes possibly stored in the Block Header. Update the hash and + * Block count, which are later used to validate the Index field. + */ +static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + s->in_start = b->in_pos; + s->out_start = b->out_pos; + +#ifdef XZ_DEC_BCJ + if (s->bcj_active) + ret = xz_dec_bcj_run(s->bcj, s->lzma2, b); + else +#endif + ret = xz_dec_lzma2_run(s->lzma2, b); + + s->block.compressed += b->in_pos - s->in_start; + s->block.uncompressed += b->out_pos - s->out_start; + + /* + * There is no need to separately check for VLI_UNKNOWN, since + * the observed sizes are always smaller than VLI_UNKNOWN. + */ + if (s->block.compressed > s->block_header.compressed + || s->block.uncompressed + > s->block_header.uncompressed) + return XZ_DATA_ERROR; + + if (s->check_type == XZ_CHECK_CRC32) + s->crc32 = xz_crc32(b->out + s->out_start, + b->out_pos - s->out_start, s->crc32); + + if (ret == XZ_STREAM_END) { + if (s->block_header.compressed != VLI_UNKNOWN + && s->block_header.compressed + != s->block.compressed) + return XZ_DATA_ERROR; + + if (s->block_header.uncompressed != VLI_UNKNOWN + && s->block_header.uncompressed + != s->block.uncompressed) + return XZ_DATA_ERROR; + + s->block.hash.unpadded += s->block_header.size + + s->block.compressed; + +#ifdef XZ_DEC_ANY_CHECK + s->block.hash.unpadded += check_sizes[s->check_type]; +#else + if (s->check_type == XZ_CHECK_CRC32) + s->block.hash.unpadded += 4; +#endif + + s->block.hash.uncompressed += s->block.uncompressed; + s->block.hash.crc32 = xz_crc32( + (const uint8_t *)&s->block.hash, + sizeof(s->block.hash), s->block.hash.crc32); + + ++s->block.count; + } + + return ret; +} + +/* Update the Index size and the CRC32 value. */ +static void XZ_FUNC index_update(struct xz_dec *s, const struct xz_buf *b) +{ + size_t in_used = b->in_pos - s->in_start; + s->index.size += in_used; + s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32); +} + +/* + * Decode the Number of Records, Unpadded Size, and Uncompressed Size + * fields from the Index field. That is, Index Padding and CRC32 are not + * decoded by this function. + * + * This can return XZ_OK (more input needed), XZ_STREAM_END (everything + * successfully decoded), or XZ_DATA_ERROR (input is corrupt). + */ +static enum xz_ret XZ_FUNC dec_index(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + do { + ret = dec_vli(s, b->in, &b->in_pos, b->in_size); + if (ret != XZ_STREAM_END) { + index_update(s, b); + return ret; + } + + switch (s->index.sequence) { + case SEQ_INDEX_COUNT: + s->index.count = s->vli; + + /* + * Validate that the Number of Records field + * indicates the same number of Records as + * there were Blocks in the Stream. + */ + if (s->index.count != s->block.count) + return XZ_DATA_ERROR; + + s->index.sequence = SEQ_INDEX_UNPADDED; + break; + + case SEQ_INDEX_UNPADDED: + s->index.hash.unpadded += s->vli; + s->index.sequence = SEQ_INDEX_UNCOMPRESSED; + break; + + case SEQ_INDEX_UNCOMPRESSED: + s->index.hash.uncompressed += s->vli; + s->index.hash.crc32 = xz_crc32( + (const uint8_t *)&s->index.hash, + sizeof(s->index.hash), + s->index.hash.crc32); + --s->index.count; + s->index.sequence = SEQ_INDEX_UNPADDED; + break; + } + } while (s->index.count > 0); + + return XZ_STREAM_END; +} + +/* + * Validate that the next four input bytes match the value of s->crc32. + * s->pos must be zero when starting to validate the first byte. + */ +static enum xz_ret XZ_FUNC crc32_validate(struct xz_dec *s, struct xz_buf *b) +{ + do { + if (b->in_pos == b->in_size) + return XZ_OK; + + if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++]) + return XZ_DATA_ERROR; + + s->pos += 8; + + } while (s->pos < 32); + + s->crc32 = 0; + s->pos = 0; + + return XZ_STREAM_END; +} + +#ifdef XZ_DEC_ANY_CHECK +/* + * Skip over the Check field when the Check ID is not supported. + * Returns true once the whole Check field has been skipped over. + */ +static bool XZ_FUNC check_skip(struct xz_dec *s, struct xz_buf *b) +{ + while (s->pos < check_sizes[s->check_type]) { + if (b->in_pos == b->in_size) + return false; + + ++b->in_pos; + ++s->pos; + } + + s->pos = 0; + + return true; +} +#endif + +/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ +static enum xz_ret XZ_FUNC dec_stream_header(struct xz_dec *s) +{ + if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) + return XZ_FORMAT_ERROR; + + if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) + != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) + return XZ_DATA_ERROR; + + if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) + return XZ_OPTIONS_ERROR; + + /* + * Of integrity checks, we support only none (Check ID = 0) and + * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined, + * we will accept other check types too, but then the check won't + * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given. + */ + s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; + +#ifdef XZ_DEC_ANY_CHECK + if (s->check_type > XZ_CHECK_MAX) + return XZ_OPTIONS_ERROR; + + if (s->check_type > XZ_CHECK_CRC32) + return XZ_UNSUPPORTED_CHECK; +#else + if (s->check_type > XZ_CHECK_CRC32) + return XZ_OPTIONS_ERROR; +#endif + + return XZ_OK; +} + +/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ +static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s) +{ + if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) + return XZ_DATA_ERROR; + + if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) + return XZ_DATA_ERROR; + + /* + * Validate Backward Size. Note that we never added the size of the + * Index CRC32 field to s->index.size, thus we use s->index.size / 4 + * instead of s->index.size / 4 - 1. + */ + if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) + return XZ_DATA_ERROR; + + if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) + return XZ_DATA_ERROR; + + /* + * Use XZ_STREAM_END instead of XZ_OK to be more convenient + * for the caller. + */ + return XZ_STREAM_END; +} + +/* Decode the Block Header and initialize the filter chain. */ +static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s) +{ + enum xz_ret ret; + + /* + * Validate the CRC32. We know that the temp buffer is at least + * eight bytes so this is safe. + */ + s->temp.size -= 4; + if (xz_crc32(s->temp.buf, s->temp.size, 0) + != get_le32(s->temp.buf + s->temp.size)) + return XZ_DATA_ERROR; + + s->temp.pos = 2; + + /* + * Catch unsupported Block Flags. We support only one or two filters + * in the chain, so we catch that with the same test. + */ +#ifdef XZ_DEC_BCJ + if (s->temp.buf[1] & 0x3E) +#else + if (s->temp.buf[1] & 0x3F) +#endif + return XZ_OPTIONS_ERROR; + + /* Compressed Size */ + if (s->temp.buf[1] & 0x40) { + if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) + != XZ_STREAM_END) + return XZ_DATA_ERROR; + + s->block_header.compressed = s->vli; + } else { + s->block_header.compressed = VLI_UNKNOWN; + } + + /* Uncompressed Size */ + if (s->temp.buf[1] & 0x80) { + if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) + != XZ_STREAM_END) + return XZ_DATA_ERROR; + + s->block_header.uncompressed = s->vli; + } else { + s->block_header.uncompressed = VLI_UNKNOWN; + } + +#ifdef XZ_DEC_BCJ + /* If there are two filters, the first one must be a BCJ filter. */ + s->bcj_active = s->temp.buf[1] & 0x01; + if (s->bcj_active) { + if (s->temp.size - s->temp.pos < 2) + return XZ_OPTIONS_ERROR; + + ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]); + if (ret != XZ_OK) + return ret; + + /* + * We don't support custom start offset, + * so Size of Properties must be zero. + */ + if (s->temp.buf[s->temp.pos++] != 0x00) + return XZ_OPTIONS_ERROR; + } +#endif + + /* Valid Filter Flags always take at least two bytes. */ + if (s->temp.size - s->temp.pos < 2) + return XZ_DATA_ERROR; + + /* Filter ID = LZMA2 */ + if (s->temp.buf[s->temp.pos++] != 0x21) + return XZ_OPTIONS_ERROR; + + /* Size of Properties = 1-byte Filter Properties */ + if (s->temp.buf[s->temp.pos++] != 0x01) + return XZ_OPTIONS_ERROR; + + /* Filter Properties contains LZMA2 dictionary size. */ + if (s->temp.size - s->temp.pos < 1) + return XZ_DATA_ERROR; + + ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]); + if (ret != XZ_OK) + return ret; + + /* The rest must be Header Padding. */ + while (s->temp.pos < s->temp.size) + if (s->temp.buf[s->temp.pos++] != 0x00) + return XZ_OPTIONS_ERROR; + + s->temp.pos = 0; + s->block.compressed = 0; + s->block.uncompressed = 0; + + return XZ_OK; +} + +static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b) +{ + enum xz_ret ret; + + /* + * Store the start position for the case when we are in the middle + * of the Index field. + */ + s->in_start = b->in_pos; + + while (true) { + switch (s->sequence) { + case SEQ_STREAM_HEADER: + /* + * Stream Header is copied to s->temp, and then + * decoded from there. This way if the caller + * gives us only little input at a time, we can + * still keep the Stream Header decoding code + * simple. Similar approach is used in many places + * in this file. + */ + if (!fill_temp(s, b)) + return XZ_OK; + + /* + * If dec_stream_header() returns + * XZ_UNSUPPORTED_CHECK, it is still possible + * to continue decoding if working in multi-call + * mode. Thus, update s->sequence before calling + * dec_stream_header(). + */ + s->sequence = SEQ_BLOCK_START; + + ret = dec_stream_header(s); + if (ret != XZ_OK) + return ret; + + case SEQ_BLOCK_START: + /* We need one byte of input to continue. */ + if (b->in_pos == b->in_size) + return XZ_OK; + + /* See if this is the beginning of the Index field. */ + if (b->in[b->in_pos] == 0) { + s->in_start = b->in_pos++; + s->sequence = SEQ_INDEX; + break; + } + + /* + * Calculate the size of the Block Header and + * prepare to decode it. + */ + s->block_header.size + = ((uint32_t)b->in[b->in_pos] + 1) * 4; + + s->temp.size = s->block_header.size; + s->temp.pos = 0; + s->sequence = SEQ_BLOCK_HEADER; + + case SEQ_BLOCK_HEADER: + if (!fill_temp(s, b)) + return XZ_OK; + + ret = dec_block_header(s); + if (ret != XZ_OK) + return ret; + + s->sequence = SEQ_BLOCK_UNCOMPRESS; + + case SEQ_BLOCK_UNCOMPRESS: + ret = dec_block(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->sequence = SEQ_BLOCK_PADDING; + + case SEQ_BLOCK_PADDING: + /* + * Size of Compressed Data + Block Padding + * must be a multiple of four. We don't need + * s->block.compressed for anything else + * anymore, so we use it here to test the size + * of the Block Padding field. + */ + while (s->block.compressed & 3) { + if (b->in_pos == b->in_size) + return XZ_OK; + + if (b->in[b->in_pos++] != 0) + return XZ_DATA_ERROR; + + ++s->block.compressed; + } + + s->sequence = SEQ_BLOCK_CHECK; + + case SEQ_BLOCK_CHECK: + if (s->check_type == XZ_CHECK_CRC32) { + ret = crc32_validate(s, b); + if (ret != XZ_STREAM_END) + return ret; + } +#ifdef XZ_DEC_ANY_CHECK + else if (!check_skip(s, b)) { + return XZ_OK; + } +#endif + + s->sequence = SEQ_BLOCK_START; + break; + + case SEQ_INDEX: + ret = dec_index(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->sequence = SEQ_INDEX_PADDING; + + case SEQ_INDEX_PADDING: + while ((s->index.size + (b->in_pos - s->in_start)) + & 3) { + if (b->in_pos == b->in_size) { + index_update(s, b); + return XZ_OK; + } + + if (b->in[b->in_pos++] != 0) + return XZ_DATA_ERROR; + } + + /* Finish the CRC32 value and Index size. */ + index_update(s, b); + + /* Compare the hashes to validate the Index field. */ + if (!memeq(&s->block.hash, &s->index.hash, + sizeof(s->block.hash))) + return XZ_DATA_ERROR; + + s->sequence = SEQ_INDEX_CRC32; + + case SEQ_INDEX_CRC32: + ret = crc32_validate(s, b); + if (ret != XZ_STREAM_END) + return ret; + + s->temp.size = STREAM_HEADER_SIZE; + s->sequence = SEQ_STREAM_FOOTER; + + case SEQ_STREAM_FOOTER: + if (!fill_temp(s, b)) + return XZ_OK; + + return dec_stream_footer(s); + } + } + + /* Never reached */ +} + +/* + * xz_dec_run() is a wrapper for dec_main() to handle some special cases in + * multi-call and single-call decoding. + * + * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we + * are not going to make any progress anymore. This is to prevent the caller + * from calling us infinitely when the input file is truncated or otherwise + * corrupt. Since zlib-style API allows that the caller fills the input buffer + * only when the decoder doesn't produce any new output, we have to be careful + * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only + * after the second consecutive call to xz_dec_run() that makes no progress. + * + * In single-call mode, if we couldn't decode everything and no error + * occurred, either the input is truncated or the output buffer is too small. + * Since we know that the last input byte never produces any output, we know + * that if all the input was consumed and decoding wasn't finished, the file + * must be corrupt. Otherwise the output buffer has to be too small or the + * file is corrupt in a way that decoding it produces too big output. + * + * If single-call decoding fails, we reset b->in_pos and b->out_pos back to + * their original values. This is because with some filter chains there won't + * be any valid uncompressed data in the output buffer unless the decoding + * actually succeeds (that's the price to pay of using the output buffer as + * the workspace). + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b) +{ + size_t in_start; + size_t out_start; + enum xz_ret ret; + + if (DEC_IS_SINGLE(s->mode)) + xz_dec_reset(s); + + in_start = b->in_pos; + out_start = b->out_pos; + ret = dec_main(s, b); + + if (DEC_IS_SINGLE(s->mode)) { + if (ret == XZ_OK) + ret = b->in_pos == b->in_size + ? XZ_DATA_ERROR : XZ_BUF_ERROR; + + if (ret != XZ_STREAM_END) { + b->in_pos = in_start; + b->out_pos = out_start; + } + + } else if (ret == XZ_OK && in_start == b->in_pos + && out_start == b->out_pos) { + if (s->allow_buf_error) + ret = XZ_BUF_ERROR; + + s->allow_buf_error = true; + } else { + s->allow_buf_error = false; + } + + return ret; +} + +XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( + enum xz_mode mode, uint32_t dict_max) +{ + struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); + if (s == NULL) + return NULL; + + s->mode = mode; + +#ifdef XZ_DEC_BCJ + s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); + if (s->bcj == NULL) + goto error_bcj; +#endif + + s->lzma2 = xz_dec_lzma2_create(mode, dict_max); + if (s->lzma2 == NULL) + goto error_lzma2; + + xz_dec_reset(s); + return s; + +error_lzma2: +#ifdef XZ_DEC_BCJ + xz_dec_bcj_end(s->bcj); +error_bcj: +#endif + kfree(s); + return NULL; +} + +XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s) +{ + s->sequence = SEQ_STREAM_HEADER; + s->allow_buf_error = false; + s->pos = 0; + s->crc32 = 0; + memzero(&s->block, sizeof(s->block)); + memzero(&s->index, sizeof(s->index)); + s->temp.pos = 0; + s->temp.size = STREAM_HEADER_SIZE; +} + +XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s) +{ + if (s != NULL) { + xz_dec_lzma2_end(s->lzma2); +#ifdef XZ_DEC_BCJ + xz_dec_bcj_end(s->bcj); +#endif + kfree(s); + } +} diff --git a/archival/libarchive/unxz/xz_lzma2.h b/archival/libarchive/unxz/xz_lzma2.h new file mode 100644 index 000000000..47f21afbc --- /dev/null +++ b/archival/libarchive/unxz/xz_lzma2.h @@ -0,0 +1,204 @@ +/* + * LZMA2 definitions + * + * Authors: Lasse Collin + * Igor Pavlov + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_LZMA2_H +#define XZ_LZMA2_H + +/* Range coder constants */ +#define RC_SHIFT_BITS 8 +#define RC_TOP_BITS 24 +#define RC_TOP_VALUE (1 << RC_TOP_BITS) +#define RC_BIT_MODEL_TOTAL_BITS 11 +#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS) +#define RC_MOVE_BITS 5 + +/* + * Maximum number of position states. A position state is the lowest pb + * number of bits of the current uncompressed offset. In some places there + * are different sets of probabilities for different position states. + */ +#define POS_STATES_MAX (1 << 4) + +/* + * This enum is used to track which LZMA symbols have occurred most recently + * and in which order. This information is used to predict the next symbol. + * + * Symbols: + * - Literal: One 8-bit byte + * - Match: Repeat a chunk of data at some distance + * - Long repeat: Multi-byte match at a recently seen distance + * - Short repeat: One-byte repeat at a recently seen distance + * + * The symbol names are in from STATE_oldest_older_previous. REP means + * either short or long repeated match, and NONLIT means any non-literal. + */ +enum lzma_state { + STATE_LIT_LIT, + STATE_MATCH_LIT_LIT, + STATE_REP_LIT_LIT, + STATE_SHORTREP_LIT_LIT, + STATE_MATCH_LIT, + STATE_REP_LIT, + STATE_SHORTREP_LIT, + STATE_LIT_MATCH, + STATE_LIT_LONGREP, + STATE_LIT_SHORTREP, + STATE_NONLIT_MATCH, + STATE_NONLIT_REP +}; + +/* Total number of states */ +#define STATES 12 + +/* The lowest 7 states indicate that the previous state was a literal. */ +#define LIT_STATES 7 + +/* Indicate that the latest symbol was a literal. */ +static inline void XZ_FUNC lzma_state_literal(enum lzma_state *state) +{ + if (*state <= STATE_SHORTREP_LIT_LIT) + *state = STATE_LIT_LIT; + else if (*state <= STATE_LIT_SHORTREP) + *state -= 3; + else + *state -= 6; +} + +/* Indicate that the latest symbol was a match. */ +static inline void XZ_FUNC lzma_state_match(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH; +} + +/* Indicate that the latest state was a long repeated match. */ +static inline void XZ_FUNC lzma_state_long_rep(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP; +} + +/* Indicate that the latest symbol was a short match. */ +static inline void XZ_FUNC lzma_state_short_rep(enum lzma_state *state) +{ + *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP; +} + +/* Test if the previous symbol was a literal. */ +static inline bool XZ_FUNC lzma_state_is_literal(enum lzma_state state) +{ + return state < LIT_STATES; +} + +/* Each literal coder is divided in three sections: + * - 0x001-0x0FF: Without match byte + * - 0x101-0x1FF: With match byte; match bit is 0 + * - 0x201-0x2FF: With match byte; match bit is 1 + * + * Match byte is used when the previous LZMA symbol was something else than + * a literal (that is, it was some kind of match). + */ +#define LITERAL_CODER_SIZE 0x300 + +/* Maximum number of literal coders */ +#define LITERAL_CODERS_MAX (1 << 4) + +/* Minimum length of a match is two bytes. */ +#define MATCH_LEN_MIN 2 + +/* Match length is encoded with 4, 5, or 10 bits. + * + * Length Bits + * 2-9 4 = Choice=0 + 3 bits + * 10-17 5 = Choice=1 + Choice2=0 + 3 bits + * 18-273 10 = Choice=1 + Choice2=1 + 8 bits + */ +#define LEN_LOW_BITS 3 +#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) +#define LEN_MID_BITS 3 +#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) +#define LEN_HIGH_BITS 8 +#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) +#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) + +/* + * Maximum length of a match is 273 which is a result of the encoding + * described above. + */ +#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) + +/* + * Different sets of probabilities are used for match distances that have + * very short match length: Lengths of 2, 3, and 4 bytes have a separate + * set of probabilities for each length. The matches with longer length + * use a shared set of probabilities. + */ +#define DIST_STATES 4 + +/* + * Get the index of the appropriate probability array for decoding + * the distance slot. + */ +static inline uint32_t XZ_FUNC lzma_get_dist_state(uint32_t len) +{ + return len < DIST_STATES + MATCH_LEN_MIN + ? len - MATCH_LEN_MIN : DIST_STATES - 1; +} + +/* + * The highest two bits of a 32-bit match distance are encoded using six bits. + * This six-bit value is called a distance slot. This way encoding a 32-bit + * value takes 6-36 bits, larger values taking more bits. + */ +#define DIST_SLOT_BITS 6 +#define DIST_SLOTS (1 << DIST_SLOT_BITS) + +/* Match distances up to 127 are fully encoded using probabilities. Since + * the highest two bits (distance slot) are always encoded using six bits, + * the distances 0-3 don't need any additional bits to encode, since the + * distance slot itself is the same as the actual distance. DIST_MODEL_START + * indicates the first distance slot where at least one additional bit is + * needed. + */ +#define DIST_MODEL_START 4 + +/* + * Match distances greater than 127 are encoded in three pieces: + * - distance slot: the highest two bits + * - direct bits: 2-26 bits below the highest two bits + * - alignment bits: four lowest bits + * + * Direct bits don't use any probabilities. + * + * The distance slot value of 14 is for distances 128-191. + */ +#define DIST_MODEL_END 14 + +/* Distance slots that indicate a distance <= 127. */ +#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2) +#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) + +/* + * For match distances greater than 127, only the highest two bits and the + * lowest four bits (alignment) is encoded using probabilities. + */ +#define ALIGN_BITS 4 +#define ALIGN_SIZE (1 << ALIGN_BITS) +#define ALIGN_MASK (ALIGN_SIZE - 1) + +/* Total number of all probability variables */ +#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE) + +/* + * LZMA remembers the four most recent match distances. Reusing these + * distances tends to take less space than re-encoding the actual + * distance value. + */ +#define REPS 4 + +#endif diff --git a/archival/libarchive/unxz/xz_private.h b/archival/libarchive/unxz/xz_private.h new file mode 100644 index 000000000..145649a83 --- /dev/null +++ b/archival/libarchive/unxz/xz_private.h @@ -0,0 +1,159 @@ +/* + * Private includes and definitions + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_PRIVATE_H +#define XZ_PRIVATE_H + +#ifdef __KERNEL__ + /* XZ_PREBOOT may be defined only via decompress_unxz.c. */ +# ifndef XZ_PREBOOT +# include +# include +# include +# define memeq(a, b, size) (memcmp(a, b, size) == 0) +# define memzero(buf, size) memset(buf, 0, size) +# endif +# include +# include +# define get_le32(p) le32_to_cpup((const uint32_t *)(p)) + /* XZ_IGNORE_KCONFIG may be defined only via decompress_unxz.c. */ +# ifndef XZ_IGNORE_KCONFIG +# ifdef CONFIG_XZ_DEC_X86 +# define XZ_DEC_X86 +# endif +# ifdef CONFIG_XZ_DEC_POWERPC +# define XZ_DEC_POWERPC +# endif +# ifdef CONFIG_XZ_DEC_IA64 +# define XZ_DEC_IA64 +# endif +# ifdef CONFIG_XZ_DEC_ARM +# define XZ_DEC_ARM +# endif +# ifdef CONFIG_XZ_DEC_ARMTHUMB +# define XZ_DEC_ARMTHUMB +# endif +# ifdef CONFIG_XZ_DEC_SPARC +# define XZ_DEC_SPARC +# endif +# endif +# include +#else + /* + * For userspace builds, use a separate header to define the required + * macros and functions. This makes it easier to adapt the code into + * different environments and avoids clutter in the Linux kernel tree. + */ +# include "xz_config.h" +#endif + +/* If no specific decoding mode is requested, enable support for all modes. */ +#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \ + && !defined(XZ_DEC_DYNALLOC) +# define XZ_DEC_SINGLE +# define XZ_DEC_PREALLOC +# define XZ_DEC_DYNALLOC +#endif + +/* + * The DEC_IS_foo(mode) macros are used in "if" statements. If only some + * of the supported modes are enabled, these macros will evaluate to true or + * false at compile time and thus allow the compiler to omit unneeded code. + */ +#ifdef XZ_DEC_SINGLE +# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE) +#else +# define DEC_IS_SINGLE(mode) (false) +#endif + +#ifdef XZ_DEC_PREALLOC +# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC) +#else +# define DEC_IS_PREALLOC(mode) (false) +#endif + +#ifdef XZ_DEC_DYNALLOC +# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC) +#else +# define DEC_IS_DYNALLOC(mode) (false) +#endif + +#if !defined(XZ_DEC_SINGLE) +# define DEC_IS_MULTI(mode) (true) +#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC) +# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE) +#else +# define DEC_IS_MULTI(mode) (false) +#endif + +/* + * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ. + * XZ_DEC_BCJ is used to enable generic support for BCJ decoders. + */ +#ifndef XZ_DEC_BCJ +# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \ + || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \ + || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \ + || defined(XZ_DEC_SPARC) +# define XZ_DEC_BCJ +# endif +#endif + +/* + * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used + * before calling xz_dec_lzma2_run(). + */ +XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( + enum xz_mode mode, uint32_t dict_max); + +/* + * Decode the LZMA2 properties (one byte) and reset the decoder. Return + * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not + * big enough, and XZ_OPTIONS_ERROR if props indicates something that this + * decoder doesn't support. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( + struct xz_dec_lzma2 *s, uint8_t props); + +/* Decode raw LZMA2 stream from b->in to b->out. */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run( + struct xz_dec_lzma2 *s, struct xz_buf *b); + +/* Free the memory allocated for the LZMA2 decoder. */ +XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s); + +#ifdef XZ_DEC_BCJ +/* + * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before + * calling xz_dec_bcj_run(). + */ +XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call); + +/* + * Decode the Filter ID of a BCJ filter. This implementation doesn't + * support custom start offsets, so no decoding of Filter Properties + * is needed. Returns XZ_OK if the given Filter ID is supported. + * Otherwise XZ_OPTIONS_ERROR is returned. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset( + struct xz_dec_bcj *s, uint8_t id); + +/* + * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is + * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run() + * must be called directly. + */ +XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s, + struct xz_dec_lzma2 *lzma2, struct xz_buf *b); + +/* Free the memory allocated for the BCJ filters. */ +#define xz_dec_bcj_end(s) kfree(s) +#endif + +#endif diff --git a/archival/libarchive/unxz/xz_stream.h b/archival/libarchive/unxz/xz_stream.h new file mode 100644 index 000000000..36f2a7cbf --- /dev/null +++ b/archival/libarchive/unxz/xz_stream.h @@ -0,0 +1,57 @@ +/* + * Definitions for handling the .xz file format + * + * Author: Lasse Collin + * + * This file has been put into the public domain. + * You can do whatever you want with this file. + */ + +#ifndef XZ_STREAM_H +#define XZ_STREAM_H + +#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32 +# include +# undef crc32 +# define xz_crc32(buf, size, crc) \ + (~crc32_le(~(uint32_t)(crc), buf, size)) +#endif + +/* + * See the .xz file format specification at + * http://tukaani.org/xz/xz-file-format.txt + * to understand the container format. + */ + +#define STREAM_HEADER_SIZE 12 + +#define HEADER_MAGIC "\3757zXZ\0" +#define HEADER_MAGIC_SIZE 6 + +#define FOOTER_MAGIC "YZ" +#define FOOTER_MAGIC_SIZE 2 + +/* + * Variable-length integer can hold a 63-bit unsigned integer, or a special + * value to indicate that the value is unknown. + */ +typedef uint64_t vli_type; + +#define VLI_MAX ((vli_type)-1 / 2) +#define VLI_UNKNOWN ((vli_type)-1) + +/* Maximum encoded size of a VLI */ +#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7) + +/* Integrity Check types */ +enum xz_check { + XZ_CHECK_NONE = 0, + XZ_CHECK_CRC32 = 1, + XZ_CHECK_CRC64 = 4, + XZ_CHECK_SHA256 = 10 +}; + +/* Maximum possible Check ID */ +#define XZ_CHECK_MAX 15 + +#endif diff --git a/archival/libunarchive/Kbuild.src b/archival/libunarchive/Kbuild.src deleted file mode 100644 index b0bc4e5aa..000000000 --- a/archival/libunarchive/Kbuild.src +++ /dev/null @@ -1,64 +0,0 @@ -# Makefile for busybox -# -# Copyright (C) 1999-2004 by Erik Andersen -# -# Licensed under GPLv2 or later, see file LICENSE in this source tree. - -lib-y:= - -COMMON_FILES:= \ -\ - data_skip.o \ - data_extract_all.o \ - data_extract_to_stdout.o \ -\ - filter_accept_all.o \ - filter_accept_list.o \ - filter_accept_reject_list.o \ -\ - header_skip.o \ - header_list.o \ - header_verbose_list.o \ -\ - seek_by_read.o \ - seek_by_jump.o \ -\ - data_align.o \ - find_list_entry.o \ - init_handle.o - -DPKG_FILES:= \ - get_header_ar.o \ - unpack_ar_archive.o \ - get_header_tar.o \ - filter_accept_list_reassign.o - -INSERT - -lib-$(CONFIG_AR) += get_header_ar.o unpack_ar_archive.o -lib-$(CONFIG_BUNZIP2) += decompress_bunzip2.o -lib-$(CONFIG_UNLZMA) += decompress_unlzma.o -lib-$(CONFIG_UNXZ) += decompress_unxz.o -lib-$(CONFIG_CPIO) += get_header_cpio.o -lib-$(CONFIG_DPKG) += $(DPKG_FILES) -lib-$(CONFIG_DPKG_DEB) += $(DPKG_FILES) -lib-$(CONFIG_GUNZIP) += decompress_unzip.o -lib-$(CONFIG_RPM2CPIO) += decompress_unzip.o get_header_cpio.o -lib-$(CONFIG_RPM) += open_transformer.o decompress_unzip.o get_header_cpio.o -lib-$(CONFIG_TAR) += get_header_tar.o -lib-$(CONFIG_UNCOMPRESS) += decompress_uncompress.o -lib-$(CONFIG_UNZIP) += decompress_unzip.o -lib-$(CONFIG_LZOP) += lzo1x_1.o lzo1x_1o.o lzo1x_d.o -lib-$(CONFIG_LZOP_COMPR_HIGH) += lzo1x_9x.o -lib-$(CONFIG_FEATURE_SEAMLESS_Z) += open_transformer.o decompress_uncompress.o -lib-$(CONFIG_FEATURE_SEAMLESS_GZ) += open_transformer.o decompress_unzip.o get_header_tar_gz.o -lib-$(CONFIG_FEATURE_SEAMLESS_BZ2) += open_transformer.o decompress_bunzip2.o get_header_tar_bz2.o -lib-$(CONFIG_FEATURE_SEAMLESS_LZMA) += open_transformer.o decompress_unlzma.o get_header_tar_lzma.o -lib-$(CONFIG_FEATURE_SEAMLESS_XZ) += open_transformer.o decompress_unxz.o -lib-$(CONFIG_FEATURE_COMPRESS_USAGE) += decompress_bunzip2.o -lib-$(CONFIG_FEATURE_COMPRESS_BBCONFIG) += decompress_bunzip2.o -lib-$(CONFIG_FEATURE_TAR_TO_COMMAND) += data_extract_to_command.o - -ifneq ($(lib-y),) -lib-y += $(COMMON_FILES) -endif diff --git a/archival/libunarchive/data_align.c b/archival/libunarchive/data_align.c deleted file mode 100644 index 4e21a36b3..000000000 --- a/archival/libunarchive/data_align.c +++ /dev/null @@ -1,15 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -void FAST_FUNC data_align(archive_handle_t *archive_handle, unsigned boundary) -{ - unsigned skip_amount = (boundary - (archive_handle->offset % boundary)) % boundary; - - archive_handle->seek(archive_handle->src_fd, skip_amount); - archive_handle->offset += skip_amount; -} diff --git a/archival/libunarchive/data_extract_all.c b/archival/libunarchive/data_extract_all.c deleted file mode 100644 index 5fb1ab2ae..000000000 --- a/archival/libunarchive/data_extract_all.c +++ /dev/null @@ -1,200 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -void FAST_FUNC data_extract_all(archive_handle_t *archive_handle) -{ - file_header_t *file_header = archive_handle->file_header; - int dst_fd; - int res; - -#if ENABLE_FEATURE_TAR_SELINUX - char *sctx = archive_handle->tar__next_file_sctx; - if (!sctx) - sctx = archive_handle->tar__global_sctx; - if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */ - setfscreatecon(sctx); - free(archive_handle->tar__next_file_sctx); - archive_handle->tar__next_file_sctx = NULL; - } -#endif - - if (archive_handle->ah_flags & ARCHIVE_CREATE_LEADING_DIRS) { - char *slash = strrchr(file_header->name, '/'); - if (slash) { - *slash = '\0'; - bb_make_directory(file_header->name, -1, FILEUTILS_RECUR); - *slash = '/'; - } - } - - if (archive_handle->ah_flags & ARCHIVE_UNLINK_OLD) { - /* Remove the entry if it exists */ - if (!S_ISDIR(file_header->mode)) { - /* Is it hardlink? - * We encode hard links as regular files of size 0 with a symlink */ - if (S_ISREG(file_header->mode) - && file_header->link_target - && file_header->size == 0 - ) { - /* Ugly special case: - * tar cf t.tar hardlink1 hardlink2 hardlink1 - * results in this tarball structure: - * hardlink1 - * hardlink2 -> hardlink1 - * hardlink1 -> hardlink1 <== !!! - */ - if (strcmp(file_header->link_target, file_header->name) == 0) - goto ret; - } - /* Proceed with deleting */ - if (unlink(file_header->name) == -1 - && errno != ENOENT - ) { - bb_perror_msg_and_die("can't remove old file %s", - file_header->name); - } - } - } - else if (archive_handle->ah_flags & ARCHIVE_EXTRACT_NEWER) { - /* Remove the existing entry if its older than the extracted entry */ - struct stat existing_sb; - if (lstat(file_header->name, &existing_sb) == -1) { - if (errno != ENOENT) { - bb_perror_msg_and_die("can't stat old file"); - } - } - else if (existing_sb.st_mtime >= file_header->mtime) { - if (!(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) - && !S_ISDIR(file_header->mode) - ) { - bb_error_msg("%s not created: newer or " - "same age file exists", file_header->name); - } - data_skip(archive_handle); - goto ret; - } - else if ((unlink(file_header->name) == -1) && (errno != EISDIR)) { - bb_perror_msg_and_die("can't remove old file %s", - file_header->name); - } - } - - /* Handle hard links separately - * We encode hard links as regular files of size 0 with a symlink */ - if (S_ISREG(file_header->mode) - && file_header->link_target - && file_header->size == 0 - ) { - /* hard link */ - res = link(file_header->link_target, file_header->name); - if ((res == -1) && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET)) { - bb_perror_msg("can't create %slink " - "from %s to %s", "hard", - file_header->name, - file_header->link_target); - } - /* Hardlinks have no separate mode/ownership, skip chown/chmod */ - goto ret; - } - - /* Create the filesystem entry */ - switch (file_header->mode & S_IFMT) { - case S_IFREG: { - /* Regular file */ - int flags = O_WRONLY | O_CREAT | O_EXCL; - if (archive_handle->ah_flags & ARCHIVE_O_TRUNC) - flags = O_WRONLY | O_CREAT | O_TRUNC; - dst_fd = xopen3(file_header->name, - flags, - file_header->mode - ); - bb_copyfd_exact_size(archive_handle->src_fd, dst_fd, file_header->size); - close(dst_fd); - break; - } - case S_IFDIR: - res = mkdir(file_header->name, file_header->mode); - if ((res == -1) - && (errno != EISDIR) /* btw, Linux doesn't return this */ - && (errno != EEXIST) - && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) - ) { - bb_perror_msg("can't make dir %s", file_header->name); - } - break; - case S_IFLNK: - /* Symlink */ -//TODO: what if file_header->link_target == NULL (say, corrupted tarball?) - res = symlink(file_header->link_target, file_header->name); - if ((res == -1) - && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) - ) { - bb_perror_msg("can't create %slink " - "from %s to %s", "sym", - file_header->name, - file_header->link_target); - } - break; - case S_IFSOCK: - case S_IFBLK: - case S_IFCHR: - case S_IFIFO: - res = mknod(file_header->name, file_header->mode, file_header->device); - if ((res == -1) - && !(archive_handle->ah_flags & ARCHIVE_EXTRACT_QUIET) - ) { - bb_perror_msg("can't create node %s", file_header->name); - } - break; - default: - bb_error_msg_and_die("unrecognized file type"); - } - - if (!S_ISLNK(file_header->mode)) { - if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_OWNER)) { - uid_t uid = file_header->uid; - gid_t gid = file_header->gid; -#if ENABLE_FEATURE_TAR_UNAME_GNAME - if (!(archive_handle->ah_flags & ARCHIVE_NUMERIC_OWNER)) { - if (file_header->tar__uname) { -//TODO: cache last name/id pair? - struct passwd *pwd = getpwnam(file_header->tar__uname); - if (pwd) uid = pwd->pw_uid; - } - if (file_header->tar__gname) { - struct group *grp = getgrnam(file_header->tar__gname); - if (grp) gid = grp->gr_gid; - } - } -#endif - /* GNU tar 1.15.1 uses chown, not lchown */ - chown(file_header->name, uid, gid); - } - /* uclibc has no lchmod, glibc is even stranger - - * it has lchmod which seems to do nothing! - * so we use chmod... */ - if (!(archive_handle->ah_flags & ARCHIVE_DONT_RESTORE_PERM)) { - chmod(file_header->name, file_header->mode); - } - if (archive_handle->ah_flags & ARCHIVE_RESTORE_DATE) { - struct timeval t[2]; - - t[1].tv_sec = t[0].tv_sec = file_header->mtime; - t[1].tv_usec = t[0].tv_usec = 0; - utimes(file_header->name, t); - } - } - - ret: ; -#if ENABLE_FEATURE_TAR_SELINUX - if (sctx) { - /* reset the context after creating an entry */ - setfscreatecon(NULL); - } -#endif -} diff --git a/archival/libunarchive/data_extract_to_command.c b/archival/libunarchive/data_extract_to_command.c deleted file mode 100644 index b54f7f215..000000000 --- a/archival/libunarchive/data_extract_to_command.c +++ /dev/null @@ -1,134 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -enum { - //TAR_FILETYPE, - TAR_MODE, - TAR_FILENAME, - TAR_REALNAME, -#if ENABLE_FEATURE_TAR_UNAME_GNAME - TAR_UNAME, - TAR_GNAME, -#endif - TAR_SIZE, - TAR_UID, - TAR_GID, - TAR_MAX, -}; - -static const char *const tar_var[] = { - // "FILETYPE", - "MODE", - "FILENAME", - "REALNAME", -#if ENABLE_FEATURE_TAR_UNAME_GNAME - "UNAME", - "GNAME", -#endif - "SIZE", - "UID", - "GID", -}; - -static void xputenv(char *str) -{ - if (putenv(str)) - bb_error_msg_and_die(bb_msg_memory_exhausted); -} - -static void str2env(char *env[], int idx, const char *str) -{ - env[idx] = xasprintf("TAR_%s=%s", tar_var[idx], str); - xputenv(env[idx]); -} - -static void dec2env(char *env[], int idx, unsigned long long val) -{ - env[idx] = xasprintf("TAR_%s=%llu", tar_var[idx], val); - xputenv(env[idx]); -} - -static void oct2env(char *env[], int idx, unsigned long val) -{ - env[idx] = xasprintf("TAR_%s=%lo", tar_var[idx], val); - xputenv(env[idx]); -} - -void FAST_FUNC data_extract_to_command(archive_handle_t *archive_handle) -{ - file_header_t *file_header = archive_handle->file_header; - -#if 0 /* do we need this? ENABLE_FEATURE_TAR_SELINUX */ - char *sctx = archive_handle->tar__next_file_sctx; - if (!sctx) - sctx = archive_handle->tar__global_sctx; - if (sctx) { /* setfscreatecon is 4 syscalls, avoid if possible */ - setfscreatecon(sctx); - free(archive_handle->tar__next_file_sctx); - archive_handle->tar__next_file_sctx = NULL; - } -#endif - - if ((file_header->mode & S_IFMT) == S_IFREG) { - pid_t pid; - int p[2], status; - char *tar_env[TAR_MAX]; - - memset(tar_env, 0, sizeof(tar_env)); - - xpipe(p); - pid = BB_MMU ? xfork() : xvfork(); - if (pid == 0) { - /* Child */ - /* str2env(tar_env, TAR_FILETYPE, "f"); - parent should do it once */ - oct2env(tar_env, TAR_MODE, file_header->mode); - str2env(tar_env, TAR_FILENAME, file_header->name); - str2env(tar_env, TAR_REALNAME, file_header->name); -#if ENABLE_FEATURE_TAR_UNAME_GNAME - str2env(tar_env, TAR_UNAME, file_header->tar__uname); - str2env(tar_env, TAR_GNAME, file_header->tar__gname); -#endif - dec2env(tar_env, TAR_SIZE, file_header->size); - dec2env(tar_env, TAR_UID, file_header->uid); - dec2env(tar_env, TAR_GID, file_header->gid); - close(p[1]); - xdup2(p[0], STDIN_FILENO); - signal(SIGPIPE, SIG_DFL); - execl(DEFAULT_SHELL, DEFAULT_SHELL_SHORT_NAME, "-c", archive_handle->tar__to_command, NULL); - bb_perror_msg_and_die("can't execute '%s'", DEFAULT_SHELL); - } - close(p[0]); - /* Our caller is expected to do signal(SIGPIPE, SIG_IGN) - * so that we don't die if child don't read all the input: */ - bb_copyfd_exact_size(archive_handle->src_fd, p[1], -file_header->size); - close(p[1]); - - if (safe_waitpid(pid, &status, 0) == -1) - bb_perror_msg_and_die("waitpid"); - if (WIFEXITED(status) && WEXITSTATUS(status)) - bb_error_msg_and_die("'%s' returned status %d", - archive_handle->tar__to_command, WEXITSTATUS(status)); - if (WIFSIGNALED(status)) - bb_error_msg_and_die("'%s' terminated on signal %d", - archive_handle->tar__to_command, WTERMSIG(status)); - - if (!BB_MMU) { - int i; - for (i = 0; i < TAR_MAX; i++) { - if (tar_env[i]) - bb_unsetenv_and_free(tar_env[i]); - } - } - } - -#if 0 /* ENABLE_FEATURE_TAR_SELINUX */ - if (sctx) - /* reset the context after creating an entry */ - setfscreatecon(NULL); -#endif -} diff --git a/archival/libunarchive/data_extract_to_stdout.c b/archival/libunarchive/data_extract_to_stdout.c deleted file mode 100644 index ce0713ac4..000000000 --- a/archival/libunarchive/data_extract_to_stdout.c +++ /dev/null @@ -1,14 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -void FAST_FUNC data_extract_to_stdout(archive_handle_t *archive_handle) -{ - bb_copyfd_exact_size(archive_handle->src_fd, - STDOUT_FILENO, - archive_handle->file_header->size); -} diff --git a/archival/libunarchive/data_skip.c b/archival/libunarchive/data_skip.c deleted file mode 100644 index 06b74399d..000000000 --- a/archival/libunarchive/data_skip.c +++ /dev/null @@ -1,12 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -void FAST_FUNC data_skip(archive_handle_t *archive_handle) -{ - archive_handle->seek(archive_handle->src_fd, archive_handle->file_header->size); -} diff --git a/archival/libunarchive/decompress_bunzip2.c b/archival/libunarchive/decompress_bunzip2.c deleted file mode 100644 index 15f08a60e..000000000 --- a/archival/libunarchive/decompress_bunzip2.c +++ /dev/null @@ -1,822 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* Small bzip2 deflate implementation, by Rob Landley (rob@landley.net). - - Based on bzip2 decompression code by Julian R Seward (jseward@acm.org), - which also acknowledges contributions by Mike Burrows, David Wheeler, - Peter Fenwick, Alistair Moffat, Radford Neal, Ian H. Witten, - Robert Sedgewick, and Jon L. Bentley. - - Licensed under GPLv2 or later, see file LICENSE in this source tree. -*/ - -/* - Size and speed optimizations by Manuel Novoa III (mjn3@codepoet.org). - - More efficient reading of Huffman codes, a streamlined read_bunzip() - function, and various other tweaks. In (limited) tests, approximately - 20% faster than bzcat on x86 and about 10% faster on arm. - - Note that about 2/3 of the time is spent in read_bunzip() reversing - the Burrows-Wheeler transformation. Much of that time is delay - resulting from cache misses. - - (2010 update by vda: profiled "bzcat <84mbyte.bz2 >/dev/null" - on x86-64 CPU with L2 > 1M: get_next_block is hotter than read_bunzip: - %time seconds calls function - 71.01 12.69 444 get_next_block - 28.65 5.12 93065 read_bunzip - 00.22 0.04 7736490 get_bits - 00.11 0.02 47 dealloc_bunzip - 00.00 0.00 93018 full_write - ...) - - - I would ask that anyone benefiting from this work, especially those - using it in commercial products, consider making a donation to my local - non-profit hospice organization (www.hospiceacadiana.com) in the name of - the woman I loved, Toni W. Hagan, who passed away Feb. 12, 2003. - - Manuel - */ - -#include "libbb.h" -#include "unarchive.h" - -/* Constants for Huffman coding */ -#define MAX_GROUPS 6 -#define GROUP_SIZE 50 /* 64 would have been more efficient */ -#define MAX_HUFCODE_BITS 20 /* Longest Huffman code allowed */ -#define MAX_SYMBOLS 258 /* 256 literals + RUNA + RUNB */ -#define SYMBOL_RUNA 0 -#define SYMBOL_RUNB 1 - -/* Status return values */ -#define RETVAL_OK 0 -#define RETVAL_LAST_BLOCK (-1) -#define RETVAL_NOT_BZIP_DATA (-2) -#define RETVAL_UNEXPECTED_INPUT_EOF (-3) -#define RETVAL_SHORT_WRITE (-4) -#define RETVAL_DATA_ERROR (-5) -#define RETVAL_OUT_OF_MEMORY (-6) -#define RETVAL_OBSOLETE_INPUT (-7) - -/* Other housekeeping constants */ -#define IOBUF_SIZE 4096 - -/* This is what we know about each Huffman coding group */ -struct group_data { - /* We have an extra slot at the end of limit[] for a sentinel value. */ - int limit[MAX_HUFCODE_BITS+1], base[MAX_HUFCODE_BITS], permute[MAX_SYMBOLS]; - int minLen, maxLen; -}; - -/* Structure holding all the housekeeping data, including IO buffers and - * memory that persists between calls to bunzip - * Found the most used member: - * cat this_file.c | sed -e 's/"/ /g' -e "s/'/ /g" | xargs -n1 \ - * | grep 'bd->' | sed 's/^.*bd->/bd->/' | sort | $PAGER - * and moved it (inbufBitCount) to offset 0. - */ -struct bunzip_data { - /* I/O tracking data (file handles, buffers, positions, etc.) */ - unsigned inbufBitCount, inbufBits; - int in_fd, out_fd, inbufCount, inbufPos /*, outbufPos*/; - uint8_t *inbuf /*,*outbuf*/; - - /* State for interrupting output loop */ - int writeCopies, writePos, writeRunCountdown, writeCount; - int writeCurrent; /* actually a uint8_t */ - - /* The CRC values stored in the block header and calculated from the data */ - uint32_t headerCRC, totalCRC, writeCRC; - - /* Intermediate buffer and its size (in bytes) */ - uint32_t *dbuf; - unsigned dbufSize; - - /* For I/O error handling */ - jmp_buf jmpbuf; - - /* Big things go last (register-relative addressing can be larger for big offsets) */ - uint32_t crc32Table[256]; - uint8_t selectors[32768]; /* nSelectors=15 bits */ - struct group_data groups[MAX_GROUPS]; /* Huffman coding tables */ -}; -/* typedef struct bunzip_data bunzip_data; -- done in .h file */ - - -/* Return the next nnn bits of input. All reads from the compressed input - are done through this function. All reads are big endian */ -static unsigned get_bits(bunzip_data *bd, int bits_wanted) -{ - unsigned bits = 0; - /* Cache bd->inbufBitCount in a CPU register (hopefully): */ - int bit_count = bd->inbufBitCount; - - /* If we need to get more data from the byte buffer, do so. (Loop getting - one byte at a time to enforce endianness and avoid unaligned access.) */ - while (bit_count < bits_wanted) { - - /* If we need to read more data from file into byte buffer, do so */ - if (bd->inbufPos == bd->inbufCount) { - /* if "no input fd" case: in_fd == -1, read fails, we jump */ - bd->inbufCount = read(bd->in_fd, bd->inbuf, IOBUF_SIZE); - if (bd->inbufCount <= 0) - longjmp(bd->jmpbuf, RETVAL_UNEXPECTED_INPUT_EOF); - bd->inbufPos = 0; - } - - /* Avoid 32-bit overflow (dump bit buffer to top of output) */ - if (bit_count >= 24) { - bits = bd->inbufBits & ((1 << bit_count) - 1); - bits_wanted -= bit_count; - bits <<= bits_wanted; - bit_count = 0; - } - - /* Grab next 8 bits of input from buffer. */ - bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; - bit_count += 8; - } - - /* Calculate result */ - bit_count -= bits_wanted; - bd->inbufBitCount = bit_count; - bits |= (bd->inbufBits >> bit_count) & ((1 << bits_wanted) - 1); - - return bits; -} - -/* Unpacks the next block and sets up for the inverse Burrows-Wheeler step. */ -static int get_next_block(bunzip_data *bd) -{ - struct group_data *hufGroup; - int dbufCount, dbufSize, groupCount, *base, *limit, selector, - i, j, t, runPos, symCount, symTotal, nSelectors, byteCount[256]; - int runCnt = runCnt; /* for compiler */ - uint8_t uc, symToByte[256], mtfSymbol[256], *selectors; - uint32_t *dbuf; - unsigned origPtr; - - dbuf = bd->dbuf; - dbufSize = bd->dbufSize; - selectors = bd->selectors; - -/* In bbox, we are ok with aborting through setjmp which is set up in start_bunzip */ -#if 0 - /* Reset longjmp I/O error handling */ - i = setjmp(bd->jmpbuf); - if (i) return i; -#endif - - /* Read in header signature and CRC, then validate signature. - (last block signature means CRC is for whole file, return now) */ - i = get_bits(bd, 24); - j = get_bits(bd, 24); - bd->headerCRC = get_bits(bd, 32); - if ((i == 0x177245) && (j == 0x385090)) return RETVAL_LAST_BLOCK; - if ((i != 0x314159) || (j != 0x265359)) return RETVAL_NOT_BZIP_DATA; - - /* We can add support for blockRandomised if anybody complains. There was - some code for this in busybox 1.0.0-pre3, but nobody ever noticed that - it didn't actually work. */ - if (get_bits(bd, 1)) return RETVAL_OBSOLETE_INPUT; - origPtr = get_bits(bd, 24); - if ((int)origPtr > dbufSize) return RETVAL_DATA_ERROR; - - /* mapping table: if some byte values are never used (encoding things - like ascii text), the compression code removes the gaps to have fewer - symbols to deal with, and writes a sparse bitfield indicating which - values were present. We make a translation table to convert the symbols - back to the corresponding bytes. */ - symTotal = 0; - i = 0; - t = get_bits(bd, 16); - do { - if (t & (1 << 15)) { - unsigned inner_map = get_bits(bd, 16); - do { - if (inner_map & (1 << 15)) - symToByte[symTotal++] = i; - inner_map <<= 1; - i++; - } while (i & 15); - i -= 16; - } - t <<= 1; - i += 16; - } while (i < 256); - - /* How many different Huffman coding groups does this block use? */ - groupCount = get_bits(bd, 3); - if (groupCount < 2 || groupCount > MAX_GROUPS) - return RETVAL_DATA_ERROR; - - /* nSelectors: Every GROUP_SIZE many symbols we select a new Huffman coding - group. Read in the group selector list, which is stored as MTF encoded - bit runs. (MTF=Move To Front, as each value is used it's moved to the - start of the list.) */ - for (i = 0; i < groupCount; i++) - mtfSymbol[i] = i; - nSelectors = get_bits(bd, 15); - if (!nSelectors) - return RETVAL_DATA_ERROR; - for (i = 0; i < nSelectors; i++) { - uint8_t tmp_byte; - /* Get next value */ - int n = 0; - while (get_bits(bd, 1)) { - if (n >= groupCount) return RETVAL_DATA_ERROR; - n++; - } - /* Decode MTF to get the next selector */ - tmp_byte = mtfSymbol[n]; - while (--n >= 0) - mtfSymbol[n + 1] = mtfSymbol[n]; - mtfSymbol[0] = selectors[i] = tmp_byte; - } - - /* Read the Huffman coding tables for each group, which code for symTotal - literal symbols, plus two run symbols (RUNA, RUNB) */ - symCount = symTotal + 2; - for (j = 0; j < groupCount; j++) { - uint8_t length[MAX_SYMBOLS]; - /* 8 bits is ALMOST enough for temp[], see below */ - unsigned temp[MAX_HUFCODE_BITS+1]; - int minLen, maxLen, pp, len_m1; - - /* Read Huffman code lengths for each symbol. They're stored in - a way similar to mtf; record a starting value for the first symbol, - and an offset from the previous value for every symbol after that. - (Subtracting 1 before the loop and then adding it back at the end is - an optimization that makes the test inside the loop simpler: symbol - length 0 becomes negative, so an unsigned inequality catches it.) */ - len_m1 = get_bits(bd, 5) - 1; - for (i = 0; i < symCount; i++) { - for (;;) { - int two_bits; - if ((unsigned)len_m1 > (MAX_HUFCODE_BITS-1)) - return RETVAL_DATA_ERROR; - - /* If first bit is 0, stop. Else second bit indicates whether - to increment or decrement the value. Optimization: grab 2 - bits and unget the second if the first was 0. */ - two_bits = get_bits(bd, 2); - if (two_bits < 2) { - bd->inbufBitCount++; - break; - } - - /* Add one if second bit 1, else subtract 1. Avoids if/else */ - len_m1 += (((two_bits+1) & 2) - 1); - } - - /* Correct for the initial -1, to get the final symbol length */ - length[i] = len_m1 + 1; - } - - /* Find largest and smallest lengths in this group */ - minLen = maxLen = length[0]; - for (i = 1; i < symCount; i++) { - if (length[i] > maxLen) maxLen = length[i]; - else if (length[i] < minLen) minLen = length[i]; - } - - /* Calculate permute[], base[], and limit[] tables from length[]. - * - * permute[] is the lookup table for converting Huffman coded symbols - * into decoded symbols. base[] is the amount to subtract from the - * value of a Huffman symbol of a given length when using permute[]. - * - * limit[] indicates the largest numerical value a symbol with a given - * number of bits can have. This is how the Huffman codes can vary in - * length: each code with a value>limit[length] needs another bit. - */ - hufGroup = bd->groups + j; - hufGroup->minLen = minLen; - hufGroup->maxLen = maxLen; - - /* Note that minLen can't be smaller than 1, so we adjust the base - and limit array pointers so we're not always wasting the first - entry. We do this again when using them (during symbol decoding). */ - base = hufGroup->base - 1; - limit = hufGroup->limit - 1; - - /* Calculate permute[]. Concurently, initialize temp[] and limit[]. */ - pp = 0; - for (i = minLen; i <= maxLen; i++) { - int k; - temp[i] = limit[i] = 0; - for (k = 0; k < symCount; k++) - if (length[k] == i) - hufGroup->permute[pp++] = k; - } - - /* Count symbols coded for at each bit length */ - /* NB: in pathological cases, temp[8] can end ip being 256. - * That's why uint8_t is too small for temp[]. */ - for (i = 0; i < symCount; i++) temp[length[i]]++; - - /* Calculate limit[] (the largest symbol-coding value at each bit - * length, which is (previous limit<<1)+symbols at this level), and - * base[] (number of symbols to ignore at each bit length, which is - * limit minus the cumulative count of symbols coded for already). */ - pp = t = 0; - for (i = minLen; i < maxLen;) { - unsigned temp_i = temp[i]; - - pp += temp_i; - - /* We read the largest possible symbol size and then unget bits - after determining how many we need, and those extra bits could - be set to anything. (They're noise from future symbols.) At - each level we're really only interested in the first few bits, - so here we set all the trailing to-be-ignored bits to 1 so they - don't affect the value>limit[length] comparison. */ - limit[i] = (pp << (maxLen - i)) - 1; - pp <<= 1; - t += temp_i; - base[++i] = pp - t; - } - limit[maxLen] = pp + temp[maxLen] - 1; - limit[maxLen+1] = INT_MAX; /* Sentinel value for reading next sym. */ - base[minLen] = 0; - } - - /* We've finished reading and digesting the block header. Now read this - block's Huffman coded symbols from the file and undo the Huffman coding - and run length encoding, saving the result into dbuf[dbufCount++] = uc */ - - /* Initialize symbol occurrence counters and symbol Move To Front table */ - /*memset(byteCount, 0, sizeof(byteCount)); - smaller, but slower */ - for (i = 0; i < 256; i++) { - byteCount[i] = 0; - mtfSymbol[i] = (uint8_t)i; - } - - /* Loop through compressed symbols. */ - - runPos = dbufCount = selector = 0; - for (;;) { - int nextSym; - - /* Fetch next Huffman coding group from list. */ - symCount = GROUP_SIZE - 1; - if (selector >= nSelectors) return RETVAL_DATA_ERROR; - hufGroup = bd->groups + selectors[selector++]; - base = hufGroup->base - 1; - limit = hufGroup->limit - 1; - - continue_this_group: - /* Read next Huffman-coded symbol. */ - - /* Note: It is far cheaper to read maxLen bits and back up than it is - to read minLen bits and then add additional bit at a time, testing - as we go. Because there is a trailing last block (with file CRC), - there is no danger of the overread causing an unexpected EOF for a - valid compressed file. - */ - if (1) { - /* As a further optimization, we do the read inline - (falling back to a call to get_bits if the buffer runs dry). - */ - int new_cnt; - while ((new_cnt = bd->inbufBitCount - hufGroup->maxLen) < 0) { - /* bd->inbufBitCount < hufGroup->maxLen */ - if (bd->inbufPos == bd->inbufCount) { - nextSym = get_bits(bd, hufGroup->maxLen); - goto got_huff_bits; - } - bd->inbufBits = (bd->inbufBits << 8) | bd->inbuf[bd->inbufPos++]; - bd->inbufBitCount += 8; - }; - bd->inbufBitCount = new_cnt; /* "bd->inbufBitCount -= hufGroup->maxLen;" */ - nextSym = (bd->inbufBits >> new_cnt) & ((1 << hufGroup->maxLen) - 1); - got_huff_bits: ; - } else { /* unoptimized equivalent */ - nextSym = get_bits(bd, hufGroup->maxLen); - } - /* Figure how many bits are in next symbol and unget extras */ - i = hufGroup->minLen; - while (nextSym > limit[i]) ++i; - j = hufGroup->maxLen - i; - if (j < 0) - return RETVAL_DATA_ERROR; - bd->inbufBitCount += j; - - /* Huffman decode value to get nextSym (with bounds checking) */ - nextSym = (nextSym >> j) - base[i]; - if ((unsigned)nextSym >= MAX_SYMBOLS) - return RETVAL_DATA_ERROR; - nextSym = hufGroup->permute[nextSym]; - - /* We have now decoded the symbol, which indicates either a new literal - byte, or a repeated run of the most recent literal byte. First, - check if nextSym indicates a repeated run, and if so loop collecting - how many times to repeat the last literal. */ - if ((unsigned)nextSym <= SYMBOL_RUNB) { /* RUNA or RUNB */ - - /* If this is the start of a new run, zero out counter */ - if (runPos == 0) { - runPos = 1; - runCnt = 0; - } - - /* Neat trick that saves 1 symbol: instead of or-ing 0 or 1 at - each bit position, add 1 or 2 instead. For example, - 1011 is 1<<0 + 1<<1 + 2<<2. 1010 is 2<<0 + 2<<1 + 1<<2. - You can make any bit pattern that way using 1 less symbol than - the basic or 0/1 method (except all bits 0, which would use no - symbols, but a run of length 0 doesn't mean anything in this - context). Thus space is saved. */ - runCnt += (runPos << nextSym); /* +runPos if RUNA; +2*runPos if RUNB */ - if (runPos < dbufSize) runPos <<= 1; - goto end_of_huffman_loop; - } - - /* When we hit the first non-run symbol after a run, we now know - how many times to repeat the last literal, so append that many - copies to our buffer of decoded symbols (dbuf) now. (The last - literal used is the one at the head of the mtfSymbol array.) */ - if (runPos != 0) { - uint8_t tmp_byte; - if (dbufCount + runCnt >= dbufSize) return RETVAL_DATA_ERROR; - tmp_byte = symToByte[mtfSymbol[0]]; - byteCount[tmp_byte] += runCnt; - while (--runCnt >= 0) dbuf[dbufCount++] = (uint32_t)tmp_byte; - runPos = 0; - } - - /* Is this the terminating symbol? */ - if (nextSym > symTotal) break; - - /* At this point, nextSym indicates a new literal character. Subtract - one to get the position in the MTF array at which this literal is - currently to be found. (Note that the result can't be -1 or 0, - because 0 and 1 are RUNA and RUNB. But another instance of the - first symbol in the mtf array, position 0, would have been handled - as part of a run above. Therefore 1 unused mtf position minus - 2 non-literal nextSym values equals -1.) */ - if (dbufCount >= dbufSize) return RETVAL_DATA_ERROR; - i = nextSym - 1; - uc = mtfSymbol[i]; - - /* Adjust the MTF array. Since we typically expect to move only a - * small number of symbols, and are bound by 256 in any case, using - * memmove here would typically be bigger and slower due to function - * call overhead and other assorted setup costs. */ - do { - mtfSymbol[i] = mtfSymbol[i-1]; - } while (--i); - mtfSymbol[0] = uc; - uc = symToByte[uc]; - - /* We have our literal byte. Save it into dbuf. */ - byteCount[uc]++; - dbuf[dbufCount++] = (uint32_t)uc; - - /* Skip group initialization if we're not done with this group. Done - * this way to avoid compiler warning. */ - end_of_huffman_loop: - if (--symCount >= 0) goto continue_this_group; - } - - /* At this point, we've read all the Huffman-coded symbols (and repeated - runs) for this block from the input stream, and decoded them into the - intermediate buffer. There are dbufCount many decoded bytes in dbuf[]. - Now undo the Burrows-Wheeler transform on dbuf. - See http://dogma.net/markn/articles/bwt/bwt.htm - */ - - /* Turn byteCount into cumulative occurrence counts of 0 to n-1. */ - j = 0; - for (i = 0; i < 256; i++) { - int tmp_count = j + byteCount[i]; - byteCount[i] = j; - j = tmp_count; - } - - /* Figure out what order dbuf would be in if we sorted it. */ - for (i = 0; i < dbufCount; i++) { - uint8_t tmp_byte = (uint8_t)dbuf[i]; - int tmp_count = byteCount[tmp_byte]; - dbuf[tmp_count] |= (i << 8); - byteCount[tmp_byte] = tmp_count + 1; - } - - /* Decode first byte by hand to initialize "previous" byte. Note that it - doesn't get output, and if the first three characters are identical - it doesn't qualify as a run (hence writeRunCountdown=5). */ - if (dbufCount) { - uint32_t tmp; - if ((int)origPtr >= dbufCount) return RETVAL_DATA_ERROR; - tmp = dbuf[origPtr]; - bd->writeCurrent = (uint8_t)tmp; - bd->writePos = (tmp >> 8); - bd->writeRunCountdown = 5; - } - bd->writeCount = dbufCount; - - return RETVAL_OK; -} - -/* Undo Burrows-Wheeler transform on intermediate buffer to produce output. - If start_bunzip was initialized with out_fd=-1, then up to len bytes of - data are written to outbuf. Return value is number of bytes written or - error (all errors are negative numbers). If out_fd!=-1, outbuf and len - are ignored, data is written to out_fd and return is RETVAL_OK or error. - - NB: read_bunzip returns < 0 on error, or the number of *unfilled* bytes - in outbuf. IOW: on EOF returns len ("all bytes are not filled"), not 0. - (Why? This allows to get rid of one local variable) -*/ -int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) -{ - const uint32_t *dbuf; - int pos, current, previous; - uint32_t CRC; - - /* If we already have error/end indicator, return it */ - if (bd->writeCount < 0) - return bd->writeCount; - - dbuf = bd->dbuf; - - /* Register-cached state (hopefully): */ - pos = bd->writePos; - current = bd->writeCurrent; - CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */ - - /* We will always have pending decoded data to write into the output - buffer unless this is the very first call (in which case we haven't - Huffman-decoded a block into the intermediate buffer yet). */ - if (bd->writeCopies) { - - dec_writeCopies: - /* Inside the loop, writeCopies means extra copies (beyond 1) */ - --bd->writeCopies; - - /* Loop outputting bytes */ - for (;;) { - - /* If the output buffer is full, save cached state and return */ - if (--len < 0) { - /* Unlikely branch. - * Use of "goto" instead of keeping code here - * helps compiler to realize this. */ - goto outbuf_full; - } - - /* Write next byte into output buffer, updating CRC */ - *outbuf++ = current; - CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current]; - - /* Loop now if we're outputting multiple copies of this byte */ - if (bd->writeCopies) { - /* Unlikely branch */ - /*--bd->writeCopies;*/ - /*continue;*/ - /* Same, but (ab)using other existing --writeCopies operation - * (and this if() compiles into just test+branch pair): */ - goto dec_writeCopies; - } - decode_next_byte: - if (--bd->writeCount < 0) - break; /* input block is fully consumed, need next one */ - - /* Follow sequence vector to undo Burrows-Wheeler transform */ - previous = current; - pos = dbuf[pos]; - current = (uint8_t)pos; - pos >>= 8; - - /* After 3 consecutive copies of the same byte, the 4th - * is a repeat count. We count down from 4 instead - * of counting up because testing for non-zero is faster */ - if (--bd->writeRunCountdown != 0) { - if (current != previous) - bd->writeRunCountdown = 4; - } else { - /* Unlikely branch */ - /* We have a repeated run, this byte indicates the count */ - bd->writeCopies = current; - current = previous; - bd->writeRunCountdown = 5; - - /* Sometimes there are just 3 bytes (run length 0) */ - if (!bd->writeCopies) goto decode_next_byte; - - /* Subtract the 1 copy we'd output anyway to get extras */ - --bd->writeCopies; - } - } /* for(;;) */ - - /* Decompression of this input block completed successfully */ - bd->writeCRC = CRC = ~CRC; - bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC; - - /* If this block had a CRC error, force file level CRC error */ - if (CRC != bd->headerCRC) { - bd->totalCRC = bd->headerCRC + 1; - return RETVAL_LAST_BLOCK; - } - } - - /* Refill the intermediate buffer by Huffman-decoding next block of input */ - { - int r = get_next_block(bd); - if (r) { /* error/end */ - bd->writeCount = r; - return (r != RETVAL_LAST_BLOCK) ? r : len; - } - } - - CRC = ~0; - pos = bd->writePos; - current = bd->writeCurrent; - goto decode_next_byte; - - outbuf_full: - /* Output buffer is full, save cached state and return */ - bd->writePos = pos; - bd->writeCurrent = current; - bd->writeCRC = CRC; - - bd->writeCopies++; - - return 0; -} - -/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain - a complete bunzip file (len bytes long). If in_fd!=-1, inbuf and len are - ignored, and data is read from file handle into temporary buffer. */ - -/* Because bunzip2 is used for help text unpacking, and because bb_show_usage() - should work for NOFORK applets too, we must be extremely careful to not leak - any allocations! */ -int FAST_FUNC start_bunzip(bunzip_data **bdp, int in_fd, - const void *inbuf, int len) -{ - bunzip_data *bd; - unsigned i; - enum { - BZh0 = ('B' << 24) + ('Z' << 16) + ('h' << 8) + '0', - h0 = ('h' << 8) + '0', - }; - - /* Figure out how much data to allocate */ - i = sizeof(bunzip_data); - if (in_fd != -1) i += IOBUF_SIZE; - - /* Allocate bunzip_data. Most fields initialize to zero. */ - bd = *bdp = xzalloc(i); - - /* Setup input buffer */ - bd->in_fd = in_fd; - if (-1 == in_fd) { - /* in this case, bd->inbuf is read-only */ - bd->inbuf = (void*)inbuf; /* cast away const-ness */ - } else { - bd->inbuf = (uint8_t*)(bd + 1); - memcpy(bd->inbuf, inbuf, len); - } - bd->inbufCount = len; - - /* Init the CRC32 table (big endian) */ - crc32_filltable(bd->crc32Table, 1); - - /* Setup for I/O error handling via longjmp */ - i = setjmp(bd->jmpbuf); - if (i) return i; - - /* Ensure that file starts with "BZh['1'-'9']." */ - /* Update: now caller verifies 1st two bytes, makes .gz/.bz2 - * integration easier */ - /* was: */ - /* i = get_bits(bd, 32); */ - /* if ((unsigned)(i - BZh0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; */ - i = get_bits(bd, 16); - if ((unsigned)(i - h0 - 1) >= 9) return RETVAL_NOT_BZIP_DATA; - - /* Fourth byte (ascii '1'-'9') indicates block size in units of 100k of - uncompressed data. Allocate intermediate buffer for block. */ - /* bd->dbufSize = 100000 * (i - BZh0); */ - bd->dbufSize = 100000 * (i - h0); - - /* Cannot use xmalloc - may leak bd in NOFORK case! */ - bd->dbuf = malloc_or_warn(bd->dbufSize * sizeof(bd->dbuf[0])); - if (!bd->dbuf) { - free(bd); - xfunc_die(); - } - return RETVAL_OK; -} - -void FAST_FUNC dealloc_bunzip(bunzip_data *bd) -{ - free(bd->dbuf); - free(bd); -} - - -/* Decompress src_fd to dst_fd. Stops at end of bzip data, not end of file. */ -IF_DESKTOP(long long) int FAST_FUNC -unpack_bz2_stream(int src_fd, int dst_fd) -{ - IF_DESKTOP(long long total_written = 0;) - bunzip_data *bd; - char *outbuf; - int i; - unsigned len; - - outbuf = xmalloc(IOBUF_SIZE); - len = 0; - while (1) { /* "Process one BZ... stream" loop */ - - i = start_bunzip(&bd, src_fd, outbuf + 2, len); - - if (i == 0) { - while (1) { /* "Produce some output bytes" loop */ - i = read_bunzip(bd, outbuf, IOBUF_SIZE); - if (i < 0) /* error? */ - break; - i = IOBUF_SIZE - i; /* number of bytes produced */ - if (i == 0) /* EOF? */ - break; - if (i != full_write(dst_fd, outbuf, i)) { - bb_error_msg("short write"); - i = RETVAL_SHORT_WRITE; - goto release_mem; - } - IF_DESKTOP(total_written += i;) - } - } - - if (i != RETVAL_LAST_BLOCK) { - bb_error_msg("bunzip error %d", i); - break; - } - if (bd->headerCRC != bd->totalCRC) { - bb_error_msg("CRC error"); - break; - } - - /* Successfully unpacked one BZ stream */ - i = RETVAL_OK; - - /* Do we have "BZ..." after last processed byte? - * pbzip2 (parallelized bzip2) produces such files. - */ - len = bd->inbufCount - bd->inbufPos; - memcpy(outbuf, &bd->inbuf[bd->inbufPos], len); - if (len < 2) { - if (safe_read(src_fd, outbuf + len, 2 - len) != 2 - len) - break; - len = 2; - } - if (*(uint16_t*)outbuf != BZIP2_MAGIC) /* "BZ"? */ - break; - dealloc_bunzip(bd); - len -= 2; - } - - release_mem: - dealloc_bunzip(bd); - free(outbuf); - - return i ? i : IF_DESKTOP(total_written) + 0; -} - -IF_DESKTOP(long long) int FAST_FUNC -unpack_bz2_stream_prime(int src_fd, int dst_fd) -{ - uint16_t magic2; - xread(src_fd, &magic2, 2); - if (magic2 != BZIP2_MAGIC) { - bb_error_msg_and_die("invalid magic"); - } - return unpack_bz2_stream(src_fd, dst_fd); -} - -#ifdef TESTING - -static char *const bunzip_errors[] = { - NULL, "Bad file checksum", "Not bzip data", - "Unexpected input EOF", "Unexpected output EOF", "Data error", - "Out of memory", "Obsolete (pre 0.9.5) bzip format not supported" -}; - -/* Dumb little test thing, decompress stdin to stdout */ -int main(int argc, char **argv) -{ - int i; - char c; - - int i = unpack_bz2_stream_prime(0, 1); - if (i < 0) - fprintf(stderr, "%s\n", bunzip_errors[-i]); - else if (read(STDIN_FILENO, &c, 1)) - fprintf(stderr, "Trailing garbage ignored\n"); - return -i; -} -#endif diff --git a/archival/libunarchive/decompress_uncompress.c b/archival/libunarchive/decompress_uncompress.c deleted file mode 100644 index 285b4efeb..000000000 --- a/archival/libunarchive/decompress_uncompress.c +++ /dev/null @@ -1,307 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* uncompress for busybox -- (c) 2002 Robert Griebl - * - * based on the original compress42.c source - * (see disclaimer below) - */ - -/* (N)compress42.c - File compression ala IEEE Computer, Mar 1992. - * - * Authors: - * Spencer W. Thomas (decvax!harpo!utah-cs!utah-gr!thomas) - * Jim McKie (decvax!mcvax!jim) - * Steve Davies (decvax!vax135!petsd!peora!srd) - * Ken Turkowski (decvax!decwrl!turtlevax!ken) - * James A. Woods (decvax!ihnp4!ames!jaw) - * Joe Orost (decvax!vax135!petsd!joe) - * Dave Mack (csu@alembic.acs.com) - * Peter Jannesen, Network Communication Systems - * (peter@ncs.nl) - * - * marc@suse.de : a small security fix for a buffer overflow - * - * [... History snipped ...] - * - */ - -#include "libbb.h" -#include "unarchive.h" - - -/* Default input buffer size */ -#define IBUFSIZ 2048 - -/* Default output buffer size */ -#define OBUFSIZ 2048 - -/* Defines for third byte of header */ -#define BIT_MASK 0x1f /* Mask for 'number of compresssion bits' */ - /* Masks 0x20 and 0x40 are free. */ - /* I think 0x20 should mean that there is */ - /* a fourth header byte (for expansion). */ -#define BLOCK_MODE 0x80 /* Block compression if table is full and */ - /* compression rate is dropping flush tables */ - /* the next two codes should not be changed lightly, as they must not */ - /* lie within the contiguous general code space. */ -#define FIRST 257 /* first free entry */ -#define CLEAR 256 /* table clear output code */ - -#define INIT_BITS 9 /* initial number of bits/code */ - - -/* machine variants which require cc -Dmachine: pdp11, z8000, DOS */ -#define HBITS 17 /* 50% occupancy */ -#define HSIZE (1< BITS) { - bb_error_msg("compressed with %d bits, can only handle " - BITS_STR" bits", maxbits); - goto err; - } - - n_bits = INIT_BITS; - maxcode = MAXCODE(INIT_BITS) - 1; - bitmask = (1 << INIT_BITS) - 1; - oldcode = -1; - finchar = 0; - outpos = 0; - posbits = 0 << 3; - - free_ent = ((block_mode) ? FIRST : 256); - - /* As above, initialize the first 256 entries in the table. */ - /*clear_tab_prefixof(); - done by xzalloc */ - - for (code = 255; code >= 0; --code) { - tab_suffixof(code) = (unsigned char) code; - } - - do { - resetbuf: - { - int i; - int e; - int o; - - o = posbits >> 3; - e = insize - o; - - for (i = 0; i < e; ++i) - inbuf[i] = inbuf[i + o]; - - insize = e; - posbits = 0; - } - - if (insize < (int) (IBUFSIZ + 64) - IBUFSIZ) { - rsize = safe_read(fd_in, inbuf + insize, IBUFSIZ); -//error check?? - insize += rsize; - } - - inbits = ((rsize > 0) ? (insize - insize % n_bits) << 3 : - (insize << 3) - (n_bits - 1)); - - while (inbits > posbits) { - if (free_ent > maxcode) { - posbits = - ((posbits - 1) + - ((n_bits << 3) - - (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); - ++n_bits; - if (n_bits == maxbits) { - maxcode = maxmaxcode; - } else { - maxcode = MAXCODE(n_bits) - 1; - } - bitmask = (1 << n_bits) - 1; - goto resetbuf; - } - { - unsigned char *p = &inbuf[posbits >> 3]; - - code = ((((long) (p[0])) | ((long) (p[1]) << 8) | - ((long) (p[2]) << 16)) >> (posbits & 0x7)) & bitmask; - } - posbits += n_bits; - - - if (oldcode == -1) { - oldcode = code; - finchar = (int) oldcode; - outbuf[outpos++] = (unsigned char) finchar; - continue; - } - - if (code == CLEAR && block_mode) { - clear_tab_prefixof(); - free_ent = FIRST - 1; - posbits = - ((posbits - 1) + - ((n_bits << 3) - - (posbits - 1 + (n_bits << 3)) % (n_bits << 3))); - n_bits = INIT_BITS; - maxcode = MAXCODE(INIT_BITS) - 1; - bitmask = (1 << INIT_BITS) - 1; - goto resetbuf; - } - - incode = code; - stackp = de_stack; - - /* Special case for KwKwK string. */ - if (code >= free_ent) { - if (code > free_ent) { - unsigned char *p; - - posbits -= n_bits; - p = &inbuf[posbits >> 3]; - - bb_error_msg - ("insize:%d posbits:%d inbuf:%02X %02X %02X %02X %02X (%d)", - insize, posbits, p[-1], p[0], p[1], p[2], p[3], - (posbits & 07)); - bb_error_msg("corrupted data"); - goto err; - } - - *--stackp = (unsigned char) finchar; - code = oldcode; - } - - /* Generate output characters in reverse order */ - while ((long) code >= (long) 256) { - *--stackp = tab_suffixof(code); - code = tab_prefixof(code); - } - - finchar = tab_suffixof(code); - *--stackp = (unsigned char) finchar; - - /* And put them out in forward order */ - { - int i; - - i = de_stack - stackp; - if (outpos + i >= OBUFSIZ) { - do { - if (i > OBUFSIZ - outpos) { - i = OBUFSIZ - outpos; - } - - if (i > 0) { - memcpy(outbuf + outpos, stackp, i); - outpos += i; - } - - if (outpos >= OBUFSIZ) { - full_write(fd_out, outbuf, outpos); -//error check?? - IF_DESKTOP(total_written += outpos;) - outpos = 0; - } - stackp += i; - i = de_stack - stackp; - } while (i > 0); - } else { - memcpy(outbuf + outpos, stackp, i); - outpos += i; - } - } - - /* Generate the new entry. */ - code = free_ent; - if (code < maxmaxcode) { - tab_prefixof(code) = (unsigned short) oldcode; - tab_suffixof(code) = (unsigned char) finchar; - free_ent = code + 1; - } - - /* Remember previous code. */ - oldcode = incode; - } - - } while (rsize > 0); - - if (outpos > 0) { - full_write(fd_out, outbuf, outpos); -//error check?? - IF_DESKTOP(total_written += outpos;) - } - - retval = IF_DESKTOP(total_written) + 0; - err: - free(inbuf); - free(outbuf); - free(htab); - free(codetab); - return retval; -} diff --git a/archival/libunarchive/decompress_unlzma.c b/archival/libunarchive/decompress_unlzma.c deleted file mode 100644 index 1a3a8f86b..000000000 --- a/archival/libunarchive/decompress_unlzma.c +++ /dev/null @@ -1,465 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Small lzma deflate implementation. - * Copyright (C) 2006 Aurelien Jacobs - * - * Based on LzmaDecode.c from the LZMA SDK 4.22 (http://www.7-zip.org/) - * Copyright (C) 1999-2005 Igor Pavlov - * - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ -#include "libbb.h" -#include "unarchive.h" - -#if ENABLE_FEATURE_LZMA_FAST -# define speed_inline ALWAYS_INLINE -# define size_inline -#else -# define speed_inline -# define size_inline ALWAYS_INLINE -#endif - - -typedef struct { - int fd; - uint8_t *ptr; - -/* Was keeping rc on stack in unlzma and separately allocating buffer, - * but with "buffer 'attached to' allocated rc" code is smaller: */ - /* uint8_t *buffer; */ -#define RC_BUFFER ((uint8_t*)(rc+1)) - - uint8_t *buffer_end; - -/* Had provisions for variable buffer, but we don't need it here */ - /* int buffer_size; */ -#define RC_BUFFER_SIZE 0x10000 - - uint32_t code; - uint32_t range; - uint32_t bound; -} rc_t; - -#define RC_TOP_BITS 24 -#define RC_MOVE_BITS 5 -#define RC_MODEL_TOTAL_BITS 11 - - -/* Called twice: once at startup (LZMA_FAST only) and once in rc_normalize() */ -static size_inline void rc_read(rc_t *rc) -{ - int buffer_size = safe_read(rc->fd, RC_BUFFER, RC_BUFFER_SIZE); -//TODO: return -1 instead -//This will make unlzma delete broken unpacked file on unpack errors - if (buffer_size <= 0) - bb_error_msg_and_die("unexpected EOF"); - rc->ptr = RC_BUFFER; - rc->buffer_end = RC_BUFFER + buffer_size; -} - -/* Called twice, but one callsite is in speed_inline'd rc_is_bit_1() */ -static void rc_do_normalize(rc_t *rc) -{ - if (rc->ptr >= rc->buffer_end) - rc_read(rc); - rc->range <<= 8; - rc->code = (rc->code << 8) | *rc->ptr++; -} - -/* Called once */ -static ALWAYS_INLINE rc_t* rc_init(int fd) /*, int buffer_size) */ -{ - int i; - rc_t *rc; - - rc = xzalloc(sizeof(*rc) + RC_BUFFER_SIZE); - - rc->fd = fd; - /* rc->ptr = rc->buffer_end; */ - - for (i = 0; i < 5; i++) { -#if ENABLE_FEATURE_LZMA_FAST - if (rc->ptr >= rc->buffer_end) - rc_read(rc); - rc->code = (rc->code << 8) | *rc->ptr++; -#else - rc_do_normalize(rc); -#endif - } - rc->range = 0xFFFFFFFF; - return rc; -} - -/* Called once */ -static ALWAYS_INLINE void rc_free(rc_t *rc) -{ - free(rc); -} - -static ALWAYS_INLINE void rc_normalize(rc_t *rc) -{ - if (rc->range < (1 << RC_TOP_BITS)) { - rc_do_normalize(rc); - } -} - -/* rc_is_bit_1 is called 9 times */ -static speed_inline int rc_is_bit_1(rc_t *rc, uint16_t *p) -{ - rc_normalize(rc); - rc->bound = *p * (rc->range >> RC_MODEL_TOTAL_BITS); - if (rc->code < rc->bound) { - rc->range = rc->bound; - *p += ((1 << RC_MODEL_TOTAL_BITS) - *p) >> RC_MOVE_BITS; - return 0; - } - rc->range -= rc->bound; - rc->code -= rc->bound; - *p -= *p >> RC_MOVE_BITS; - return 1; -} - -/* Called 4 times in unlzma loop */ -static speed_inline int rc_get_bit(rc_t *rc, uint16_t *p, int *symbol) -{ - int ret = rc_is_bit_1(rc, p); - *symbol = *symbol * 2 + ret; - return ret; -} - -/* Called once */ -static ALWAYS_INLINE int rc_direct_bit(rc_t *rc) -{ - rc_normalize(rc); - rc->range >>= 1; - if (rc->code >= rc->range) { - rc->code -= rc->range; - return 1; - } - return 0; -} - -/* Called twice */ -static speed_inline void -rc_bit_tree_decode(rc_t *rc, uint16_t *p, int num_levels, int *symbol) -{ - int i = num_levels; - - *symbol = 1; - while (i--) - rc_get_bit(rc, p + *symbol, symbol); - *symbol -= 1 << num_levels; -} - - -typedef struct { - uint8_t pos; - uint32_t dict_size; - uint64_t dst_size; -} PACKED lzma_header_t; - - -/* #defines will force compiler to compute/optimize each one with each usage. - * Have heart and use enum instead. */ -enum { - LZMA_BASE_SIZE = 1846, - LZMA_LIT_SIZE = 768, - - LZMA_NUM_POS_BITS_MAX = 4, - - LZMA_LEN_NUM_LOW_BITS = 3, - LZMA_LEN_NUM_MID_BITS = 3, - LZMA_LEN_NUM_HIGH_BITS = 8, - - LZMA_LEN_CHOICE = 0, - LZMA_LEN_CHOICE_2 = (LZMA_LEN_CHOICE + 1), - LZMA_LEN_LOW = (LZMA_LEN_CHOICE_2 + 1), - LZMA_LEN_MID = (LZMA_LEN_LOW \ - + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_LOW_BITS))), - LZMA_LEN_HIGH = (LZMA_LEN_MID \ - + (1 << (LZMA_NUM_POS_BITS_MAX + LZMA_LEN_NUM_MID_BITS))), - LZMA_NUM_LEN_PROBS = (LZMA_LEN_HIGH + (1 << LZMA_LEN_NUM_HIGH_BITS)), - - LZMA_NUM_STATES = 12, - LZMA_NUM_LIT_STATES = 7, - - LZMA_START_POS_MODEL_INDEX = 4, - LZMA_END_POS_MODEL_INDEX = 14, - LZMA_NUM_FULL_DISTANCES = (1 << (LZMA_END_POS_MODEL_INDEX >> 1)), - - LZMA_NUM_POS_SLOT_BITS = 6, - LZMA_NUM_LEN_TO_POS_STATES = 4, - - LZMA_NUM_ALIGN_BITS = 4, - - LZMA_MATCH_MIN_LEN = 2, - - LZMA_IS_MATCH = 0, - LZMA_IS_REP = (LZMA_IS_MATCH + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), - LZMA_IS_REP_G0 = (LZMA_IS_REP + LZMA_NUM_STATES), - LZMA_IS_REP_G1 = (LZMA_IS_REP_G0 + LZMA_NUM_STATES), - LZMA_IS_REP_G2 = (LZMA_IS_REP_G1 + LZMA_NUM_STATES), - LZMA_IS_REP_0_LONG = (LZMA_IS_REP_G2 + LZMA_NUM_STATES), - LZMA_POS_SLOT = (LZMA_IS_REP_0_LONG \ - + (LZMA_NUM_STATES << LZMA_NUM_POS_BITS_MAX)), - LZMA_SPEC_POS = (LZMA_POS_SLOT \ - + (LZMA_NUM_LEN_TO_POS_STATES << LZMA_NUM_POS_SLOT_BITS)), - LZMA_ALIGN = (LZMA_SPEC_POS \ - + LZMA_NUM_FULL_DISTANCES - LZMA_END_POS_MODEL_INDEX), - LZMA_LEN_CODER = (LZMA_ALIGN + (1 << LZMA_NUM_ALIGN_BITS)), - LZMA_REP_LEN_CODER = (LZMA_LEN_CODER + LZMA_NUM_LEN_PROBS), - LZMA_LITERAL = (LZMA_REP_LEN_CODER + LZMA_NUM_LEN_PROBS), -}; - - -IF_DESKTOP(long long) int FAST_FUNC -unpack_lzma_stream(int src_fd, int dst_fd) -{ - IF_DESKTOP(long long total_written = 0;) - lzma_header_t header; - int lc, pb, lp; - uint32_t pos_state_mask; - uint32_t literal_pos_mask; - uint16_t *p; - int num_bits; - int num_probs; - rc_t *rc; - int i; - uint8_t *buffer; - uint8_t previous_byte = 0; - size_t buffer_pos = 0, global_pos = 0; - int len = 0; - int state = 0; - uint32_t rep0 = 1, rep1 = 1, rep2 = 1, rep3 = 1; - - if (full_read(src_fd, &header, sizeof(header)) != sizeof(header) - || header.pos >= (9 * 5 * 5) - ) { - bb_error_msg("bad lzma header"); - return -1; - } - - i = header.pos / 9; - lc = header.pos % 9; - pb = i / 5; - lp = i % 5; - pos_state_mask = (1 << pb) - 1; - literal_pos_mask = (1 << lp) - 1; - - header.dict_size = SWAP_LE32(header.dict_size); - header.dst_size = SWAP_LE64(header.dst_size); - - if (header.dict_size == 0) - header.dict_size++; - - buffer = xmalloc(MIN(header.dst_size, header.dict_size)); - - num_probs = LZMA_BASE_SIZE + (LZMA_LIT_SIZE << (lc + lp)); - p = xmalloc(num_probs * sizeof(*p)); - num_probs += LZMA_LITERAL - LZMA_BASE_SIZE; - for (i = 0; i < num_probs; i++) - p[i] = (1 << RC_MODEL_TOTAL_BITS) >> 1; - - rc = rc_init(src_fd); /*, RC_BUFFER_SIZE); */ - - while (global_pos + buffer_pos < header.dst_size) { - int pos_state = (buffer_pos + global_pos) & pos_state_mask; - uint16_t *prob = p + LZMA_IS_MATCH + (state << LZMA_NUM_POS_BITS_MAX) + pos_state; - - if (!rc_is_bit_1(rc, prob)) { - static const char next_state[LZMA_NUM_STATES] = - { 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 4, 5 }; - int mi = 1; - - prob = (p + LZMA_LITERAL - + (LZMA_LIT_SIZE * ((((buffer_pos + global_pos) & literal_pos_mask) << lc) - + (previous_byte >> (8 - lc)) - ) - ) - ); - - if (state >= LZMA_NUM_LIT_STATES) { - int match_byte; - uint32_t pos = buffer_pos - rep0; - - while (pos >= header.dict_size) - pos += header.dict_size; - match_byte = buffer[pos]; - do { - int bit; - - match_byte <<= 1; - bit = match_byte & 0x100; - bit ^= (rc_get_bit(rc, prob + 0x100 + bit + mi, &mi) << 8); /* 0x100 or 0 */ - if (bit) - break; - } while (mi < 0x100); - } - while (mi < 0x100) { - rc_get_bit(rc, prob + mi, &mi); - } - - state = next_state[state]; - - previous_byte = (uint8_t) mi; -#if ENABLE_FEATURE_LZMA_FAST - one_byte1: - buffer[buffer_pos++] = previous_byte; - if (buffer_pos == header.dict_size) { - buffer_pos = 0; - global_pos += header.dict_size; - if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) - goto bad; - IF_DESKTOP(total_written += header.dict_size;) - } -#else - len = 1; - goto one_byte2; -#endif - } else { - int offset; - uint16_t *prob2; -#define prob_len prob2 - - prob2 = p + LZMA_IS_REP + state; - if (!rc_is_bit_1(rc, prob2)) { - rep3 = rep2; - rep2 = rep1; - rep1 = rep0; - state = state < LZMA_NUM_LIT_STATES ? 0 : 3; - prob2 = p + LZMA_LEN_CODER; - } else { - prob2 += LZMA_IS_REP_G0 - LZMA_IS_REP; - if (!rc_is_bit_1(rc, prob2)) { - prob2 = (p + LZMA_IS_REP_0_LONG - + (state << LZMA_NUM_POS_BITS_MAX) - + pos_state - ); - if (!rc_is_bit_1(rc, prob2)) { -#if ENABLE_FEATURE_LZMA_FAST - uint32_t pos = buffer_pos - rep0; - state = state < LZMA_NUM_LIT_STATES ? 9 : 11; - while (pos >= header.dict_size) - pos += header.dict_size; - previous_byte = buffer[pos]; - goto one_byte1; -#else - state = state < LZMA_NUM_LIT_STATES ? 9 : 11; - len = 1; - goto string; -#endif - } - } else { - uint32_t distance; - - prob2 += LZMA_IS_REP_G1 - LZMA_IS_REP_G0; - distance = rep1; - if (rc_is_bit_1(rc, prob2)) { - prob2 += LZMA_IS_REP_G2 - LZMA_IS_REP_G1; - distance = rep2; - if (rc_is_bit_1(rc, prob2)) { - distance = rep3; - rep3 = rep2; - } - rep2 = rep1; - } - rep1 = rep0; - rep0 = distance; - } - state = state < LZMA_NUM_LIT_STATES ? 8 : 11; - prob2 = p + LZMA_REP_LEN_CODER; - } - - prob_len = prob2 + LZMA_LEN_CHOICE; - num_bits = LZMA_LEN_NUM_LOW_BITS; - if (!rc_is_bit_1(rc, prob_len)) { - prob_len += LZMA_LEN_LOW - LZMA_LEN_CHOICE - + (pos_state << LZMA_LEN_NUM_LOW_BITS); - offset = 0; - } else { - prob_len += LZMA_LEN_CHOICE_2 - LZMA_LEN_CHOICE; - if (!rc_is_bit_1(rc, prob_len)) { - prob_len += LZMA_LEN_MID - LZMA_LEN_CHOICE_2 - + (pos_state << LZMA_LEN_NUM_MID_BITS); - offset = 1 << LZMA_LEN_NUM_LOW_BITS; - num_bits += LZMA_LEN_NUM_MID_BITS - LZMA_LEN_NUM_LOW_BITS; - } else { - prob_len += LZMA_LEN_HIGH - LZMA_LEN_CHOICE_2; - offset = ((1 << LZMA_LEN_NUM_LOW_BITS) - + (1 << LZMA_LEN_NUM_MID_BITS)); - num_bits += LZMA_LEN_NUM_HIGH_BITS - LZMA_LEN_NUM_LOW_BITS; - } - } - rc_bit_tree_decode(rc, prob_len, num_bits, &len); - len += offset; - - if (state < 4) { - int pos_slot; - uint16_t *prob3; - - state += LZMA_NUM_LIT_STATES; - prob3 = p + LZMA_POS_SLOT + - ((len < LZMA_NUM_LEN_TO_POS_STATES ? len : - LZMA_NUM_LEN_TO_POS_STATES - 1) - << LZMA_NUM_POS_SLOT_BITS); - rc_bit_tree_decode(rc, prob3, - LZMA_NUM_POS_SLOT_BITS, &pos_slot); - rep0 = pos_slot; - if (pos_slot >= LZMA_START_POS_MODEL_INDEX) { - int i2, mi2, num_bits2 = (pos_slot >> 1) - 1; - rep0 = 2 | (pos_slot & 1); - if (pos_slot < LZMA_END_POS_MODEL_INDEX) { - rep0 <<= num_bits2; - prob3 = p + LZMA_SPEC_POS + rep0 - pos_slot - 1; - } else { - for (; num_bits2 != LZMA_NUM_ALIGN_BITS; num_bits2--) - rep0 = (rep0 << 1) | rc_direct_bit(rc); - rep0 <<= LZMA_NUM_ALIGN_BITS; - prob3 = p + LZMA_ALIGN; - } - i2 = 1; - mi2 = 1; - while (num_bits2--) { - if (rc_get_bit(rc, prob3 + mi2, &mi2)) - rep0 |= i2; - i2 <<= 1; - } - } - if (++rep0 == 0) - break; - } - - len += LZMA_MATCH_MIN_LEN; - IF_NOT_FEATURE_LZMA_FAST(string:) - do { - uint32_t pos = buffer_pos - rep0; - while (pos >= header.dict_size) - pos += header.dict_size; - previous_byte = buffer[pos]; - IF_NOT_FEATURE_LZMA_FAST(one_byte2:) - buffer[buffer_pos++] = previous_byte; - if (buffer_pos == header.dict_size) { - buffer_pos = 0; - global_pos += header.dict_size; - if (full_write(dst_fd, buffer, header.dict_size) != (ssize_t)header.dict_size) - goto bad; - IF_DESKTOP(total_written += header.dict_size;) - } - len--; - } while (len != 0 && buffer_pos < header.dst_size); - } - } - - { - IF_NOT_DESKTOP(int total_written = 0; /* success */) - IF_DESKTOP(total_written += buffer_pos;) - if (full_write(dst_fd, buffer, buffer_pos) != (ssize_t)buffer_pos) { - bad: - total_written = -1; /* failure */ - } - rc_free(rc); - free(p); - free(buffer); - return total_written; - } -} diff --git a/archival/libunarchive/decompress_unxz.c b/archival/libunarchive/decompress_unxz.c deleted file mode 100644 index ca427231e..000000000 --- a/archival/libunarchive/decompress_unxz.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * This file uses XZ Embedded library code which is written - * by Lasse Collin - * and Igor Pavlov - * - * See README file in unxz/ directory for more information. - * - * This file is: - * Copyright (C) 2010 Denys Vlasenko - * Licensed under GPLv2, see file LICENSE in this source tree. - */ -#include "libbb.h" -#include "unarchive.h" - -#define XZ_FUNC FAST_FUNC -#define XZ_EXTERN static - -#define XZ_DEC_DYNALLOC - -/* Skip check (rather than fail) of unsupported hash functions */ -#define XZ_DEC_ANY_CHECK 1 - -/* We use our own crc32 function */ -#define XZ_INTERNAL_CRC32 0 -static uint32_t xz_crc32(const uint8_t *buf, size_t size, uint32_t crc) -{ - return ~crc32_block_endian0(~crc, buf, size, global_crc32_table); -} - -/* We use arch-optimized unaligned accessors */ -#define get_unaligned_le32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_LE32(v); }) -#define get_unaligned_be32(buf) ({ uint32_t v; move_from_unaligned32(v, buf); SWAP_BE32(v); }) -#define put_unaligned_le32(val, buf) move_to_unaligned16(buf, SWAP_LE32(val)) -#define put_unaligned_be32(val, buf) move_to_unaligned16(buf, SWAP_BE32(val)) - -#include "unxz/xz_dec_bcj.c" -#include "unxz/xz_dec_lzma2.c" -#include "unxz/xz_dec_stream.c" - -IF_DESKTOP(long long) int FAST_FUNC -unpack_xz_stream(int src_fd, int dst_fd) -{ - struct xz_buf iobuf; - struct xz_dec *state; - unsigned char *membuf; - IF_DESKTOP(long long) int total = 0; - - if (!global_crc32_table) - global_crc32_table = crc32_filltable(NULL, /*endian:*/ 0); - - memset(&iobuf, 0, sizeof(iobuf)); - /* Preload XZ file signature */ - membuf = (void*) strcpy(xmalloc(2 * BUFSIZ), HEADER_MAGIC); - iobuf.in = membuf; - iobuf.in_size = HEADER_MAGIC_SIZE; - iobuf.out = membuf + BUFSIZ; - iobuf.out_size = BUFSIZ; - - /* Limit memory usage to about 64 MiB. */ - state = xz_dec_init(XZ_DYNALLOC, 64*1024*1024); - - while (1) { - enum xz_ret r; - - if (iobuf.in_pos == iobuf.in_size) { - int rd = safe_read(src_fd, membuf, BUFSIZ); - if (rd < 0) { - bb_error_msg(bb_msg_read_error); - total = -1; - break; - } - iobuf.in_size = rd; - iobuf.in_pos = 0; - } -// bb_error_msg(">in pos:%d size:%d out pos:%d size:%d", -// iobuf.in_pos, iobuf.in_size, iobuf.out_pos, iobuf.out_size); - r = xz_dec_run(state, &iobuf); -// bb_error_msg(" - * based on gzip sources - * - * Adjusted further by Erik Andersen to support - * files as well as stdin/stdout, and to generally behave itself wrt - * command line handling. - * - * General cleanup to better adhere to the style guide and make use of standard - * busybox functions by Glenn McGrath - * - * read_gz interface + associated hacking by Laurence Anderson - * - * Fixed huft_build() so decoding end-of-block code does not grab more bits - * than necessary (this is required by unzip applet), added inflate_cleanup() - * to free leaked bytebuffer memory (used in unzip.c), and some minor style - * guide cleanups by Ed Clark - * - * gzip (GNU zip) -- compress files with zip algorithm and 'compress' interface - * Copyright (C) 1992-1993 Jean-loup Gailly - * The unzip code was written and put in the public domain by Mark Adler. - * Portions of the lzw code are derived from the public domain 'compress' - * written by Spencer Thomas, Joe Orost, James Woods, Jim McKie, Steve Davies, - * Ken Turkowski, Dave Mack and Peter Jannesen. - * - * See the file algorithm.doc for the compression algorithms and file formats. - * - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include -#include "libbb.h" -#include "unarchive.h" - -typedef struct huft_t { - unsigned char e; /* number of extra bits or operation */ - unsigned char b; /* number of bits in this code or subcode */ - union { - unsigned short n; /* literal, length base, or distance base */ - struct huft_t *t; /* pointer to next level of table */ - } v; -} huft_t; - -enum { - /* gunzip_window size--must be a power of two, and - * at least 32K for zip's deflate method */ - GUNZIP_WSIZE = 0x8000, - /* If BMAX needs to be larger than 16, then h and x[] should be ulg. */ - BMAX = 16, /* maximum bit length of any code (16 for explode) */ - N_MAX = 288, /* maximum number of codes in any set */ -}; - - -/* This is somewhat complex-looking arrangement, but it allows - * to place decompressor state either in bss or in - * malloc'ed space simply by changing #defines below. - * Sizes on i386: - * text data bss dec hex - * 5256 0 108 5364 14f4 - bss - * 4915 0 0 4915 1333 - malloc - */ -#define STATE_IN_BSS 0 -#define STATE_IN_MALLOC 1 - - -typedef struct state_t { - off_t gunzip_bytes_out; /* number of output bytes */ - uint32_t gunzip_crc; - - int gunzip_src_fd; - unsigned gunzip_outbuf_count; /* bytes in output buffer */ - - unsigned char *gunzip_window; - - uint32_t *gunzip_crc_table; - - /* bitbuffer */ - unsigned gunzip_bb; /* bit buffer */ - unsigned char gunzip_bk; /* bits in bit buffer */ - - /* input (compressed) data */ - unsigned char *bytebuffer; /* buffer itself */ - off_t to_read; /* compressed bytes to read (unzip only, -1 for gunzip) */ -// unsigned bytebuffer_max; /* buffer size */ - unsigned bytebuffer_offset; /* buffer position */ - unsigned bytebuffer_size; /* how much data is there (size <= max) */ - - /* private data of inflate_codes() */ - unsigned inflate_codes_ml; /* masks for bl and bd bits */ - unsigned inflate_codes_md; /* masks for bl and bd bits */ - unsigned inflate_codes_bb; /* bit buffer */ - unsigned inflate_codes_k; /* number of bits in bit buffer */ - unsigned inflate_codes_w; /* current gunzip_window position */ - huft_t *inflate_codes_tl; - huft_t *inflate_codes_td; - unsigned inflate_codes_bl; - unsigned inflate_codes_bd; - unsigned inflate_codes_nn; /* length and index for copy */ - unsigned inflate_codes_dd; - - smallint resume_copy; - - /* private data of inflate_get_next_window() */ - smallint method; /* method == -1 for stored, -2 for codes */ - smallint need_another_block; - smallint end_reached; - - /* private data of inflate_stored() */ - unsigned inflate_stored_n; - unsigned inflate_stored_b; - unsigned inflate_stored_k; - unsigned inflate_stored_w; - - const char *error_msg; - jmp_buf error_jmp; -} state_t; -#define gunzip_bytes_out (S()gunzip_bytes_out ) -#define gunzip_crc (S()gunzip_crc ) -#define gunzip_src_fd (S()gunzip_src_fd ) -#define gunzip_outbuf_count (S()gunzip_outbuf_count) -#define gunzip_window (S()gunzip_window ) -#define gunzip_crc_table (S()gunzip_crc_table ) -#define gunzip_bb (S()gunzip_bb ) -#define gunzip_bk (S()gunzip_bk ) -#define to_read (S()to_read ) -// #define bytebuffer_max (S()bytebuffer_max ) -// Both gunzip and unzip can use constant buffer size now (16k): -#define bytebuffer_max 0x4000 -#define bytebuffer (S()bytebuffer ) -#define bytebuffer_offset (S()bytebuffer_offset ) -#define bytebuffer_size (S()bytebuffer_size ) -#define inflate_codes_ml (S()inflate_codes_ml ) -#define inflate_codes_md (S()inflate_codes_md ) -#define inflate_codes_bb (S()inflate_codes_bb ) -#define inflate_codes_k (S()inflate_codes_k ) -#define inflate_codes_w (S()inflate_codes_w ) -#define inflate_codes_tl (S()inflate_codes_tl ) -#define inflate_codes_td (S()inflate_codes_td ) -#define inflate_codes_bl (S()inflate_codes_bl ) -#define inflate_codes_bd (S()inflate_codes_bd ) -#define inflate_codes_nn (S()inflate_codes_nn ) -#define inflate_codes_dd (S()inflate_codes_dd ) -#define resume_copy (S()resume_copy ) -#define method (S()method ) -#define need_another_block (S()need_another_block ) -#define end_reached (S()end_reached ) -#define inflate_stored_n (S()inflate_stored_n ) -#define inflate_stored_b (S()inflate_stored_b ) -#define inflate_stored_k (S()inflate_stored_k ) -#define inflate_stored_w (S()inflate_stored_w ) -#define error_msg (S()error_msg ) -#define error_jmp (S()error_jmp ) - -/* This is a generic part */ -#if STATE_IN_BSS /* Use global data segment */ -#define DECLARE_STATE /*nothing*/ -#define ALLOC_STATE /*nothing*/ -#define DEALLOC_STATE ((void)0) -#define S() state. -#define PASS_STATE /*nothing*/ -#define PASS_STATE_ONLY /*nothing*/ -#define STATE_PARAM /*nothing*/ -#define STATE_PARAM_ONLY void -static state_t state; -#endif - -#if STATE_IN_MALLOC /* Use malloc space */ -#define DECLARE_STATE state_t *state -#define ALLOC_STATE (state = xzalloc(sizeof(*state))) -#define DEALLOC_STATE free(state) -#define S() state-> -#define PASS_STATE state, -#define PASS_STATE_ONLY state -#define STATE_PARAM state_t *state, -#define STATE_PARAM_ONLY state_t *state -#endif - - -static const uint16_t mask_bits[] ALIGN2 = { - 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff, - 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff -}; - -/* Copy lengths for literal codes 257..285 */ -static const uint16_t cplens[] ALIGN2 = { - 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, - 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0 -}; - -/* note: see note #13 above about the 258 in this list. */ -/* Extra bits for literal codes 257..285 */ -static const uint8_t cplext[] ALIGN1 = { - 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, - 5, 5, 5, 0, 99, 99 -}; /* 99 == invalid */ - -/* Copy offsets for distance codes 0..29 */ -static const uint16_t cpdist[] ALIGN2 = { - 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193, 257, 385, 513, - 769, 1025, 1537, 2049, 3073, 4097, 6145, 8193, 12289, 16385, 24577 -}; - -/* Extra bits for distance codes */ -static const uint8_t cpdext[] ALIGN1 = { - 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, - 11, 11, 12, 12, 13, 13 -}; - -/* Tables for deflate from PKZIP's appnote.txt. */ -/* Order of the bit length code lengths */ -static const uint8_t border[] ALIGN1 = { - 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15 -}; - - -/* - * Free the malloc'ed tables built by huft_build(), which makes a linked - * list of the tables it made, with the links in a dummy first entry of - * each table. - * t: table to free - */ -static void huft_free(huft_t *p) -{ - huft_t *q; - - /* Go through linked list, freeing from the malloced (t[-1]) address. */ - while (p) { - q = (--p)->v.t; - free(p); - p = q; - } -} - -static void huft_free_all(STATE_PARAM_ONLY) -{ - huft_free(inflate_codes_tl); - huft_free(inflate_codes_td); - inflate_codes_tl = NULL; - inflate_codes_td = NULL; -} - -static void abort_unzip(STATE_PARAM_ONLY) NORETURN; -static void abort_unzip(STATE_PARAM_ONLY) -{ - huft_free_all(PASS_STATE_ONLY); - longjmp(error_jmp, 1); -} - -static unsigned fill_bitbuffer(STATE_PARAM unsigned bitbuffer, unsigned *current, const unsigned required) -{ - while (*current < required) { - if (bytebuffer_offset >= bytebuffer_size) { - unsigned sz = bytebuffer_max - 4; - if (to_read >= 0 && to_read < sz) /* unzip only */ - sz = to_read; - /* Leave the first 4 bytes empty so we can always unwind the bitbuffer - * to the front of the bytebuffer */ - bytebuffer_size = safe_read(gunzip_src_fd, &bytebuffer[4], sz); - if ((int)bytebuffer_size < 1) { - error_msg = "unexpected end of file"; - abort_unzip(PASS_STATE_ONLY); - } - if (to_read >= 0) /* unzip only */ - to_read -= bytebuffer_size; - bytebuffer_size += 4; - bytebuffer_offset = 4; - } - bitbuffer |= ((unsigned) bytebuffer[bytebuffer_offset]) << *current; - bytebuffer_offset++; - *current += 8; - } - return bitbuffer; -} - - -/* Given a list of code lengths and a maximum table size, make a set of - * tables to decode that set of codes. Return zero on success, one if - * the given code set is incomplete (the tables are still built in this - * case), two if the input is invalid (all zero length codes or an - * oversubscribed set of lengths) - in this case stores NULL in *t. - * - * b: code lengths in bits (all assumed <= BMAX) - * n: number of codes (assumed <= N_MAX) - * s: number of simple-valued codes (0..s-1) - * d: list of base values for non-simple codes - * e: list of extra bits for non-simple codes - * t: result: starting table - * m: maximum lookup bits, returns actual - */ -static int huft_build(const unsigned *b, const unsigned n, - const unsigned s, const unsigned short *d, - const unsigned char *e, huft_t **t, unsigned *m) -{ - unsigned a; /* counter for codes of length k */ - unsigned c[BMAX + 1]; /* bit length count table */ - unsigned eob_len; /* length of end-of-block code (value 256) */ - unsigned f; /* i repeats in table every f entries */ - int g; /* maximum code length */ - int htl; /* table level */ - unsigned i; /* counter, current code */ - unsigned j; /* counter */ - int k; /* number of bits in current code */ - unsigned *p; /* pointer into c[], b[], or v[] */ - huft_t *q; /* points to current table */ - huft_t r; /* table entry for structure assignment */ - huft_t *u[BMAX]; /* table stack */ - unsigned v[N_MAX]; /* values in order of bit length */ - int ws[BMAX + 1]; /* bits decoded stack */ - int w; /* bits decoded */ - unsigned x[BMAX + 1]; /* bit offsets, then code stack */ - unsigned *xp; /* pointer into x */ - int y; /* number of dummy codes added */ - unsigned z; /* number of entries in current table */ - - /* Length of EOB code, if any */ - eob_len = n > 256 ? b[256] : BMAX; - - *t = NULL; - - /* Generate counts for each bit length */ - memset(c, 0, sizeof(c)); - p = (unsigned *) b; /* cast allows us to reuse p for pointing to b */ - i = n; - do { - c[*p]++; /* assume all entries <= BMAX */ - p++; /* can't combine with above line (Solaris bug) */ - } while (--i); - if (c[0] == n) { /* null input - all zero length codes */ - *m = 0; - return 2; - } - - /* Find minimum and maximum length, bound *m by those */ - for (j = 1; (c[j] == 0) && (j <= BMAX); j++) - continue; - k = j; /* minimum code length */ - for (i = BMAX; (c[i] == 0) && i; i--) - continue; - g = i; /* maximum code length */ - *m = (*m < j) ? j : ((*m > i) ? i : *m); - - /* Adjust last length count to fill out codes, if needed */ - for (y = 1 << j; j < i; j++, y <<= 1) { - y -= c[j]; - if (y < 0) - return 2; /* bad input: more codes than bits */ - } - y -= c[i]; - if (y < 0) - return 2; - c[i] += y; - - /* Generate starting offsets into the value table for each length */ - x[1] = j = 0; - p = c + 1; - xp = x + 2; - while (--i) { /* note that i == g from above */ - j += *p++; - *xp++ = j; - } - - /* Make a table of values in order of bit lengths */ - p = (unsigned *) b; - i = 0; - do { - j = *p++; - if (j != 0) { - v[x[j]++] = i; - } - } while (++i < n); - - /* Generate the Huffman codes and for each, make the table entries */ - x[0] = i = 0; /* first Huffman code is zero */ - p = v; /* grab values in bit order */ - htl = -1; /* no tables yet--level -1 */ - w = ws[0] = 0; /* bits decoded */ - u[0] = NULL; /* just to keep compilers happy */ - q = NULL; /* ditto */ - z = 0; /* ditto */ - - /* go through the bit lengths (k already is bits in shortest code) */ - for (; k <= g; k++) { - a = c[k]; - while (a--) { - /* here i is the Huffman code of length k bits for value *p */ - /* make tables up to required level */ - while (k > ws[htl + 1]) { - w = ws[++htl]; - - /* compute minimum size table less than or equal to *m bits */ - z = g - w; - z = z > *m ? *m : z; /* upper limit on table size */ - j = k - w; - f = 1 << j; - if (f > a + 1) { /* try a k-w bit table */ - /* too few codes for k-w bit table */ - f -= a + 1; /* deduct codes from patterns left */ - xp = c + k; - while (++j < z) { /* try smaller tables up to z bits */ - f <<= 1; - if (f <= *++xp) { - break; /* enough codes to use up j bits */ - } - f -= *xp; /* else deduct codes from patterns */ - } - } - j = (w + j > eob_len && w < eob_len) ? eob_len - w : j; /* make EOB code end at table */ - z = 1 << j; /* table entries for j-bit table */ - ws[htl+1] = w + j; /* set bits decoded in stack */ - - /* allocate and link in new table */ - q = xzalloc((z + 1) * sizeof(huft_t)); - *t = q + 1; /* link to list for huft_free() */ - t = &(q->v.t); - u[htl] = ++q; /* table starts after link */ - - /* connect to last table, if there is one */ - if (htl) { - x[htl] = i; /* save pattern for backing up */ - r.b = (unsigned char) (w - ws[htl - 1]); /* bits to dump before this table */ - r.e = (unsigned char) (16 + j); /* bits in this table */ - r.v.t = q; /* pointer to this table */ - j = (i & ((1 << w) - 1)) >> ws[htl - 1]; - u[htl - 1][j] = r; /* connect to last table */ - } - } - - /* set up table entry in r */ - r.b = (unsigned char) (k - w); - if (p >= v + n) { - r.e = 99; /* out of values--invalid code */ - } else if (*p < s) { - r.e = (unsigned char) (*p < 256 ? 16 : 15); /* 256 is EOB code */ - r.v.n = (unsigned short) (*p++); /* simple code is just the value */ - } else { - r.e = (unsigned char) e[*p - s]; /* non-simple--look up in lists */ - r.v.n = d[*p++ - s]; - } - - /* fill code-like entries with r */ - f = 1 << (k - w); - for (j = i >> w; j < z; j += f) { - q[j] = r; - } - - /* backwards increment the k-bit code i */ - for (j = 1 << (k - 1); i & j; j >>= 1) { - i ^= j; - } - i ^= j; - - /* backup over finished tables */ - while ((i & ((1 << w) - 1)) != x[htl]) { - w = ws[--htl]; - } - } - } - - /* return actual size of base table */ - *m = ws[1]; - - /* Return 1 if we were given an incomplete table */ - return y != 0 && g != 1; -} - - -/* - * inflate (decompress) the codes in a deflated (compressed) block. - * Return an error code or zero if it all goes ok. - * - * tl, td: literal/length and distance decoder tables - * bl, bd: number of bits decoded by tl[] and td[] - */ -/* called once from inflate_block */ - -/* map formerly local static variables to globals */ -#define ml inflate_codes_ml -#define md inflate_codes_md -#define bb inflate_codes_bb -#define k inflate_codes_k -#define w inflate_codes_w -#define tl inflate_codes_tl -#define td inflate_codes_td -#define bl inflate_codes_bl -#define bd inflate_codes_bd -#define nn inflate_codes_nn -#define dd inflate_codes_dd -static void inflate_codes_setup(STATE_PARAM unsigned my_bl, unsigned my_bd) -{ - bl = my_bl; - bd = my_bd; - /* make local copies of globals */ - bb = gunzip_bb; /* initialize bit buffer */ - k = gunzip_bk; - w = gunzip_outbuf_count; /* initialize gunzip_window position */ - /* inflate the coded data */ - ml = mask_bits[bl]; /* precompute masks for speed */ - md = mask_bits[bd]; -} -/* called once from inflate_get_next_window */ -static NOINLINE int inflate_codes(STATE_PARAM_ONLY) -{ - unsigned e; /* table entry flag/number of extra bits */ - huft_t *t; /* pointer to table entry */ - - if (resume_copy) - goto do_copy; - - while (1) { /* do until end of block */ - bb = fill_bitbuffer(PASS_STATE bb, &k, bl); - t = tl + ((unsigned) bb & ml); - e = t->e; - if (e > 16) - do { - if (e == 99) - abort_unzip(PASS_STATE_ONLY);; - bb >>= t->b; - k -= t->b; - e -= 16; - bb = fill_bitbuffer(PASS_STATE bb, &k, e); - t = t->v.t + ((unsigned) bb & mask_bits[e]); - e = t->e; - } while (e > 16); - bb >>= t->b; - k -= t->b; - if (e == 16) { /* then it's a literal */ - gunzip_window[w++] = (unsigned char) t->v.n; - if (w == GUNZIP_WSIZE) { - gunzip_outbuf_count = w; - //flush_gunzip_window(); - w = 0; - return 1; // We have a block to read - } - } else { /* it's an EOB or a length */ - /* exit if end of block */ - if (e == 15) { - break; - } - - /* get length of block to copy */ - bb = fill_bitbuffer(PASS_STATE bb, &k, e); - nn = t->v.n + ((unsigned) bb & mask_bits[e]); - bb >>= e; - k -= e; - - /* decode distance of block to copy */ - bb = fill_bitbuffer(PASS_STATE bb, &k, bd); - t = td + ((unsigned) bb & md); - e = t->e; - if (e > 16) - do { - if (e == 99) - abort_unzip(PASS_STATE_ONLY); - bb >>= t->b; - k -= t->b; - e -= 16; - bb = fill_bitbuffer(PASS_STATE bb, &k, e); - t = t->v.t + ((unsigned) bb & mask_bits[e]); - e = t->e; - } while (e > 16); - bb >>= t->b; - k -= t->b; - bb = fill_bitbuffer(PASS_STATE bb, &k, e); - dd = w - t->v.n - ((unsigned) bb & mask_bits[e]); - bb >>= e; - k -= e; - - /* do the copy */ - do_copy: - do { - /* Was: nn -= (e = (e = GUNZIP_WSIZE - ((dd &= GUNZIP_WSIZE - 1) > w ? dd : w)) > nn ? nn : e); */ - /* Who wrote THAT?? rewritten as: */ - unsigned delta; - - dd &= GUNZIP_WSIZE - 1; - e = GUNZIP_WSIZE - (dd > w ? dd : w); - delta = w > dd ? w - dd : dd - w; - if (e > nn) e = nn; - nn -= e; - - /* copy to new buffer to prevent possible overwrite */ - if (delta >= e) { - memcpy(gunzip_window + w, gunzip_window + dd, e); - w += e; - dd += e; - } else { - /* do it slow to avoid memcpy() overlap */ - /* !NOMEMCPY */ - do { - gunzip_window[w++] = gunzip_window[dd++]; - } while (--e); - } - if (w == GUNZIP_WSIZE) { - gunzip_outbuf_count = w; - resume_copy = (nn != 0); - //flush_gunzip_window(); - w = 0; - return 1; - } - } while (nn); - resume_copy = 0; - } - } - - /* restore the globals from the locals */ - gunzip_outbuf_count = w; /* restore global gunzip_window pointer */ - gunzip_bb = bb; /* restore global bit buffer */ - gunzip_bk = k; - - /* normally just after call to inflate_codes, but save code by putting it here */ - /* free the decoding tables (tl and td), return */ - huft_free_all(PASS_STATE_ONLY); - - /* done */ - return 0; -} -#undef ml -#undef md -#undef bb -#undef k -#undef w -#undef tl -#undef td -#undef bl -#undef bd -#undef nn -#undef dd - - -/* called once from inflate_block */ -static void inflate_stored_setup(STATE_PARAM int my_n, int my_b, int my_k) -{ - inflate_stored_n = my_n; - inflate_stored_b = my_b; - inflate_stored_k = my_k; - /* initialize gunzip_window position */ - inflate_stored_w = gunzip_outbuf_count; -} -/* called once from inflate_get_next_window */ -static int inflate_stored(STATE_PARAM_ONLY) -{ - /* read and output the compressed data */ - while (inflate_stored_n--) { - inflate_stored_b = fill_bitbuffer(PASS_STATE inflate_stored_b, &inflate_stored_k, 8); - gunzip_window[inflate_stored_w++] = (unsigned char) inflate_stored_b; - if (inflate_stored_w == GUNZIP_WSIZE) { - gunzip_outbuf_count = inflate_stored_w; - //flush_gunzip_window(); - inflate_stored_w = 0; - inflate_stored_b >>= 8; - inflate_stored_k -= 8; - return 1; /* We have a block */ - } - inflate_stored_b >>= 8; - inflate_stored_k -= 8; - } - - /* restore the globals from the locals */ - gunzip_outbuf_count = inflate_stored_w; /* restore global gunzip_window pointer */ - gunzip_bb = inflate_stored_b; /* restore global bit buffer */ - gunzip_bk = inflate_stored_k; - return 0; /* Finished */ -} - - -/* - * decompress an inflated block - * e: last block flag - * - * GLOBAL VARIABLES: bb, kk, - */ -/* Return values: -1 = inflate_stored, -2 = inflate_codes */ -/* One callsite in inflate_get_next_window */ -static int inflate_block(STATE_PARAM smallint *e) -{ - unsigned ll[286 + 30]; /* literal/length and distance code lengths */ - unsigned t; /* block type */ - unsigned b; /* bit buffer */ - unsigned k; /* number of bits in bit buffer */ - - /* make local bit buffer */ - - b = gunzip_bb; - k = gunzip_bk; - - /* read in last block bit */ - b = fill_bitbuffer(PASS_STATE b, &k, 1); - *e = b & 1; - b >>= 1; - k -= 1; - - /* read in block type */ - b = fill_bitbuffer(PASS_STATE b, &k, 2); - t = (unsigned) b & 3; - b >>= 2; - k -= 2; - - /* restore the global bit buffer */ - gunzip_bb = b; - gunzip_bk = k; - - /* Do we see block type 1 often? Yes! - * TODO: fix performance problem (see below) */ - //bb_error_msg("blktype %d", t); - - /* inflate that block type */ - switch (t) { - case 0: /* Inflate stored */ - { - unsigned n; /* number of bytes in block */ - unsigned b_stored; /* bit buffer */ - unsigned k_stored; /* number of bits in bit buffer */ - - /* make local copies of globals */ - b_stored = gunzip_bb; /* initialize bit buffer */ - k_stored = gunzip_bk; - - /* go to byte boundary */ - n = k_stored & 7; - b_stored >>= n; - k_stored -= n; - - /* get the length and its complement */ - b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16); - n = ((unsigned) b_stored & 0xffff); - b_stored >>= 16; - k_stored -= 16; - - b_stored = fill_bitbuffer(PASS_STATE b_stored, &k_stored, 16); - if (n != (unsigned) ((~b_stored) & 0xffff)) { - abort_unzip(PASS_STATE_ONLY); /* error in compressed data */ - } - b_stored >>= 16; - k_stored -= 16; - - inflate_stored_setup(PASS_STATE n, b_stored, k_stored); - - return -1; - } - case 1: - /* Inflate fixed - * decompress an inflated type 1 (fixed Huffman codes) block. We should - * either replace this with a custom decoder, or at least precompute the - * Huffman tables. TODO */ - { - int i; /* temporary variable */ - unsigned bl; /* lookup bits for tl */ - unsigned bd; /* lookup bits for td */ - /* gcc 4.2.1 is too dumb to reuse stackspace. Moved up... */ - //unsigned ll[288]; /* length list for huft_build */ - - /* set up literal table */ - for (i = 0; i < 144; i++) - ll[i] = 8; - for (; i < 256; i++) - ll[i] = 9; - for (; i < 280; i++) - ll[i] = 7; - for (; i < 288; i++) /* make a complete, but wrong code set */ - ll[i] = 8; - bl = 7; - huft_build(ll, 288, 257, cplens, cplext, &inflate_codes_tl, &bl); - /* huft_build() never return nonzero - we use known data */ - - /* set up distance table */ - for (i = 0; i < 30; i++) /* make an incomplete code set */ - ll[i] = 5; - bd = 5; - huft_build(ll, 30, 0, cpdist, cpdext, &inflate_codes_td, &bd); - - /* set up data for inflate_codes() */ - inflate_codes_setup(PASS_STATE bl, bd); - - /* huft_free code moved into inflate_codes */ - - return -2; - } - case 2: /* Inflate dynamic */ - { - enum { dbits = 6 }; /* bits in base distance lookup table */ - enum { lbits = 9 }; /* bits in base literal/length lookup table */ - - huft_t *td; /* distance code table */ - unsigned i; /* temporary variables */ - unsigned j; - unsigned l; /* last length */ - unsigned m; /* mask for bit lengths table */ - unsigned n; /* number of lengths to get */ - unsigned bl; /* lookup bits for tl */ - unsigned bd; /* lookup bits for td */ - unsigned nb; /* number of bit length codes */ - unsigned nl; /* number of literal/length codes */ - unsigned nd; /* number of distance codes */ - - //unsigned ll[286 + 30];/* literal/length and distance code lengths */ - unsigned b_dynamic; /* bit buffer */ - unsigned k_dynamic; /* number of bits in bit buffer */ - - /* make local bit buffer */ - b_dynamic = gunzip_bb; - k_dynamic = gunzip_bk; - - /* read in table lengths */ - b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5); - nl = 257 + ((unsigned) b_dynamic & 0x1f); /* number of literal/length codes */ - - b_dynamic >>= 5; - k_dynamic -= 5; - b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 5); - nd = 1 + ((unsigned) b_dynamic & 0x1f); /* number of distance codes */ - - b_dynamic >>= 5; - k_dynamic -= 5; - b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 4); - nb = 4 + ((unsigned) b_dynamic & 0xf); /* number of bit length codes */ - - b_dynamic >>= 4; - k_dynamic -= 4; - if (nl > 286 || nd > 30) - abort_unzip(PASS_STATE_ONLY); /* bad lengths */ - - /* read in bit-length-code lengths */ - for (j = 0; j < nb; j++) { - b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3); - ll[border[j]] = (unsigned) b_dynamic & 7; - b_dynamic >>= 3; - k_dynamic -= 3; - } - for (; j < 19; j++) - ll[border[j]] = 0; - - /* build decoding table for trees - single level, 7 bit lookup */ - bl = 7; - i = huft_build(ll, 19, 19, NULL, NULL, &inflate_codes_tl, &bl); - if (i != 0) { - abort_unzip(PASS_STATE_ONLY); //return i; /* incomplete code set */ - } - - /* read in literal and distance code lengths */ - n = nl + nd; - m = mask_bits[bl]; - i = l = 0; - while ((unsigned) i < n) { - b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, (unsigned)bl); - td = inflate_codes_tl + ((unsigned) b_dynamic & m); - j = td->b; - b_dynamic >>= j; - k_dynamic -= j; - j = td->v.n; - if (j < 16) { /* length of code in bits (0..15) */ - ll[i++] = l = j; /* save last length in l */ - } else if (j == 16) { /* repeat last length 3 to 6 times */ - b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 2); - j = 3 + ((unsigned) b_dynamic & 3); - b_dynamic >>= 2; - k_dynamic -= 2; - if ((unsigned) i + j > n) { - abort_unzip(PASS_STATE_ONLY); //return 1; - } - while (j--) { - ll[i++] = l; - } - } else if (j == 17) { /* 3 to 10 zero length codes */ - b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 3); - j = 3 + ((unsigned) b_dynamic & 7); - b_dynamic >>= 3; - k_dynamic -= 3; - if ((unsigned) i + j > n) { - abort_unzip(PASS_STATE_ONLY); //return 1; - } - while (j--) { - ll[i++] = 0; - } - l = 0; - } else { /* j == 18: 11 to 138 zero length codes */ - b_dynamic = fill_bitbuffer(PASS_STATE b_dynamic, &k_dynamic, 7); - j = 11 + ((unsigned) b_dynamic & 0x7f); - b_dynamic >>= 7; - k_dynamic -= 7; - if ((unsigned) i + j > n) { - abort_unzip(PASS_STATE_ONLY); //return 1; - } - while (j--) { - ll[i++] = 0; - } - l = 0; - } - } - - /* free decoding table for trees */ - huft_free(inflate_codes_tl); - - /* restore the global bit buffer */ - gunzip_bb = b_dynamic; - gunzip_bk = k_dynamic; - - /* build the decoding tables for literal/length and distance codes */ - bl = lbits; - - i = huft_build(ll, nl, 257, cplens, cplext, &inflate_codes_tl, &bl); - if (i != 0) - abort_unzip(PASS_STATE_ONLY); - bd = dbits; - i = huft_build(ll + nl, nd, 0, cpdist, cpdext, &inflate_codes_td, &bd); - if (i != 0) - abort_unzip(PASS_STATE_ONLY); - - /* set up data for inflate_codes() */ - inflate_codes_setup(PASS_STATE bl, bd); - - /* huft_free code moved into inflate_codes */ - - return -2; - } - default: - abort_unzip(PASS_STATE_ONLY); - } -} - -/* Two callsites, both in inflate_get_next_window */ -static void calculate_gunzip_crc(STATE_PARAM_ONLY) -{ - gunzip_crc = crc32_block_endian0(gunzip_crc, gunzip_window, gunzip_outbuf_count, gunzip_crc_table); - gunzip_bytes_out += gunzip_outbuf_count; -} - -/* One callsite in inflate_unzip_internal */ -static int inflate_get_next_window(STATE_PARAM_ONLY) -{ - gunzip_outbuf_count = 0; - - while (1) { - int ret; - - if (need_another_block) { - if (end_reached) { - calculate_gunzip_crc(PASS_STATE_ONLY); - end_reached = 0; - /* NB: need_another_block is still set */ - return 0; /* Last block */ - } - method = inflate_block(PASS_STATE &end_reached); - need_another_block = 0; - } - - switch (method) { - case -1: - ret = inflate_stored(PASS_STATE_ONLY); - break; - case -2: - ret = inflate_codes(PASS_STATE_ONLY); - break; - default: /* cannot happen */ - abort_unzip(PASS_STATE_ONLY); - } - - if (ret == 1) { - calculate_gunzip_crc(PASS_STATE_ONLY); - return 1; /* more data left */ - } - need_another_block = 1; /* end of that block */ - } - /* Doesnt get here */ -} - - -/* Called from unpack_gz_stream() and inflate_unzip() */ -static IF_DESKTOP(long long) int -inflate_unzip_internal(STATE_PARAM int in, int out) -{ - IF_DESKTOP(long long) int n = 0; - ssize_t nwrote; - - /* Allocate all global buffers (for DYN_ALLOC option) */ - gunzip_window = xmalloc(GUNZIP_WSIZE); - gunzip_outbuf_count = 0; - gunzip_bytes_out = 0; - gunzip_src_fd = in; - - /* (re) initialize state */ - method = -1; - need_another_block = 1; - resume_copy = 0; - gunzip_bk = 0; - gunzip_bb = 0; - - /* Create the crc table */ - gunzip_crc_table = crc32_filltable(NULL, 0); - gunzip_crc = ~0; - - error_msg = "corrupted data"; - if (setjmp(error_jmp)) { - /* Error from deep inside zip machinery */ - n = -1; - goto ret; - } - - while (1) { - int r = inflate_get_next_window(PASS_STATE_ONLY); - nwrote = full_write(out, gunzip_window, gunzip_outbuf_count); - if (nwrote != (ssize_t)gunzip_outbuf_count) { - bb_perror_msg("write"); - n = -1; - goto ret; - } - IF_DESKTOP(n += nwrote;) - if (r == 0) break; - } - - /* Store unused bytes in a global buffer so calling applets can access it */ - if (gunzip_bk >= 8) { - /* Undo too much lookahead. The next read will be byte aligned - * so we can discard unused bits in the last meaningful byte. */ - bytebuffer_offset--; - bytebuffer[bytebuffer_offset] = gunzip_bb & 0xff; - gunzip_bb >>= 8; - gunzip_bk -= 8; - } - ret: - /* Cleanup */ - free(gunzip_window); - free(gunzip_crc_table); - return n; -} - - -/* External entry points */ - -/* For unzip */ - -IF_DESKTOP(long long) int FAST_FUNC -inflate_unzip(inflate_unzip_result *res, off_t compr_size, int in, int out) -{ - IF_DESKTOP(long long) int n; - DECLARE_STATE; - - ALLOC_STATE; - - to_read = compr_size; -// bytebuffer_max = 0x8000; - bytebuffer_offset = 4; - bytebuffer = xmalloc(bytebuffer_max); - n = inflate_unzip_internal(PASS_STATE in, out); - free(bytebuffer); - - res->crc = gunzip_crc; - res->bytes_out = gunzip_bytes_out; - DEALLOC_STATE; - return n; -} - - -/* For gunzip */ - -/* helpers first */ - -/* Top up the input buffer with at least n bytes. */ -static int top_up(STATE_PARAM unsigned n) -{ - int count = bytebuffer_size - bytebuffer_offset; - - if (count < (int)n) { - memmove(bytebuffer, &bytebuffer[bytebuffer_offset], count); - bytebuffer_offset = 0; - bytebuffer_size = full_read(gunzip_src_fd, &bytebuffer[count], bytebuffer_max - count); - if ((int)bytebuffer_size < 0) { - bb_error_msg(bb_msg_read_error); - return 0; - } - bytebuffer_size += count; - if (bytebuffer_size < n) - return 0; - } - return 1; -} - -static uint16_t buffer_read_le_u16(STATE_PARAM_ONLY) -{ - uint16_t res; -#if BB_LITTLE_ENDIAN - move_from_unaligned16(res, &bytebuffer[bytebuffer_offset]); -#else - res = bytebuffer[bytebuffer_offset]; - res |= bytebuffer[bytebuffer_offset + 1] << 8; -#endif - bytebuffer_offset += 2; - return res; -} - -static uint32_t buffer_read_le_u32(STATE_PARAM_ONLY) -{ - uint32_t res; -#if BB_LITTLE_ENDIAN - move_from_unaligned32(res, &bytebuffer[bytebuffer_offset]); -#else - res = bytebuffer[bytebuffer_offset]; - res |= bytebuffer[bytebuffer_offset + 1] << 8; - res |= bytebuffer[bytebuffer_offset + 2] << 16; - res |= bytebuffer[bytebuffer_offset + 3] << 24; -#endif - bytebuffer_offset += 4; - return res; -} - -static int check_header_gzip(STATE_PARAM unpack_info_t *info) -{ - union { - unsigned char raw[8]; - struct { - uint8_t gz_method; - uint8_t flags; - uint32_t mtime; - uint8_t xtra_flags_UNUSED; - uint8_t os_flags_UNUSED; - } PACKED formatted; - } header; - struct BUG_header { - char BUG_header[sizeof(header) == 8 ? 1 : -1]; - }; - - /* - * Rewind bytebuffer. We use the beginning because the header has 8 - * bytes, leaving enough for unwinding afterwards. - */ - bytebuffer_size -= bytebuffer_offset; - memmove(bytebuffer, &bytebuffer[bytebuffer_offset], bytebuffer_size); - bytebuffer_offset = 0; - - if (!top_up(PASS_STATE 8)) - return 0; - memcpy(header.raw, &bytebuffer[bytebuffer_offset], 8); - bytebuffer_offset += 8; - - /* Check the compression method */ - if (header.formatted.gz_method != 8) { - return 0; - } - - if (header.formatted.flags & 0x04) { - /* bit 2 set: extra field present */ - unsigned extra_short; - - if (!top_up(PASS_STATE 2)) - return 0; - extra_short = buffer_read_le_u16(PASS_STATE_ONLY); - if (!top_up(PASS_STATE extra_short)) - return 0; - /* Ignore extra field */ - bytebuffer_offset += extra_short; - } - - /* Discard original name and file comment if any */ - /* bit 3 set: original file name present */ - /* bit 4 set: file comment present */ - if (header.formatted.flags & 0x18) { - while (1) { - do { - if (!top_up(PASS_STATE 1)) - return 0; - } while (bytebuffer[bytebuffer_offset++] != 0); - if ((header.formatted.flags & 0x18) != 0x18) - break; - header.formatted.flags &= ~0x18; - } - } - - if (info) - info->mtime = SWAP_LE32(header.formatted.mtime); - - /* Read the header checksum */ - if (header.formatted.flags & 0x02) { - if (!top_up(PASS_STATE 2)) - return 0; - bytebuffer_offset += 2; - } - return 1; -} - -IF_DESKTOP(long long) int FAST_FUNC -unpack_gz_stream_with_info(int in, int out, unpack_info_t *info) -{ - uint32_t v32; - IF_DESKTOP(long long) int n; - DECLARE_STATE; - - n = 0; - - ALLOC_STATE; - to_read = -1; -// bytebuffer_max = 0x8000; - bytebuffer = xmalloc(bytebuffer_max); - gunzip_src_fd = in; - - again: - if (!check_header_gzip(PASS_STATE info)) { - bb_error_msg("corrupted data"); - n = -1; - goto ret; - } - n += inflate_unzip_internal(PASS_STATE in, out); - if (n < 0) - goto ret; - - if (!top_up(PASS_STATE 8)) { - bb_error_msg("corrupted data"); - n = -1; - goto ret; - } - - /* Validate decompression - crc */ - v32 = buffer_read_le_u32(PASS_STATE_ONLY); - if ((~gunzip_crc) != v32) { - bb_error_msg("crc error"); - n = -1; - goto ret; - } - - /* Validate decompression - size */ - v32 = buffer_read_le_u32(PASS_STATE_ONLY); - if ((uint32_t)gunzip_bytes_out != v32) { - bb_error_msg("incorrect length"); - n = -1; - } - - if (!top_up(PASS_STATE 2)) - goto ret; /* EOF */ - - if (bytebuffer[bytebuffer_offset] == 0x1f - && bytebuffer[bytebuffer_offset + 1] == 0x8b - ) { - bytebuffer_offset += 2; - goto again; - } - /* GNU gzip says: */ - /*bb_error_msg("decompression OK, trailing garbage ignored");*/ - - ret: - free(bytebuffer); - DEALLOC_STATE; - return n; -} - -IF_DESKTOP(long long) int FAST_FUNC -unpack_gz_stream(int in, int out) -{ - return unpack_gz_stream_with_info(in, out, NULL); -} diff --git a/archival/libunarchive/filter_accept_all.c b/archival/libunarchive/filter_accept_all.c deleted file mode 100644 index cb1f506c8..000000000 --- a/archival/libunarchive/filter_accept_all.c +++ /dev/null @@ -1,17 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Copyright (C) 2002 by Glenn McGrath - * - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -/* Accept any non-null name, its not really a filter at all */ -char FAST_FUNC filter_accept_all(archive_handle_t *archive_handle) -{ - if (archive_handle->file_header->name) - return EXIT_SUCCESS; - return EXIT_FAILURE; -} diff --git a/archival/libunarchive/filter_accept_list.c b/archival/libunarchive/filter_accept_list.c deleted file mode 100644 index fe4414c85..000000000 --- a/archival/libunarchive/filter_accept_list.c +++ /dev/null @@ -1,19 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Copyright (C) 2002 by Glenn McGrath - * - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -/* - * Accept names that are in the accept list, ignoring reject list. - */ -char FAST_FUNC filter_accept_list(archive_handle_t *archive_handle) -{ - if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) - return EXIT_SUCCESS; - return EXIT_FAILURE; -} diff --git a/archival/libunarchive/filter_accept_list_reassign.c b/archival/libunarchive/filter_accept_list_reassign.c deleted file mode 100644 index 891f58390..000000000 --- a/archival/libunarchive/filter_accept_list_reassign.c +++ /dev/null @@ -1,51 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Copyright (C) 2002 by Glenn McGrath - * - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -/* Built and used only if ENABLE_DPKG || ENABLE_DPKG_DEB */ - -/* - * Reassign the subarchive metadata parser based on the filename extension - * e.g. if its a .tar.gz modify archive_handle->sub_archive to process a .tar.gz - * or if its a .tar.bz2 make archive_handle->sub_archive handle that - */ -char FAST_FUNC filter_accept_list_reassign(archive_handle_t *archive_handle) -{ - /* Check the file entry is in the accept list */ - if (find_list_entry(archive_handle->accept, archive_handle->file_header->name)) { - const char *name_ptr; - - /* Find extension */ - name_ptr = strrchr(archive_handle->file_header->name, '.'); - if (!name_ptr) - return EXIT_FAILURE; - name_ptr++; - - /* Modify the subarchive handler based on the extension */ - if (ENABLE_FEATURE_SEAMLESS_GZ - && strcmp(name_ptr, "gz") == 0 - ) { - archive_handle->dpkg__action_data_subarchive = get_header_tar_gz; - return EXIT_SUCCESS; - } - if (ENABLE_FEATURE_SEAMLESS_BZ2 - && strcmp(name_ptr, "bz2") == 0 - ) { - archive_handle->dpkg__action_data_subarchive = get_header_tar_bz2; - return EXIT_SUCCESS; - } - if (ENABLE_FEATURE_SEAMLESS_LZMA - && strcmp(name_ptr, "lzma") == 0 - ) { - archive_handle->dpkg__action_data_subarchive = get_header_tar_lzma; - return EXIT_SUCCESS; - } - } - return EXIT_FAILURE; -} diff --git a/archival/libunarchive/filter_accept_reject_list.c b/archival/libunarchive/filter_accept_reject_list.c deleted file mode 100644 index 89a5502d5..000000000 --- a/archival/libunarchive/filter_accept_reject_list.c +++ /dev/null @@ -1,36 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Copyright (C) 2002 by Glenn McGrath - * - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -/* - * Accept names that are in the accept list and not in the reject list - */ -char FAST_FUNC filter_accept_reject_list(archive_handle_t *archive_handle) -{ - const char *key; - const llist_t *reject_entry; - const llist_t *accept_entry; - - key = archive_handle->file_header->name; - - /* If the key is in a reject list fail */ - reject_entry = find_list_entry2(archive_handle->reject, key); - if (reject_entry) { - return EXIT_FAILURE; - } - accept_entry = find_list_entry2(archive_handle->accept, key); - - /* Fail if an accept list was specified and the key wasnt in there */ - if ((accept_entry == NULL) && archive_handle->accept) { - return EXIT_FAILURE; - } - - /* Accepted */ - return EXIT_SUCCESS; -} diff --git a/archival/libunarchive/find_list_entry.c b/archival/libunarchive/find_list_entry.c deleted file mode 100644 index 5c0c85f09..000000000 --- a/archival/libunarchive/find_list_entry.c +++ /dev/null @@ -1,54 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Copyright (C) 2002 by Glenn McGrath - * - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include -#include "libbb.h" -#include "unarchive.h" - -/* Find a string in a shell pattern list */ -const llist_t* FAST_FUNC find_list_entry(const llist_t *list, const char *filename) -{ - while (list) { - if (fnmatch(list->data, filename, 0) == 0) { - return list; - } - list = list->link; - } - return NULL; -} - -/* Same, but compares only path components present in pattern - * (extra trailing path components in filename are assumed to match) - */ -const llist_t* FAST_FUNC find_list_entry2(const llist_t *list, const char *filename) -{ - char buf[PATH_MAX]; - int pattern_slash_cnt; - const char *c; - char *d; - - while (list) { - c = list->data; - pattern_slash_cnt = 0; - while (*c) - if (*c++ == '/') pattern_slash_cnt++; - c = filename; - d = buf; - /* paranoia is better than buffer overflows */ - while (*c && d != buf + sizeof(buf)-1) { - if (*c == '/' && --pattern_slash_cnt < 0) - break; - *d++ = *c++; - } - *d = '\0'; - if (fnmatch(list->data, buf, 0) == 0) { - return list; - } - list = list->link; - } - return NULL; -} diff --git a/archival/libunarchive/get_header_ar.c b/archival/libunarchive/get_header_ar.c deleted file mode 100644 index 6bfc6bc27..000000000 --- a/archival/libunarchive/get_header_ar.c +++ /dev/null @@ -1,133 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* Copyright 2001 Glenn McGrath. - * - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" -#include "ar.h" - -static unsigned read_num(const char *str, int base) -{ - /* This code works because - * on misformatted numbers bb_strtou returns all-ones */ - int err = bb_strtou(str, NULL, base); - if (err == -1) - bb_error_msg_and_die("invalid ar header"); - return err; -} - -char FAST_FUNC get_header_ar(archive_handle_t *archive_handle) -{ - file_header_t *typed = archive_handle->file_header; - unsigned size; - union { - char raw[60]; - struct ar_header formatted; - } ar; -#if ENABLE_FEATURE_AR_LONG_FILENAMES - static char *ar_long_names; - static unsigned ar_long_name_size; -#endif - - /* dont use xread as we want to handle the error ourself */ - if (read(archive_handle->src_fd, ar.raw, 60) != 60) { - /* End Of File */ - return EXIT_FAILURE; - } - - /* ar header starts on an even byte (2 byte aligned) - * '\n' is used for padding - */ - if (ar.raw[0] == '\n') { - /* fix up the header, we started reading 1 byte too early */ - memmove(ar.raw, &ar.raw[1], 59); - ar.raw[59] = xread_char(archive_handle->src_fd); - archive_handle->offset++; - } - archive_handle->offset += 60; - - if (ar.formatted.magic[0] != '`' || ar.formatted.magic[1] != '\n') - bb_error_msg_and_die("invalid ar header"); - - /* FIXME: more thorough routine would be in order here - * (we have something like that in tar) - * but for now we are lax. */ - ar.formatted.magic[0] = '\0'; /* else 4G-2 file will have size="4294967294`\n..." */ - typed->size = size = read_num(ar.formatted.size, 10); - - /* special filenames have '/' as the first character */ - if (ar.formatted.name[0] == '/') { - if (ar.formatted.name[1] == ' ') { - /* This is the index of symbols in the file for compilers */ - data_skip(archive_handle); - archive_handle->offset += size; - return get_header_ar(archive_handle); /* Return next header */ - } -#if ENABLE_FEATURE_AR_LONG_FILENAMES - if (ar.formatted.name[1] == '/') { - /* If the second char is a '/' then this entries data section - * stores long filename for multiple entries, they are stored - * in static variable long_names for use in future entries - */ - ar_long_name_size = size; - free(ar_long_names); - ar_long_names = xmalloc(size); - xread(archive_handle->src_fd, ar_long_names, size); - archive_handle->offset += size; - /* Return next header */ - return get_header_ar(archive_handle); - } -#else - bb_error_msg_and_die("long filenames not supported"); -#endif - } - /* Only size is always present, the rest may be missing in - * long filename pseudo file. Thus we decode the rest - * after dealing with long filename pseudo file. - */ - typed->mode = read_num(ar.formatted.mode, 8); - typed->mtime = read_num(ar.formatted.date, 10); - typed->uid = read_num(ar.formatted.uid, 10); - typed->gid = read_num(ar.formatted.gid, 10); - -#if ENABLE_FEATURE_AR_LONG_FILENAMES - if (ar.formatted.name[0] == '/') { - unsigned long_offset; - - /* The number after the '/' indicates the offset in the ar data section - * (saved in ar_long_names) that conatains the real filename */ - long_offset = read_num(&ar.formatted.name[1], 10); - if (long_offset >= ar_long_name_size) { - bb_error_msg_and_die("can't resolve long filename"); - } - typed->name = xstrdup(ar_long_names + long_offset); - } else -#endif - { - /* short filenames */ - typed->name = xstrndup(ar.formatted.name, 16); - } - - typed->name[strcspn(typed->name, " /")] = '\0'; - - if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { - archive_handle->action_header(typed); -#if ENABLE_DPKG || ENABLE_DPKG_DEB - if (archive_handle->dpkg__sub_archive) { - while (archive_handle->dpkg__action_data_subarchive(archive_handle->dpkg__sub_archive) == EXIT_SUCCESS) - continue; - } else -#endif - archive_handle->action_data(archive_handle); - } else { - data_skip(archive_handle); - } - - archive_handle->offset += typed->size; - /* Set the file pointer to the correct spot, we may have been reading a compressed file */ - lseek(archive_handle->src_fd, archive_handle->offset, SEEK_SET); - - return EXIT_SUCCESS; -} diff --git a/archival/libunarchive/get_header_cpio.c b/archival/libunarchive/get_header_cpio.c deleted file mode 100644 index 8cd1096ba..000000000 --- a/archival/libunarchive/get_header_cpio.c +++ /dev/null @@ -1,186 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* Copyright 2002 Laurence Anderson - * - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -typedef struct hardlinks_t { - struct hardlinks_t *next; - int inode; /* TODO: must match maj/min too! */ - int mode ; - int mtime; /* These three are useful only in corner case */ - int uid ; /* of hardlinks with zero size body */ - int gid ; - char name[1]; -} hardlinks_t; - -char FAST_FUNC get_header_cpio(archive_handle_t *archive_handle) -{ - file_header_t *file_header = archive_handle->file_header; - char cpio_header[110]; - int namesize; - int major, minor, nlink, mode, inode; - unsigned size, uid, gid, mtime; - - /* There can be padding before archive header */ - data_align(archive_handle, 4); - - size = full_read(archive_handle->src_fd, cpio_header, 110); - if (size == 0) { - goto create_hardlinks; - } - if (size != 110) { - bb_error_msg_and_die("short read"); - } - archive_handle->offset += 110; - - if (strncmp(&cpio_header[0], "07070", 5) != 0 - || (cpio_header[5] != '1' && cpio_header[5] != '2') - ) { - bb_error_msg_and_die("unsupported cpio format, use newc or crc"); - } - - if (sscanf(cpio_header + 6, - "%8x" "%8x" "%8x" "%8x" - "%8x" "%8x" "%8x" /*maj,min:*/ "%*16c" - /*rmaj,rmin:*/"%8x" "%8x" "%8x" /*chksum: "%*8c"*/, - &inode, &mode, &uid, &gid, - &nlink, &mtime, &size, - &major, &minor, &namesize) != 10) - bb_error_msg_and_die("damaged cpio file"); - file_header->mode = mode; - file_header->uid = uid; - file_header->gid = gid; - file_header->mtime = mtime; - file_header->size = size; - - namesize &= 0x1fff; /* paranoia: limit names to 8k chars */ - file_header->name = xzalloc(namesize + 1); - /* Read in filename */ - xread(archive_handle->src_fd, file_header->name, namesize); - if (file_header->name[0] == '/') { - /* Testcase: echo /etc/hosts | cpio -pvd /tmp - * Without this code, it tries to unpack /etc/hosts - * into "/etc/hosts", not "etc/hosts". - */ - char *p = file_header->name; - do p++; while (*p == '/'); - overlapping_strcpy(file_header->name, p); - } - archive_handle->offset += namesize; - - /* Update offset amount and skip padding before file contents */ - data_align(archive_handle, 4); - - if (strcmp(file_header->name, "TRAILER!!!") == 0) { - /* Always round up. ">> 9" divides by 512 */ - archive_handle->cpio__blocks = (uoff_t)(archive_handle->offset + 511) >> 9; - goto create_hardlinks; - } - - file_header->link_target = NULL; - if (S_ISLNK(file_header->mode)) { - file_header->size &= 0x1fff; /* paranoia: limit names to 8k chars */ - file_header->link_target = xzalloc(file_header->size + 1); - xread(archive_handle->src_fd, file_header->link_target, file_header->size); - archive_handle->offset += file_header->size; - file_header->size = 0; /* Stop possible seeks in future */ - } - -// TODO: data_extract_all can't deal with hardlinks to non-files... -// when fixed, change S_ISREG to !S_ISDIR here - - if (nlink > 1 && S_ISREG(file_header->mode)) { - hardlinks_t *new = xmalloc(sizeof(*new) + namesize); - new->inode = inode; - new->mode = mode ; - new->mtime = mtime; - new->uid = uid ; - new->gid = gid ; - strcpy(new->name, file_header->name); - /* Put file on a linked list for later */ - if (size == 0) { - new->next = archive_handle->cpio__hardlinks_to_create; - archive_handle->cpio__hardlinks_to_create = new; - return EXIT_SUCCESS; /* Skip this one */ - /* TODO: this breaks cpio -t (it does not show hardlinks) */ - } - new->next = archive_handle->cpio__created_hardlinks; - archive_handle->cpio__created_hardlinks = new; - } - file_header->device = makedev(major, minor); - - if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { - archive_handle->action_data(archive_handle); -//TODO: run "echo /etc/hosts | cpio -pv /tmp" twice. On 2nd run: -//cpio: etc/hosts not created: newer or same age file exists -//etc/hosts <-- should NOT show it -//2 blocks <-- should say "0 blocks" - archive_handle->action_header(file_header); - } else { - data_skip(archive_handle); - } - - archive_handle->offset += file_header->size; - - free(file_header->link_target); - free(file_header->name); - file_header->link_target = NULL; - file_header->name = NULL; - - return EXIT_SUCCESS; - - create_hardlinks: - free(file_header->link_target); - free(file_header->name); - - while (archive_handle->cpio__hardlinks_to_create) { - hardlinks_t *cur; - hardlinks_t *make_me = archive_handle->cpio__hardlinks_to_create; - - archive_handle->cpio__hardlinks_to_create = make_me->next; - - memset(file_header, 0, sizeof(*file_header)); - file_header->mtime = make_me->mtime; - file_header->name = make_me->name; - file_header->mode = make_me->mode; - file_header->uid = make_me->uid; - file_header->gid = make_me->gid; - /*file_header->size = 0;*/ - /*file_header->link_target = NULL;*/ - - /* Try to find a file we are hardlinked to */ - cur = archive_handle->cpio__created_hardlinks; - while (cur) { - /* TODO: must match maj/min too! */ - if (cur->inode == make_me->inode) { - file_header->link_target = cur->name; - /* link_target != NULL, size = 0: "I am a hardlink" */ - if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) - archive_handle->action_data(archive_handle); - free(make_me); - goto next_link; - } - cur = cur->next; - } - /* Oops... no file with such inode was created... do it now - * (happens when hardlinked files are empty (zero length)) */ - if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) - archive_handle->action_data(archive_handle); - /* Move to the list of created hardlinked files */ - make_me->next = archive_handle->cpio__created_hardlinks; - archive_handle->cpio__created_hardlinks = make_me; - next_link: ; - } - - while (archive_handle->cpio__created_hardlinks) { - hardlinks_t *p = archive_handle->cpio__created_hardlinks; - archive_handle->cpio__created_hardlinks = p->next; - free(p); - } - - return EXIT_FAILURE; /* "No more files to process" */ -} diff --git a/archival/libunarchive/get_header_tar.c b/archival/libunarchive/get_header_tar.c deleted file mode 100644 index cf6487207..000000000 --- a/archival/libunarchive/get_header_tar.c +++ /dev/null @@ -1,461 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* Licensed under GPLv2 or later, see file LICENSE in this source tree. - * - * FIXME: - * In privileged mode if uname and gname map to a uid and gid then use the - * mapped value instead of the uid/gid values in tar header - * - * References: - * GNU tar and star man pages, - * Opengroup's ustar interchange format, - * http://www.opengroup.org/onlinepubs/007904975/utilities/pax.html - */ - -#include "libbb.h" -#include "unarchive.h" - -typedef uint32_t aliased_uint32_t FIX_ALIASING; -typedef off_t aliased_off_t FIX_ALIASING; - - -/* NB: _DESTROYS_ str[len] character! */ -static unsigned long long getOctal(char *str, int len) -{ - unsigned long long v; - char *end; - /* NB: leading spaces are allowed. Using strtoull to handle that. - * The downside is that we accept e.g. "-123" too :( - */ - str[len] = '\0'; - v = strtoull(str, &end, 8); - /* std: "Each numeric field is terminated by one or more - * or NUL characters". We must support ' '! */ - if (*end != '\0' && *end != ' ') { - int8_t first = str[0]; - if (!(first & 0x80)) - bb_error_msg_and_die("corrupted octal value in tar header"); - /* - * GNU tar uses "base-256 encoding" for very large numbers. - * Encoding is binary, with highest bit always set as a marker - * and sign in next-highest bit: - * 80 00 .. 00 - zero - * bf ff .. ff - largest positive number - * ff ff .. ff - minus 1 - * c0 00 .. 00 - smallest negative number - * - * Example of tar file with 8914993153 (0x213600001) byte file. - * Field starts at offset 7c: - * 00070 30 30 30 00 30 30 30 30 30 30 30 00 80 00 00 00 |000.0000000.....| - * 00080 00 00 00 02 13 60 00 01 31 31 31 32 30 33 33 36 |.....`..11120336| - * - * NB: tarballs with NEGATIVE unix times encoded that way were seen! - */ - v = first; - /* Sign-extend using 6th bit: */ - v <<= sizeof(unsigned long long)*8 - 7; - v = (long long)v >> (sizeof(unsigned long long)*8 - 7); - while (--len != 0) - v = (v << 8) + (unsigned char) *str++; - } - return v; -} -#define GET_OCTAL(a) getOctal((a), sizeof(a)) - -#if ENABLE_FEATURE_TAR_SELINUX -/* Scan a PAX header for SELinux contexts, via "RHT.security.selinux" keyword. - * This is what Red Hat's patched version of tar uses. - */ -# define SELINUX_CONTEXT_KEYWORD "RHT.security.selinux" -static char *get_selinux_sctx_from_pax_hdr(archive_handle_t *archive_handle, unsigned sz) -{ - char *buf, *p; - char *result; - - p = buf = xmalloc(sz + 1); - /* prevent bb_strtou from running off the buffer */ - buf[sz] = '\0'; - xread(archive_handle->src_fd, buf, sz); - archive_handle->offset += sz; - - result = NULL; - while (sz != 0) { - char *end, *value; - unsigned len; - - /* Every record has this format: "LEN NAME=VALUE\n" */ - len = bb_strtou(p, &end, 10); - /* expect errno to be EINVAL, because the character - * following the digits should be a space - */ - p += len; - sz -= len; - if ((int)sz < 0 - || len == 0 - || errno != EINVAL - || *end != ' ' - ) { - bb_error_msg("malformed extended header, skipped"); - // More verbose version: - //bb_error_msg("malformed extended header at %"OFF_FMT"d, skipped", - // archive_handle->offset - (sz + len)); - break; - } - /* overwrite the terminating newline with NUL - * (we do not bother to check that it *was* a newline) - */ - p[-1] = '\0'; - /* Is it selinux security context? */ - value = end + 1; - if (strncmp(value, SELINUX_CONTEXT_KEYWORD"=", sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1) == 0) { - value += sizeof(SELINUX_CONTEXT_KEYWORD"=") - 1; - result = xstrdup(value); - break; - } - } - - free(buf); - return result; -} -#endif - -char FAST_FUNC get_header_tar(archive_handle_t *archive_handle) -{ - file_header_t *file_header = archive_handle->file_header; - struct tar_header_t tar; - char *cp; - int i, sum_u, sum; -#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY - int sum_s; -#endif - int parse_names; - - /* Our "private data" */ -#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS -# define p_longname (archive_handle->tar__longname) -# define p_linkname (archive_handle->tar__linkname) -#else -# define p_longname 0 -# define p_linkname 0 -#endif - -#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS || ENABLE_FEATURE_TAR_SELINUX - again: -#endif - /* Align header */ - data_align(archive_handle, 512); - - again_after_align: - -#if ENABLE_DESKTOP || ENABLE_FEATURE_TAR_AUTODETECT - /* to prevent misdetection of bz2 sig */ - *(aliased_uint32_t*)&tar = 0; - i = full_read(archive_handle->src_fd, &tar, 512); - /* If GNU tar sees EOF in above read, it says: - * "tar: A lone zero block at N", where N = kilobyte - * where EOF was met (not EOF block, actual EOF!), - * and exits with EXIT_SUCCESS. - * We will mimic exit(EXIT_SUCCESS), although we will not mimic - * the message and we don't check whether we indeed - * saw zero block directly before this. */ - if (i == 0) { - xfunc_error_retval = 0; - short_read: - bb_error_msg_and_die("short read"); - } - if (i != 512) { - IF_FEATURE_TAR_AUTODETECT(goto autodetect;) - goto short_read; - } - -#else - i = 512; - xread(archive_handle->src_fd, &tar, i); -#endif - archive_handle->offset += i; - - /* If there is no filename its an empty header */ - if (tar.name[0] == 0 && tar.prefix[0] == 0) { - if (archive_handle->tar__end) { - /* Second consecutive empty header - end of archive. - * Read until the end to empty the pipe from gz or bz2 - */ - while (full_read(archive_handle->src_fd, &tar, 512) == 512) - continue; - return EXIT_FAILURE; - } - archive_handle->tar__end = 1; - return EXIT_SUCCESS; - } - archive_handle->tar__end = 0; - - /* Check header has valid magic, "ustar" is for the proper tar, - * five NULs are for the old tar format */ - if (strncmp(tar.magic, "ustar", 5) != 0 - && (!ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY - || memcmp(tar.magic, "\0\0\0\0", 5) != 0) - ) { -#if ENABLE_FEATURE_TAR_AUTODETECT - char FAST_FUNC (*get_header_ptr)(archive_handle_t *); - uint16_t magic2; - - autodetect: - magic2 = *(uint16_t*)tar.name; - /* tar gz/bz autodetect: check for gz/bz2 magic. - * If we see the magic, and it is the very first block, - * we can switch to get_header_tar_gz/bz2/lzma(). - * Needs seekable fd. I wish recv(MSG_PEEK) works - * on any fd... */ -# if ENABLE_FEATURE_SEAMLESS_GZ - if (magic2 == GZIP_MAGIC) { - get_header_ptr = get_header_tar_gz; - } else -# endif -# if ENABLE_FEATURE_SEAMLESS_BZ2 - if (magic2 == BZIP2_MAGIC - && tar.name[2] == 'h' && isdigit(tar.name[3]) - ) { /* bzip2 */ - get_header_ptr = get_header_tar_bz2; - } else -# endif -# if ENABLE_FEATURE_SEAMLESS_XZ - //TODO: if (magic2 == XZ_MAGIC1)... - //else -# endif - goto err; - /* Two different causes for lseek() != 0: - * unseekable fd (would like to support that too, but...), - * or not first block (false positive, it's not .gz/.bz2!) */ - if (lseek(archive_handle->src_fd, -i, SEEK_CUR) != 0) - goto err; - while (get_header_ptr(archive_handle) == EXIT_SUCCESS) - continue; - return EXIT_FAILURE; - err: -#endif /* FEATURE_TAR_AUTODETECT */ - bb_error_msg_and_die("invalid tar magic"); - } - - /* Do checksum on headers. - * POSIX says that checksum is done on unsigned bytes, but - * Sun and HP-UX gets it wrong... more details in - * GNU tar source. */ -#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY - sum_s = ' ' * sizeof(tar.chksum); -#endif - sum_u = ' ' * sizeof(tar.chksum); - for (i = 0; i < 148; i++) { - sum_u += ((unsigned char*)&tar)[i]; -#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY - sum_s += ((signed char*)&tar)[i]; -#endif - } - for (i = 156; i < 512; i++) { - sum_u += ((unsigned char*)&tar)[i]; -#if ENABLE_FEATURE_TAR_OLDSUN_COMPATIBILITY - sum_s += ((signed char*)&tar)[i]; -#endif - } - /* This field does not need special treatment (getOctal) */ - { - char *endp; /* gcc likes temp var for &endp */ - sum = strtoul(tar.chksum, &endp, 8); - if ((*endp != '\0' && *endp != ' ') - || (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) - ) { - bb_error_msg_and_die("invalid tar header checksum"); - } - } - /* don't use xstrtoul, tar.chksum may have leading spaces */ - sum = strtoul(tar.chksum, NULL, 8); - if (sum_u != sum IF_FEATURE_TAR_OLDSUN_COMPATIBILITY(&& sum_s != sum)) { - bb_error_msg_and_die("invalid tar header checksum"); - } - - /* 0 is reserved for high perf file, treat as normal file */ - if (!tar.typeflag) tar.typeflag = '0'; - parse_names = (tar.typeflag >= '0' && tar.typeflag <= '7'); - - /* getOctal trashes subsequent field, therefore we call it - * on fields in reverse order */ - if (tar.devmajor[0]) { - char t = tar.prefix[0]; - /* we trash prefix[0] here, but we DO need it later! */ - unsigned minor = GET_OCTAL(tar.devminor); - unsigned major = GET_OCTAL(tar.devmajor); - file_header->device = makedev(major, minor); - tar.prefix[0] = t; - } - file_header->link_target = NULL; - if (!p_linkname && parse_names && tar.linkname[0]) { - file_header->link_target = xstrndup(tar.linkname, sizeof(tar.linkname)); - /* FIXME: what if we have non-link object with link_target? */ - /* Will link_target be free()ed? */ - } -#if ENABLE_FEATURE_TAR_UNAME_GNAME - file_header->tar__uname = tar.uname[0] ? xstrndup(tar.uname, sizeof(tar.uname)) : NULL; - file_header->tar__gname = tar.gname[0] ? xstrndup(tar.gname, sizeof(tar.gname)) : NULL; -#endif - file_header->mtime = GET_OCTAL(tar.mtime); - file_header->size = GET_OCTAL(tar.size); - file_header->gid = GET_OCTAL(tar.gid); - file_header->uid = GET_OCTAL(tar.uid); - /* Set bits 0-11 of the files mode */ - file_header->mode = 07777 & GET_OCTAL(tar.mode); - - file_header->name = NULL; - if (!p_longname && parse_names) { - /* we trash mode[0] here, it's ok */ - //tar.name[sizeof(tar.name)] = '\0'; - gcc 4.3.0 would complain - tar.mode[0] = '\0'; - if (tar.prefix[0]) { - /* and padding[0] */ - //tar.prefix[sizeof(tar.prefix)] = '\0'; - gcc 4.3.0 would complain - tar.padding[0] = '\0'; - file_header->name = concat_path_file(tar.prefix, tar.name); - } else - file_header->name = xstrdup(tar.name); - } - - /* Set bits 12-15 of the files mode */ - /* (typeflag was not trashed because chksum does not use getOctal) */ - switch (tar.typeflag) { - /* busybox identifies hard links as being regular files with 0 size and a link name */ - case '1': - file_header->mode |= S_IFREG; - break; - case '7': - /* case 0: */ - case '0': -#if ENABLE_FEATURE_TAR_OLDGNU_COMPATIBILITY - if (last_char_is(file_header->name, '/')) { - goto set_dir; - } -#endif - file_header->mode |= S_IFREG; - break; - case '2': - file_header->mode |= S_IFLNK; - /* have seen tarballs with size field containing - * the size of the link target's name */ - size0: - file_header->size = 0; - break; - case '3': - file_header->mode |= S_IFCHR; - goto size0; /* paranoia */ - case '4': - file_header->mode |= S_IFBLK; - goto size0; - case '5': - IF_FEATURE_TAR_OLDGNU_COMPATIBILITY(set_dir:) - file_header->mode |= S_IFDIR; - goto size0; - case '6': - file_header->mode |= S_IFIFO; - goto size0; -#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS - case 'L': - /* free: paranoia: tar with several consecutive longnames */ - free(p_longname); - /* For paranoia reasons we allocate extra NUL char */ - p_longname = xzalloc(file_header->size + 1); - /* We read ASCIZ string, including NUL */ - xread(archive_handle->src_fd, p_longname, file_header->size); - archive_handle->offset += file_header->size; - /* return get_header_tar(archive_handle); */ - /* gcc 4.1.1 didn't optimize it into jump */ - /* so we will do it ourself, this also saves stack */ - goto again; - case 'K': - free(p_linkname); - p_linkname = xzalloc(file_header->size + 1); - xread(archive_handle->src_fd, p_linkname, file_header->size); - archive_handle->offset += file_header->size; - /* return get_header_tar(archive_handle); */ - goto again; - case 'D': /* GNU dump dir */ - case 'M': /* Continuation of multi volume archive */ - case 'N': /* Old GNU for names > 100 characters */ - case 'S': /* Sparse file */ - case 'V': /* Volume header */ -#endif -#if !ENABLE_FEATURE_TAR_SELINUX - case 'g': /* pax global header */ - case 'x': /* pax extended header */ -#else - skip_ext_hdr: -#endif - { - off_t sz; - bb_error_msg("warning: skipping header '%c'", tar.typeflag); - sz = (file_header->size + 511) & ~(off_t)511; - archive_handle->offset += sz; - sz >>= 9; /* sz /= 512 but w/o contortions for signed div */ - while (sz--) - xread(archive_handle->src_fd, &tar, 512); - /* return get_header_tar(archive_handle); */ - goto again_after_align; - } -#if ENABLE_FEATURE_TAR_SELINUX - case 'g': /* pax global header */ - case 'x': { /* pax extended header */ - char **pp; - if ((uoff_t)file_header->size > 0xfffff) /* paranoia */ - goto skip_ext_hdr; - pp = (tar.typeflag == 'g') ? &archive_handle->tar__global_sctx : &archive_handle->tar__next_file_sctx; - free(*pp); - *pp = get_selinux_sctx_from_pax_hdr(archive_handle, file_header->size); - goto again; - } -#endif - default: - bb_error_msg_and_die("unknown typeflag: 0x%x", tar.typeflag); - } - -#if ENABLE_FEATURE_TAR_GNU_EXTENSIONS - if (p_longname) { - file_header->name = p_longname; - p_longname = NULL; - } - if (p_linkname) { - file_header->link_target = p_linkname; - p_linkname = NULL; - } -#endif - if (strncmp(file_header->name, "/../"+1, 3) == 0 - || strstr(file_header->name, "/../") - ) { - bb_error_msg_and_die("name with '..' encountered: '%s'", - file_header->name); - } - - /* Strip trailing '/' in directories */ - /* Must be done after mode is set as '/' is used to check if it's a directory */ - cp = last_char_is(file_header->name, '/'); - - if (archive_handle->filter(archive_handle) == EXIT_SUCCESS) { - archive_handle->action_header(/*archive_handle->*/ file_header); - /* Note that we kill the '/' only after action_header() */ - /* (like GNU tar 1.15.1: verbose mode outputs "dir/dir/") */ - if (cp) - *cp = '\0'; - archive_handle->action_data(archive_handle); - if (archive_handle->accept || archive_handle->reject) - llist_add_to(&archive_handle->passed, file_header->name); - else /* Caller isn't interested in list of unpacked files */ - free(file_header->name); - } else { - data_skip(archive_handle); - free(file_header->name); - } - archive_handle->offset += file_header->size; - - free(file_header->link_target); - /* Do not free(file_header->name)! - * It might be inserted in archive_handle->passed - see above */ -#if ENABLE_FEATURE_TAR_UNAME_GNAME - free(file_header->tar__uname); - free(file_header->tar__gname); -#endif - return EXIT_SUCCESS; -} diff --git a/archival/libunarchive/get_header_tar_bz2.c b/archival/libunarchive/get_header_tar_bz2.c deleted file mode 100644 index 4ffc17086..000000000 --- a/archival/libunarchive/get_header_tar_bz2.c +++ /dev/null @@ -1,21 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -char FAST_FUNC get_header_tar_bz2(archive_handle_t *archive_handle) -{ - /* Can't lseek over pipes */ - archive_handle->seek = seek_by_read; - - open_transformer(archive_handle->src_fd, unpack_bz2_stream_prime, "bunzip2"); - archive_handle->offset = 0; - while (get_header_tar(archive_handle) == EXIT_SUCCESS) - continue; - - /* Can only do one file at a time */ - return EXIT_FAILURE; -} diff --git a/archival/libunarchive/get_header_tar_gz.c b/archival/libunarchive/get_header_tar_gz.c deleted file mode 100644 index a9af22e0e..000000000 --- a/archival/libunarchive/get_header_tar_gz.c +++ /dev/null @@ -1,36 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -char FAST_FUNC get_header_tar_gz(archive_handle_t *archive_handle) -{ -#if BB_MMU - unsigned char magic[2]; -#endif - - /* Can't lseek over pipes */ - archive_handle->seek = seek_by_read; - - /* Check gzip magic only if open_transformer will invoke unpack_gz_stream (MMU case). - * Otherwise, it will invoke an external helper "gunzip -cf" (NOMMU case) which will - * need the header. */ -#if BB_MMU - xread(archive_handle->src_fd, &magic, 2); - /* Can skip this check, but error message will be less clear */ - if ((magic[0] != 0x1f) || (magic[1] != 0x8b)) { - bb_error_msg_and_die("invalid gzip magic"); - } -#endif - - open_transformer(archive_handle->src_fd, unpack_gz_stream, "gunzip"); - archive_handle->offset = 0; - while (get_header_tar(archive_handle) == EXIT_SUCCESS) - continue; - - /* Can only do one file at a time */ - return EXIT_FAILURE; -} diff --git a/archival/libunarchive/get_header_tar_lzma.c b/archival/libunarchive/get_header_tar_lzma.c deleted file mode 100644 index 9876b3827..000000000 --- a/archival/libunarchive/get_header_tar_lzma.c +++ /dev/null @@ -1,24 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Small lzma deflate implementation. - * Copyright (C) 2006 Aurelien Jacobs - * - * Licensed under GPLv2, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -char FAST_FUNC get_header_tar_lzma(archive_handle_t *archive_handle) -{ - /* Can't lseek over pipes */ - archive_handle->seek = seek_by_read; - - open_transformer(archive_handle->src_fd, unpack_lzma_stream, "unlzma"); - archive_handle->offset = 0; - while (get_header_tar(archive_handle) == EXIT_SUCCESS) - continue; - - /* Can only do one file at a time */ - return EXIT_FAILURE; -} diff --git a/archival/libunarchive/header_list.c b/archival/libunarchive/header_list.c deleted file mode 100644 index 902d6ebe0..000000000 --- a/archival/libunarchive/header_list.c +++ /dev/null @@ -1,12 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ -#include "libbb.h" -#include "unarchive.h" - -void FAST_FUNC header_list(const file_header_t *file_header) -{ -//TODO: cpio -vp DIR should output "DIR/NAME", not just "NAME" */ - puts(file_header->name); -} diff --git a/archival/libunarchive/header_skip.c b/archival/libunarchive/header_skip.c deleted file mode 100644 index 2af36ac9c..000000000 --- a/archival/libunarchive/header_skip.c +++ /dev/null @@ -1,10 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ -#include "libbb.h" -#include "unarchive.h" - -void FAST_FUNC header_skip(const file_header_t *file_header UNUSED_PARAM) -{ -} diff --git a/archival/libunarchive/header_verbose_list.c b/archival/libunarchive/header_verbose_list.c deleted file mode 100644 index d863e6a29..000000000 --- a/archival/libunarchive/header_verbose_list.c +++ /dev/null @@ -1,69 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -void FAST_FUNC header_verbose_list(const file_header_t *file_header) -{ - struct tm tm_time; - struct tm *ptm = &tm_time; //localtime(&file_header->mtime); - -#if ENABLE_FEATURE_TAR_UNAME_GNAME - char uid[sizeof(int)*3 + 2]; - /*char gid[sizeof(int)*3 + 2];*/ - char *user; - char *group; - - localtime_r(&file_header->mtime, ptm); - - user = file_header->tar__uname; - if (user == NULL) { - sprintf(uid, "%u", (unsigned)file_header->uid); - user = uid; - } - group = file_header->tar__gname; - if (group == NULL) { - /*sprintf(gid, "%u", (unsigned)file_header->gid);*/ - group = utoa(file_header->gid); - } - printf("%s %s/%s %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s", - bb_mode_string(file_header->mode), - user, - group, - file_header->size, - 1900 + ptm->tm_year, - 1 + ptm->tm_mon, - ptm->tm_mday, - ptm->tm_hour, - ptm->tm_min, - ptm->tm_sec, - file_header->name); - -#else /* !FEATURE_TAR_UNAME_GNAME */ - - localtime_r(&file_header->mtime, ptm); - - printf("%s %u/%u %9"OFF_FMT"u %4u-%02u-%02u %02u:%02u:%02u %s", - bb_mode_string(file_header->mode), - (unsigned)file_header->uid, - (unsigned)file_header->gid, - file_header->size, - 1900 + ptm->tm_year, - 1 + ptm->tm_mon, - ptm->tm_mday, - ptm->tm_hour, - ptm->tm_min, - ptm->tm_sec, - file_header->name); - -#endif /* FEATURE_TAR_UNAME_GNAME */ - - /* NB: GNU tar shows "->" for symlinks and "link to" for hardlinks */ - if (file_header->link_target) { - printf(" -> %s", file_header->link_target); - } - bb_putchar('\n'); -} diff --git a/archival/libunarchive/init_handle.c b/archival/libunarchive/init_handle.c deleted file mode 100644 index de7021f78..000000000 --- a/archival/libunarchive/init_handle.c +++ /dev/null @@ -1,22 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -archive_handle_t* FAST_FUNC init_handle(void) -{ - archive_handle_t *archive_handle; - - /* Initialize default values */ - archive_handle = xzalloc(sizeof(archive_handle_t)); - archive_handle->file_header = xzalloc(sizeof(file_header_t)); - archive_handle->action_header = header_skip; - archive_handle->action_data = data_skip; - archive_handle->filter = filter_accept_all; - archive_handle->seek = seek_by_jump; - - return archive_handle; -} diff --git a/archival/libunarchive/liblzo.h b/archival/libunarchive/liblzo.h deleted file mode 100644 index 843997cb9..000000000 --- a/archival/libunarchive/liblzo.h +++ /dev/null @@ -1,93 +0,0 @@ -/* - This file is part of the LZO real-time data compression library. - - Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer - All Rights Reserved. - - Markus F.X.J. Oberhumer - http://www.oberhumer.com/opensource/lzo/ - - The LZO library is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - The LZO library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the LZO library; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -#include "liblzo_interface.h" - -/* lzo-2.03/src/config1x.h */ -#define M2_MIN_LEN 3 -#define M2_MAX_LEN 8 -#define M3_MAX_LEN 33 -#define M4_MAX_LEN 9 -#define M1_MAX_OFFSET 0x0400 -#define M2_MAX_OFFSET 0x0800 -#define M3_MAX_OFFSET 0x4000 -#define M4_MAX_OFFSET 0xbfff -#define M1_MARKER 0 -#define M3_MARKER 32 -#define M4_MARKER 16 - -#define MX_MAX_OFFSET (M1_MAX_OFFSET + M2_MAX_OFFSET) -#define MIN_LOOKAHEAD (M2_MAX_LEN + 1) - -#define LZO_EOF_CODE - -/* lzo-2.03/src/lzo_dict.h */ -#define GINDEX(m_pos,m_off,dict,dindex,in) m_pos = dict[dindex] -#define DX2(p,s1,s2) \ - (((((unsigned)((p)[2]) << (s2)) ^ (p)[1]) << (s1)) ^ (p)[0]) -//#define DA3(p,s1,s2,s3) ((DA2((p)+1,s2,s3) << (s1)) + (p)[0]) -//#define DS3(p,s1,s2,s3) ((DS2((p)+1,s2,s3) << (s1)) - (p)[0]) -#define DX3(p,s1,s2,s3) ((DX2((p)+1,s2,s3) << (s1)) ^ (p)[0]) - -#define D_SIZE (1U << D_BITS) -#define D_MASK ((1U << D_BITS) - 1) -#define D_HIGH ((D_MASK >> 1) + 1) - -#define LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,max_offset) \ - ( \ - m_pos = ip - (unsigned)(ip - m_pos), \ - ((uintptr_t)m_pos < (uintptr_t)in \ - || (m_off = (unsigned)(ip - m_pos)) <= 0 \ - || m_off > max_offset) \ - ) - -#define DENTRY(p,in) (p) -#define UPDATE_I(dict,drun,index,p,in) dict[index] = DENTRY(p,in) - -#define DMS(v,s) ((unsigned) (((v) & (D_MASK >> (s))) << (s))) -#define DM(v) ((unsigned) ((v) & D_MASK)) -#define DMUL(a,b) ((unsigned) ((a) * (b))) - -/* lzo-2.03/src/lzo_ptr.h */ -#define pd(a,b) ((unsigned)((a)-(b))) - -# define TEST_IP (ip < ip_end) -# define NEED_IP(x) \ - if ((unsigned)(ip_end - ip) < (unsigned)(x)) goto input_overrun - -# undef TEST_OP /* don't need both of the tests here */ -# define TEST_OP 1 -# define NEED_OP(x) \ - if ((unsigned)(op_end - op) < (unsigned)(x)) goto output_overrun - -#define HAVE_ANY_OP 1 - -//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND) -# define TEST_LB(m_pos) if (m_pos < out || m_pos >= op) goto lookbehind_overrun -//# define TEST_LBO(m_pos,o) if (m_pos < out || m_pos >= op - (o)) goto lookbehind_overrun -//#else -//# define TEST_LB(m_pos) ((void) 0) -//# define TEST_LBO(m_pos,o) ((void) 0) -//#endif diff --git a/archival/libunarchive/lzo1x_1.c b/archival/libunarchive/lzo1x_1.c deleted file mode 100644 index a88839846..000000000 --- a/archival/libunarchive/lzo1x_1.c +++ /dev/null @@ -1,35 +0,0 @@ -/* LZO1X-1 compression - - This file is part of the LZO real-time data compression library. - - Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer - All Rights Reserved. - - Markus F.X.J. Oberhumer - http://www.oberhumer.com/opensource/lzo/ - - The LZO library is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - The LZO library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the LZO library; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#include "libbb.h" -#include "liblzo.h" - -#define D_BITS 14 -#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5) -#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) - -#define DO_COMPRESS lzo1x_1_compress - -#include "lzo1x_c.c" diff --git a/archival/libunarchive/lzo1x_1o.c b/archival/libunarchive/lzo1x_1o.c deleted file mode 100644 index 3c61253e0..000000000 --- a/archival/libunarchive/lzo1x_1o.c +++ /dev/null @@ -1,35 +0,0 @@ -/* LZO1X-1(15) compression - - This file is part of the LZO real-time data compression library. - - Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer - All Rights Reserved. - - Markus F.X.J. Oberhumer - http://www.oberhumer.com/opensource/lzo/ - - The LZO library is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - The LZO library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the LZO library; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#include "libbb.h" -#include "liblzo.h" - -#define D_BITS 15 -#define D_INDEX1(d,p) d = DM(DMUL(0x21,DX3(p,5,5,6)) >> 5) -#define D_INDEX2(d,p) d = (d & (D_MASK & 0x7ff)) ^ (D_HIGH | 0x1f) - -#define DO_COMPRESS lzo1x_1_15_compress - -#include "lzo1x_c.c" diff --git a/archival/libunarchive/lzo1x_9x.c b/archival/libunarchive/lzo1x_9x.c deleted file mode 100644 index 483205155..000000000 --- a/archival/libunarchive/lzo1x_9x.c +++ /dev/null @@ -1,921 +0,0 @@ -/* lzo1x_9x.c -- implementation of the LZO1X-999 compression algorithm - - This file is part of the LZO real-time data compression library. - - Copyright (C) 2008 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2007 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2006 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2005 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2004 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2003 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2002 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2001 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 2000 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1999 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1998 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1997 Markus Franz Xaver Johannes Oberhumer - Copyright (C) 1996 Markus Franz Xaver Johannes Oberhumer - All Rights Reserved. - - The LZO library is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - The LZO library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the LZO library; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - - Markus F.X.J. Oberhumer - - http://www.oberhumer.com/opensource/lzo/ -*/ -#include "libbb.h" - -/* The following is probably only safe on Intel-compatible processors ... */ -#define LZO_UNALIGNED_OK_2 -#define LZO_UNALIGNED_OK_4 - -#include "liblzo.h" - -#define LZO_MAX(a,b) ((a) >= (b) ? (a) : (b)) -#define LZO_MIN(a,b) ((a) <= (b) ? (a) : (b)) -#define LZO_MAX3(a,b,c) ((a) >= (b) ? LZO_MAX(a,c) : LZO_MAX(b,c)) - -/*********************************************************************** -// -************************************************************************/ -#define SWD_N M4_MAX_OFFSET /* size of ring buffer */ -#define SWD_F 2048 /* upper limit for match length */ - -#define SWD_BEST_OFF (LZO_MAX3(M2_MAX_LEN, M3_MAX_LEN, M4_MAX_LEN) + 1) - -typedef struct { - int init; - - unsigned look; /* bytes in lookahead buffer */ - - unsigned m_len; - unsigned m_off; - - const uint8_t *bp; - const uint8_t *ip; - const uint8_t *in; - const uint8_t *in_end; - uint8_t *out; - - unsigned r1_lit; - -} lzo1x_999_t; - -#define getbyte(c) ((c).ip < (c).in_end ? *((c).ip)++ : (-1)) - -/* lzo_swd.c -- sliding window dictionary */ - -/*********************************************************************** -// -************************************************************************/ -#define SWD_UINT_MAX USHRT_MAX - -#ifndef SWD_HSIZE -# define SWD_HSIZE 16384 -#endif -#ifndef SWD_MAX_CHAIN -# define SWD_MAX_CHAIN 2048 -#endif - -#define HEAD3(b, p) \ - ( ((0x9f5f * ((((b[p]<<5)^b[p+1])<<5) ^ b[p+2])) >> 5) & (SWD_HSIZE-1) ) - -#if defined(LZO_UNALIGNED_OK_2) -# define HEAD2(b,p) (* (uint16_t *) &(b[p])) -#else -# define HEAD2(b,p) (b[p] ^ ((unsigned)b[p+1]<<8)) -#endif -#define NIL2 SWD_UINT_MAX - -typedef struct lzo_swd { - /* public - "built-in" */ - - /* public - configuration */ - unsigned max_chain; - int use_best_off; - - /* public - output */ - unsigned m_len; - unsigned m_off; - unsigned look; - int b_char; -#if defined(SWD_BEST_OFF) - unsigned best_off[SWD_BEST_OFF]; -#endif - - /* semi public */ - lzo1x_999_t *c; - unsigned m_pos; -#if defined(SWD_BEST_OFF) - unsigned best_pos[SWD_BEST_OFF]; -#endif - - /* private */ - unsigned ip; /* input pointer (lookahead) */ - unsigned bp; /* buffer pointer */ - unsigned rp; /* remove pointer */ - - unsigned node_count; - unsigned first_rp; - - uint8_t b[SWD_N + SWD_F]; - uint8_t b_wrap[SWD_F]; /* must follow b */ - uint16_t head3[SWD_HSIZE]; - uint16_t succ3[SWD_N + SWD_F]; - uint16_t best3[SWD_N + SWD_F]; - uint16_t llen3[SWD_HSIZE]; -#ifdef HEAD2 - uint16_t head2[65536L]; -#endif -} lzo_swd_t, *lzo_swd_p; - -#define SIZEOF_LZO_SWD_T (sizeof(lzo_swd_t)) - - -/* Access macro for head3. - * head3[key] may be uninitialized, but then its value will never be used. - */ -#define s_get_head3(s,key) s->head3[key] - - -/*********************************************************************** -// -************************************************************************/ -#define B_SIZE (SWD_N + SWD_F) - -static int swd_init(lzo_swd_p s) -{ - /* defaults */ - s->node_count = SWD_N; - - memset(s->llen3, 0, sizeof(s->llen3[0]) * (unsigned)SWD_HSIZE); -#ifdef HEAD2 - memset(s->head2, 0xff, sizeof(s->head2[0]) * 65536L); - assert(s->head2[0] == NIL2); -#endif - - s->ip = 0; - s->bp = s->ip; - s->first_rp = s->ip; - - assert(s->ip + SWD_F <= B_SIZE); - s->look = (unsigned) (s->c->in_end - s->c->ip); - if (s->look > 0) { - if (s->look > SWD_F) - s->look = SWD_F; - memcpy(&s->b[s->ip], s->c->ip, s->look); - s->c->ip += s->look; - s->ip += s->look; - } - if (s->ip == B_SIZE) - s->ip = 0; - - s->rp = s->first_rp; - if (s->rp >= s->node_count) - s->rp -= s->node_count; - else - s->rp += B_SIZE - s->node_count; - - return LZO_E_OK; -} - -#define swd_pos2off(s,pos) \ - (s->bp > (pos) ? s->bp - (pos) : B_SIZE - ((pos) - s->bp)) - - -/*********************************************************************** -// -************************************************************************/ -static void swd_getbyte(lzo_swd_p s) -{ - int c; - - if ((c = getbyte(*(s->c))) < 0) { - if (s->look > 0) - --s->look; - } else { - s->b[s->ip] = c; - if (s->ip < SWD_F) - s->b_wrap[s->ip] = c; - } - if (++s->ip == B_SIZE) - s->ip = 0; - if (++s->bp == B_SIZE) - s->bp = 0; - if (++s->rp == B_SIZE) - s->rp = 0; -} - - -/*********************************************************************** -// remove node from lists -************************************************************************/ -static void swd_remove_node(lzo_swd_p s, unsigned node) -{ - if (s->node_count == 0) { - unsigned key; - - key = HEAD3(s->b,node); - assert(s->llen3[key] > 0); - --s->llen3[key]; - -#ifdef HEAD2 - key = HEAD2(s->b,node); - assert(s->head2[key] != NIL2); - if ((unsigned) s->head2[key] == node) - s->head2[key] = NIL2; -#endif - } else - --s->node_count; -} - - -/*********************************************************************** -// -************************************************************************/ -static void swd_accept(lzo_swd_p s, unsigned n) -{ - assert(n <= s->look); - - while (n--) { - unsigned key; - - swd_remove_node(s,s->rp); - - /* add bp into HEAD3 */ - key = HEAD3(s->b, s->bp); - s->succ3[s->bp] = s_get_head3(s, key); - s->head3[key] = s->bp; - s->best3[s->bp] = SWD_F + 1; - s->llen3[key]++; - assert(s->llen3[key] <= SWD_N); - -#ifdef HEAD2 - /* add bp into HEAD2 */ - key = HEAD2(s->b, s->bp); - s->head2[key] = s->bp; -#endif - - swd_getbyte(s); - } -} - - -/*********************************************************************** -// -************************************************************************/ -static void swd_search(lzo_swd_p s, unsigned node, unsigned cnt) -{ - const uint8_t *p1; - const uint8_t *p2; - const uint8_t *px; - unsigned m_len = s->m_len; - const uint8_t *b = s->b; - const uint8_t *bp = s->b + s->bp; - const uint8_t *bx = s->b + s->bp + s->look; - unsigned char scan_end1; - - assert(s->m_len > 0); - - scan_end1 = bp[m_len - 1]; - for ( ; cnt-- > 0; node = s->succ3[node]) { - p1 = bp; - p2 = b + node; - px = bx; - - assert(m_len < s->look); - - if (p2[m_len - 1] == scan_end1 - && p2[m_len] == p1[m_len] - && p2[0] == p1[0] - && p2[1] == p1[1] - ) { - unsigned i; - assert(lzo_memcmp(bp, &b[node], 3) == 0); - - p1 += 2; p2 += 2; - do {} while (++p1 < px && *p1 == *++p2); - i = p1-bp; - - assert(lzo_memcmp(bp, &b[node], i) == 0); - -#if defined(SWD_BEST_OFF) - if (i < SWD_BEST_OFF) { - if (s->best_pos[i] == 0) - s->best_pos[i] = node + 1; - } -#endif - if (i > m_len) { - s->m_len = m_len = i; - s->m_pos = node; - if (m_len == s->look) - return; - if (m_len >= SWD_F) - return; - if (m_len > (unsigned) s->best3[node]) - return; - scan_end1 = bp[m_len - 1]; - } - } - } -} - - -/*********************************************************************** -// -************************************************************************/ -#ifdef HEAD2 - -static int swd_search2(lzo_swd_p s) -{ - unsigned key; - - assert(s->look >= 2); - assert(s->m_len > 0); - - key = s->head2[HEAD2(s->b, s->bp)]; - if (key == NIL2) - return 0; - assert(lzo_memcmp(&s->b[s->bp], &s->b[key], 2) == 0); -#if defined(SWD_BEST_OFF) - if (s->best_pos[2] == 0) - s->best_pos[2] = key + 1; -#endif - - if (s->m_len < 2) { - s->m_len = 2; - s->m_pos = key; - } - return 1; -} - -#endif - - -/*********************************************************************** -// -************************************************************************/ -static void swd_findbest(lzo_swd_p s) -{ - unsigned key; - unsigned cnt, node; - unsigned len; - - assert(s->m_len > 0); - - /* get current head, add bp into HEAD3 */ - key = HEAD3(s->b,s->bp); - node = s->succ3[s->bp] = s_get_head3(s, key); - cnt = s->llen3[key]++; - assert(s->llen3[key] <= SWD_N + SWD_F); - if (cnt > s->max_chain) - cnt = s->max_chain; - s->head3[key] = s->bp; - - s->b_char = s->b[s->bp]; - len = s->m_len; - if (s->m_len >= s->look) { - if (s->look == 0) - s->b_char = -1; - s->m_off = 0; - s->best3[s->bp] = SWD_F + 1; - } else { -#ifdef HEAD2 - if (swd_search2(s)) -#endif - if (s->look >= 3) - swd_search(s, node, cnt); - if (s->m_len > len) - s->m_off = swd_pos2off(s,s->m_pos); - s->best3[s->bp] = s->m_len; - -#if defined(SWD_BEST_OFF) - if (s->use_best_off) { - int i; - for (i = 2; i < SWD_BEST_OFF; i++) { - if (s->best_pos[i] > 0) - s->best_off[i] = swd_pos2off(s, s->best_pos[i]-1); - else - s->best_off[i] = 0; - } - } -#endif - } - - swd_remove_node(s,s->rp); - -#ifdef HEAD2 - /* add bp into HEAD2 */ - key = HEAD2(s->b, s->bp); - s->head2[key] = s->bp; -#endif -} - -#undef HEAD3 -#undef HEAD2 -#undef s_get_head3 - - -/*********************************************************************** -// -************************************************************************/ -static int init_match(lzo1x_999_t *c, lzo_swd_p s, uint32_t use_best_off) -{ - int r; - - assert(!c->init); - c->init = 1; - - s->c = c; - - r = swd_init(s); - if (r != 0) - return r; - - s->use_best_off = use_best_off; - return r; -} - - -/*********************************************************************** -// -************************************************************************/ -static int find_match(lzo1x_999_t *c, lzo_swd_p s, - unsigned this_len, unsigned skip) -{ - assert(c->init); - - if (skip > 0) { - assert(this_len >= skip); - swd_accept(s, this_len - skip); - } else { - assert(this_len <= 1); - } - - s->m_len = 1; - s->m_len = 1; -#ifdef SWD_BEST_OFF - if (s->use_best_off) - memset(s->best_pos, 0, sizeof(s->best_pos)); -#endif - swd_findbest(s); - c->m_len = s->m_len; - c->m_off = s->m_off; - - swd_getbyte(s); - - if (s->b_char < 0) { - c->look = 0; - c->m_len = 0; - } else { - c->look = s->look + 1; - } - c->bp = c->ip - c->look; - - return LZO_E_OK; -} - -/* this is a public functions, but there is no prototype in a header file */ -static int lzo1x_999_compress_internal(const uint8_t *in , unsigned in_len, - uint8_t *out, unsigned *out_len, - void *wrkmem, - unsigned good_length, - unsigned max_lazy, - unsigned max_chain, - uint32_t use_best_off); - - -/*********************************************************************** -// -************************************************************************/ -static uint8_t *code_match(lzo1x_999_t *c, - uint8_t *op, unsigned m_len, unsigned m_off) -{ - assert(op > c->out); - if (m_len == 2) { - assert(m_off <= M1_MAX_OFFSET); - assert(c->r1_lit > 0); - assert(c->r1_lit < 4); - m_off -= 1; - *op++ = M1_MARKER | ((m_off & 3) << 2); - *op++ = m_off >> 2; - } else if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) { - assert(m_len >= 3); - m_off -= 1; - *op++ = ((m_len - 1) << 5) | ((m_off & 7) << 2); - *op++ = m_off >> 3; - assert(op[-2] >= M2_MARKER); - } else if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && c->r1_lit >= 4) { - assert(m_len == 3); - assert(m_off > M2_MAX_OFFSET); - m_off -= 1 + M2_MAX_OFFSET; - *op++ = M1_MARKER | ((m_off & 3) << 2); - *op++ = m_off >> 2; - } else if (m_off <= M3_MAX_OFFSET) { - assert(m_len >= 3); - m_off -= 1; - if (m_len <= M3_MAX_LEN) - *op++ = M3_MARKER | (m_len - 2); - else { - m_len -= M3_MAX_LEN; - *op++ = M3_MARKER | 0; - while (m_len > 255) { - m_len -= 255; - *op++ = 0; - } - assert(m_len > 0); - *op++ = m_len; - } - *op++ = m_off << 2; - *op++ = m_off >> 6; - } else { - unsigned k; - - assert(m_len >= 3); - assert(m_off > 0x4000); - assert(m_off <= 0xbfff); - m_off -= 0x4000; - k = (m_off & 0x4000) >> 11; - if (m_len <= M4_MAX_LEN) - *op++ = M4_MARKER | k | (m_len - 2); - else { - m_len -= M4_MAX_LEN; - *op++ = M4_MARKER | k | 0; - while (m_len > 255) { - m_len -= 255; - *op++ = 0; - } - assert(m_len > 0); - *op++ = m_len; - } - *op++ = m_off << 2; - *op++ = m_off >> 6; - } - - return op; -} - - -static uint8_t *STORE_RUN(lzo1x_999_t *c, uint8_t *op, - const uint8_t *ii, unsigned t) -{ - if (op == c->out && t <= 238) { - *op++ = 17 + t; - } else if (t <= 3) { - op[-2] |= t; - } else if (t <= 18) { - *op++ = t - 3; - } else { - unsigned tt = t - 18; - - *op++ = 0; - while (tt > 255) { - tt -= 255; - *op++ = 0; - } - assert(tt > 0); - *op++ = tt; - } - do *op++ = *ii++; while (--t > 0); - - return op; -} - - -static uint8_t *code_run(lzo1x_999_t *c, uint8_t *op, const uint8_t *ii, - unsigned lit) -{ - if (lit > 0) { - assert(m_len >= 2); - op = STORE_RUN(c, op, ii, lit); - } else { - assert(m_len >= 3); - } - c->r1_lit = lit; - - return op; -} - - -/*********************************************************************** -// -************************************************************************/ -static int len_of_coded_match(unsigned m_len, unsigned m_off, unsigned lit) -{ - int n = 4; - - if (m_len < 2) - return -1; - if (m_len == 2) - return (m_off <= M1_MAX_OFFSET && lit > 0 && lit < 4) ? 2 : -1; - if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) - return 2; - if (m_len == M2_MIN_LEN && m_off <= MX_MAX_OFFSET && lit >= 4) - return 2; - if (m_off <= M3_MAX_OFFSET) { - if (m_len <= M3_MAX_LEN) - return 3; - m_len -= M3_MAX_LEN; - } else if (m_off <= M4_MAX_OFFSET) { - if (m_len <= M4_MAX_LEN) - return 3; - m_len -= M4_MAX_LEN; - } else - return -1; - while (m_len > 255) { - m_len -= 255; - n++; - } - return n; -} - - -static int min_gain(unsigned ahead, unsigned lit1, - unsigned lit2, int l1, int l2, int l3) -{ - int lazy_match_min_gain = 0; - - assert (ahead >= 1); - lazy_match_min_gain += ahead; - - if (lit1 <= 3) - lazy_match_min_gain += (lit2 <= 3) ? 0 : 2; - else if (lit1 <= 18) - lazy_match_min_gain += (lit2 <= 18) ? 0 : 1; - - lazy_match_min_gain += (l2 - l1) * 2; - if (l3 > 0) - lazy_match_min_gain -= (ahead - l3) * 2; - - if (lazy_match_min_gain < 0) - lazy_match_min_gain = 0; - - return lazy_match_min_gain; -} - - -/*********************************************************************** -// -************************************************************************/ -#if defined(SWD_BEST_OFF) - -static void better_match(const lzo_swd_p swd, - unsigned *m_len, unsigned *m_off) -{ - if (*m_len <= M2_MIN_LEN) - return; - - if (*m_off <= M2_MAX_OFFSET) - return; - - /* M3/M4 -> M2 */ - if (*m_off > M2_MAX_OFFSET - && *m_len >= M2_MIN_LEN + 1 && *m_len <= M2_MAX_LEN + 1 - && swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M2_MAX_OFFSET - ) { - *m_len = *m_len - 1; - *m_off = swd->best_off[*m_len]; - return; - } - - /* M4 -> M2 */ - if (*m_off > M3_MAX_OFFSET - && *m_len >= M4_MAX_LEN + 1 && *m_len <= M2_MAX_LEN + 2 - && swd->best_off[*m_len-2] && swd->best_off[*m_len-2] <= M2_MAX_OFFSET - ) { - *m_len = *m_len - 2; - *m_off = swd->best_off[*m_len]; - return; - } - /* M4 -> M3 */ - if (*m_off > M3_MAX_OFFSET - && *m_len >= M4_MAX_LEN + 1 && *m_len <= M3_MAX_LEN + 1 - && swd->best_off[*m_len-1] && swd->best_off[*m_len-1] <= M3_MAX_OFFSET - ) { - *m_len = *m_len - 1; - *m_off = swd->best_off[*m_len]; - } -} - -#endif - - -/*********************************************************************** -// -************************************************************************/ -static int lzo1x_999_compress_internal(const uint8_t *in, unsigned in_len, - uint8_t *out, unsigned *out_len, - void *wrkmem, - unsigned good_length, - unsigned max_lazy, - unsigned max_chain, - uint32_t use_best_off) -{ - uint8_t *op; - const uint8_t *ii; - unsigned lit; - unsigned m_len, m_off; - lzo1x_999_t cc; - lzo1x_999_t *const c = &cc; - const lzo_swd_p swd = (lzo_swd_p) wrkmem; - int r; - - c->init = 0; - c->ip = c->in = in; - c->in_end = in + in_len; - c->out = out; - - op = out; - ii = c->ip; /* point to start of literal run */ - lit = 0; - c->r1_lit = 0; - - r = init_match(c, swd, use_best_off); - if (r != 0) - return r; - swd->max_chain = max_chain; - - r = find_match(c, swd, 0, 0); - if (r != 0) - return r; - - while (c->look > 0) { - unsigned ahead; - unsigned max_ahead; - int l1, l2, l3; - - m_len = c->m_len; - m_off = c->m_off; - - assert(c->bp == c->ip - c->look); - assert(c->bp >= in); - if (lit == 0) - ii = c->bp; - assert(ii + lit == c->bp); - assert(swd->b_char == *(c->bp)); - - if (m_len < 2 - || (m_len == 2 && (m_off > M1_MAX_OFFSET || lit == 0 || lit >= 4)) - /* Do not accept this match for compressed-data compatibility - * with LZO v1.01 and before - * [ might be a problem for decompress() and optimize() ] - */ - || (m_len == 2 && op == out) - || (op == out && lit == 0) - ) { - /* a literal */ - m_len = 0; - } - else if (m_len == M2_MIN_LEN) { - /* compression ratio improves if we code a literal in some cases */ - if (m_off > MX_MAX_OFFSET && lit >= 4) - m_len = 0; - } - - if (m_len == 0) { - /* a literal */ - lit++; - swd->max_chain = max_chain; - r = find_match(c, swd, 1, 0); - assert(r == 0); - continue; - } - - /* a match */ -#if defined(SWD_BEST_OFF) - if (swd->use_best_off) - better_match(swd, &m_len, &m_off); -#endif - - /* shall we try a lazy match ? */ - ahead = 0; - if (m_len >= max_lazy) { - /* no */ - l1 = 0; - max_ahead = 0; - } else { - /* yes, try a lazy match */ - l1 = len_of_coded_match(m_len, m_off, lit); - assert(l1 > 0); - max_ahead = LZO_MIN(2, (unsigned)l1 - 1); - } - - - while (ahead < max_ahead && c->look > m_len) { - int lazy_match_min_gain; - - if (m_len >= good_length) - swd->max_chain = max_chain >> 2; - else - swd->max_chain = max_chain; - r = find_match(c, swd, 1, 0); - ahead++; - - assert(r == 0); - assert(c->look > 0); - assert(ii + lit + ahead == c->bp); - - if (c->m_len < m_len) - continue; - if (c->m_len == m_len && c->m_off >= m_off) - continue; -#if defined(SWD_BEST_OFF) - if (swd->use_best_off) - better_match(swd, &c->m_len, &c->m_off); -#endif - l2 = len_of_coded_match(c->m_len, c->m_off, lit+ahead); - if (l2 < 0) - continue; - - /* compressed-data compatibility [see above] */ - l3 = (op == out) ? -1 : len_of_coded_match(ahead, m_off, lit); - - lazy_match_min_gain = min_gain(ahead, lit, lit+ahead, l1, l2, l3); - if (c->m_len >= m_len + lazy_match_min_gain) { - if (l3 > 0) { - /* code previous run */ - op = code_run(c, op, ii, lit); - lit = 0; - /* code shortened match */ - op = code_match(c, op, ahead, m_off); - } else { - lit += ahead; - assert(ii + lit == c->bp); - } - goto lazy_match_done; - } - } - - assert(ii + lit + ahead == c->bp); - - /* 1 - code run */ - op = code_run(c, op, ii, lit); - lit = 0; - - /* 2 - code match */ - op = code_match(c, op, m_len, m_off); - swd->max_chain = max_chain; - r = find_match(c, swd, m_len, 1+ahead); - assert(r == 0); - - lazy_match_done: ; - } - - /* store final run */ - if (lit > 0) - op = STORE_RUN(c, op, ii, lit); - -#if defined(LZO_EOF_CODE) - *op++ = M4_MARKER | 1; - *op++ = 0; - *op++ = 0; -#endif - - *out_len = op - out; - - return LZO_E_OK; -} - - -/*********************************************************************** -// -************************************************************************/ -int lzo1x_999_compress_level(const uint8_t *in, unsigned in_len, - uint8_t *out, unsigned *out_len, - void *wrkmem, - int compression_level) -{ - static const struct { - uint16_t good_length; - uint16_t max_lazy; - uint16_t max_chain; - uint16_t use_best_off; - } c[3] = { - { 8, 32, 256, 0 }, - { 32, 128, 2048, 1 }, - { SWD_F, SWD_F, 4096, 1 } /* max. compression */ - }; - - if (compression_level < 7 || compression_level > 9) - return LZO_E_ERROR; - - compression_level -= 7; - return lzo1x_999_compress_internal(in, in_len, out, out_len, wrkmem, - c[compression_level].good_length, - c[compression_level].max_lazy, - c[compression_level].max_chain, - c[compression_level].use_best_off); -} diff --git a/archival/libunarchive/lzo1x_c.c b/archival/libunarchive/lzo1x_c.c deleted file mode 100644 index cc86f74b1..000000000 --- a/archival/libunarchive/lzo1x_c.c +++ /dev/null @@ -1,296 +0,0 @@ -/* implementation of the LZO1[XY]-1 compression algorithm - - This file is part of the LZO real-time data compression library. - - Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer - All Rights Reserved. - - Markus F.X.J. Oberhumer - http://www.oberhumer.com/opensource/lzo/ - - The LZO library is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - The LZO library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the LZO library; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ - -/*********************************************************************** -// compress a block of data. -************************************************************************/ -static NOINLINE unsigned -do_compress(const uint8_t* in, unsigned in_len, - uint8_t* out, unsigned* out_len, - void* wrkmem) -{ - register const uint8_t* ip; - uint8_t* op; - const uint8_t* const in_end = in + in_len; - const uint8_t* const ip_end = in + in_len - M2_MAX_LEN - 5; - const uint8_t* ii; - const void* *const dict = (const void**) wrkmem; - - op = out; - ip = in; - ii = ip; - - ip += 4; - for (;;) { - register const uint8_t* m_pos; - unsigned m_off; - unsigned m_len; - unsigned dindex; - - D_INDEX1(dindex,ip); - GINDEX(m_pos,m_off,dict,dindex,in); - if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) - goto literal; -#if 1 - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - D_INDEX2(dindex,ip); -#endif - GINDEX(m_pos,m_off,dict,dindex,in); - if (LZO_CHECK_MPOS_NON_DET(m_pos,m_off,in,ip,M4_MAX_OFFSET)) - goto literal; - if (m_off <= M2_MAX_OFFSET || m_pos[3] == ip[3]) - goto try_match; - goto literal; - - try_match: -#if 1 && defined(LZO_UNALIGNED_OK_2) - if (* (const lzo_ushortp) m_pos != * (const lzo_ushortp) ip) -#else - if (m_pos[0] != ip[0] || m_pos[1] != ip[1]) -#endif - { - } else { - if (m_pos[2] == ip[2]) { -#if 0 - if (m_off <= M2_MAX_OFFSET) - goto match; - if (lit <= 3) - goto match; - if (lit == 3) { /* better compression, but slower */ - assert(op - 2 > out); op[-2] |= (uint8_t)(3); - *op++ = *ii++; *op++ = *ii++; *op++ = *ii++; - goto code_match; - } - if (m_pos[3] == ip[3]) -#endif - goto match; - } - else { - /* still need a better way for finding M1 matches */ -#if 0 - /* a M1 match */ -#if 0 - if (m_off <= M1_MAX_OFFSET && lit > 0 && lit <= 3) -#else - if (m_off <= M1_MAX_OFFSET && lit == 3) -#endif - { - register unsigned t; - - t = lit; - assert(op - 2 > out); op[-2] |= (uint8_t)(t); - do *op++ = *ii++; while (--t > 0); - assert(ii == ip); - m_off -= 1; - *op++ = (uint8_t)(M1_MARKER | ((m_off & 3) << 2)); - *op++ = (uint8_t)(m_off >> 2); - ip += 2; - goto match_done; - } -#endif - } - } - - /* a literal */ - literal: - UPDATE_I(dict, 0, dindex, ip, in); - ++ip; - if (ip >= ip_end) - break; - continue; - - /* a match */ -match: - UPDATE_I(dict, 0, dindex, ip, in); - /* store current literal run */ - if (pd(ip, ii) > 0) { - register unsigned t = pd(ip, ii); - - if (t <= 3) { - assert(op - 2 > out); - op[-2] |= (uint8_t)(t); - } - else if (t <= 18) - *op++ = (uint8_t)(t - 3); - else { - register unsigned tt = t - 18; - - *op++ = 0; - while (tt > 255) { - tt -= 255; - *op++ = 0; - } - assert(tt > 0); - *op++ = (uint8_t)(tt); - } - do *op++ = *ii++; while (--t > 0); - } - - /* code the match */ - assert(ii == ip); - ip += 3; - if (m_pos[3] != *ip++ || m_pos[4] != *ip++ || m_pos[5] != *ip++ - || m_pos[6] != *ip++ || m_pos[7] != *ip++ || m_pos[8] != *ip++ -#ifdef LZO1Y - || m_pos[ 9] != *ip++ || m_pos[10] != *ip++ || m_pos[11] != *ip++ - || m_pos[12] != *ip++ || m_pos[13] != *ip++ || m_pos[14] != *ip++ -#endif - ) { - --ip; - m_len = pd(ip, ii); - assert(m_len >= 3); - assert(m_len <= M2_MAX_LEN); - - if (m_off <= M2_MAX_OFFSET) { - m_off -= 1; -#if defined(LZO1X) - *op++ = (uint8_t)(((m_len - 1) << 5) | ((m_off & 7) << 2)); - *op++ = (uint8_t)(m_off >> 3); -#elif defined(LZO1Y) - *op++ = (uint8_t)(((m_len + 1) << 4) | ((m_off & 3) << 2)); - *op++ = (uint8_t)(m_off >> 2); -#endif - } - else if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; - *op++ = (uint8_t)(M3_MARKER | (m_len - 2)); - goto m3_m4_offset; - } else { -#if defined(LZO1X) - m_off -= 0x4000; - assert(m_off > 0); - assert(m_off <= 0x7fff); - *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2)); - goto m3_m4_offset; -#elif defined(LZO1Y) - goto m4_match; -#endif - } - } - else { - { - const uint8_t* end = in_end; - const uint8_t* m = m_pos + M2_MAX_LEN + 1; - while (ip < end && *m == *ip) - m++, ip++; - m_len = pd(ip, ii); - } - assert(m_len > M2_MAX_LEN); - - if (m_off <= M3_MAX_OFFSET) { - m_off -= 1; - if (m_len <= 33) - *op++ = (uint8_t)(M3_MARKER | (m_len - 2)); - else { - m_len -= 33; - *op++ = M3_MARKER | 0; - goto m3_m4_len; - } - } else { -#if defined(LZO1Y) - m4_match: -#endif - m_off -= 0x4000; - assert(m_off > 0); - assert(m_off <= 0x7fff); - if (m_len <= M4_MAX_LEN) - *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11) | (m_len - 2)); - else { - m_len -= M4_MAX_LEN; - *op++ = (uint8_t)(M4_MARKER | ((m_off & 0x4000) >> 11)); - m3_m4_len: - while (m_len > 255) { - m_len -= 255; - *op++ = 0; - } - assert(m_len > 0); - *op++ = (uint8_t)(m_len); - } - } - m3_m4_offset: - *op++ = (uint8_t)((m_off & 63) << 2); - *op++ = (uint8_t)(m_off >> 6); - } -#if 0 - match_done: -#endif - ii = ip; - if (ip >= ip_end) - break; - } - - *out_len = pd(op, out); - return pd(in_end, ii); -} - -/*********************************************************************** -// public entry point -************************************************************************/ -int DO_COMPRESS(const uint8_t* in, unsigned in_len, - uint8_t* out, unsigned* out_len, - void* wrkmem) -{ - uint8_t* op = out; - unsigned t; - - if (in_len <= M2_MAX_LEN + 5) - t = in_len; - else { - t = do_compress(in,in_len,op,out_len,wrkmem); - op += *out_len; - } - - if (t > 0) { - const uint8_t* ii = in + in_len - t; - - if (op == out && t <= 238) - *op++ = (uint8_t)(17 + t); - else if (t <= 3) - op[-2] |= (uint8_t)(t); - else if (t <= 18) - *op++ = (uint8_t)(t - 3); - else { - unsigned tt = t - 18; - - *op++ = 0; - while (tt > 255) { - tt -= 255; - *op++ = 0; - } - assert(tt > 0); - *op++ = (uint8_t)(tt); - } - do *op++ = *ii++; while (--t > 0); - } - - *op++ = M4_MARKER | 1; - *op++ = 0; - *op++ = 0; - - *out_len = pd(op, out); - return 0; /*LZO_E_OK*/ -} diff --git a/archival/libunarchive/lzo1x_d.c b/archival/libunarchive/lzo1x_d.c deleted file mode 100644 index 348a85510..000000000 --- a/archival/libunarchive/lzo1x_d.c +++ /dev/null @@ -1,420 +0,0 @@ -/* implementation of the LZO1X decompression algorithm - - This file is part of the LZO real-time data compression library. - - Copyright (C) 1996..2008 Markus Franz Xaver Johannes Oberhumer - All Rights Reserved. - - Markus F.X.J. Oberhumer - http://www.oberhumer.com/opensource/lzo/ - - The LZO library is free software; you can redistribute it and/or - modify it under the terms of the GNU General Public License as - published by the Free Software Foundation; either version 2 of - the License, or (at your option) any later version. - - The LZO library is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with the LZO library; see the file COPYING. - If not, write to the Free Software Foundation, Inc., - 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. - */ -#include "libbb.h" -#include "liblzo.h" - -/*********************************************************************** -// decompress a block of data. -************************************************************************/ -/* safe decompression with overrun testing */ -int lzo1x_decompress_safe(const uint8_t* in, unsigned in_len, - uint8_t* out, unsigned* out_len, - void* wrkmem UNUSED_PARAM) -{ - register uint8_t* op; - register const uint8_t* ip; - register unsigned t; -#if defined(COPY_DICT) - unsigned m_off; - const uint8_t* dict_end; -#else - register const uint8_t* m_pos = NULL; /* possibly not needed */ -#endif - const uint8_t* const ip_end = in + in_len; -#if defined(HAVE_ANY_OP) - uint8_t* const op_end = out + *out_len; -#endif -#if defined(LZO1Z) - unsigned last_m_off = 0; -#endif - -// LZO_UNUSED(wrkmem); - -#if defined(COPY_DICT) - if (dict) { - if (dict_len > M4_MAX_OFFSET) { - dict += dict_len - M4_MAX_OFFSET; - dict_len = M4_MAX_OFFSET; - } - dict_end = dict + dict_len; - } else { - dict_len = 0; - dict_end = NULL; - } -#endif /* COPY_DICT */ - - *out_len = 0; - - op = out; - ip = in; - - if (*ip > 17) { - t = *ip++ - 17; - if (t < 4) - goto match_next; - assert(t > 0); NEED_OP(t); NEED_IP(t+1); - do *op++ = *ip++; while (--t > 0); - goto first_literal_run; - } - - while (TEST_IP && TEST_OP) { - t = *ip++; - if (t >= 16) - goto match; - /* a literal run */ - if (t == 0) { - NEED_IP(1); - while (*ip == 0) { - t += 255; - ip++; - NEED_IP(1); - } - t += 15 + *ip++; - } - /* copy literals */ - assert(t > 0); - NEED_OP(t+3); - NEED_IP(t+4); -#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) -# if !defined(LZO_UNALIGNED_OK_4) - if (PTR_ALIGNED2_4(op, ip)) -# endif - { - COPY4(op, ip); - op += 4; - ip += 4; - if (--t > 0) { - if (t >= 4) { - do { - COPY4(op, ip); - op += 4; - ip += 4; - t -= 4; - } while (t >= 4); - if (t > 0) - do *op++ = *ip++; while (--t > 0); - } else { - do *op++ = *ip++; while (--t > 0); - } - } - } -# if !defined(LZO_UNALIGNED_OK_4) - else -# endif -#endif -#if !defined(LZO_UNALIGNED_OK_4) - { - *op++ = *ip++; - *op++ = *ip++; - *op++ = *ip++; - do *op++ = *ip++; while (--t > 0); - } -#endif - - first_literal_run: - t = *ip++; - if (t >= 16) - goto match; -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); - last_m_off = m_off; -#else - m_off = (1 + M2_MAX_OFFSET) + (t >> 2) + (*ip++ << 2); -#endif - NEED_OP(3); - t = 3; COPY_DICT(t,m_off) -#else /* !COPY_DICT */ -#if defined(LZO1Z) - t = (1 + M2_MAX_OFFSET) + (t << 6) + (*ip++ >> 2); - m_pos = op - t; - last_m_off = t; -#else - m_pos = op - (1 + M2_MAX_OFFSET); - m_pos -= t >> 2; - m_pos -= *ip++ << 2; -#endif - TEST_LB(m_pos); NEED_OP(3); - *op++ = *m_pos++; - *op++ = *m_pos++; - *op++ = *m_pos; -#endif /* COPY_DICT */ - goto match_done; - - /* handle matches */ - do { - match: - if (t >= 64) { /* a M2 match */ -#if defined(COPY_DICT) -#if defined(LZO1X) - m_off = 1 + ((t >> 2) & 7) + (*ip++ << 3); - t = (t >> 5) - 1; -#elif defined(LZO1Y) - m_off = 1 + ((t >> 2) & 3) + (*ip++ << 2); - t = (t >> 4) - 3; -#elif defined(LZO1Z) - m_off = t & 0x1f; - if (m_off >= 0x1c) - m_off = last_m_off; - else { - m_off = 1 + (m_off << 6) + (*ip++ >> 2); - last_m_off = m_off; - } - t = (t >> 5) - 1; -#endif -#else /* !COPY_DICT */ -#if defined(LZO1X) - m_pos = op - 1; - m_pos -= (t >> 2) & 7; - m_pos -= *ip++ << 3; - t = (t >> 5) - 1; -#elif defined(LZO1Y) - m_pos = op - 1; - m_pos -= (t >> 2) & 3; - m_pos -= *ip++ << 2; - t = (t >> 4) - 3; -#elif defined(LZO1Z) - { - unsigned off = t & 0x1f; - m_pos = op; - if (off >= 0x1c) { - assert(last_m_off > 0); - m_pos -= last_m_off; - } else { - off = 1 + (off << 6) + (*ip++ >> 2); - m_pos -= off; - last_m_off = off; - } - } - t = (t >> 5) - 1; -#endif - TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1); - goto copy_match; -#endif /* COPY_DICT */ - } - else if (t >= 32) { /* a M3 match */ - t &= 31; - if (t == 0) { - NEED_IP(1); - while (*ip == 0) { - t += 255; - ip++; - NEED_IP(1); - } - t += 31 + *ip++; - } -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off = 1 + (ip[0] << 6) + (ip[1] >> 2); - last_m_off = m_off; -#else - m_off = 1 + (ip[0] >> 2) + (ip[1] << 6); -#endif -#else /* !COPY_DICT */ -#if defined(LZO1Z) - { - unsigned off = 1 + (ip[0] << 6) + (ip[1] >> 2); - m_pos = op - off; - last_m_off = off; - } -#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN) - m_pos = op - 1; - m_pos -= (* (const lzo_ushortp) ip) >> 2; -#else - m_pos = op - 1; - m_pos -= (ip[0] >> 2) + (ip[1] << 6); -#endif -#endif /* COPY_DICT */ - ip += 2; - } - else if (t >= 16) { /* a M4 match */ -#if defined(COPY_DICT) - m_off = (t & 8) << 11; -#else /* !COPY_DICT */ - m_pos = op; - m_pos -= (t & 8) << 11; -#endif /* COPY_DICT */ - t &= 7; - if (t == 0) { - NEED_IP(1); - while (*ip == 0) { - t += 255; - ip++; - NEED_IP(1); - } - t += 7 + *ip++; - } -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off += (ip[0] << 6) + (ip[1] >> 2); -#else - m_off += (ip[0] >> 2) + (ip[1] << 6); -#endif - ip += 2; - if (m_off == 0) - goto eof_found; - m_off += 0x4000; -#if defined(LZO1Z) - last_m_off = m_off; -#endif -#else /* !COPY_DICT */ -#if defined(LZO1Z) - m_pos -= (ip[0] << 6) + (ip[1] >> 2); -#elif defined(LZO_UNALIGNED_OK_2) && defined(LZO_ABI_LITTLE_ENDIAN) - m_pos -= (* (const lzo_ushortp) ip) >> 2; -#else - m_pos -= (ip[0] >> 2) + (ip[1] << 6); -#endif - ip += 2; - if (m_pos == op) - goto eof_found; - m_pos -= 0x4000; -#if defined(LZO1Z) - last_m_off = pd((const uint8_t*)op, m_pos); -#endif -#endif /* COPY_DICT */ - } - else { /* a M1 match */ -#if defined(COPY_DICT) -#if defined(LZO1Z) - m_off = 1 + (t << 6) + (*ip++ >> 2); - last_m_off = m_off; -#else - m_off = 1 + (t >> 2) + (*ip++ << 2); -#endif - NEED_OP(2); - t = 2; COPY_DICT(t,m_off) -#else /* !COPY_DICT */ -#if defined(LZO1Z) - t = 1 + (t << 6) + (*ip++ >> 2); - m_pos = op - t; - last_m_off = t; -#else - m_pos = op - 1; - m_pos -= t >> 2; - m_pos -= *ip++ << 2; -#endif - TEST_LB(m_pos); NEED_OP(2); - *op++ = *m_pos++; - *op++ = *m_pos; -#endif /* COPY_DICT */ - goto match_done; - } - - /* copy match */ -#if defined(COPY_DICT) - - NEED_OP(t+3-1); - t += 3-1; COPY_DICT(t,m_off) - -#else /* !COPY_DICT */ - - TEST_LB(m_pos); assert(t > 0); NEED_OP(t+3-1); -#if defined(LZO_UNALIGNED_OK_4) || defined(LZO_ALIGNED_OK_4) -# if !defined(LZO_UNALIGNED_OK_4) - if (t >= 2 * 4 - (3 - 1) && PTR_ALIGNED2_4(op,m_pos)) { - assert((op - m_pos) >= 4); /* both pointers are aligned */ -# else - if (t >= 2 * 4 - (3 - 1) && (op - m_pos) >= 4) { -# endif - COPY4(op,m_pos); - op += 4; m_pos += 4; t -= 4 - (3 - 1); - do { - COPY4(op,m_pos); - op += 4; m_pos += 4; t -= 4; - } while (t >= 4); - if (t > 0) - do *op++ = *m_pos++; while (--t > 0); - } - else -#endif - { - copy_match: - *op++ = *m_pos++; *op++ = *m_pos++; - do *op++ = *m_pos++; while (--t > 0); - } - -#endif /* COPY_DICT */ - - match_done: -#if defined(LZO1Z) - t = ip[-1] & 3; -#else - t = ip[-2] & 3; -#endif - if (t == 0) - break; - - /* copy literals */ - match_next: - assert(t > 0); - assert(t < 4); - NEED_OP(t); - NEED_IP(t+1); -#if 0 - do *op++ = *ip++; while (--t > 0); -#else - *op++ = *ip++; - if (t > 1) { - *op++ = *ip++; - if (t > 2) - *op++ = *ip++; - } -#endif - t = *ip++; - } while (TEST_IP && TEST_OP); - } - -//#if defined(HAVE_TEST_IP) || defined(HAVE_TEST_OP) - /* no EOF code was found */ - *out_len = pd(op, out); - return LZO_E_EOF_NOT_FOUND; -//#endif - - eof_found: - assert(t == 1); - *out_len = pd(op, out); - return (ip == ip_end ? LZO_E_OK : - (ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN)); - -//#if defined(HAVE_NEED_IP) - input_overrun: - *out_len = pd(op, out); - return LZO_E_INPUT_OVERRUN; -//#endif - -//#if defined(HAVE_NEED_OP) - output_overrun: - *out_len = pd(op, out); - return LZO_E_OUTPUT_OVERRUN; -//#endif - -//#if defined(LZO_TEST_OVERRUN_LOOKBEHIND) - lookbehind_overrun: - *out_len = pd(op, out); - return LZO_E_LOOKBEHIND_OVERRUN; -//#endif -} diff --git a/archival/libunarchive/open_transformer.c b/archival/libunarchive/open_transformer.c deleted file mode 100644 index ed6a556bb..000000000 --- a/archival/libunarchive/open_transformer.c +++ /dev/null @@ -1,54 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -/* transformer(), more than meets the eye */ -/* - * On MMU machine, the transform_prog is removed by macro magic - * in include/unarchive.h. On NOMMU, transformer is removed. - */ -void FAST_FUNC open_transformer(int fd, - IF_DESKTOP(long long) int FAST_FUNC (*transformer)(int src_fd, int dst_fd), - const char *transform_prog) -{ - struct fd_pair fd_pipe; - int pid; - - xpiped_pair(fd_pipe); - pid = BB_MMU ? xfork() : xvfork(); - if (pid == 0) { - /* Child */ - close(fd_pipe.rd); /* we don't want to read from the parent */ - // FIXME: error check? -#if BB_MMU - transformer(fd, fd_pipe.wr); - if (ENABLE_FEATURE_CLEAN_UP) { - close(fd_pipe.wr); /* send EOF */ - close(fd); - } - /* must be _exit! bug was actually seen here */ - _exit(EXIT_SUCCESS); -#else - { - char *argv[4]; - xmove_fd(fd, 0); - xmove_fd(fd_pipe.wr, 1); - argv[0] = (char*)transform_prog; - argv[1] = (char*)"-cf"; - argv[2] = (char*)"-"; - argv[3] = NULL; - BB_EXECVP(transform_prog, argv); - bb_perror_msg_and_die("can't execute '%s'", transform_prog); - } -#endif - /* notreached */ - } - - /* parent process */ - close(fd_pipe.wr); /* don't want to write to the child */ - xmove_fd(fd_pipe.rd, fd); -} diff --git a/archival/libunarchive/seek_by_jump.c b/archival/libunarchive/seek_by_jump.c deleted file mode 100644 index bda55e1b1..000000000 --- a/archival/libunarchive/seek_by_jump.c +++ /dev/null @@ -1,19 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -void FAST_FUNC seek_by_jump(int fd, off_t amount) -{ - if (amount - && lseek(fd, amount, SEEK_CUR) == (off_t) -1 - ) { - if (errno == ESPIPE) - seek_by_read(fd, amount); - else - bb_perror_msg_and_die("seek failure"); - } -} diff --git a/archival/libunarchive/seek_by_read.c b/archival/libunarchive/seek_by_read.c deleted file mode 100644 index 25b31365d..000000000 --- a/archival/libunarchive/seek_by_read.c +++ /dev/null @@ -1,16 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" - -/* If we are reading through a pipe, or from stdin then we can't lseek, - * we must read and discard the data to skip over it. - */ -void FAST_FUNC seek_by_read(int fd, off_t amount) -{ - if (amount) - bb_copyfd_exact_size(fd, -1, amount); -} diff --git a/archival/libunarchive/unpack_ar_archive.c b/archival/libunarchive/unpack_ar_archive.c deleted file mode 100644 index 4f68ba3d8..000000000 --- a/archival/libunarchive/unpack_ar_archive.c +++ /dev/null @@ -1,22 +0,0 @@ -/* vi: set sw=4 ts=4: */ -/* - * Licensed under GPLv2 or later, see file LICENSE in this source tree. - */ - -#include "libbb.h" -#include "unarchive.h" -#include "ar.h" - -void FAST_FUNC unpack_ar_archive(archive_handle_t *ar_archive) -{ - char magic[7]; - - xread(ar_archive->src_fd, magic, AR_MAGIC_LEN); - if (strncmp(magic, AR_MAGIC, AR_MAGIC_LEN) != 0) { - bb_error_msg_and_die("invalid ar magic"); - } - ar_archive->offset += AR_MAGIC_LEN; - - while (get_header_ar(ar_archive) == EXIT_SUCCESS) - continue; -} diff --git a/archival/libunarchive/unxz/README b/archival/libunarchive/unxz/README deleted file mode 100644 index c5972f6b8..000000000 --- a/archival/libunarchive/unxz/README +++ /dev/null @@ -1,135 +0,0 @@ - -XZ Embedded -=========== - - XZ Embedded is a relatively small, limited implementation of the .xz - file format. Currently only decoding is implemented. - - XZ Embedded was written for use in the Linux kernel, but the code can - be easily used in other environments too, including regular userspace - applications. - - This README contains information that is useful only when the copy - of XZ Embedded isn't part of the Linux kernel tree. You should also - read linux/Documentation/xz.txt even if you aren't using XZ Embedded - as part of Linux; information in that file is not repeated in this - README. - -Compiling the Linux kernel module - - The xz_dec module depends on crc32 module, so make sure that you have - it enabled (CONFIG_CRC32). - - Building the xz_dec and xz_dec_test modules without support for BCJ - filters: - - cd linux/lib/xz - make -C /path/to/kernel/source \ - KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \ - CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m - - Building the xz_dec and xz_dec_test modules with support for BCJ - filters: - - cd linux/lib/xz - make -C /path/to/kernel/source \ - KCPPFLAGS=-I"$(pwd)/../../include" M="$(pwd)" \ - CONFIG_XZ_DEC=m CONFIG_XZ_DEC_TEST=m CONFIG_XZ_DEC_BCJ=y \ - CONFIG_XZ_DEC_X86=y CONFIG_XZ_DEC_POWERPC=y \ - CONFIG_XZ_DEC_IA64=y CONFIG_XZ_DEC_ARM=y \ - CONFIG_XZ_DEC_ARMTHUMB=y CONFIG_XZ_DEC_SPARC=y - - If you want only one or a few of the BCJ filters, omit the appropriate - variables. CONFIG_XZ_DEC_BCJ=y is always required to build the support - code shared between all BCJ filters. - - Most people don't need the xz_dec_test module. You can skip building - it by omitting CONFIG_XZ_DEC_TEST=m from the make command line. - -Compiler requirements - - XZ Embedded should compile as either GNU-C89 (used in the Linux - kernel) or with any C99 compiler. Getting the code to compile with - non-GNU C89 compiler or a C++ compiler should be quite easy as - long as there is a data type for unsigned 64-bit integer (or the - code is modified not to support large files, which needs some more - care than just using 32-bit integer instead of 64-bit). - - If you use GCC, try to use a recent version. For example, on x86, - xz_dec_lzma2.c compiled with GCC 3.3.6 is 15-25 % slower than when - compiled with GCC 4.3.3. - -Embedding into userspace applications - - To embed the XZ decoder, copy the following files into a single - directory in your source code tree: - - linux/include/linux/xz.h - linux/lib/xz/xz_crc32.c - linux/lib/xz/xz_dec_lzma2.c - linux/lib/xz/xz_dec_stream.c - linux/lib/xz/xz_lzma2.h - linux/lib/xz/xz_private.h - linux/lib/xz/xz_stream.h - userspace/xz_config.h - - Alternatively, xz.h may be placed into a different directory but then - that directory must be in the compiler include path when compiling - the .c files. - - Your code should use only the functions declared in xz.h. The rest of - the .h files are meant only for internal use in XZ Embedded. - - You may want to modify xz_config.h to be more suitable for your build - environment. Probably you should at least skim through it even if the - default file works as is. - -BCJ filter support - - If you want support for one or more BCJ filters, you need to copy also - linux/lib/xz/xz_dec_bcj.c into your application, and use appropriate - #defines in xz_config.h or in compiler flags. You don't need these - #defines in the code that just uses XZ Embedded via xz.h, but having - them always #defined doesn't hurt either. - - #define Instruction set BCJ filter endianness - XZ_DEC_X86 x86 or x86-64 Little endian only - XZ_DEC_POWERPC PowerPC Big endian only - XZ_DEC_IA64 Itanium (IA-64) Big or little endian - XZ_DEC_ARM ARM Little endian only - XZ_DEC_ARMTHUMB ARM-Thumb Little endian only - XZ_DEC_SPARC SPARC Big or little endian - - While some architectures are (partially) bi-endian, the endianness - setting doesn't change the endianness of the instructions on all - architectures. That's why Itanium and SPARC filters work for both big - and little endian executables (Itanium has little endian instructions - and SPARC has big endian instructions). - - There currently is no filter for little endian PowerPC or big endian - ARM or ARM-Thumb. Implementing filters for them can be considered if - there is a need for such filters in real-world applications. - -Notes about shared libraries - - If you are including XZ Embedded into a shared library, you very - probably should rename the xz_* functions to prevent symbol - conflicts in case your library is linked against some other library - or application that also has XZ Embedded in it (which may even be - a different version of XZ Embedded). TODO: Provide an easy way - to do this. - - Please don't create a shared library of XZ Embedded itself unless - it is fine to rebuild everything depending on that shared library - everytime you upgrade to a newer version of XZ Embedded. There are - no API or ABI stability guarantees between different versions of - XZ Embedded. - -Specifying the calling convention - - XZ_FUNC macro was included to support declaring functions with __init - in Linux. Outside Linux, it can be used to specify the calling - convention on systems that support multiple calling conventions. - For example, on Windows, you may make all functions use the stdcall - calling convention by defining XZ_FUNC=__stdcall when building and - using the functions from XZ Embedded. diff --git a/archival/libunarchive/unxz/xz.h b/archival/libunarchive/unxz/xz.h deleted file mode 100644 index c6c071c4a..000000000 --- a/archival/libunarchive/unxz/xz.h +++ /dev/null @@ -1,271 +0,0 @@ -/* - * XZ decompressor - * - * Authors: Lasse Collin - * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. - */ - -#ifndef XZ_H -#define XZ_H - -#ifdef __KERNEL__ -# include -# include -#else -# include -# include -#endif - -/* In Linux, this is used to make extern functions static when needed. */ -#ifndef XZ_EXTERN -# define XZ_EXTERN extern -#endif - -/* In Linux, this is used to mark the functions with __init when needed. */ -#ifndef XZ_FUNC -# define XZ_FUNC -#endif - -/** - * enum xz_mode - Operation mode - * - * @XZ_SINGLE: Single-call mode. This uses less RAM than - * than multi-call modes, because the LZMA2 - * dictionary doesn't need to be allocated as - * part of the decoder state. All required data - * structures are allocated at initialization, - * so xz_dec_run() cannot return XZ_MEM_ERROR. - * @XZ_PREALLOC: Multi-call mode with preallocated LZMA2 - * dictionary buffer. All data structures are - * allocated at initialization, so xz_dec_run() - * cannot return XZ_MEM_ERROR. - * @XZ_DYNALLOC: Multi-call mode. The LZMA2 dictionary is - * allocated once the required size has been - * parsed from the stream headers. If the - * allocation fails, xz_dec_run() will return - * XZ_MEM_ERROR. - * - * It is possible to enable support only for a subset of the above - * modes at compile time by defining XZ_DEC_SINGLE, XZ_DEC_PREALLOC, - * or XZ_DEC_DYNALLOC. The xz_dec kernel module is always compiled - * with support for all operation modes, but the preboot code may - * be built with fewer features to minimize code size. - */ -enum xz_mode { - XZ_SINGLE, - XZ_PREALLOC, - XZ_DYNALLOC -}; - -/** - * enum xz_ret - Return codes - * @XZ_OK: Everything is OK so far. More input or more - * output space is required to continue. This - * return code is possible only in multi-call mode - * (XZ_PREALLOC or XZ_DYNALLOC). - * @XZ_STREAM_END: Operation finished successfully. - * @XZ_UNSUPPORTED_CHECK: Integrity check type is not supported. Decoding - * is still possible in multi-call mode by simply - * calling xz_dec_run() again. - * NOTE: This return value is used only if - * XZ_DEC_ANY_CHECK was defined at build time, - * which is not used in the kernel. Unsupported - * check types return XZ_OPTIONS_ERROR if - * XZ_DEC_ANY_CHECK was not defined at build time. - * @XZ_MEM_ERROR: Allocating memory failed. This return code is - * possible only if the decoder was initialized - * with XZ_DYNALLOC. The amount of memory that was - * tried to be allocated was no more than the - * dict_max argument given to xz_dec_init(). - * @XZ_MEMLIMIT_ERROR: A bigger LZMA2 dictionary would be needed than - * allowed by the dict_max argument given to - * xz_dec_init(). This return value is possible - * only in multi-call mode (XZ_PREALLOC or - * XZ_DYNALLOC); the single-call mode (XZ_SINGLE) - * ignores the dict_max argument. - * @XZ_FORMAT_ERROR: File format was not recognized (wrong magic - * bytes). - * @XZ_OPTIONS_ERROR: This implementation doesn't support the requested - * compression options. In the decoder this means - * that the header CRC32 matches, but the header - * itself specifies something that we don't support. - * @XZ_DATA_ERROR: Compressed data is corrupt. - * @XZ_BUF_ERROR: Cannot make any progress. Details are slightly - * different between multi-call and single-call - * mode; more information below. - * - * In multi-call mode, XZ_BUF_ERROR is returned when two consecutive calls - * to XZ code cannot consume any input and cannot produce any new output. - * This happens when there is no new input available, or the output buffer - * is full while at least one output byte is still pending. Assuming your - * code is not buggy, you can get this error only when decoding a compressed - * stream that is truncated or otherwise corrupt. - * - * In single-call mode, XZ_BUF_ERROR is returned only when the output buffer - * is too small, or the compressed input is corrupt in a way that makes the - * decoder produce more output than the caller expected. When it is - * (relatively) clear that the compressed input is truncated, XZ_DATA_ERROR - * is used instead of XZ_BUF_ERROR. - */ -enum xz_ret { - XZ_OK, - XZ_STREAM_END, - XZ_UNSUPPORTED_CHECK, - XZ_MEM_ERROR, - XZ_MEMLIMIT_ERROR, - XZ_FORMAT_ERROR, - XZ_OPTIONS_ERROR, - XZ_DATA_ERROR, - XZ_BUF_ERROR -}; - -/** - * struct xz_buf - Passing input and output buffers to XZ code - * @in: Beginning of the input buffer. This may be NULL if and only - * if in_pos is equal to in_size. - * @in_pos: Current position in the input buffer. This must not exceed - * in_size. - * @in_size: Size of the input buffer - * @out: Beginning of the output buffer. This may be NULL if and only - * if out_pos is equal to out_size. - * @out_pos: Current position in the output buffer. This must not exceed - * out_size. - * @out_size: Size of the output buffer - * - * Only the contents of the output buffer from out[out_pos] onward, and - * the variables in_pos and out_pos are modified by the XZ code. - */ -struct xz_buf { - const uint8_t *in; - size_t in_pos; - size_t in_size; - - uint8_t *out; - size_t out_pos; - size_t out_size; -}; - -/** - * struct xz_dec - Opaque type to hold the XZ decoder state - */ -struct xz_dec; - -/** - * xz_dec_init() - Allocate and initialize a XZ decoder state - * @mode: Operation mode - * @dict_max: Maximum size of the LZMA2 dictionary (history buffer) for - * multi-call decoding. This is ignored in single-call mode - * (mode == XZ_SINGLE). LZMA2 dictionary is always 2^n bytes - * or 2^n + 2^(n-1) bytes (the latter sizes are less common - * in practice), so other values for dict_max don't make sense. - * In the kernel, dictionary sizes of 64 KiB, 128 KiB, 256 KiB, - * 512 KiB, and 1 MiB are probably the only reasonable values, - * except for kernel and initramfs images where a bigger - * dictionary can be fine and useful. - * - * Single-call mode (XZ_SINGLE): xz_dec_run() decodes the whole stream at - * once. The caller must provide enough output space or the decoding will - * fail. The output space is used as the dictionary buffer, which is why - * there is no need to allocate the dictionary as part of the decoder's - * internal state. - * - * Because the output buffer is used as the workspace, streams encoded using - * a big dictionary are not a problem in single-call mode. It is enough that - * the output buffer is big enough to hold the actual uncompressed data; it - * can be smaller than the dictionary size stored in the stream headers. - * - * Multi-call mode with preallocated dictionary (XZ_PREALLOC): dict_max bytes - * of memory is preallocated for the LZMA2 dictionary. This way there is no - * risk that xz_dec_run() could run out of memory, since xz_dec_run() will - * never allocate any memory. Instead, if the preallocated dictionary is too - * small for decoding the given input stream, xz_dec_run() will return - * XZ_MEMLIMIT_ERROR. Thus, it is important to know what kind of data will be - * decoded to avoid allocating excessive amount of memory for the dictionary. - * - * Multi-call mode with dynamically allocated dictionary (XZ_DYNALLOC): - * dict_max specifies the maximum allowed dictionary size that xz_dec_run() - * may allocate once it has parsed the dictionary size from the stream - * headers. This way excessive allocations can be avoided while still - * limiting the maximum memory usage to a sane value to prevent running the - * system out of memory when decompressing streams from untrusted sources. - * - * On success, xz_dec_init() returns a pointer to struct xz_dec, which is - * ready to be used with xz_dec_run(). If memory allocation fails, - * xz_dec_init() returns NULL. - */ -XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( - enum xz_mode mode, uint32_t dict_max); - -/** - * xz_dec_run() - Run the XZ decoder - * @s: Decoder state allocated using xz_dec_init() - * @b: Input and output buffers - * - * The possible return values depend on build options and operation mode. - * See enum xz_ret for details. - * - * NOTE: If an error occurs in single-call mode (return value is not - * XZ_STREAM_END), b->in_pos and b->out_pos are not modified, and the - * contents of the output buffer from b->out[b->out_pos] onward are - * undefined. This is true even after XZ_BUF_ERROR, because with some filter - * chains, there may be a second pass over the output buffer, and this pass - * cannot be properly done if the output buffer is truncated. Thus, you - * cannot give the single-call decoder a too small buffer and then expect to - * get that amount valid data from the beginning of the stream. You must use - * the multi-call decoder if you don't want to uncompress the whole stream. - */ -XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b); - -/** - * xz_dec_reset() - Reset an already allocated decoder state - * @s: Decoder state allocated using xz_dec_init() - * - * This function can be used to reset the multi-call decoder state without - * freeing and reallocating memory with xz_dec_end() and xz_dec_init(). - * - * In single-call mode, xz_dec_reset() is always called in the beginning of - * xz_dec_run(). Thus, explicit call to xz_dec_reset() is useful only in - * multi-call mode. - */ -XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s); - -/** - * xz_dec_end() - Free the memory allocated for the decoder state - * @s: Decoder state allocated using xz_dec_init(). If s is NULL, - * this function does nothing. - */ -XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s); - -/* - * Standalone build (userspace build or in-kernel build for boot time use) - * needs a CRC32 implementation. For normal in-kernel use, kernel's own - * CRC32 module is used instead, and users of this module don't need to - * care about the functions below. - */ -#ifndef XZ_INTERNAL_CRC32 -# ifdef __KERNEL__ -# define XZ_INTERNAL_CRC32 0 -# else -# define XZ_INTERNAL_CRC32 1 -# endif -#endif - -#if XZ_INTERNAL_CRC32 -/* - * This must be called before any other xz_* function to initialize - * the CRC32 lookup table. - */ -XZ_EXTERN void XZ_FUNC xz_crc32_init(void); - -/* - * Update CRC32 value using the polynomial from IEEE-802.3. To start a new - * calculation, the third argument must be zero. To continue the calculation, - * the previously returned value is passed as the third argument. - */ -XZ_EXTERN uint32_t XZ_FUNC xz_crc32( - const uint8_t *buf, size_t size, uint32_t crc); -#endif -#endif diff --git a/archival/libunarchive/unxz/xz_config.h b/archival/libunarchive/unxz/xz_config.h deleted file mode 100644 index 187e1cbed..000000000 --- a/archival/libunarchive/unxz/xz_config.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Private includes and definitions for userspace use of XZ Embedded - * - * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. - */ - -#ifndef XZ_CONFIG_H -#define XZ_CONFIG_H - -/* Uncomment as needed to enable BCJ filter decoders. */ -/* #define XZ_DEC_X86 */ -/* #define XZ_DEC_POWERPC */ -/* #define XZ_DEC_IA64 */ -/* #define XZ_DEC_ARM */ -/* #define XZ_DEC_ARMTHUMB */ -/* #define XZ_DEC_SPARC */ - -#include -#include -#include - -#include "xz.h" - -#define kmalloc(size, flags) malloc(size) -#define kfree(ptr) free(ptr) -#define vmalloc(size) malloc(size) -#define vfree(ptr) free(ptr) - -#define memeq(a, b, size) (memcmp(a, b, size) == 0) -#define memzero(buf, size) memset(buf, 0, size) - -#undef min -#undef min_t -#define min(x, y) ((x) < (y) ? (x) : (y)) -#define min_t(type, x, y) min(x, y) - -/* - * Some functions have been marked with __always_inline to keep the - * performance reasonable even when the compiler is optimizing for - * small code size. You may be able to save a few bytes by #defining - * __always_inline to plain inline, but don't complain if the code - * becomes slow. - * - * NOTE: System headers on GNU/Linux may #define this macro already, - * so if you want to change it, you need to #undef it first. - */ -#ifndef __always_inline -# ifdef __GNUC__ -# define __always_inline \ - inline __attribute__((__always_inline__)) -# else -# define __always_inline inline -# endif -#endif - -/* - * Some functions are marked to never be inlined to reduce stack usage. - * If you don't care about stack usage, you may want to modify this so - * that noinline_for_stack is #defined to be empty even when using GCC. - * Doing so may save a few bytes in binary size. - */ -#ifndef noinline_for_stack -# ifdef __GNUC__ -# define noinline_for_stack __attribute__((__noinline__)) -# else -# define noinline_for_stack -# endif -#endif - -/* Inline functions to access unaligned unsigned 32-bit integers */ -#ifndef get_unaligned_le32 -static inline uint32_t XZ_FUNC get_unaligned_le32(const uint8_t *buf) -{ - return (uint32_t)buf[0] - | ((uint32_t)buf[1] << 8) - | ((uint32_t)buf[2] << 16) - | ((uint32_t)buf[3] << 24); -} -#endif - -#ifndef get_unaligned_be32 -static inline uint32_t XZ_FUNC get_unaligned_be32(const uint8_t *buf) -{ - return (uint32_t)(buf[0] << 24) - | ((uint32_t)buf[1] << 16) - | ((uint32_t)buf[2] << 8) - | (uint32_t)buf[3]; -} -#endif - -#ifndef put_unaligned_le32 -static inline void XZ_FUNC put_unaligned_le32(uint32_t val, uint8_t *buf) -{ - buf[0] = (uint8_t)val; - buf[1] = (uint8_t)(val >> 8); - buf[2] = (uint8_t)(val >> 16); - buf[3] = (uint8_t)(val >> 24); -} -#endif - -#ifndef put_unaligned_be32 -static inline void XZ_FUNC put_unaligned_be32(uint32_t val, uint8_t *buf) -{ - buf[0] = (uint8_t)(val >> 24); - buf[1] = (uint8_t)(val >> 16); - buf[2] = (uint8_t)(val >> 8); - buf[3] = (uint8_t)val; -} -#endif - -/* - * Use get_unaligned_le32() also for aligned access for simplicity. On - * little endian systems, #define get_le32(ptr) (*(const uint32_t *)(ptr)) - * could save a few bytes in code size. - */ -#ifndef get_le32 -# define get_le32 get_unaligned_le32 -#endif - -#endif diff --git a/archival/libunarchive/unxz/xz_dec_bcj.c b/archival/libunarchive/unxz/xz_dec_bcj.c deleted file mode 100644 index 09162b51f..000000000 --- a/archival/libunarchive/unxz/xz_dec_bcj.c +++ /dev/null @@ -1,564 +0,0 @@ -/* - * Branch/Call/Jump (BCJ) filter decoders - * - * Authors: Lasse Collin - * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. - */ - -#include "xz_private.h" - -/* - * The rest of the file is inside this ifdef. It makes things a little more - * convenient when building without support for any BCJ filters. - */ -#ifdef XZ_DEC_BCJ - -struct xz_dec_bcj { - /* Type of the BCJ filter being used */ - enum { - BCJ_X86 = 4, /* x86 or x86-64 */ - BCJ_POWERPC = 5, /* Big endian only */ - BCJ_IA64 = 6, /* Big or little endian */ - BCJ_ARM = 7, /* Little endian only */ - BCJ_ARMTHUMB = 8, /* Little endian only */ - BCJ_SPARC = 9 /* Big or little endian */ - } type; - - /* - * Return value of the next filter in the chain. We need to preserve - * this information across calls, because we must not call the next - * filter anymore once it has returned XZ_STREAM_END. - */ - enum xz_ret ret; - - /* True if we are operating in single-call mode. */ - bool single_call; - - /* - * Absolute position relative to the beginning of the uncompressed - * data (in a single .xz Block). We care only about the lowest 32 - * bits so this doesn't need to be uint64_t even with big files. - */ - uint32_t pos; - - /* x86 filter state */ - uint32_t x86_prev_mask; - - /* Temporary space to hold the variables from struct xz_buf */ - uint8_t *out; - size_t out_pos; - size_t out_size; - - struct { - /* Amount of already filtered data in the beginning of buf */ - size_t filtered; - - /* Total amount of data currently stored in buf */ - size_t size; - - /* - * Buffer to hold a mix of filtered and unfiltered data. This - * needs to be big enough to hold Alignment + 2 * Look-ahead: - * - * Type Alignment Look-ahead - * x86 1 4 - * PowerPC 4 0 - * IA-64 16 0 - * ARM 4 0 - * ARM-Thumb 2 2 - * SPARC 4 0 - */ - uint8_t buf[16]; - } temp; -}; - -#ifdef XZ_DEC_X86 -/* - * This is macro used to test the most significant byte of a memory address - * in an x86 instruction. - */ -#define bcj_x86_test_msbyte(b) ((b) == 0x00 || (b) == 0xFF) - -static noinline_for_stack size_t XZ_FUNC bcj_x86( - struct xz_dec_bcj *s, uint8_t *buf, size_t size) -{ - static const bool mask_to_allowed_status[8] - = { true, true, true, false, true, false, false, false }; - - static const uint8_t mask_to_bit_num[8] = { 0, 1, 2, 2, 3, 3, 3, 3 }; - - size_t i; - size_t prev_pos = (size_t)-1; - uint32_t prev_mask = s->x86_prev_mask; - uint32_t src; - uint32_t dest; - uint32_t j; - uint8_t b; - - if (size <= 4) - return 0; - - size -= 4; - for (i = 0; i < size; ++i) { - if ((buf[i] & 0xFE) != 0xE8) - continue; - - prev_pos = i - prev_pos; - if (prev_pos > 3) { - prev_mask = 0; - } else { - prev_mask = (prev_mask << (prev_pos - 1)) & 7; - if (prev_mask != 0) { - b = buf[i + 4 - mask_to_bit_num[prev_mask]]; - if (!mask_to_allowed_status[prev_mask] - || bcj_x86_test_msbyte(b)) { - prev_pos = i; - prev_mask = (prev_mask << 1) | 1; - continue; - } - } - } - - prev_pos = i; - - if (bcj_x86_test_msbyte(buf[i + 4])) { - src = get_unaligned_le32(buf + i + 1); - while (true) { - dest = src - (s->pos + (uint32_t)i + 5); - if (prev_mask == 0) - break; - - j = mask_to_bit_num[prev_mask] * 8; - b = (uint8_t)(dest >> (24 - j)); - if (!bcj_x86_test_msbyte(b)) - break; - - src = dest ^ (((uint32_t)1 << (32 - j)) - 1); - } - - dest &= 0x01FFFFFF; - dest |= (uint32_t)0 - (dest & 0x01000000); - put_unaligned_le32(dest, buf + i + 1); - i += 4; - } else { - prev_mask = (prev_mask << 1) | 1; - } - } - - prev_pos = i - prev_pos; - s->x86_prev_mask = prev_pos > 3 ? 0 : prev_mask << (prev_pos - 1); - return i; -} -#endif - -#ifdef XZ_DEC_POWERPC -static noinline_for_stack size_t XZ_FUNC bcj_powerpc( - struct xz_dec_bcj *s, uint8_t *buf, size_t size) -{ - size_t i; - uint32_t instr; - - for (i = 0; i + 4 <= size; i += 4) { - instr = get_unaligned_be32(buf + i); - if ((instr & 0xFC000003) == 0x48000001) { - instr &= 0x03FFFFFC; - instr -= s->pos + (uint32_t)i; - instr &= 0x03FFFFFC; - instr |= 0x48000001; - put_unaligned_be32(instr, buf + i); - } - } - - return i; -} -#endif - -#ifdef XZ_DEC_IA64 -static noinline_for_stack size_t XZ_FUNC bcj_ia64( - struct xz_dec_bcj *s, uint8_t *buf, size_t size) -{ - static const uint8_t branch_table[32] = { - 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, - 4, 4, 6, 6, 0, 0, 7, 7, - 4, 4, 0, 0, 4, 4, 0, 0 - }; - - /* - * The local variables take a little bit stack space, but it's less - * than what LZMA2 decoder takes, so it doesn't make sense to reduce - * stack usage here without doing that for the LZMA2 decoder too. - */ - - /* Loop counters */ - size_t i; - size_t j; - - /* Instruction slot (0, 1, or 2) in the 128-bit instruction word */ - uint32_t slot; - - /* Bitwise offset of the instruction indicated by slot */ - uint32_t bit_pos; - - /* bit_pos split into byte and bit parts */ - uint32_t byte_pos; - uint32_t bit_res; - - /* Address part of an instruction */ - uint32_t addr; - - /* Mask used to detect which instructions to convert */ - uint32_t mask; - - /* 41-bit instruction stored somewhere in the lowest 48 bits */ - uint64_t instr; - - /* Instruction normalized with bit_res for easier manipulation */ - uint64_t norm; - - for (i = 0; i + 16 <= size; i += 16) { - mask = branch_table[buf[i] & 0x1F]; - for (slot = 0, bit_pos = 5; slot < 3; ++slot, bit_pos += 41) { - if (((mask >> slot) & 1) == 0) - continue; - - byte_pos = bit_pos >> 3; - bit_res = bit_pos & 7; - instr = 0; - for (j = 0; j < 6; ++j) - instr |= (uint64_t)(buf[i + j + byte_pos]) - << (8 * j); - - norm = instr >> bit_res; - - if (((norm >> 37) & 0x0F) == 0x05 - && ((norm >> 9) & 0x07) == 0) { - addr = (norm >> 13) & 0x0FFFFF; - addr |= ((uint32_t)(norm >> 36) & 1) << 20; - addr <<= 4; - addr -= s->pos + (uint32_t)i; - addr >>= 4; - - norm &= ~((uint64_t)0x8FFFFF << 13); - norm |= (uint64_t)(addr & 0x0FFFFF) << 13; - norm |= (uint64_t)(addr & 0x100000) - << (36 - 20); - - instr &= (1 << bit_res) - 1; - instr |= norm << bit_res; - - for (j = 0; j < 6; j++) - buf[i + j + byte_pos] - = (uint8_t)(instr >> (8 * j)); - } - } - } - - return i; -} -#endif - -#ifdef XZ_DEC_ARM -static noinline_for_stack size_t XZ_FUNC bcj_arm( - struct xz_dec_bcj *s, uint8_t *buf, size_t size) -{ - size_t i; - uint32_t addr; - - for (i = 0; i + 4 <= size; i += 4) { - if (buf[i + 3] == 0xEB) { - addr = (uint32_t)buf[i] | ((uint32_t)buf[i + 1] << 8) - | ((uint32_t)buf[i + 2] << 16); - addr <<= 2; - addr -= s->pos + (uint32_t)i + 8; - addr >>= 2; - buf[i] = (uint8_t)addr; - buf[i + 1] = (uint8_t)(addr >> 8); - buf[i + 2] = (uint8_t)(addr >> 16); - } - } - - return i; -} -#endif - -#ifdef XZ_DEC_ARMTHUMB -static noinline_for_stack size_t XZ_FUNC bcj_armthumb( - struct xz_dec_bcj *s, uint8_t *buf, size_t size) -{ - size_t i; - uint32_t addr; - - for (i = 0; i + 4 <= size; i += 2) { - if ((buf[i + 1] & 0xF8) == 0xF0 - && (buf[i + 3] & 0xF8) == 0xF8) { - addr = (((uint32_t)buf[i + 1] & 0x07) << 19) - | ((uint32_t)buf[i] << 11) - | (((uint32_t)buf[i + 3] & 0x07) << 8) - | (uint32_t)buf[i + 2]; - addr <<= 1; - addr -= s->pos + (uint32_t)i + 4; - addr >>= 1; - buf[i + 1] = (uint8_t)(0xF0 | ((addr >> 19) & 0x07)); - buf[i] = (uint8_t)(addr >> 11); - buf[i + 3] = (uint8_t)(0xF8 | ((addr >> 8) & 0x07)); - buf[i + 2] = (uint8_t)addr; - i += 2; - } - } - - return i; -} -#endif - -#ifdef XZ_DEC_SPARC -static noinline_for_stack size_t XZ_FUNC bcj_sparc( - struct xz_dec_bcj *s, uint8_t *buf, size_t size) -{ - size_t i; - uint32_t instr; - - for (i = 0; i + 4 <= size; i += 4) { - instr = get_unaligned_be32(buf + i); - if ((instr >> 22) == 0x100 || (instr >> 22) == 0x1FF) { - instr <<= 2; - instr -= s->pos + (uint32_t)i; - instr >>= 2; - instr = ((uint32_t)0x40000000 - (instr & 0x400000)) - | 0x40000000 | (instr & 0x3FFFFF); - put_unaligned_be32(instr, buf + i); - } - } - - return i; -} -#endif - -/* - * Apply the selected BCJ filter. Update *pos and s->pos to match the amount - * of data that got filtered. - * - * NOTE: This is implemented as a switch statement to avoid using function - * pointers, which could be problematic in the kernel boot code, which must - * avoid pointers to static data (at least on x86). - */ -static void XZ_FUNC bcj_apply(struct xz_dec_bcj *s, - uint8_t *buf, size_t *pos, size_t size) -{ - size_t filtered; - - buf += *pos; - size -= *pos; - - switch (s->type) { -#ifdef XZ_DEC_X86 - case BCJ_X86: - filtered = bcj_x86(s, buf, size); - break; -#endif -#ifdef XZ_DEC_POWERPC - case BCJ_POWERPC: - filtered = bcj_powerpc(s, buf, size); - break; -#endif -#ifdef XZ_DEC_IA64 - case BCJ_IA64: - filtered = bcj_ia64(s, buf, size); - break; -#endif -#ifdef XZ_DEC_ARM - case BCJ_ARM: - filtered = bcj_arm(s, buf, size); - break; -#endif -#ifdef XZ_DEC_ARMTHUMB - case BCJ_ARMTHUMB: - filtered = bcj_armthumb(s, buf, size); - break; -#endif -#ifdef XZ_DEC_SPARC - case BCJ_SPARC: - filtered = bcj_sparc(s, buf, size); - break; -#endif - default: - /* Never reached but silence compiler warnings. */ - filtered = 0; - break; - } - - *pos += filtered; - s->pos += filtered; -} - -/* - * Flush pending filtered data from temp to the output buffer. - * Move the remaining mixture of possibly filtered and unfiltered - * data to the beginning of temp. - */ -static void XZ_FUNC bcj_flush(struct xz_dec_bcj *s, struct xz_buf *b) -{ - size_t copy_size; - - copy_size = min_t(size_t, s->temp.filtered, b->out_size - b->out_pos); - memcpy(b->out + b->out_pos, s->temp.buf, copy_size); - b->out_pos += copy_size; - - s->temp.filtered -= copy_size; - s->temp.size -= copy_size; - memmove(s->temp.buf, s->temp.buf + copy_size, s->temp.size); -} - -/* - * The BCJ filter functions are primitive in sense that they process the - * data in chunks of 1-16 bytes. To hide this issue, this function does - * some buffering. - */ -XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s, - struct xz_dec_lzma2 *lzma2, struct xz_buf *b) -{ - size_t out_start; - - /* - * Flush pending already filtered data to the output buffer. Return - * immediatelly if we couldn't flush everything, or if the next - * filter in the chain had already returned XZ_STREAM_END. - */ - if (s->temp.filtered > 0) { - bcj_flush(s, b); - if (s->temp.filtered > 0) - return XZ_OK; - - if (s->ret == XZ_STREAM_END) - return XZ_STREAM_END; - } - - /* - * If we have more output space than what is currently pending in - * temp, copy the unfiltered data from temp to the output buffer - * and try to fill the output buffer by decoding more data from the - * next filter in the chain. Apply the BCJ filter on the new data - * in the output buffer. If everything cannot be filtered, copy it - * to temp and rewind the output buffer position accordingly. - */ - if (s->temp.size < b->out_size - b->out_pos) { - out_start = b->out_pos; - memcpy(b->out + b->out_pos, s->temp.buf, s->temp.size); - b->out_pos += s->temp.size; - - s->ret = xz_dec_lzma2_run(lzma2, b); - if (s->ret != XZ_STREAM_END - && (s->ret != XZ_OK || s->single_call)) - return s->ret; - - bcj_apply(s, b->out, &out_start, b->out_pos); - - /* - * As an exception, if the next filter returned XZ_STREAM_END, - * we can do that too, since the last few bytes that remain - * unfiltered are meant to remain unfiltered. - */ - if (s->ret == XZ_STREAM_END) - return XZ_STREAM_END; - - s->temp.size = b->out_pos - out_start; - b->out_pos -= s->temp.size; - memcpy(s->temp.buf, b->out + b->out_pos, s->temp.size); - } - - /* - * If we have unfiltered data in temp, try to fill by decoding more - * data from the next filter. Apply the BCJ filter on temp. Then we - * hopefully can fill the actual output buffer by copying filtered - * data from temp. A mix of filtered and unfiltered data may be left - * in temp; it will be taken care on the next call to this function. - */ - if (s->temp.size > 0) { - /* Make b->out{,_pos,_size} temporarily point to s->temp. */ - s->out = b->out; - s->out_pos = b->out_pos; - s->out_size = b->out_size; - b->out = s->temp.buf; - b->out_pos = s->temp.size; - b->out_size = sizeof(s->temp.buf); - - s->ret = xz_dec_lzma2_run(lzma2, b); - - s->temp.size = b->out_pos; - b->out = s->out; - b->out_pos = s->out_pos; - b->out_size = s->out_size; - - if (s->ret != XZ_OK && s->ret != XZ_STREAM_END) - return s->ret; - - bcj_apply(s, s->temp.buf, &s->temp.filtered, s->temp.size); - - /* - * If the next filter returned XZ_STREAM_END, we mark that - * everything is filtered, since the last unfiltered bytes - * of the stream are meant to be left as is. - */ - if (s->ret == XZ_STREAM_END) - s->temp.filtered = s->temp.size; - - bcj_flush(s, b); - if (s->temp.filtered > 0) - return XZ_OK; - } - - return s->ret; -} - -XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call) -{ - struct xz_dec_bcj *s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s != NULL) - s->single_call = single_call; - - return s; -} - -XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset( - struct xz_dec_bcj *s, uint8_t id) -{ - switch (id) { -#ifdef XZ_DEC_X86 - case BCJ_X86: -#endif -#ifdef XZ_DEC_POWERPC - case BCJ_POWERPC: -#endif -#ifdef XZ_DEC_IA64 - case BCJ_IA64: -#endif -#ifdef XZ_DEC_ARM - case BCJ_ARM: -#endif -#ifdef XZ_DEC_ARMTHUMB - case BCJ_ARMTHUMB: -#endif -#ifdef XZ_DEC_SPARC - case BCJ_SPARC: -#endif - break; - - default: - /* Unsupported Filter ID */ - return XZ_OPTIONS_ERROR; - } - - s->type = id; - s->ret = XZ_OK; - s->pos = 0; - s->x86_prev_mask = 0; - s->temp.filtered = 0; - s->temp.size = 0; - - return XZ_OK; -} - -#endif diff --git a/archival/libunarchive/unxz/xz_dec_lzma2.c b/archival/libunarchive/unxz/xz_dec_lzma2.c deleted file mode 100644 index da71cb4d4..000000000 --- a/archival/libunarchive/unxz/xz_dec_lzma2.c +++ /dev/null @@ -1,1175 +0,0 @@ -/* - * LZMA2 decoder - * - * Authors: Lasse Collin - * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. - */ - -#include "xz_private.h" -#include "xz_lzma2.h" - -/* - * Range decoder initialization eats the first five bytes of each LZMA chunk. - */ -#define RC_INIT_BYTES 5 - -/* - * Minimum number of usable input buffer to safely decode one LZMA symbol. - * The worst case is that we decode 22 bits using probabilities and 26 - * direct bits. This may decode at maximum of 20 bytes of input. However, - * lzma_main() does an extra normalization before returning, thus we - * need to put 21 here. - */ -#define LZMA_IN_REQUIRED 21 - -/* - * Dictionary (history buffer) - * - * These are always true: - * start <= pos <= full <= end - * pos <= limit <= end - * - * In multi-call mode, also these are true: - * end == size - * size <= size_max - * allocated <= size - * - * Most of these variables are size_t to support single-call mode, - * in which the dictionary variables address the actual output - * buffer directly. - */ -struct dictionary { - /* Beginning of the history buffer */ - uint8_t *buf; - - /* Old position in buf (before decoding more data) */ - size_t start; - - /* Position in buf */ - size_t pos; - - /* - * How full dictionary is. This is used to detect corrupt input that - * would read beyond the beginning of the uncompressed stream. - */ - size_t full; - - /* Write limit; we don't write to buf[limit] or later bytes. */ - size_t limit; - - /* - * End of the dictionary buffer. In multi-call mode, this is - * the same as the dictionary size. In single-call mode, this - * indicates the size of the output buffer. - */ - size_t end; - - /* - * Size of the dictionary as specified in Block Header. This is used - * together with "full" to detect corrupt input that would make us - * read beyond the beginning of the uncompressed stream. - */ - uint32_t size; - - /* - * Maximum allowed dictionary size in multi-call mode. - * This is ignored in single-call mode. - */ - uint32_t size_max; - - /* - * Amount of memory currently allocated for the dictionary. - * This is used only with XZ_DYNALLOC. (With XZ_PREALLOC, - * size_max is always the same as the allocated size.) - */ - uint32_t allocated; - - /* Operation mode */ - enum xz_mode mode; -}; - -/* Range decoder */ -struct rc_dec { - uint32_t range; - uint32_t code; - - /* - * Number of initializing bytes remaining to be read - * by rc_read_init(). - */ - uint32_t init_bytes_left; - - /* - * Buffer from which we read our input. It can be either - * temp.buf or the caller-provided input buffer. - */ - const uint8_t *in; - size_t in_pos; - size_t in_limit; -}; - -/* Probabilities for a length decoder. */ -struct lzma_len_dec { - /* Probability of match length being at least 10 */ - uint16_t choice; - - /* Probability of match length being at least 18 */ - uint16_t choice2; - - /* Probabilities for match lengths 2-9 */ - uint16_t low[POS_STATES_MAX][LEN_LOW_SYMBOLS]; - - /* Probabilities for match lengths 10-17 */ - uint16_t mid[POS_STATES_MAX][LEN_MID_SYMBOLS]; - - /* Probabilities for match lengths 18-273 */ - uint16_t high[LEN_HIGH_SYMBOLS]; -}; - -struct lzma_dec { - /* Distances of latest four matches */ - uint32_t rep0; - uint32_t rep1; - uint32_t rep2; - uint32_t rep3; - - /* Types of the most recently seen LZMA symbols */ - enum lzma_state state; - - /* - * Length of a match. This is updated so that dict_repeat can - * be called again to finish repeating the whole match. - */ - uint32_t len; - - /* - * LZMA properties or related bit masks (number of literal - * context bits, a mask dervied from the number of literal - * position bits, and a mask dervied from the number - * position bits) - */ - uint32_t lc; - uint32_t literal_pos_mask; /* (1 << lp) - 1 */ - uint32_t pos_mask; /* (1 << pb) - 1 */ - - /* If 1, it's a match. Otherwise it's a single 8-bit literal. */ - uint16_t is_match[STATES][POS_STATES_MAX]; - - /* If 1, it's a repeated match. The distance is one of rep0 .. rep3. */ - uint16_t is_rep[STATES]; - - /* - * If 0, distance of a repeated match is rep0. - * Otherwise check is_rep1. - */ - uint16_t is_rep0[STATES]; - - /* - * If 0, distance of a repeated match is rep1. - * Otherwise check is_rep2. - */ - uint16_t is_rep1[STATES]; - - /* If 0, distance of a repeated match is rep2. Otherwise it is rep3. */ - uint16_t is_rep2[STATES]; - - /* - * If 1, the repeated match has length of one byte. Otherwise - * the length is decoded from rep_len_decoder. - */ - uint16_t is_rep0_long[STATES][POS_STATES_MAX]; - - /* - * Probability tree for the highest two bits of the match - * distance. There is a separate probability tree for match - * lengths of 2 (i.e. MATCH_LEN_MIN), 3, 4, and [5, 273]. - */ - uint16_t dist_slot[DIST_STATES][DIST_SLOTS]; - - /* - * Probility trees for additional bits for match distance - * when the distance is in the range [4, 127]. - */ - uint16_t dist_special[FULL_DISTANCES - DIST_MODEL_END]; - - /* - * Probability tree for the lowest four bits of a match - * distance that is equal to or greater than 128. - */ - uint16_t dist_align[ALIGN_SIZE]; - - /* Length of a normal match */ - struct lzma_len_dec match_len_dec; - - /* Length of a repeated match */ - struct lzma_len_dec rep_len_dec; - - /* Probabilities of literals */ - uint16_t literal[LITERAL_CODERS_MAX][LITERAL_CODER_SIZE]; -}; - -struct lzma2_dec { - /* Position in xz_dec_lzma2_run(). */ - enum lzma2_seq { - SEQ_CONTROL, - SEQ_UNCOMPRESSED_1, - SEQ_UNCOMPRESSED_2, - SEQ_COMPRESSED_0, - SEQ_COMPRESSED_1, - SEQ_PROPERTIES, - SEQ_LZMA_PREPARE, - SEQ_LZMA_RUN, - SEQ_COPY - } sequence; - - /* Next position after decoding the compressed size of the chunk. */ - enum lzma2_seq next_sequence; - - /* Uncompressed size of LZMA chunk (2 MiB at maximum) */ - uint32_t uncompressed; - - /* - * Compressed size of LZMA chunk or compressed/uncompressed - * size of uncompressed chunk (64 KiB at maximum) - */ - uint32_t compressed; - - /* - * True if dictionary reset is needed. This is false before - * the first chunk (LZMA or uncompressed). - */ - bool need_dict_reset; - - /* - * True if new LZMA properties are needed. This is false - * before the first LZMA chunk. - */ - bool need_props; -}; - -struct xz_dec_lzma2 { - /* - * The order below is important on x86 to reduce code size and - * it shouldn't hurt on other platforms. Everything up to and - * including lzma.pos_mask are in the first 128 bytes on x86-32, - * which allows using smaller instructions to access those - * variables. On x86-64, fewer variables fit into the first 128 - * bytes, but this is still the best order without sacrificing - * the readability by splitting the structures. - */ - struct rc_dec rc; - struct dictionary dict; - struct lzma2_dec lzma2; - struct lzma_dec lzma; - - /* - * Temporary buffer which holds small number of input bytes between - * decoder calls. See lzma2_lzma() for details. - */ - struct { - uint32_t size; - uint8_t buf[3 * LZMA_IN_REQUIRED]; - } temp; -}; - -/************** - * Dictionary * - **************/ - -/* - * Reset the dictionary state. When in single-call mode, set up the beginning - * of the dictionary to point to the actual output buffer. - */ -static void XZ_FUNC dict_reset(struct dictionary *dict, struct xz_buf *b) -{ - if (DEC_IS_SINGLE(dict->mode)) { - dict->buf = b->out + b->out_pos; - dict->end = b->out_size - b->out_pos; - } - - dict->start = 0; - dict->pos = 0; - dict->limit = 0; - dict->full = 0; -} - -/* Set dictionary write limit */ -static void XZ_FUNC dict_limit(struct dictionary *dict, size_t out_max) -{ - if (dict->end - dict->pos <= out_max) - dict->limit = dict->end; - else - dict->limit = dict->pos + out_max; -} - -/* Return true if at least one byte can be written into the dictionary. */ -static __always_inline bool XZ_FUNC dict_has_space(const struct dictionary *dict) -{ - return dict->pos < dict->limit; -} - -/* - * Get a byte from the dictionary at the given distance. The distance is - * assumed to valid, or as a special case, zero when the dictionary is - * still empty. This special case is needed for single-call decoding to - * avoid writing a '\0' to the end of the destination buffer. - */ -static __always_inline uint32_t XZ_FUNC dict_get( - const struct dictionary *dict, uint32_t dist) -{ - size_t offset = dict->pos - dist - 1; - - if (dist >= dict->pos) - offset += dict->end; - - return dict->full > 0 ? dict->buf[offset] : 0; -} - -/* - * Put one byte into the dictionary. It is assumed that there is space for it. - */ -static inline void XZ_FUNC dict_put(struct dictionary *dict, uint8_t byte) -{ - dict->buf[dict->pos++] = byte; - - if (dict->full < dict->pos) - dict->full = dict->pos; -} - -/* - * Repeat given number of bytes from the given distance. If the distance is - * invalid, false is returned. On success, true is returned and *len is - * updated to indicate how many bytes were left to be repeated. - */ -static bool XZ_FUNC dict_repeat( - struct dictionary *dict, uint32_t *len, uint32_t dist) -{ - size_t back; - uint32_t left; - - if (dist >= dict->full || dist >= dict->size) - return false; - - left = min_t(size_t, dict->limit - dict->pos, *len); - *len -= left; - - back = dict->pos - dist - 1; - if (dist >= dict->pos) - back += dict->end; - - do { - dict->buf[dict->pos++] = dict->buf[back++]; - if (back == dict->end) - back = 0; - } while (--left > 0); - - if (dict->full < dict->pos) - dict->full = dict->pos; - - return true; -} - -/* Copy uncompressed data as is from input to dictionary and output buffers. */ -static void XZ_FUNC dict_uncompressed( - struct dictionary *dict, struct xz_buf *b, uint32_t *left) -{ - size_t copy_size; - - while (*left > 0 && b->in_pos < b->in_size - && b->out_pos < b->out_size) { - copy_size = min(b->in_size - b->in_pos, - b->out_size - b->out_pos); - if (copy_size > dict->end - dict->pos) - copy_size = dict->end - dict->pos; - if (copy_size > *left) - copy_size = *left; - - *left -= copy_size; - - memcpy(dict->buf + dict->pos, b->in + b->in_pos, copy_size); - dict->pos += copy_size; - - if (dict->full < dict->pos) - dict->full = dict->pos; - - if (DEC_IS_MULTI(dict->mode)) { - if (dict->pos == dict->end) - dict->pos = 0; - - memcpy(b->out + b->out_pos, b->in + b->in_pos, - copy_size); - } - - dict->start = dict->pos; - - b->out_pos += copy_size; - b->in_pos += copy_size; - - } -} - -/* - * Flush pending data from dictionary to b->out. It is assumed that there is - * enough space in b->out. This is guaranteed because caller uses dict_limit() - * before decoding data into the dictionary. - */ -static uint32_t XZ_FUNC dict_flush(struct dictionary *dict, struct xz_buf *b) -{ - size_t copy_size = dict->pos - dict->start; - - if (DEC_IS_MULTI(dict->mode)) { - if (dict->pos == dict->end) - dict->pos = 0; - - memcpy(b->out + b->out_pos, dict->buf + dict->start, - copy_size); - } - - dict->start = dict->pos; - b->out_pos += copy_size; - return copy_size; -} - -/***************** - * Range decoder * - *****************/ - -/* Reset the range decoder. */ -static void XZ_FUNC rc_reset(struct rc_dec *rc) -{ - rc->range = (uint32_t)-1; - rc->code = 0; - rc->init_bytes_left = RC_INIT_BYTES; -} - -/* - * Read the first five initial bytes into rc->code if they haven't been - * read already. (Yes, the first byte gets completely ignored.) - */ -static bool XZ_FUNC rc_read_init(struct rc_dec *rc, struct xz_buf *b) -{ - while (rc->init_bytes_left > 0) { - if (b->in_pos == b->in_size) - return false; - - rc->code = (rc->code << 8) + b->in[b->in_pos++]; - --rc->init_bytes_left; - } - - return true; -} - -/* Return true if there may not be enough input for the next decoding loop. */ -static inline bool XZ_FUNC rc_limit_exceeded(const struct rc_dec *rc) -{ - return rc->in_pos > rc->in_limit; -} - -/* - * Return true if it is possible (from point of view of range decoder) that - * we have reached the end of the LZMA chunk. - */ -static inline bool XZ_FUNC rc_is_finished(const struct rc_dec *rc) -{ - return rc->code == 0; -} - -/* Read the next input byte if needed. */ -static __always_inline void XZ_FUNC rc_normalize(struct rc_dec *rc) -{ - if (rc->range < RC_TOP_VALUE) { - rc->range <<= RC_SHIFT_BITS; - rc->code = (rc->code << RC_SHIFT_BITS) + rc->in[rc->in_pos++]; - } -} - -/* - * Decode one bit. In some versions, this function has been splitted in three - * functions so that the compiler is supposed to be able to more easily avoid - * an extra branch. In this particular version of the LZMA decoder, this - * doesn't seem to be a good idea (tested with GCC 3.3.6, 3.4.6, and 4.3.3 - * on x86). Using a non-splitted version results in nicer looking code too. - * - * NOTE: This must return an int. Do not make it return a bool or the speed - * of the code generated by GCC 3.x decreases 10-15 %. (GCC 4.3 doesn't care, - * and it generates 10-20 % faster code than GCC 3.x from this file anyway.) - */ -static __always_inline int XZ_FUNC rc_bit(struct rc_dec *rc, uint16_t *prob) -{ - uint32_t bound; - int bit; - - rc_normalize(rc); - bound = (rc->range >> RC_BIT_MODEL_TOTAL_BITS) * *prob; - if (rc->code < bound) { - rc->range = bound; - *prob += (RC_BIT_MODEL_TOTAL - *prob) >> RC_MOVE_BITS; - bit = 0; - } else { - rc->range -= bound; - rc->code -= bound; - *prob -= *prob >> RC_MOVE_BITS; - bit = 1; - } - - return bit; -} - -/* Decode a bittree starting from the most significant bit. */ -static __always_inline uint32_t XZ_FUNC rc_bittree( - struct rc_dec *rc, uint16_t *probs, uint32_t limit) -{ - uint32_t symbol = 1; - - do { - if (rc_bit(rc, &probs[symbol])) - symbol = (symbol << 1) + 1; - else - symbol <<= 1; - } while (symbol < limit); - - return symbol; -} - -/* Decode a bittree starting from the least significant bit. */ -static __always_inline void XZ_FUNC rc_bittree_reverse(struct rc_dec *rc, - uint16_t *probs, uint32_t *dest, uint32_t limit) -{ - uint32_t symbol = 1; - uint32_t i = 0; - - do { - if (rc_bit(rc, &probs[symbol])) { - symbol = (symbol << 1) + 1; - *dest += 1 << i; - } else { - symbol <<= 1; - } - } while (++i < limit); -} - -/* Decode direct bits (fixed fifty-fifty probability) */ -static inline void XZ_FUNC rc_direct( - struct rc_dec *rc, uint32_t *dest, uint32_t limit) -{ - uint32_t mask; - - do { - rc_normalize(rc); - rc->range >>= 1; - rc->code -= rc->range; - mask = (uint32_t)0 - (rc->code >> 31); - rc->code += rc->range & mask; - *dest = (*dest << 1) + (mask + 1); - } while (--limit > 0); -} - -/******** - * LZMA * - ********/ - -/* Get pointer to literal coder probability array. */ -static uint16_t * XZ_FUNC lzma_literal_probs(struct xz_dec_lzma2 *s) -{ - uint32_t prev_byte = dict_get(&s->dict, 0); - uint32_t low = prev_byte >> (8 - s->lzma.lc); - uint32_t high = (s->dict.pos & s->lzma.literal_pos_mask) << s->lzma.lc; - return s->lzma.literal[low + high]; -} - -/* Decode a literal (one 8-bit byte) */ -static void XZ_FUNC lzma_literal(struct xz_dec_lzma2 *s) -{ - uint16_t *probs; - uint32_t symbol; - uint32_t match_byte; - uint32_t match_bit; - uint32_t offset; - uint32_t i; - - probs = lzma_literal_probs(s); - - if (lzma_state_is_literal(s->lzma.state)) { - symbol = rc_bittree(&s->rc, probs, 0x100); - } else { - symbol = 1; - match_byte = dict_get(&s->dict, s->lzma.rep0) << 1; - offset = 0x100; - - do { - match_bit = match_byte & offset; - match_byte <<= 1; - i = offset + match_bit + symbol; - - if (rc_bit(&s->rc, &probs[i])) { - symbol = (symbol << 1) + 1; - offset &= match_bit; - } else { - symbol <<= 1; - offset &= ~match_bit; - } - } while (symbol < 0x100); - } - - dict_put(&s->dict, (uint8_t)symbol); - lzma_state_literal(&s->lzma.state); -} - -/* Decode the length of the match into s->lzma.len. */ -static void XZ_FUNC lzma_len(struct xz_dec_lzma2 *s, struct lzma_len_dec *l, - uint32_t pos_state) -{ - uint16_t *probs; - uint32_t limit; - - if (!rc_bit(&s->rc, &l->choice)) { - probs = l->low[pos_state]; - limit = LEN_LOW_SYMBOLS; - s->lzma.len = MATCH_LEN_MIN; - } else { - if (!rc_bit(&s->rc, &l->choice2)) { - probs = l->mid[pos_state]; - limit = LEN_MID_SYMBOLS; - s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS; - } else { - probs = l->high; - limit = LEN_HIGH_SYMBOLS; - s->lzma.len = MATCH_LEN_MIN + LEN_LOW_SYMBOLS - + LEN_MID_SYMBOLS; - } - } - - s->lzma.len += rc_bittree(&s->rc, probs, limit) - limit; -} - -/* Decode a match. The distance will be stored in s->lzma.rep0. */ -static void XZ_FUNC lzma_match(struct xz_dec_lzma2 *s, uint32_t pos_state) -{ - uint16_t *probs; - uint32_t dist_slot; - uint32_t limit; - - lzma_state_match(&s->lzma.state); - - s->lzma.rep3 = s->lzma.rep2; - s->lzma.rep2 = s->lzma.rep1; - s->lzma.rep1 = s->lzma.rep0; - - lzma_len(s, &s->lzma.match_len_dec, pos_state); - - probs = s->lzma.dist_slot[lzma_get_dist_state(s->lzma.len)]; - dist_slot = rc_bittree(&s->rc, probs, DIST_SLOTS) - DIST_SLOTS; - - if (dist_slot < DIST_MODEL_START) { - s->lzma.rep0 = dist_slot; - } else { - limit = (dist_slot >> 1) - 1; - s->lzma.rep0 = 2 + (dist_slot & 1); - - if (dist_slot < DIST_MODEL_END) { - s->lzma.rep0 <<= limit; - probs = s->lzma.dist_special + s->lzma.rep0 - - dist_slot - 1; - rc_bittree_reverse(&s->rc, probs, - &s->lzma.rep0, limit); - } else { - rc_direct(&s->rc, &s->lzma.rep0, limit - ALIGN_BITS); - s->lzma.rep0 <<= ALIGN_BITS; - rc_bittree_reverse(&s->rc, s->lzma.dist_align, - &s->lzma.rep0, ALIGN_BITS); - } - } -} - -/* - * Decode a repeated match. The distance is one of the four most recently - * seen matches. The distance will be stored in s->lzma.rep0. - */ -static void XZ_FUNC lzma_rep_match(struct xz_dec_lzma2 *s, uint32_t pos_state) -{ - uint32_t tmp; - - if (!rc_bit(&s->rc, &s->lzma.is_rep0[s->lzma.state])) { - if (!rc_bit(&s->rc, &s->lzma.is_rep0_long[ - s->lzma.state][pos_state])) { - lzma_state_short_rep(&s->lzma.state); - s->lzma.len = 1; - return; - } - } else { - if (!rc_bit(&s->rc, &s->lzma.is_rep1[s->lzma.state])) { - tmp = s->lzma.rep1; - } else { - if (!rc_bit(&s->rc, &s->lzma.is_rep2[s->lzma.state])) { - tmp = s->lzma.rep2; - } else { - tmp = s->lzma.rep3; - s->lzma.rep3 = s->lzma.rep2; - } - - s->lzma.rep2 = s->lzma.rep1; - } - - s->lzma.rep1 = s->lzma.rep0; - s->lzma.rep0 = tmp; - } - - lzma_state_long_rep(&s->lzma.state); - lzma_len(s, &s->lzma.rep_len_dec, pos_state); -} - -/* LZMA decoder core */ -static bool XZ_FUNC lzma_main(struct xz_dec_lzma2 *s) -{ - uint32_t pos_state; - - /* - * If the dictionary was reached during the previous call, try to - * finish the possibly pending repeat in the dictionary. - */ - if (dict_has_space(&s->dict) && s->lzma.len > 0) - dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0); - - /* - * Decode more LZMA symbols. One iteration may consume up to - * LZMA_IN_REQUIRED - 1 bytes. - */ - while (dict_has_space(&s->dict) && !rc_limit_exceeded(&s->rc)) { - pos_state = s->dict.pos & s->lzma.pos_mask; - - if (!rc_bit(&s->rc, &s->lzma.is_match[ - s->lzma.state][pos_state])) { - lzma_literal(s); - } else { - if (rc_bit(&s->rc, &s->lzma.is_rep[s->lzma.state])) - lzma_rep_match(s, pos_state); - else - lzma_match(s, pos_state); - - if (!dict_repeat(&s->dict, &s->lzma.len, s->lzma.rep0)) - return false; - } - } - - /* - * Having the range decoder always normalized when we are outside - * this function makes it easier to correctly handle end of the chunk. - */ - rc_normalize(&s->rc); - - return true; -} - -/* - * Reset the LZMA decoder and range decoder state. Dictionary is nore reset - * here, because LZMA state may be reset without resetting the dictionary. - */ -static void XZ_FUNC lzma_reset(struct xz_dec_lzma2 *s) -{ - uint16_t *probs; - size_t i; - - s->lzma.state = STATE_LIT_LIT; - s->lzma.rep0 = 0; - s->lzma.rep1 = 0; - s->lzma.rep2 = 0; - s->lzma.rep3 = 0; - - /* - * All probabilities are initialized to the same value. This hack - * makes the code smaller by avoiding a separate loop for each - * probability array. - * - * This could be optimized so that only that part of literal - * probabilities that are actually required. In the common case - * we would write 12 KiB less. - */ - probs = s->lzma.is_match[0]; - for (i = 0; i < PROBS_TOTAL; ++i) - probs[i] = RC_BIT_MODEL_TOTAL / 2; - - rc_reset(&s->rc); -} - -/* - * Decode and validate LZMA properties (lc/lp/pb) and calculate the bit masks - * from the decoded lp and pb values. On success, the LZMA decoder state is - * reset and true is returned. - */ -static bool XZ_FUNC lzma_props(struct xz_dec_lzma2 *s, uint8_t props) -{ - if (props > (4 * 5 + 4) * 9 + 8) - return false; - - s->lzma.pos_mask = 0; - while (props >= 9 * 5) { - props -= 9 * 5; - ++s->lzma.pos_mask; - } - - s->lzma.pos_mask = (1 << s->lzma.pos_mask) - 1; - - s->lzma.literal_pos_mask = 0; - while (props >= 9) { - props -= 9; - ++s->lzma.literal_pos_mask; - } - - s->lzma.lc = props; - - if (s->lzma.lc + s->lzma.literal_pos_mask > 4) - return false; - - s->lzma.literal_pos_mask = (1 << s->lzma.literal_pos_mask) - 1; - - lzma_reset(s); - - return true; -} - -/********* - * LZMA2 * - *********/ - -/* - * The LZMA decoder assumes that if the input limit (s->rc.in_limit) hasn't - * been exceeded, it is safe to read up to LZMA_IN_REQUIRED bytes. This - * wrapper function takes care of making the LZMA decoder's assumption safe. - * - * As long as there is plenty of input left to be decoded in the current LZMA - * chunk, we decode directly from the caller-supplied input buffer until - * there's LZMA_IN_REQUIRED bytes left. Those remaining bytes are copied into - * s->temp.buf, which (hopefully) gets filled on the next call to this - * function. We decode a few bytes from the temporary buffer so that we can - * continue decoding from the caller-supplied input buffer again. - */ -static bool XZ_FUNC lzma2_lzma(struct xz_dec_lzma2 *s, struct xz_buf *b) -{ - size_t in_avail; - uint32_t tmp; - - in_avail = b->in_size - b->in_pos; - if (s->temp.size > 0 || s->lzma2.compressed == 0) { - tmp = 2 * LZMA_IN_REQUIRED - s->temp.size; - if (tmp > s->lzma2.compressed - s->temp.size) - tmp = s->lzma2.compressed - s->temp.size; - if (tmp > in_avail) - tmp = in_avail; - - memcpy(s->temp.buf + s->temp.size, b->in + b->in_pos, tmp); - - if (s->temp.size + tmp == s->lzma2.compressed) { - memzero(s->temp.buf + s->temp.size + tmp, - sizeof(s->temp.buf) - - s->temp.size - tmp); - s->rc.in_limit = s->temp.size + tmp; - } else if (s->temp.size + tmp < LZMA_IN_REQUIRED) { - s->temp.size += tmp; - b->in_pos += tmp; - return true; - } else { - s->rc.in_limit = s->temp.size + tmp - LZMA_IN_REQUIRED; - } - - s->rc.in = s->temp.buf; - s->rc.in_pos = 0; - - if (!lzma_main(s) || s->rc.in_pos > s->temp.size + tmp) - return false; - - s->lzma2.compressed -= s->rc.in_pos; - - if (s->rc.in_pos < s->temp.size) { - s->temp.size -= s->rc.in_pos; - memmove(s->temp.buf, s->temp.buf + s->rc.in_pos, - s->temp.size); - return true; - } - - b->in_pos += s->rc.in_pos - s->temp.size; - s->temp.size = 0; - } - - in_avail = b->in_size - b->in_pos; - if (in_avail >= LZMA_IN_REQUIRED) { - s->rc.in = b->in; - s->rc.in_pos = b->in_pos; - - if (in_avail >= s->lzma2.compressed + LZMA_IN_REQUIRED) - s->rc.in_limit = b->in_pos + s->lzma2.compressed; - else - s->rc.in_limit = b->in_size - LZMA_IN_REQUIRED; - - if (!lzma_main(s)) - return false; - - in_avail = s->rc.in_pos - b->in_pos; - if (in_avail > s->lzma2.compressed) - return false; - - s->lzma2.compressed -= in_avail; - b->in_pos = s->rc.in_pos; - } - - in_avail = b->in_size - b->in_pos; - if (in_avail < LZMA_IN_REQUIRED) { - if (in_avail > s->lzma2.compressed) - in_avail = s->lzma2.compressed; - - memcpy(s->temp.buf, b->in + b->in_pos, in_avail); - s->temp.size = in_avail; - b->in_pos += in_avail; - } - - return true; -} - -/* - * Take care of the LZMA2 control layer, and forward the job of actual LZMA - * decoding or copying of uncompressed chunks to other functions. - */ -XZ_EXTERN NOINLINE enum xz_ret XZ_FUNC xz_dec_lzma2_run( - struct xz_dec_lzma2 *s, struct xz_buf *b) -{ - uint32_t tmp; - - while (b->in_pos < b->in_size || s->lzma2.sequence == SEQ_LZMA_RUN) { - switch (s->lzma2.sequence) { - case SEQ_CONTROL: - /* - * LZMA2 control byte - * - * Exact values: - * 0x00 End marker - * 0x01 Dictionary reset followed by - * an uncompressed chunk - * 0x02 Uncompressed chunk (no dictionary reset) - * - * Highest three bits (s->control & 0xE0): - * 0xE0 Dictionary reset, new properties and state - * reset, followed by LZMA compressed chunk - * 0xC0 New properties and state reset, followed - * by LZMA compressed chunk (no dictionary - * reset) - * 0xA0 State reset using old properties, - * followed by LZMA compressed chunk (no - * dictionary reset) - * 0x80 LZMA chunk (no dictionary or state reset) - * - * For LZMA compressed chunks, the lowest five bits - * (s->control & 1F) are the highest bits of the - * uncompressed size (bits 16-20). - * - * A new LZMA2 stream must begin with a dictionary - * reset. The first LZMA chunk must set new - * properties and reset the LZMA state. - * - * Values that don't match anything described above - * are invalid and we return XZ_DATA_ERROR. - */ - tmp = b->in[b->in_pos++]; - - if (tmp >= 0xE0 || tmp == 0x01) { - s->lzma2.need_props = true; - s->lzma2.need_dict_reset = false; - dict_reset(&s->dict, b); - } else if (s->lzma2.need_dict_reset) { - return XZ_DATA_ERROR; - } - - if (tmp >= 0x80) { - s->lzma2.uncompressed = (tmp & 0x1F) << 16; - s->lzma2.sequence = SEQ_UNCOMPRESSED_1; - - if (tmp >= 0xC0) { - /* - * When there are new properties, - * state reset is done at - * SEQ_PROPERTIES. - */ - s->lzma2.need_props = false; - s->lzma2.next_sequence - = SEQ_PROPERTIES; - - } else if (s->lzma2.need_props) { - return XZ_DATA_ERROR; - - } else { - s->lzma2.next_sequence - = SEQ_LZMA_PREPARE; - if (tmp >= 0xA0) - lzma_reset(s); - } - } else { - if (tmp == 0x00) - return XZ_STREAM_END; - - if (tmp > 0x02) - return XZ_DATA_ERROR; - - s->lzma2.sequence = SEQ_COMPRESSED_0; - s->lzma2.next_sequence = SEQ_COPY; - } - - break; - - case SEQ_UNCOMPRESSED_1: - s->lzma2.uncompressed - += (uint32_t)b->in[b->in_pos++] << 8; - s->lzma2.sequence = SEQ_UNCOMPRESSED_2; - break; - - case SEQ_UNCOMPRESSED_2: - s->lzma2.uncompressed - += (uint32_t)b->in[b->in_pos++] + 1; - s->lzma2.sequence = SEQ_COMPRESSED_0; - break; - - case SEQ_COMPRESSED_0: - s->lzma2.compressed - = (uint32_t)b->in[b->in_pos++] << 8; - s->lzma2.sequence = SEQ_COMPRESSED_1; - break; - - case SEQ_COMPRESSED_1: - s->lzma2.compressed - += (uint32_t)b->in[b->in_pos++] + 1; - s->lzma2.sequence = s->lzma2.next_sequence; - break; - - case SEQ_PROPERTIES: - if (!lzma_props(s, b->in[b->in_pos++])) - return XZ_DATA_ERROR; - - s->lzma2.sequence = SEQ_LZMA_PREPARE; - - case SEQ_LZMA_PREPARE: - if (s->lzma2.compressed < RC_INIT_BYTES) - return XZ_DATA_ERROR; - - if (!rc_read_init(&s->rc, b)) - return XZ_OK; - - s->lzma2.compressed -= RC_INIT_BYTES; - s->lzma2.sequence = SEQ_LZMA_RUN; - - case SEQ_LZMA_RUN: - /* - * Set dictionary limit to indicate how much we want - * to be encoded at maximum. Decode new data into the - * dictionary. Flush the new data from dictionary to - * b->out. Check if we finished decoding this chunk. - * In case the dictionary got full but we didn't fill - * the output buffer yet, we may run this loop - * multiple times without changing s->lzma2.sequence. - */ - dict_limit(&s->dict, min_t(size_t, - b->out_size - b->out_pos, - s->lzma2.uncompressed)); - if (!lzma2_lzma(s, b)) - return XZ_DATA_ERROR; - - s->lzma2.uncompressed -= dict_flush(&s->dict, b); - - if (s->lzma2.uncompressed == 0) { - if (s->lzma2.compressed > 0 || s->lzma.len > 0 - || !rc_is_finished(&s->rc)) - return XZ_DATA_ERROR; - - rc_reset(&s->rc); - s->lzma2.sequence = SEQ_CONTROL; - - } else if (b->out_pos == b->out_size - || (b->in_pos == b->in_size - && s->temp.size - < s->lzma2.compressed)) { - return XZ_OK; - } - - break; - - case SEQ_COPY: - dict_uncompressed(&s->dict, b, &s->lzma2.compressed); - if (s->lzma2.compressed > 0) - return XZ_OK; - - s->lzma2.sequence = SEQ_CONTROL; - break; - } - } - - return XZ_OK; -} - -XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( - enum xz_mode mode, uint32_t dict_max) -{ - struct xz_dec_lzma2 *s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return NULL; - - s->dict.mode = mode; - s->dict.size_max = dict_max; - - if (DEC_IS_PREALLOC(mode)) { - s->dict.buf = vmalloc(dict_max); - if (s->dict.buf == NULL) { - kfree(s); - return NULL; - } - } else if (DEC_IS_DYNALLOC(mode)) { - s->dict.buf = NULL; - s->dict.allocated = 0; - } - - return s; -} - -XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( - struct xz_dec_lzma2 *s, uint8_t props) -{ - /* This limits dictionary size to 3 GiB to keep parsing simpler. */ - if (props > 39) - return XZ_OPTIONS_ERROR; - - s->dict.size = 2 + (props & 1); - s->dict.size <<= (props >> 1) + 11; - - if (DEC_IS_MULTI(s->dict.mode)) { - if (s->dict.size > s->dict.size_max) - return XZ_MEMLIMIT_ERROR; - - s->dict.end = s->dict.size; - - if (DEC_IS_DYNALLOC(s->dict.mode)) { - if (s->dict.allocated < s->dict.size) { - vfree(s->dict.buf); - s->dict.buf = vmalloc(s->dict.size); - if (s->dict.buf == NULL) { - s->dict.allocated = 0; - return XZ_MEM_ERROR; - } - } - } - } - - s->lzma.len = 0; - - s->lzma2.sequence = SEQ_CONTROL; - s->lzma2.need_dict_reset = true; - - s->temp.size = 0; - - return XZ_OK; -} - -XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s) -{ - if (DEC_IS_MULTI(s->dict.mode)) - vfree(s->dict.buf); - - kfree(s); -} diff --git a/archival/libunarchive/unxz/xz_dec_stream.c b/archival/libunarchive/unxz/xz_dec_stream.c deleted file mode 100644 index bdcbf1ba3..000000000 --- a/archival/libunarchive/unxz/xz_dec_stream.c +++ /dev/null @@ -1,822 +0,0 @@ -/* - * .xz Stream decoder - * - * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. - */ - -#include "xz_private.h" -#include "xz_stream.h" - -/* Hash used to validate the Index field */ -struct xz_dec_hash { - vli_type unpadded; - vli_type uncompressed; - uint32_t crc32; -}; - -struct xz_dec { - /* Position in dec_main() */ - enum { - SEQ_STREAM_HEADER, - SEQ_BLOCK_START, - SEQ_BLOCK_HEADER, - SEQ_BLOCK_UNCOMPRESS, - SEQ_BLOCK_PADDING, - SEQ_BLOCK_CHECK, - SEQ_INDEX, - SEQ_INDEX_PADDING, - SEQ_INDEX_CRC32, - SEQ_STREAM_FOOTER - } sequence; - - /* Position in variable-length integers and Check fields */ - uint32_t pos; - - /* Variable-length integer decoded by dec_vli() */ - vli_type vli; - - /* Saved in_pos and out_pos */ - size_t in_start; - size_t out_start; - - /* CRC32 value in Block or Index */ - uint32_t crc32; - - /* Type of the integrity check calculated from uncompressed data */ - enum xz_check check_type; - - /* Operation mode */ - enum xz_mode mode; - - /* - * True if the next call to xz_dec_run() is allowed to return - * XZ_BUF_ERROR. - */ - bool allow_buf_error; - - /* Information stored in Block Header */ - struct { - /* - * Value stored in the Compressed Size field, or - * VLI_UNKNOWN if Compressed Size is not present. - */ - vli_type compressed; - - /* - * Value stored in the Uncompressed Size field, or - * VLI_UNKNOWN if Uncompressed Size is not present. - */ - vli_type uncompressed; - - /* Size of the Block Header field */ - uint32_t size; - } block_header; - - /* Information collected when decoding Blocks */ - struct { - /* Observed compressed size of the current Block */ - vli_type compressed; - - /* Observed uncompressed size of the current Block */ - vli_type uncompressed; - - /* Number of Blocks decoded so far */ - vli_type count; - - /* - * Hash calculated from the Block sizes. This is used to - * validate the Index field. - */ - struct xz_dec_hash hash; - } block; - - /* Variables needed when verifying the Index field */ - struct { - /* Position in dec_index() */ - enum { - SEQ_INDEX_COUNT, - SEQ_INDEX_UNPADDED, - SEQ_INDEX_UNCOMPRESSED - } sequence; - - /* Size of the Index in bytes */ - vli_type size; - - /* Number of Records (matches block.count in valid files) */ - vli_type count; - - /* - * Hash calculated from the Records (matches block.hash in - * valid files). - */ - struct xz_dec_hash hash; - } index; - - /* - * Temporary buffer needed to hold Stream Header, Block Header, - * and Stream Footer. The Block Header is the biggest (1 KiB) - * so we reserve space according to that. buf[] has to be aligned - * to a multiple of four bytes; the size_t variables before it - * should guarantee this. - */ - struct { - size_t pos; - size_t size; - uint8_t buf[1024]; - } temp; - - struct xz_dec_lzma2 *lzma2; - -#ifdef XZ_DEC_BCJ - struct xz_dec_bcj *bcj; - bool bcj_active; -#endif -}; - -#ifdef XZ_DEC_ANY_CHECK -/* Sizes of the Check field with different Check IDs */ -static const uint8_t check_sizes[16] = { - 0, - 4, 4, 4, - 8, 8, 8, - 16, 16, 16, - 32, 32, 32, - 64, 64, 64 -}; -#endif - -/* - * Fill s->temp by copying data starting from b->in[b->in_pos]. Caller - * must have set s->temp.pos to indicate how much data we are supposed - * to copy into s->temp.buf. Return true once s->temp.pos has reached - * s->temp.size. - */ -static bool XZ_FUNC fill_temp(struct xz_dec *s, struct xz_buf *b) -{ - size_t copy_size = min_t(size_t, - b->in_size - b->in_pos, s->temp.size - s->temp.pos); - - memcpy(s->temp.buf + s->temp.pos, b->in + b->in_pos, copy_size); - b->in_pos += copy_size; - s->temp.pos += copy_size; - - if (s->temp.pos == s->temp.size) { - s->temp.pos = 0; - return true; - } - - return false; -} - -/* Decode a variable-length integer (little-endian base-128 encoding) */ -static enum xz_ret XZ_FUNC dec_vli(struct xz_dec *s, - const uint8_t *in, size_t *in_pos, size_t in_size) -{ - uint8_t byte; - - if (s->pos == 0) - s->vli = 0; - - while (*in_pos < in_size) { - byte = in[*in_pos]; - ++*in_pos; - - s->vli |= (vli_type)(byte & 0x7F) << s->pos; - - if ((byte & 0x80) == 0) { - /* Don't allow non-minimal encodings. */ - if (byte == 0 && s->pos != 0) - return XZ_DATA_ERROR; - - s->pos = 0; - return XZ_STREAM_END; - } - - s->pos += 7; - if (s->pos == 7 * VLI_BYTES_MAX) - return XZ_DATA_ERROR; - } - - return XZ_OK; -} - -/* - * Decode the Compressed Data field from a Block. Update and validate - * the observed compressed and uncompressed sizes of the Block so that - * they don't exceed the values possibly stored in the Block Header - * (validation assumes that no integer overflow occurs, since vli_type - * is normally uint64_t). Update the CRC32 if presence of the CRC32 - * field was indicated in Stream Header. - * - * Once the decoding is finished, validate that the observed sizes match - * the sizes possibly stored in the Block Header. Update the hash and - * Block count, which are later used to validate the Index field. - */ -static enum xz_ret XZ_FUNC dec_block(struct xz_dec *s, struct xz_buf *b) -{ - enum xz_ret ret; - - s->in_start = b->in_pos; - s->out_start = b->out_pos; - -#ifdef XZ_DEC_BCJ - if (s->bcj_active) - ret = xz_dec_bcj_run(s->bcj, s->lzma2, b); - else -#endif - ret = xz_dec_lzma2_run(s->lzma2, b); - - s->block.compressed += b->in_pos - s->in_start; - s->block.uncompressed += b->out_pos - s->out_start; - - /* - * There is no need to separately check for VLI_UNKNOWN, since - * the observed sizes are always smaller than VLI_UNKNOWN. - */ - if (s->block.compressed > s->block_header.compressed - || s->block.uncompressed - > s->block_header.uncompressed) - return XZ_DATA_ERROR; - - if (s->check_type == XZ_CHECK_CRC32) - s->crc32 = xz_crc32(b->out + s->out_start, - b->out_pos - s->out_start, s->crc32); - - if (ret == XZ_STREAM_END) { - if (s->block_header.compressed != VLI_UNKNOWN - && s->block_header.compressed - != s->block.compressed) - return XZ_DATA_ERROR; - - if (s->block_header.uncompressed != VLI_UNKNOWN - && s->block_header.uncompressed - != s->block.uncompressed) - return XZ_DATA_ERROR; - - s->block.hash.unpadded += s->block_header.size - + s->block.compressed; - -#ifdef XZ_DEC_ANY_CHECK - s->block.hash.unpadded += check_sizes[s->check_type]; -#else - if (s->check_type == XZ_CHECK_CRC32) - s->block.hash.unpadded += 4; -#endif - - s->block.hash.uncompressed += s->block.uncompressed; - s->block.hash.crc32 = xz_crc32( - (const uint8_t *)&s->block.hash, - sizeof(s->block.hash), s->block.hash.crc32); - - ++s->block.count; - } - - return ret; -} - -/* Update the Index size and the CRC32 value. */ -static void XZ_FUNC index_update(struct xz_dec *s, const struct xz_buf *b) -{ - size_t in_used = b->in_pos - s->in_start; - s->index.size += in_used; - s->crc32 = xz_crc32(b->in + s->in_start, in_used, s->crc32); -} - -/* - * Decode the Number of Records, Unpadded Size, and Uncompressed Size - * fields from the Index field. That is, Index Padding and CRC32 are not - * decoded by this function. - * - * This can return XZ_OK (more input needed), XZ_STREAM_END (everything - * successfully decoded), or XZ_DATA_ERROR (input is corrupt). - */ -static enum xz_ret XZ_FUNC dec_index(struct xz_dec *s, struct xz_buf *b) -{ - enum xz_ret ret; - - do { - ret = dec_vli(s, b->in, &b->in_pos, b->in_size); - if (ret != XZ_STREAM_END) { - index_update(s, b); - return ret; - } - - switch (s->index.sequence) { - case SEQ_INDEX_COUNT: - s->index.count = s->vli; - - /* - * Validate that the Number of Records field - * indicates the same number of Records as - * there were Blocks in the Stream. - */ - if (s->index.count != s->block.count) - return XZ_DATA_ERROR; - - s->index.sequence = SEQ_INDEX_UNPADDED; - break; - - case SEQ_INDEX_UNPADDED: - s->index.hash.unpadded += s->vli; - s->index.sequence = SEQ_INDEX_UNCOMPRESSED; - break; - - case SEQ_INDEX_UNCOMPRESSED: - s->index.hash.uncompressed += s->vli; - s->index.hash.crc32 = xz_crc32( - (const uint8_t *)&s->index.hash, - sizeof(s->index.hash), - s->index.hash.crc32); - --s->index.count; - s->index.sequence = SEQ_INDEX_UNPADDED; - break; - } - } while (s->index.count > 0); - - return XZ_STREAM_END; -} - -/* - * Validate that the next four input bytes match the value of s->crc32. - * s->pos must be zero when starting to validate the first byte. - */ -static enum xz_ret XZ_FUNC crc32_validate(struct xz_dec *s, struct xz_buf *b) -{ - do { - if (b->in_pos == b->in_size) - return XZ_OK; - - if (((s->crc32 >> s->pos) & 0xFF) != b->in[b->in_pos++]) - return XZ_DATA_ERROR; - - s->pos += 8; - - } while (s->pos < 32); - - s->crc32 = 0; - s->pos = 0; - - return XZ_STREAM_END; -} - -#ifdef XZ_DEC_ANY_CHECK -/* - * Skip over the Check field when the Check ID is not supported. - * Returns true once the whole Check field has been skipped over. - */ -static bool XZ_FUNC check_skip(struct xz_dec *s, struct xz_buf *b) -{ - while (s->pos < check_sizes[s->check_type]) { - if (b->in_pos == b->in_size) - return false; - - ++b->in_pos; - ++s->pos; - } - - s->pos = 0; - - return true; -} -#endif - -/* Decode the Stream Header field (the first 12 bytes of the .xz Stream). */ -static enum xz_ret XZ_FUNC dec_stream_header(struct xz_dec *s) -{ - if (!memeq(s->temp.buf, HEADER_MAGIC, HEADER_MAGIC_SIZE)) - return XZ_FORMAT_ERROR; - - if (xz_crc32(s->temp.buf + HEADER_MAGIC_SIZE, 2, 0) - != get_le32(s->temp.buf + HEADER_MAGIC_SIZE + 2)) - return XZ_DATA_ERROR; - - if (s->temp.buf[HEADER_MAGIC_SIZE] != 0) - return XZ_OPTIONS_ERROR; - - /* - * Of integrity checks, we support only none (Check ID = 0) and - * CRC32 (Check ID = 1). However, if XZ_DEC_ANY_CHECK is defined, - * we will accept other check types too, but then the check won't - * be verified and a warning (XZ_UNSUPPORTED_CHECK) will be given. - */ - s->check_type = s->temp.buf[HEADER_MAGIC_SIZE + 1]; - -#ifdef XZ_DEC_ANY_CHECK - if (s->check_type > XZ_CHECK_MAX) - return XZ_OPTIONS_ERROR; - - if (s->check_type > XZ_CHECK_CRC32) - return XZ_UNSUPPORTED_CHECK; -#else - if (s->check_type > XZ_CHECK_CRC32) - return XZ_OPTIONS_ERROR; -#endif - - return XZ_OK; -} - -/* Decode the Stream Footer field (the last 12 bytes of the .xz Stream) */ -static enum xz_ret XZ_FUNC dec_stream_footer(struct xz_dec *s) -{ - if (!memeq(s->temp.buf + 10, FOOTER_MAGIC, FOOTER_MAGIC_SIZE)) - return XZ_DATA_ERROR; - - if (xz_crc32(s->temp.buf + 4, 6, 0) != get_le32(s->temp.buf)) - return XZ_DATA_ERROR; - - /* - * Validate Backward Size. Note that we never added the size of the - * Index CRC32 field to s->index.size, thus we use s->index.size / 4 - * instead of s->index.size / 4 - 1. - */ - if ((s->index.size >> 2) != get_le32(s->temp.buf + 4)) - return XZ_DATA_ERROR; - - if (s->temp.buf[8] != 0 || s->temp.buf[9] != s->check_type) - return XZ_DATA_ERROR; - - /* - * Use XZ_STREAM_END instead of XZ_OK to be more convenient - * for the caller. - */ - return XZ_STREAM_END; -} - -/* Decode the Block Header and initialize the filter chain. */ -static enum xz_ret XZ_FUNC dec_block_header(struct xz_dec *s) -{ - enum xz_ret ret; - - /* - * Validate the CRC32. We know that the temp buffer is at least - * eight bytes so this is safe. - */ - s->temp.size -= 4; - if (xz_crc32(s->temp.buf, s->temp.size, 0) - != get_le32(s->temp.buf + s->temp.size)) - return XZ_DATA_ERROR; - - s->temp.pos = 2; - - /* - * Catch unsupported Block Flags. We support only one or two filters - * in the chain, so we catch that with the same test. - */ -#ifdef XZ_DEC_BCJ - if (s->temp.buf[1] & 0x3E) -#else - if (s->temp.buf[1] & 0x3F) -#endif - return XZ_OPTIONS_ERROR; - - /* Compressed Size */ - if (s->temp.buf[1] & 0x40) { - if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) - != XZ_STREAM_END) - return XZ_DATA_ERROR; - - s->block_header.compressed = s->vli; - } else { - s->block_header.compressed = VLI_UNKNOWN; - } - - /* Uncompressed Size */ - if (s->temp.buf[1] & 0x80) { - if (dec_vli(s, s->temp.buf, &s->temp.pos, s->temp.size) - != XZ_STREAM_END) - return XZ_DATA_ERROR; - - s->block_header.uncompressed = s->vli; - } else { - s->block_header.uncompressed = VLI_UNKNOWN; - } - -#ifdef XZ_DEC_BCJ - /* If there are two filters, the first one must be a BCJ filter. */ - s->bcj_active = s->temp.buf[1] & 0x01; - if (s->bcj_active) { - if (s->temp.size - s->temp.pos < 2) - return XZ_OPTIONS_ERROR; - - ret = xz_dec_bcj_reset(s->bcj, s->temp.buf[s->temp.pos++]); - if (ret != XZ_OK) - return ret; - - /* - * We don't support custom start offset, - * so Size of Properties must be zero. - */ - if (s->temp.buf[s->temp.pos++] != 0x00) - return XZ_OPTIONS_ERROR; - } -#endif - - /* Valid Filter Flags always take at least two bytes. */ - if (s->temp.size - s->temp.pos < 2) - return XZ_DATA_ERROR; - - /* Filter ID = LZMA2 */ - if (s->temp.buf[s->temp.pos++] != 0x21) - return XZ_OPTIONS_ERROR; - - /* Size of Properties = 1-byte Filter Properties */ - if (s->temp.buf[s->temp.pos++] != 0x01) - return XZ_OPTIONS_ERROR; - - /* Filter Properties contains LZMA2 dictionary size. */ - if (s->temp.size - s->temp.pos < 1) - return XZ_DATA_ERROR; - - ret = xz_dec_lzma2_reset(s->lzma2, s->temp.buf[s->temp.pos++]); - if (ret != XZ_OK) - return ret; - - /* The rest must be Header Padding. */ - while (s->temp.pos < s->temp.size) - if (s->temp.buf[s->temp.pos++] != 0x00) - return XZ_OPTIONS_ERROR; - - s->temp.pos = 0; - s->block.compressed = 0; - s->block.uncompressed = 0; - - return XZ_OK; -} - -static enum xz_ret XZ_FUNC dec_main(struct xz_dec *s, struct xz_buf *b) -{ - enum xz_ret ret; - - /* - * Store the start position for the case when we are in the middle - * of the Index field. - */ - s->in_start = b->in_pos; - - while (true) { - switch (s->sequence) { - case SEQ_STREAM_HEADER: - /* - * Stream Header is copied to s->temp, and then - * decoded from there. This way if the caller - * gives us only little input at a time, we can - * still keep the Stream Header decoding code - * simple. Similar approach is used in many places - * in this file. - */ - if (!fill_temp(s, b)) - return XZ_OK; - - /* - * If dec_stream_header() returns - * XZ_UNSUPPORTED_CHECK, it is still possible - * to continue decoding if working in multi-call - * mode. Thus, update s->sequence before calling - * dec_stream_header(). - */ - s->sequence = SEQ_BLOCK_START; - - ret = dec_stream_header(s); - if (ret != XZ_OK) - return ret; - - case SEQ_BLOCK_START: - /* We need one byte of input to continue. */ - if (b->in_pos == b->in_size) - return XZ_OK; - - /* See if this is the beginning of the Index field. */ - if (b->in[b->in_pos] == 0) { - s->in_start = b->in_pos++; - s->sequence = SEQ_INDEX; - break; - } - - /* - * Calculate the size of the Block Header and - * prepare to decode it. - */ - s->block_header.size - = ((uint32_t)b->in[b->in_pos] + 1) * 4; - - s->temp.size = s->block_header.size; - s->temp.pos = 0; - s->sequence = SEQ_BLOCK_HEADER; - - case SEQ_BLOCK_HEADER: - if (!fill_temp(s, b)) - return XZ_OK; - - ret = dec_block_header(s); - if (ret != XZ_OK) - return ret; - - s->sequence = SEQ_BLOCK_UNCOMPRESS; - - case SEQ_BLOCK_UNCOMPRESS: - ret = dec_block(s, b); - if (ret != XZ_STREAM_END) - return ret; - - s->sequence = SEQ_BLOCK_PADDING; - - case SEQ_BLOCK_PADDING: - /* - * Size of Compressed Data + Block Padding - * must be a multiple of four. We don't need - * s->block.compressed for anything else - * anymore, so we use it here to test the size - * of the Block Padding field. - */ - while (s->block.compressed & 3) { - if (b->in_pos == b->in_size) - return XZ_OK; - - if (b->in[b->in_pos++] != 0) - return XZ_DATA_ERROR; - - ++s->block.compressed; - } - - s->sequence = SEQ_BLOCK_CHECK; - - case SEQ_BLOCK_CHECK: - if (s->check_type == XZ_CHECK_CRC32) { - ret = crc32_validate(s, b); - if (ret != XZ_STREAM_END) - return ret; - } -#ifdef XZ_DEC_ANY_CHECK - else if (!check_skip(s, b)) { - return XZ_OK; - } -#endif - - s->sequence = SEQ_BLOCK_START; - break; - - case SEQ_INDEX: - ret = dec_index(s, b); - if (ret != XZ_STREAM_END) - return ret; - - s->sequence = SEQ_INDEX_PADDING; - - case SEQ_INDEX_PADDING: - while ((s->index.size + (b->in_pos - s->in_start)) - & 3) { - if (b->in_pos == b->in_size) { - index_update(s, b); - return XZ_OK; - } - - if (b->in[b->in_pos++] != 0) - return XZ_DATA_ERROR; - } - - /* Finish the CRC32 value and Index size. */ - index_update(s, b); - - /* Compare the hashes to validate the Index field. */ - if (!memeq(&s->block.hash, &s->index.hash, - sizeof(s->block.hash))) - return XZ_DATA_ERROR; - - s->sequence = SEQ_INDEX_CRC32; - - case SEQ_INDEX_CRC32: - ret = crc32_validate(s, b); - if (ret != XZ_STREAM_END) - return ret; - - s->temp.size = STREAM_HEADER_SIZE; - s->sequence = SEQ_STREAM_FOOTER; - - case SEQ_STREAM_FOOTER: - if (!fill_temp(s, b)) - return XZ_OK; - - return dec_stream_footer(s); - } - } - - /* Never reached */ -} - -/* - * xz_dec_run() is a wrapper for dec_main() to handle some special cases in - * multi-call and single-call decoding. - * - * In multi-call mode, we must return XZ_BUF_ERROR when it seems clear that we - * are not going to make any progress anymore. This is to prevent the caller - * from calling us infinitely when the input file is truncated or otherwise - * corrupt. Since zlib-style API allows that the caller fills the input buffer - * only when the decoder doesn't produce any new output, we have to be careful - * to avoid returning XZ_BUF_ERROR too easily: XZ_BUF_ERROR is returned only - * after the second consecutive call to xz_dec_run() that makes no progress. - * - * In single-call mode, if we couldn't decode everything and no error - * occurred, either the input is truncated or the output buffer is too small. - * Since we know that the last input byte never produces any output, we know - * that if all the input was consumed and decoding wasn't finished, the file - * must be corrupt. Otherwise the output buffer has to be too small or the - * file is corrupt in a way that decoding it produces too big output. - * - * If single-call decoding fails, we reset b->in_pos and b->out_pos back to - * their original values. This is because with some filter chains there won't - * be any valid uncompressed data in the output buffer unless the decoding - * actually succeeds (that's the price to pay of using the output buffer as - * the workspace). - */ -XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_run(struct xz_dec *s, struct xz_buf *b) -{ - size_t in_start; - size_t out_start; - enum xz_ret ret; - - if (DEC_IS_SINGLE(s->mode)) - xz_dec_reset(s); - - in_start = b->in_pos; - out_start = b->out_pos; - ret = dec_main(s, b); - - if (DEC_IS_SINGLE(s->mode)) { - if (ret == XZ_OK) - ret = b->in_pos == b->in_size - ? XZ_DATA_ERROR : XZ_BUF_ERROR; - - if (ret != XZ_STREAM_END) { - b->in_pos = in_start; - b->out_pos = out_start; - } - - } else if (ret == XZ_OK && in_start == b->in_pos - && out_start == b->out_pos) { - if (s->allow_buf_error) - ret = XZ_BUF_ERROR; - - s->allow_buf_error = true; - } else { - s->allow_buf_error = false; - } - - return ret; -} - -XZ_EXTERN struct xz_dec * XZ_FUNC xz_dec_init( - enum xz_mode mode, uint32_t dict_max) -{ - struct xz_dec *s = kmalloc(sizeof(*s), GFP_KERNEL); - if (s == NULL) - return NULL; - - s->mode = mode; - -#ifdef XZ_DEC_BCJ - s->bcj = xz_dec_bcj_create(DEC_IS_SINGLE(mode)); - if (s->bcj == NULL) - goto error_bcj; -#endif - - s->lzma2 = xz_dec_lzma2_create(mode, dict_max); - if (s->lzma2 == NULL) - goto error_lzma2; - - xz_dec_reset(s); - return s; - -error_lzma2: -#ifdef XZ_DEC_BCJ - xz_dec_bcj_end(s->bcj); -error_bcj: -#endif - kfree(s); - return NULL; -} - -XZ_EXTERN void XZ_FUNC xz_dec_reset(struct xz_dec *s) -{ - s->sequence = SEQ_STREAM_HEADER; - s->allow_buf_error = false; - s->pos = 0; - s->crc32 = 0; - memzero(&s->block, sizeof(s->block)); - memzero(&s->index, sizeof(s->index)); - s->temp.pos = 0; - s->temp.size = STREAM_HEADER_SIZE; -} - -XZ_EXTERN void XZ_FUNC xz_dec_end(struct xz_dec *s) -{ - if (s != NULL) { - xz_dec_lzma2_end(s->lzma2); -#ifdef XZ_DEC_BCJ - xz_dec_bcj_end(s->bcj); -#endif - kfree(s); - } -} diff --git a/archival/libunarchive/unxz/xz_lzma2.h b/archival/libunarchive/unxz/xz_lzma2.h deleted file mode 100644 index 47f21afbc..000000000 --- a/archival/libunarchive/unxz/xz_lzma2.h +++ /dev/null @@ -1,204 +0,0 @@ -/* - * LZMA2 definitions - * - * Authors: Lasse Collin - * Igor Pavlov - * - * This file has been put into the public domain. - * You can do whatever you want with this file. - */ - -#ifndef XZ_LZMA2_H -#define XZ_LZMA2_H - -/* Range coder constants */ -#define RC_SHIFT_BITS 8 -#define RC_TOP_BITS 24 -#define RC_TOP_VALUE (1 << RC_TOP_BITS) -#define RC_BIT_MODEL_TOTAL_BITS 11 -#define RC_BIT_MODEL_TOTAL (1 << RC_BIT_MODEL_TOTAL_BITS) -#define RC_MOVE_BITS 5 - -/* - * Maximum number of position states. A position state is the lowest pb - * number of bits of the current uncompressed offset. In some places there - * are different sets of probabilities for different position states. - */ -#define POS_STATES_MAX (1 << 4) - -/* - * This enum is used to track which LZMA symbols have occurred most recently - * and in which order. This information is used to predict the next symbol. - * - * Symbols: - * - Literal: One 8-bit byte - * - Match: Repeat a chunk of data at some distance - * - Long repeat: Multi-byte match at a recently seen distance - * - Short repeat: One-byte repeat at a recently seen distance - * - * The symbol names are in from STATE_oldest_older_previous. REP means - * either short or long repeated match, and NONLIT means any non-literal. - */ -enum lzma_state { - STATE_LIT_LIT, - STATE_MATCH_LIT_LIT, - STATE_REP_LIT_LIT, - STATE_SHORTREP_LIT_LIT, - STATE_MATCH_LIT, - STATE_REP_LIT, - STATE_SHORTREP_LIT, - STATE_LIT_MATCH, - STATE_LIT_LONGREP, - STATE_LIT_SHORTREP, - STATE_NONLIT_MATCH, - STATE_NONLIT_REP -}; - -/* Total number of states */ -#define STATES 12 - -/* The lowest 7 states indicate that the previous state was a literal. */ -#define LIT_STATES 7 - -/* Indicate that the latest symbol was a literal. */ -static inline void XZ_FUNC lzma_state_literal(enum lzma_state *state) -{ - if (*state <= STATE_SHORTREP_LIT_LIT) - *state = STATE_LIT_LIT; - else if (*state <= STATE_LIT_SHORTREP) - *state -= 3; - else - *state -= 6; -} - -/* Indicate that the latest symbol was a match. */ -static inline void XZ_FUNC lzma_state_match(enum lzma_state *state) -{ - *state = *state < LIT_STATES ? STATE_LIT_MATCH : STATE_NONLIT_MATCH; -} - -/* Indicate that the latest state was a long repeated match. */ -static inline void XZ_FUNC lzma_state_long_rep(enum lzma_state *state) -{ - *state = *state < LIT_STATES ? STATE_LIT_LONGREP : STATE_NONLIT_REP; -} - -/* Indicate that the latest symbol was a short match. */ -static inline void XZ_FUNC lzma_state_short_rep(enum lzma_state *state) -{ - *state = *state < LIT_STATES ? STATE_LIT_SHORTREP : STATE_NONLIT_REP; -} - -/* Test if the previous symbol was a literal. */ -static inline bool XZ_FUNC lzma_state_is_literal(enum lzma_state state) -{ - return state < LIT_STATES; -} - -/* Each literal coder is divided in three sections: - * - 0x001-0x0FF: Without match byte - * - 0x101-0x1FF: With match byte; match bit is 0 - * - 0x201-0x2FF: With match byte; match bit is 1 - * - * Match byte is used when the previous LZMA symbol was something else than - * a literal (that is, it was some kind of match). - */ -#define LITERAL_CODER_SIZE 0x300 - -/* Maximum number of literal coders */ -#define LITERAL_CODERS_MAX (1 << 4) - -/* Minimum length of a match is two bytes. */ -#define MATCH_LEN_MIN 2 - -/* Match length is encoded with 4, 5, or 10 bits. - * - * Length Bits - * 2-9 4 = Choice=0 + 3 bits - * 10-17 5 = Choice=1 + Choice2=0 + 3 bits - * 18-273 10 = Choice=1 + Choice2=1 + 8 bits - */ -#define LEN_LOW_BITS 3 -#define LEN_LOW_SYMBOLS (1 << LEN_LOW_BITS) -#define LEN_MID_BITS 3 -#define LEN_MID_SYMBOLS (1 << LEN_MID_BITS) -#define LEN_HIGH_BITS 8 -#define LEN_HIGH_SYMBOLS (1 << LEN_HIGH_BITS) -#define LEN_SYMBOLS (LEN_LOW_SYMBOLS + LEN_MID_SYMBOLS + LEN_HIGH_SYMBOLS) - -/* - * Maximum length of a match is 273 which is a result of the encoding - * described above. - */ -#define MATCH_LEN_MAX (MATCH_LEN_MIN + LEN_SYMBOLS - 1) - -/* - * Different sets of probabilities are used for match distances that have - * very short match length: Lengths of 2, 3, and 4 bytes have a separate - * set of probabilities for each length. The matches with longer length - * use a shared set of probabilities. - */ -#define DIST_STATES 4 - -/* - * Get the index of the appropriate probability array for decoding - * the distance slot. - */ -static inline uint32_t XZ_FUNC lzma_get_dist_state(uint32_t len) -{ - return len < DIST_STATES + MATCH_LEN_MIN - ? len - MATCH_LEN_MIN : DIST_STATES - 1; -} - -/* - * The highest two bits of a 32-bit match distance are encoded using six bits. - * This six-bit value is called a distance slot. This way encoding a 32-bit - * value takes 6-36 bits, larger values taking more bits. - */ -#define DIST_SLOT_BITS 6 -#define DIST_SLOTS (1 << DIST_SLOT_BITS) - -/* Match distances up to 127 are fully encoded using probabilities. Since - * the highest two bits (distance slot) are always encoded using six bits, - * the distances 0-3 don't need any additional bits to encode, since the - * distance slot itself is the same as the actual distance. DIST_MODEL_START - * indicates the first distance slot where at least one additional bit is - * needed. - */ -#define DIST_MODEL_START 4 - -/* - * Match distances greater than 127 are encoded in three pieces: - * - distance slot: the highest two bits - * - direct bits: 2-26 bits below the highest two bits - * - alignment bits: four lowest bits - * - * Direct bits don't use any probabilities. - * - * The distance slot value of 14 is for distances 128-191. - */ -#define DIST_MODEL_END 14 - -/* Distance slots that indicate a distance <= 127. */ -#define FULL_DISTANCES_BITS (DIST_MODEL_END / 2) -#define FULL_DISTANCES (1 << FULL_DISTANCES_BITS) - -/* - * For match distances greater than 127, only the highest two bits and the - * lowest four bits (alignment) is encoded using probabilities. - */ -#define ALIGN_BITS 4 -#define ALIGN_SIZE (1 << ALIGN_BITS) -#define ALIGN_MASK (ALIGN_SIZE - 1) - -/* Total number of all probability variables */ -#define PROBS_TOTAL (1846 + LITERAL_CODERS_MAX * LITERAL_CODER_SIZE) - -/* - * LZMA remembers the four most recent match distances. Reusing these - * distances tends to take less space than re-encoding the actual - * distance value. - */ -#define REPS 4 - -#endif diff --git a/archival/libunarchive/unxz/xz_private.h b/archival/libunarchive/unxz/xz_private.h deleted file mode 100644 index 145649a83..000000000 --- a/archival/libunarchive/unxz/xz_private.h +++ /dev/null @@ -1,159 +0,0 @@ -/* - * Private includes and definitions - * - * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. - */ - -#ifndef XZ_PRIVATE_H -#define XZ_PRIVATE_H - -#ifdef __KERNEL__ - /* XZ_PREBOOT may be defined only via decompress_unxz.c. */ -# ifndef XZ_PREBOOT -# include -# include -# include -# define memeq(a, b, size) (memcmp(a, b, size) == 0) -# define memzero(buf, size) memset(buf, 0, size) -# endif -# include -# include -# define get_le32(p) le32_to_cpup((const uint32_t *)(p)) - /* XZ_IGNORE_KCONFIG may be defined only via decompress_unxz.c. */ -# ifndef XZ_IGNORE_KCONFIG -# ifdef CONFIG_XZ_DEC_X86 -# define XZ_DEC_X86 -# endif -# ifdef CONFIG_XZ_DEC_POWERPC -# define XZ_DEC_POWERPC -# endif -# ifdef CONFIG_XZ_DEC_IA64 -# define XZ_DEC_IA64 -# endif -# ifdef CONFIG_XZ_DEC_ARM -# define XZ_DEC_ARM -# endif -# ifdef CONFIG_XZ_DEC_ARMTHUMB -# define XZ_DEC_ARMTHUMB -# endif -# ifdef CONFIG_XZ_DEC_SPARC -# define XZ_DEC_SPARC -# endif -# endif -# include -#else - /* - * For userspace builds, use a separate header to define the required - * macros and functions. This makes it easier to adapt the code into - * different environments and avoids clutter in the Linux kernel tree. - */ -# include "xz_config.h" -#endif - -/* If no specific decoding mode is requested, enable support for all modes. */ -#if !defined(XZ_DEC_SINGLE) && !defined(XZ_DEC_PREALLOC) \ - && !defined(XZ_DEC_DYNALLOC) -# define XZ_DEC_SINGLE -# define XZ_DEC_PREALLOC -# define XZ_DEC_DYNALLOC -#endif - -/* - * The DEC_IS_foo(mode) macros are used in "if" statements. If only some - * of the supported modes are enabled, these macros will evaluate to true or - * false at compile time and thus allow the compiler to omit unneeded code. - */ -#ifdef XZ_DEC_SINGLE -# define DEC_IS_SINGLE(mode) ((mode) == XZ_SINGLE) -#else -# define DEC_IS_SINGLE(mode) (false) -#endif - -#ifdef XZ_DEC_PREALLOC -# define DEC_IS_PREALLOC(mode) ((mode) == XZ_PREALLOC) -#else -# define DEC_IS_PREALLOC(mode) (false) -#endif - -#ifdef XZ_DEC_DYNALLOC -# define DEC_IS_DYNALLOC(mode) ((mode) == XZ_DYNALLOC) -#else -# define DEC_IS_DYNALLOC(mode) (false) -#endif - -#if !defined(XZ_DEC_SINGLE) -# define DEC_IS_MULTI(mode) (true) -#elif defined(XZ_DEC_PREALLOC) || defined(XZ_DEC_DYNALLOC) -# define DEC_IS_MULTI(mode) ((mode) != XZ_SINGLE) -#else -# define DEC_IS_MULTI(mode) (false) -#endif - -/* - * If any of the BCJ filter decoders are wanted, define XZ_DEC_BCJ. - * XZ_DEC_BCJ is used to enable generic support for BCJ decoders. - */ -#ifndef XZ_DEC_BCJ -# if defined(XZ_DEC_X86) || defined(XZ_DEC_POWERPC) \ - || defined(XZ_DEC_IA64) || defined(XZ_DEC_ARM) \ - || defined(XZ_DEC_ARM) || defined(XZ_DEC_ARMTHUMB) \ - || defined(XZ_DEC_SPARC) -# define XZ_DEC_BCJ -# endif -#endif - -/* - * Allocate memory for LZMA2 decoder. xz_dec_lzma2_reset() must be used - * before calling xz_dec_lzma2_run(). - */ -XZ_EXTERN struct xz_dec_lzma2 * XZ_FUNC xz_dec_lzma2_create( - enum xz_mode mode, uint32_t dict_max); - -/* - * Decode the LZMA2 properties (one byte) and reset the decoder. Return - * XZ_OK on success, XZ_MEMLIMIT_ERROR if the preallocated dictionary is not - * big enough, and XZ_OPTIONS_ERROR if props indicates something that this - * decoder doesn't support. - */ -XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_reset( - struct xz_dec_lzma2 *s, uint8_t props); - -/* Decode raw LZMA2 stream from b->in to b->out. */ -XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_lzma2_run( - struct xz_dec_lzma2 *s, struct xz_buf *b); - -/* Free the memory allocated for the LZMA2 decoder. */ -XZ_EXTERN void XZ_FUNC xz_dec_lzma2_end(struct xz_dec_lzma2 *s); - -#ifdef XZ_DEC_BCJ -/* - * Allocate memory for BCJ decoders. xz_dec_bcj_reset() must be used before - * calling xz_dec_bcj_run(). - */ -XZ_EXTERN struct xz_dec_bcj * XZ_FUNC xz_dec_bcj_create(bool single_call); - -/* - * Decode the Filter ID of a BCJ filter. This implementation doesn't - * support custom start offsets, so no decoding of Filter Properties - * is needed. Returns XZ_OK if the given Filter ID is supported. - * Otherwise XZ_OPTIONS_ERROR is returned. - */ -XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_reset( - struct xz_dec_bcj *s, uint8_t id); - -/* - * Decode raw BCJ + LZMA2 stream. This must be used only if there actually is - * a BCJ filter in the chain. If the chain has only LZMA2, xz_dec_lzma2_run() - * must be called directly. - */ -XZ_EXTERN enum xz_ret XZ_FUNC xz_dec_bcj_run(struct xz_dec_bcj *s, - struct xz_dec_lzma2 *lzma2, struct xz_buf *b); - -/* Free the memory allocated for the BCJ filters. */ -#define xz_dec_bcj_end(s) kfree(s) -#endif - -#endif diff --git a/archival/libunarchive/unxz/xz_stream.h b/archival/libunarchive/unxz/xz_stream.h deleted file mode 100644 index 36f2a7cbf..000000000 --- a/archival/libunarchive/unxz/xz_stream.h +++ /dev/null @@ -1,57 +0,0 @@ -/* - * Definitions for handling the .xz file format - * - * Author: Lasse Collin - * - * This file has been put into the public domain. - * You can do whatever you want with this file. - */ - -#ifndef XZ_STREAM_H -#define XZ_STREAM_H - -#if defined(__KERNEL__) && !XZ_INTERNAL_CRC32 -# include -# undef crc32 -# define xz_crc32(buf, size, crc) \ - (~crc32_le(~(uint32_t)(crc), buf, size)) -#endif - -/* - * See the .xz file format specification at - * http://tukaani.org/xz/xz-file-format.txt - * to understand the container format. - */ - -#define STREAM_HEADER_SIZE 12 - -#define HEADER_MAGIC "\3757zXZ\0" -#define HEADER_MAGIC_SIZE 6 - -#define FOOTER_MAGIC "YZ" -#define FOOTER_MAGIC_SIZE 2 - -/* - * Variable-length integer can hold a 63-bit unsigned integer, or a special - * value to indicate that the value is unknown. - */ -typedef uint64_t vli_type; - -#define VLI_MAX ((vli_type)-1 / 2) -#define VLI_UNKNOWN ((vli_type)-1) - -/* Maximum encoded size of a VLI */ -#define VLI_BYTES_MAX (sizeof(vli_type) * 8 / 7) - -/* Integrity Check types */ -enum xz_check { - XZ_CHECK_NONE = 0, - XZ_CHECK_CRC32 = 1, - XZ_CHECK_CRC64 = 4, - XZ_CHECK_SHA256 = 10 -}; - -/* Maximum possible Check ID */ -#define XZ_CHECK_MAX 15 - -#endif diff --git a/archival/lzop.c b/archival/lzop.c index acb34fe14..094e78cf9 100644 --- a/archival/lzop.c +++ b/archival/lzop.c @@ -26,7 +26,7 @@ */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" #include "liblzo_interface.h" /* lzo-2.03/src/lzo_ptr.h */ diff --git a/archival/rpm.c b/archival/rpm.c index 7b316a5b8..380226f9b 100644 --- a/archival/rpm.c +++ b/archival/rpm.c @@ -8,7 +8,7 @@ */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" #include "rpm.h" #define RPM_CHAR_TYPE 1 diff --git a/archival/rpm2cpio.c b/archival/rpm2cpio.c index 70021d539..ce8cd2c2c 100644 --- a/archival/rpm2cpio.c +++ b/archival/rpm2cpio.c @@ -7,7 +7,7 @@ * Licensed under GPLv2 or later, see file LICENSE in this source tree. */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" #include "rpm.h" enum { rpm_fd = STDIN_FILENO }; diff --git a/archival/tar.c b/archival/tar.c index 2176ad2ac..82caec770 100644 --- a/archival/tar.c +++ b/archival/tar.c @@ -25,7 +25,7 @@ #include #include "libbb.h" -#include "unarchive.h" +#include "archive.h" /* FIXME: Stop using this non-standard feature */ #ifndef FNM_LEADING_DIR # define FNM_LEADING_DIR 0 diff --git a/archival/unzip.c b/archival/unzip.c index 204e34952..5d62c08cb 100644 --- a/archival/unzip.c +++ b/archival/unzip.c @@ -20,7 +20,7 @@ */ #include "libbb.h" -#include "unarchive.h" +#include "archive.h" enum { #if BB_BIG_ENDIAN -- cgit v1.2.3