aboutsummaryrefslogtreecommitdiff
path: root/archival/gzip.c
diff options
context:
space:
mode:
authorDenis Vlasenko <vda.linux@googlemail.com>2007-01-07 19:39:02 +0000
committerDenis Vlasenko <vda.linux@googlemail.com>2007-01-07 19:39:02 +0000
commitda31fbc1b1dec08a18fc94d728b8d080f4a503d3 (patch)
tree4c1d83e0642db1a4d4241826901c440182adccd8 /archival/gzip.c
parentf824136f6b21558ac023bc53bcb69294e0676103 (diff)
downloadbusybox-da31fbc1b1dec08a18fc94d728b8d080f4a503d3.tar.gz
gzip cleanup part #5
Diffstat (limited to 'archival/gzip.c')
-rw-r--r--archival/gzip.c416
1 files changed, 212 insertions, 204 deletions
diff --git a/archival/gzip.c b/archival/gzip.c
index 4f47a2782..0ef25727d 100644
--- a/archival/gzip.c
+++ b/archival/gzip.c
@@ -23,11 +23,36 @@ gzip: bogus: No such file or directory
aa: 85.1% -- replaced with aa.gz
*/
-#define SMALL_MEM
//#include <dirent.h>
#include "busybox.h"
+
+/* ===========================================================================
+ */
+//#define DEBUG 1
+/* Diagnostic functions */
+#ifdef DEBUG
+# define Assert(cond,msg) {if(!(cond)) bb_error_msg(msg);}
+# define Trace(x) fprintf x
+# define Tracev(x) {if (verbose) fprintf x ;}
+# define Tracevv(x) {if (verbose > 1) fprintf x ;}
+# define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
+# define Tracecv(c,x) {if (verbose > 1 && (c)) fprintf x ;}
+#else
+# define Assert(cond,msg)
+# define Trace(x)
+# define Tracev(x)
+# define Tracevv(x)
+# define Tracec(c,x)
+# define Tracecv(c,x)
+#endif
+
+
+/* ===========================================================================
+ */
+#define SMALL_MEM
+
/* Compression methods (see algorithm.doc) */
/* Only STORED and DEFLATED are supported by this BusyBox module */
#define STORED 0
@@ -121,36 +146,170 @@ aa: 85.1% -- replaced with aa.gz
#endif
+/* ===========================================================================
+ * Compile with MEDIUM_MEM to reduce the memory requirements or
+ * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
+ * entire input file can be held in memory (not possible on 16 bit systems).
+ * Warning: defining these symbols affects HASH_BITS (see below) and thus
+ * affects the compression ratio. The compressed output
+ * is still correct, and might even be smaller in some cases.
+ */
+
+#ifdef SMALL_MEM
+# define HASH_BITS 13 /* Number of bits used to hash strings */
+#endif
+#ifdef MEDIUM_MEM
+# define HASH_BITS 14
+#endif
+#ifndef HASH_BITS
+# define HASH_BITS 15
+ /* For portability to 16 bit machines, do not use values above 15. */
+#endif
+
+/* To save space (see unlzw.c), we overlay prev+head with tab_prefix and
+ * window with tab_suffix. Check that we can do this:
+ */
+#if (WSIZE<<1) > (1<<BITS)
+# error cannot overlay window with tab_suffix and prev with tab_prefix0
+#endif
+#if HASH_BITS > BITS-1
+# error cannot overlay head with tab_prefix1
+#endif
+#define HASH_SIZE (unsigned)(1<<HASH_BITS)
+#define HASH_MASK (HASH_SIZE-1)
+#define WMASK (WSIZE-1)
+/* HASH_SIZE and WSIZE must be powers of two */
+#ifndef TOO_FAR
+# define TOO_FAR 4096
+#endif
+/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
+
+
+/* ===========================================================================
+ */
+typedef unsigned char uch;
+typedef unsigned short ush;
+typedef unsigned long ulg;
+
+
+/* ===========================================================================
+ * Local data used by the "longest match" routines.
+ */
+typedef ush Pos;
+typedef unsigned IPos;
+
+/* A Pos is an index in the character window. We use short instead of int to
+ * save space in the various tables. IPos is used only for parameter passing.
+ */
+
#define DECLARE(type, array, size)\
static type * array
#define ALLOC(type, array, size) { \
array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type)); \
}
+
#define FREE(array) { \
free(array); \
array = NULL; \
}
-/* Diagnostic functions */
+/* DECLARE(uch, window, 2L*WSIZE); */
+/* Sliding window. Input bytes are read into the second half of the window,
+ * and move to the first half later to keep a dictionary of at least WSIZE
+ * bytes. With this organization, matches are limited to a distance of
+ * WSIZE-MAX_MATCH bytes, but this ensures that IO is always
+ * performed with a length multiple of the block size. Also, it limits
+ * the window size to 64K, which is quite useful on MSDOS.
+ * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would
+ * be less efficient).
+ */
+
+/* DECLARE(Pos, prev, WSIZE); */
+/* Link to older string with same hash index. To limit the size of this
+ * array to 64K, this link is maintained only for the last 32K strings.
+ * An index in this array is thus a window index modulo 32K.
+ */
+
+/* DECLARE(Pos, head, 1<<HASH_BITS); */
+/* Heads of the hash chains or 0. */
+
+static long block_start;
+
+/* window position at the beginning of the current output block. Gets
+ * negative when the window is moved backwards.
+ */
+
+static unsigned ins_h; /* hash index of string to be inserted */
+
+#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
+/* Number of bits by which ins_h and del_h must be shifted at each
+ * input step. It must be such that after MIN_MATCH steps, the oldest
+ * byte no longer takes part in the hash key, that is:
+ * H_SHIFT * MIN_MATCH >= HASH_BITS
+ */
+
+static unsigned int prev_length;
+
+/* Length of the best match at previous step. Matches not greater than this
+ * are discarded. This is used in the lazy match evaluation.
+ */
+
+static unsigned strstart; /* start of string to insert */
+static unsigned match_start; /* start of matching string */
+static int eofile; /* flag set at end of input file */
+static unsigned lookahead; /* number of valid bytes ahead in window */
+
+enum {
+ WINDOW_SIZE = 2 * WSIZE,
+/* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
+ * input file length plus MIN_LOOKAHEAD.
+ */
+
+ max_chain_length = 4096,
+/* To speed up deflation, hash chains are never searched beyond this length.
+ * A higher limit improves compression ratio but degrades the speed.
+ */
+
+ max_lazy_match = 258,
+/* Attempt to find a better match only when the current match is strictly
+ * smaller than this value. This mechanism is used only for compression
+ * levels >= 4.
+ */
+
+ max_insert_length = max_lazy_match,
+/* Insert new strings in the hash table only if the match length
+ * is not greater than this length. This saves time but degrades compression.
+ * max_insert_length is used only for compression levels <= 3.
+ */
+
+ good_match = 32,
+/* Use a faster search when the previous match is longer than this */
+
+/* Values for max_lazy_match, good_match and max_chain_length, depending on
+ * the desired pack level (0..9). The values given below have been tuned to
+ * exclude worst case performance for pathological files. Better values may be
+ * found for specific files.
+ */
+
+ nice_match = 258 /* Stop searching when current match exceeds this */
+/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
+ * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
+ * meaning.
+ */
+};
+
+
+/* ===========================================================================
+ * Prototypes for local functions.
+ */
+static void fill_window(void);
+
+static int longest_match(IPos cur_match);
+
#ifdef DEBUG
-# define Assert(cond,msg) {if(!(cond)) bb_error_msg(msg);}
-# define Trace(x) fprintf x
-# define Tracev(x) {if (verbose) fprintf x ;}
-# define Tracevv(x) {if (verbose > 1) fprintf x ;}
-# define Tracec(c,x) {if (verbose && (c)) fprintf x ;}
-# define Tracecv(c,x) {if (verbose > 1 && (c)) fprintf x ;}
-#else
-# define Assert(cond,msg)
-# define Trace(x)
-# define Tracev(x)
-# define Tracevv(x)
-# define Tracec(c,x)
-# define Tracecv(c,x)
+static void check_match(IPos start, IPos match, int length);
#endif
-typedef unsigned char uch;
-typedef unsigned short ush;
-typedef unsigned long ulg;
/* from zip.c: */
@@ -183,7 +342,6 @@ static void copy_block(char *buf, unsigned len, int header);
* is done in window except for unlzw.
*/
-
#define tab_suffix window
#define tab_prefix prev /* hash link (see deflate.c) */
#define head (prev+WSIZE) /* hash head (see deflate.c) */
@@ -310,16 +468,10 @@ static void clear_bufs(void)
static uint32_t crc; /* shift register contents */
static uint32_t updcrc(uch * s, unsigned n)
{
- uint32_t c; /* temporary variable */
-
- if (s == NULL) {
- c = ~0;
- } else {
- c = crc;
- while (n) {
- c = crc_32_tab[(uch)(c ^ *s++)] ^ (c >> 8);
- n--;
- }
+ uint32_t c = crc;
+ while (n) {
+ c = crc_32_tab[(uch)(c ^ *s++)] ^ (c >> 8);
+ n--;
}
crc = c;
return c;
@@ -387,6 +539,7 @@ static void send_bits(int value, int length)
}
}
+
/* ===========================================================================
* Reverse the first len bits of a code, using straightforward code (a faster
* method would use a table)
@@ -403,6 +556,7 @@ static unsigned bi_reverse(unsigned code, int len)
return res >> 1;
}
+
/* ===========================================================================
* Write out any remaining bits in an incomplete byte.
*/
@@ -420,6 +574,7 @@ static void bi_windup(void)
#endif
}
+
/* ===========================================================================
* Copy a stored block to the zip file, storing first the length and its
* one's complement if requested.
@@ -443,164 +598,6 @@ static void copy_block(char *buf, unsigned len, int header)
}
}
-/* ===========================================================================
- * Configuration parameters
- */
-
-/* Compile with MEDIUM_MEM to reduce the memory requirements or
- * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the
- * entire input file can be held in memory (not possible on 16 bit systems).
- * Warning: defining these symbols affects HASH_BITS (see below) and thus
- * affects the compression ratio. The compressed output
- * is still correct, and might even be smaller in some cases.
- */
-
-#ifdef SMALL_MEM
-# define HASH_BITS 13 /* Number of bits used to hash strings */
-#endif
-#ifdef MEDIUM_MEM
-# define HASH_BITS 14
-#endif
-#ifndef HASH_BITS
-# define HASH_BITS 15
- /* For portability to 16 bit machines, do not use values above 15. */
-#endif
-
-/* To save space (see unlzw.c), we overlay prev+head with tab_prefix and
- * window with tab_suffix. Check that we can do this:
- */
-#if (WSIZE<<1) > (1<<BITS)
-# error cannot overlay window with tab_suffix and prev with tab_prefix0
-#endif
-#if HASH_BITS > BITS-1
-# error cannot overlay head with tab_prefix1
-#endif
-#define HASH_SIZE (unsigned)(1<<HASH_BITS)
-#define HASH_MASK (HASH_SIZE-1)
-#define WMASK (WSIZE-1)
-/* HASH_SIZE and WSIZE must be powers of two */
-#define NIL 0
-/* Tail of hash chains */
-#define FAST 4
-#define SLOW 2
-/* speed options for the general purpose bit flag */
-#ifndef TOO_FAR
-# define TOO_FAR 4096
-#endif
-/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */
-/* ===========================================================================
- * Local data used by the "longest match" routines.
- */
-typedef ush Pos;
-typedef unsigned IPos;
-
-/* A Pos is an index in the character window. We use short instead of int to
- * save space in the various tables. IPos is used only for parameter passing.
- */
-
-/* DECLARE(uch, window, 2L*WSIZE); */
-/* Sliding window. Input bytes are read into the second half of the window,
- * and move to the first half later to keep a dictionary of at least WSIZE
- * bytes. With this organization, matches are limited to a distance of
- * WSIZE-MAX_MATCH bytes, but this ensures that IO is always
- * performed with a length multiple of the block size. Also, it limits
- * the window size to 64K, which is quite useful on MSDOS.
- * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would
- * be less efficient).
- */
-
-/* DECLARE(Pos, prev, WSIZE); */
-/* Link to older string with same hash index. To limit the size of this
- * array to 64K, this link is maintained only for the last 32K strings.
- * An index in this array is thus a window index modulo 32K.
- */
-
-/* DECLARE(Pos, head, 1<<HASH_BITS); */
-/* Heads of the hash chains or NIL. */
-
-static const ulg window_size = (ulg) 2 * WSIZE;
-
-/* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the
- * input file length plus MIN_LOOKAHEAD.
- */
-
-static long block_start;
-
-/* window position at the beginning of the current output block. Gets
- * negative when the window is moved backwards.
- */
-
-static unsigned ins_h; /* hash index of string to be inserted */
-
-#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH)
-/* Number of bits by which ins_h and del_h must be shifted at each
- * input step. It must be such that after MIN_MATCH steps, the oldest
- * byte no longer takes part in the hash key, that is:
- * H_SHIFT * MIN_MATCH >= HASH_BITS
- */
-
-static unsigned int prev_length;
-
-/* Length of the best match at previous step. Matches not greater than this
- * are discarded. This is used in the lazy match evaluation.
- */
-
-static unsigned strstart; /* start of string to insert */
-static unsigned match_start; /* start of matching string */
-static int eofile; /* flag set at end of input file */
-static unsigned lookahead; /* number of valid bytes ahead in window */
-
-enum {
- max_chain_length = 4096,
-
-/* To speed up deflation, hash chains are never searched beyond this length.
- * A higher limit improves compression ratio but degrades the speed.
- */
-
- max_lazy_match = 258,
-
-/* Attempt to find a better match only when the current match is strictly
- * smaller than this value. This mechanism is used only for compression
- * levels >= 4.
- */
- max_insert_length = max_lazy_match,
-/* Insert new strings in the hash table only if the match length
- * is not greater than this length. This saves time but degrades compression.
- * max_insert_length is used only for compression levels <= 3.
- */
-
- good_match = 32,
-
-/* Use a faster search when the previous match is longer than this */
-
-
-/* Values for max_lazy_match, good_match and max_chain_length, depending on
- * the desired pack level (0..9). The values given below have been tuned to
- * exclude worst case performance for pathological files. Better values may be
- * found for specific files.
- */
-
- nice_match = 258 /* Stop searching when current match exceeds this */
-
-/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4
- * For deflate_fast() (levels <= 3) good is ignored and lazy has a different
- * meaning.
- */
-};
-
-#define EQUAL 0
-/* result of memcmp for equal strings */
-
-/* ===========================================================================
- * Prototypes for local functions.
- */
-static void fill_window(void);
-
-static int longest_match(IPos cur_match);
-
-#ifdef DEBUG
-static void check_match(IPos start, IPos match, int length);
-#endif
/* ===========================================================================
* Update a hash value with the given input byte
@@ -634,7 +631,8 @@ static void lm_init(ush * flags)
memset(head, 0, HASH_SIZE * sizeof(*head));
/* prev will be initialized on the fly */
- *flags |= SLOW;
+ /*speed options for the general purpose bit flag */
+ *flags |= 2; /* FAST 4, SLOW 2 */
/* ??? reduce max_chain_length for binary files */
strstart = 0;
@@ -702,7 +700,7 @@ static int longest_match(IPos cur_match)
if (prev_length >= good_match) {
chain_length >>= 2;
}
- Assert(strstart <= window_size - MIN_LOOKAHEAD, "insufficient lookahead");
+ Assert(strstart <= WINDOW_SIZE - MIN_LOOKAHEAD, "insufficient lookahead");
do {
Assert(cur_match < strstart, "no future");
@@ -750,6 +748,7 @@ static int longest_match(IPos cur_match)
return best_len;
}
+
#ifdef DEBUG
/* ===========================================================================
* Check that the match at match_start is indeed a match.
@@ -757,7 +756,7 @@ static int longest_match(IPos cur_match)
static void check_match(IPos start, IPos match, int length)
{
/* check that the match is indeed a match */
- if (memcmp(window + match, window + start, length) != EQUAL) {
+ if (memcmp(window + match, window + start, length) != 0) {
bb_error_msg(" start %d, match %d, length %d", start, match, length);
bb_error_msg("invalid match");
}
@@ -769,9 +768,10 @@ static void check_match(IPos start, IPos match, int length)
}
}
#else
-# define check_match(start, match, length)
+# define check_match(start, match, length) ((void)0)
#endif
+
/* ===========================================================================
* Fill the window when the lookahead becomes insufficient.
* Updates strstart and lookahead, and sets eofile if end of input file.
@@ -783,7 +783,7 @@ static void check_match(IPos start, IPos match, int length)
static void fill_window(void)
{
unsigned n, m;
- unsigned more = window_size - lookahead - strstart;
+ unsigned more = WINDOW_SIZE - lookahead - strstart;
/* Amount of free space at the end of the window. */
/* If the window is almost full and there is insufficient lookahead,
@@ -798,21 +798,21 @@ static void fill_window(void)
/* By the IN assertion, the window is not empty so we can't confuse
* more == 0 with more == 64K on a 16 bit machine.
*/
- Assert(window_size == (ulg) 2 * WSIZE, "no sliding with BIG_MEM");
+ Assert(WINDOW_SIZE == 2 * WSIZE, "no sliding with BIG_MEM");
memcpy(window, window + WSIZE, WSIZE);
match_start -= WSIZE;
strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */
- block_start -= (long) WSIZE;
+ block_start -= WSIZE;
for (n = 0; n < HASH_SIZE; n++) {
m = head[n];
- head[n] = (Pos) (m >= WSIZE ? m - WSIZE : NIL);
+ head[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
}
for (n = 0; n < WSIZE; n++) {
m = prev[n];
- prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : NIL);
+ prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0);
/* If n is not on any hash chain, prev[n] is garbage but
* its value will never be used.
*/
@@ -830,15 +830,20 @@ static void fill_window(void)
}
}
+
/* ===========================================================================
* Flush the current block, with given end-of-file flag.
* IN assertion: strstart is set to the end of the current match.
*/
#define FLUSH_BLOCK(eof) \
- flush_block(block_start >= 0L \
- ? (char*)&window[(unsigned)block_start] \
- : (char*)NULL, \
- (long)strstart - block_start, (eof))
+ flush_block( \
+ block_start >= 0L \
+ ? (char*)&window[(unsigned)block_start] \
+ : (char*)NULL, \
+ (long)strstart - block_start, \
+ (eof) \
+ )
+
/* ===========================================================================
* Same as above, but achieves better compression. We use a lazy
@@ -915,8 +920,10 @@ static ulg deflate(void)
match_available = 0;
match_length = MIN_MATCH - 1;
strstart++;
- if (flush)
- FLUSH_BLOCK(0), block_start = strstart;
+ if (flush) {
+ FLUSH_BLOCK(0);
+ block_start = strstart;
+ }
} else if (match_available) {
/* If there was no match at the previous position, output a
* single literal. If there was a match but the current match
@@ -924,7 +931,8 @@ static ulg deflate(void)
*/
Tracevv((stderr, "%c", window[strstart - 1]));
if (ct_tally(0, window[strstart - 1])) {
- FLUSH_BLOCK(0), block_start = strstart;
+ FLUSH_BLOCK(0);
+ block_start = strstart;
}
strstart++;
lookahead--;
@@ -2246,7 +2254,7 @@ static int zip(int in, int out)
ct_init(&attr, &method);
lm_init(&deflate_flags);
- put_8bit((uch) deflate_flags); /* extra flags */
+ put_8bit(deflate_flags); /* extra flags */
put_8bit(3); /* OS identifier = 3 (Unix) */
deflate();