diff options
author | Denis Vlasenko <vda.linux@googlemail.com> | 2007-01-07 19:39:02 +0000 |
---|---|---|
committer | Denis Vlasenko <vda.linux@googlemail.com> | 2007-01-07 19:39:02 +0000 |
commit | da31fbc1b1dec08a18fc94d728b8d080f4a503d3 (patch) | |
tree | 4c1d83e0642db1a4d4241826901c440182adccd8 | |
parent | f824136f6b21558ac023bc53bcb69294e0676103 (diff) | |
download | busybox-da31fbc1b1dec08a18fc94d728b8d080f4a503d3.tar.gz |
gzip cleanup part #5
-rw-r--r-- | archival/gzip.c | 416 |
1 files changed, 212 insertions, 204 deletions
diff --git a/archival/gzip.c b/archival/gzip.c index 4f47a2782..0ef25727d 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -23,11 +23,36 @@ gzip: bogus: No such file or directory aa: 85.1% -- replaced with aa.gz */ -#define SMALL_MEM //#include <dirent.h> #include "busybox.h" + +/* =========================================================================== + */ +//#define DEBUG 1 +/* Diagnostic functions */ +#ifdef DEBUG +# define Assert(cond,msg) {if(!(cond)) bb_error_msg(msg);} +# define Trace(x) fprintf x +# define Tracev(x) {if (verbose) fprintf x ;} +# define Tracevv(x) {if (verbose > 1) fprintf x ;} +# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} +# define Tracecv(c,x) {if (verbose > 1 && (c)) fprintf x ;} +#else +# define Assert(cond,msg) +# define Trace(x) +# define Tracev(x) +# define Tracevv(x) +# define Tracec(c,x) +# define Tracecv(c,x) +#endif + + +/* =========================================================================== + */ +#define SMALL_MEM + /* Compression methods (see algorithm.doc) */ /* Only STORED and DEFLATED are supported by this BusyBox module */ #define STORED 0 @@ -121,36 +146,170 @@ aa: 85.1% -- replaced with aa.gz #endif +/* =========================================================================== + * Compile with MEDIUM_MEM to reduce the memory requirements or + * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the + * entire input file can be held in memory (not possible on 16 bit systems). + * Warning: defining these symbols affects HASH_BITS (see below) and thus + * affects the compression ratio. The compressed output + * is still correct, and might even be smaller in some cases. + */ + +#ifdef SMALL_MEM +# define HASH_BITS 13 /* Number of bits used to hash strings */ +#endif +#ifdef MEDIUM_MEM +# define HASH_BITS 14 +#endif +#ifndef HASH_BITS +# define HASH_BITS 15 + /* For portability to 16 bit machines, do not use values above 15. */ +#endif + +/* To save space (see unlzw.c), we overlay prev+head with tab_prefix and + * window with tab_suffix. Check that we can do this: + */ +#if (WSIZE<<1) > (1<<BITS) +# error cannot overlay window with tab_suffix and prev with tab_prefix0 +#endif +#if HASH_BITS > BITS-1 +# error cannot overlay head with tab_prefix1 +#endif +#define HASH_SIZE (unsigned)(1<<HASH_BITS) +#define HASH_MASK (HASH_SIZE-1) +#define WMASK (WSIZE-1) +/* HASH_SIZE and WSIZE must be powers of two */ +#ifndef TOO_FAR +# define TOO_FAR 4096 +#endif +/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ + + +/* =========================================================================== + */ +typedef unsigned char uch; +typedef unsigned short ush; +typedef unsigned long ulg; + + +/* =========================================================================== + * Local data used by the "longest match" routines. + */ +typedef ush Pos; +typedef unsigned IPos; + +/* A Pos is an index in the character window. We use short instead of int to + * save space in the various tables. IPos is used only for parameter passing. + */ + #define DECLARE(type, array, size)\ static type * array #define ALLOC(type, array, size) { \ array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type)); \ } + #define FREE(array) { \ free(array); \ array = NULL; \ } -/* Diagnostic functions */ +/* DECLARE(uch, window, 2L*WSIZE); */ +/* Sliding window. Input bytes are read into the second half of the window, + * and move to the first half later to keep a dictionary of at least WSIZE + * bytes. With this organization, matches are limited to a distance of + * WSIZE-MAX_MATCH bytes, but this ensures that IO is always + * performed with a length multiple of the block size. Also, it limits + * the window size to 64K, which is quite useful on MSDOS. + * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would + * be less efficient). + */ + +/* DECLARE(Pos, prev, WSIZE); */ +/* Link to older string with same hash index. To limit the size of this + * array to 64K, this link is maintained only for the last 32K strings. + * An index in this array is thus a window index modulo 32K. + */ + +/* DECLARE(Pos, head, 1<<HASH_BITS); */ +/* Heads of the hash chains or 0. */ + +static long block_start; + +/* window position at the beginning of the current output block. Gets + * negative when the window is moved backwards. + */ + +static unsigned ins_h; /* hash index of string to be inserted */ + +#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH) +/* Number of bits by which ins_h and del_h must be shifted at each + * input step. It must be such that after MIN_MATCH steps, the oldest + * byte no longer takes part in the hash key, that is: + * H_SHIFT * MIN_MATCH >= HASH_BITS + */ + +static unsigned int prev_length; + +/* Length of the best match at previous step. Matches not greater than this + * are discarded. This is used in the lazy match evaluation. + */ + +static unsigned strstart; /* start of string to insert */ +static unsigned match_start; /* start of matching string */ +static int eofile; /* flag set at end of input file */ +static unsigned lookahead; /* number of valid bytes ahead in window */ + +enum { + WINDOW_SIZE = 2 * WSIZE, +/* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the + * input file length plus MIN_LOOKAHEAD. + */ + + max_chain_length = 4096, +/* To speed up deflation, hash chains are never searched beyond this length. + * A higher limit improves compression ratio but degrades the speed. + */ + + max_lazy_match = 258, +/* Attempt to find a better match only when the current match is strictly + * smaller than this value. This mechanism is used only for compression + * levels >= 4. + */ + + max_insert_length = max_lazy_match, +/* Insert new strings in the hash table only if the match length + * is not greater than this length. This saves time but degrades compression. + * max_insert_length is used only for compression levels <= 3. + */ + + good_match = 32, +/* Use a faster search when the previous match is longer than this */ + +/* Values for max_lazy_match, good_match and max_chain_length, depending on + * the desired pack level (0..9). The values given below have been tuned to + * exclude worst case performance for pathological files. Better values may be + * found for specific files. + */ + + nice_match = 258 /* Stop searching when current match exceeds this */ +/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 + * For deflate_fast() (levels <= 3) good is ignored and lazy has a different + * meaning. + */ +}; + + +/* =========================================================================== + * Prototypes for local functions. + */ +static void fill_window(void); + +static int longest_match(IPos cur_match); + #ifdef DEBUG -# define Assert(cond,msg) {if(!(cond)) bb_error_msg(msg);} -# define Trace(x) fprintf x -# define Tracev(x) {if (verbose) fprintf x ;} -# define Tracevv(x) {if (verbose > 1) fprintf x ;} -# define Tracec(c,x) {if (verbose && (c)) fprintf x ;} -# define Tracecv(c,x) {if (verbose > 1 && (c)) fprintf x ;} -#else -# define Assert(cond,msg) -# define Trace(x) -# define Tracev(x) -# define Tracevv(x) -# define Tracec(c,x) -# define Tracecv(c,x) +static void check_match(IPos start, IPos match, int length); #endif -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; /* from zip.c: */ @@ -183,7 +342,6 @@ static void copy_block(char *buf, unsigned len, int header); * is done in window except for unlzw. */ - #define tab_suffix window #define tab_prefix prev /* hash link (see deflate.c) */ #define head (prev+WSIZE) /* hash head (see deflate.c) */ @@ -310,16 +468,10 @@ static void clear_bufs(void) static uint32_t crc; /* shift register contents */ static uint32_t updcrc(uch * s, unsigned n) { - uint32_t c; /* temporary variable */ - - if (s == NULL) { - c = ~0; - } else { - c = crc; - while (n) { - c = crc_32_tab[(uch)(c ^ *s++)] ^ (c >> 8); - n--; - } + uint32_t c = crc; + while (n) { + c = crc_32_tab[(uch)(c ^ *s++)] ^ (c >> 8); + n--; } crc = c; return c; @@ -387,6 +539,7 @@ static void send_bits(int value, int length) } } + /* =========================================================================== * Reverse the first len bits of a code, using straightforward code (a faster * method would use a table) @@ -403,6 +556,7 @@ static unsigned bi_reverse(unsigned code, int len) return res >> 1; } + /* =========================================================================== * Write out any remaining bits in an incomplete byte. */ @@ -420,6 +574,7 @@ static void bi_windup(void) #endif } + /* =========================================================================== * Copy a stored block to the zip file, storing first the length and its * one's complement if requested. @@ -443,164 +598,6 @@ static void copy_block(char *buf, unsigned len, int header) } } -/* =========================================================================== - * Configuration parameters - */ - -/* Compile with MEDIUM_MEM to reduce the memory requirements or - * with SMALL_MEM to use as little memory as possible. Use BIG_MEM if the - * entire input file can be held in memory (not possible on 16 bit systems). - * Warning: defining these symbols affects HASH_BITS (see below) and thus - * affects the compression ratio. The compressed output - * is still correct, and might even be smaller in some cases. - */ - -#ifdef SMALL_MEM -# define HASH_BITS 13 /* Number of bits used to hash strings */ -#endif -#ifdef MEDIUM_MEM -# define HASH_BITS 14 -#endif -#ifndef HASH_BITS -# define HASH_BITS 15 - /* For portability to 16 bit machines, do not use values above 15. */ -#endif - -/* To save space (see unlzw.c), we overlay prev+head with tab_prefix and - * window with tab_suffix. Check that we can do this: - */ -#if (WSIZE<<1) > (1<<BITS) -# error cannot overlay window with tab_suffix and prev with tab_prefix0 -#endif -#if HASH_BITS > BITS-1 -# error cannot overlay head with tab_prefix1 -#endif -#define HASH_SIZE (unsigned)(1<<HASH_BITS) -#define HASH_MASK (HASH_SIZE-1) -#define WMASK (WSIZE-1) -/* HASH_SIZE and WSIZE must be powers of two */ -#define NIL 0 -/* Tail of hash chains */ -#define FAST 4 -#define SLOW 2 -/* speed options for the general purpose bit flag */ -#ifndef TOO_FAR -# define TOO_FAR 4096 -#endif -/* Matches of length 3 are discarded if their distance exceeds TOO_FAR */ -/* =========================================================================== - * Local data used by the "longest match" routines. - */ -typedef ush Pos; -typedef unsigned IPos; - -/* A Pos is an index in the character window. We use short instead of int to - * save space in the various tables. IPos is used only for parameter passing. - */ - -/* DECLARE(uch, window, 2L*WSIZE); */ -/* Sliding window. Input bytes are read into the second half of the window, - * and move to the first half later to keep a dictionary of at least WSIZE - * bytes. With this organization, matches are limited to a distance of - * WSIZE-MAX_MATCH bytes, but this ensures that IO is always - * performed with a length multiple of the block size. Also, it limits - * the window size to 64K, which is quite useful on MSDOS. - * To do: limit the window size to WSIZE+BSZ if SMALL_MEM (the code would - * be less efficient). - */ - -/* DECLARE(Pos, prev, WSIZE); */ -/* Link to older string with same hash index. To limit the size of this - * array to 64K, this link is maintained only for the last 32K strings. - * An index in this array is thus a window index modulo 32K. - */ - -/* DECLARE(Pos, head, 1<<HASH_BITS); */ -/* Heads of the hash chains or NIL. */ - -static const ulg window_size = (ulg) 2 * WSIZE; - -/* window size, 2*WSIZE except for MMAP or BIG_MEM, where it is the - * input file length plus MIN_LOOKAHEAD. - */ - -static long block_start; - -/* window position at the beginning of the current output block. Gets - * negative when the window is moved backwards. - */ - -static unsigned ins_h; /* hash index of string to be inserted */ - -#define H_SHIFT ((HASH_BITS+MIN_MATCH-1)/MIN_MATCH) -/* Number of bits by which ins_h and del_h must be shifted at each - * input step. It must be such that after MIN_MATCH steps, the oldest - * byte no longer takes part in the hash key, that is: - * H_SHIFT * MIN_MATCH >= HASH_BITS - */ - -static unsigned int prev_length; - -/* Length of the best match at previous step. Matches not greater than this - * are discarded. This is used in the lazy match evaluation. - */ - -static unsigned strstart; /* start of string to insert */ -static unsigned match_start; /* start of matching string */ -static int eofile; /* flag set at end of input file */ -static unsigned lookahead; /* number of valid bytes ahead in window */ - -enum { - max_chain_length = 4096, - -/* To speed up deflation, hash chains are never searched beyond this length. - * A higher limit improves compression ratio but degrades the speed. - */ - - max_lazy_match = 258, - -/* Attempt to find a better match only when the current match is strictly - * smaller than this value. This mechanism is used only for compression - * levels >= 4. - */ - max_insert_length = max_lazy_match, -/* Insert new strings in the hash table only if the match length - * is not greater than this length. This saves time but degrades compression. - * max_insert_length is used only for compression levels <= 3. - */ - - good_match = 32, - -/* Use a faster search when the previous match is longer than this */ - - -/* Values for max_lazy_match, good_match and max_chain_length, depending on - * the desired pack level (0..9). The values given below have been tuned to - * exclude worst case performance for pathological files. Better values may be - * found for specific files. - */ - - nice_match = 258 /* Stop searching when current match exceeds this */ - -/* Note: the deflate() code requires max_lazy >= MIN_MATCH and max_chain >= 4 - * For deflate_fast() (levels <= 3) good is ignored and lazy has a different - * meaning. - */ -}; - -#define EQUAL 0 -/* result of memcmp for equal strings */ - -/* =========================================================================== - * Prototypes for local functions. - */ -static void fill_window(void); - -static int longest_match(IPos cur_match); - -#ifdef DEBUG -static void check_match(IPos start, IPos match, int length); -#endif /* =========================================================================== * Update a hash value with the given input byte @@ -634,7 +631,8 @@ static void lm_init(ush * flags) memset(head, 0, HASH_SIZE * sizeof(*head)); /* prev will be initialized on the fly */ - *flags |= SLOW; + /*speed options for the general purpose bit flag */ + *flags |= 2; /* FAST 4, SLOW 2 */ /* ??? reduce max_chain_length for binary files */ strstart = 0; @@ -702,7 +700,7 @@ static int longest_match(IPos cur_match) if (prev_length >= good_match) { chain_length >>= 2; } - Assert(strstart <= window_size - MIN_LOOKAHEAD, "insufficient lookahead"); + Assert(strstart <= WINDOW_SIZE - MIN_LOOKAHEAD, "insufficient lookahead"); do { Assert(cur_match < strstart, "no future"); @@ -750,6 +748,7 @@ static int longest_match(IPos cur_match) return best_len; } + #ifdef DEBUG /* =========================================================================== * Check that the match at match_start is indeed a match. @@ -757,7 +756,7 @@ static int longest_match(IPos cur_match) static void check_match(IPos start, IPos match, int length) { /* check that the match is indeed a match */ - if (memcmp(window + match, window + start, length) != EQUAL) { + if (memcmp(window + match, window + start, length) != 0) { bb_error_msg(" start %d, match %d, length %d", start, match, length); bb_error_msg("invalid match"); } @@ -769,9 +768,10 @@ static void check_match(IPos start, IPos match, int length) } } #else -# define check_match(start, match, length) +# define check_match(start, match, length) ((void)0) #endif + /* =========================================================================== * Fill the window when the lookahead becomes insufficient. * Updates strstart and lookahead, and sets eofile if end of input file. @@ -783,7 +783,7 @@ static void check_match(IPos start, IPos match, int length) static void fill_window(void) { unsigned n, m; - unsigned more = window_size - lookahead - strstart; + unsigned more = WINDOW_SIZE - lookahead - strstart; /* Amount of free space at the end of the window. */ /* If the window is almost full and there is insufficient lookahead, @@ -798,21 +798,21 @@ static void fill_window(void) /* By the IN assertion, the window is not empty so we can't confuse * more == 0 with more == 64K on a 16 bit machine. */ - Assert(window_size == (ulg) 2 * WSIZE, "no sliding with BIG_MEM"); + Assert(WINDOW_SIZE == 2 * WSIZE, "no sliding with BIG_MEM"); memcpy(window, window + WSIZE, WSIZE); match_start -= WSIZE; strstart -= WSIZE; /* we now have strstart >= MAX_DIST: */ - block_start -= (long) WSIZE; + block_start -= WSIZE; for (n = 0; n < HASH_SIZE; n++) { m = head[n]; - head[n] = (Pos) (m >= WSIZE ? m - WSIZE : NIL); + head[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0); } for (n = 0; n < WSIZE; n++) { m = prev[n]; - prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : NIL); + prev[n] = (Pos) (m >= WSIZE ? m - WSIZE : 0); /* If n is not on any hash chain, prev[n] is garbage but * its value will never be used. */ @@ -830,15 +830,20 @@ static void fill_window(void) } } + /* =========================================================================== * Flush the current block, with given end-of-file flag. * IN assertion: strstart is set to the end of the current match. */ #define FLUSH_BLOCK(eof) \ - flush_block(block_start >= 0L \ - ? (char*)&window[(unsigned)block_start] \ - : (char*)NULL, \ - (long)strstart - block_start, (eof)) + flush_block( \ + block_start >= 0L \ + ? (char*)&window[(unsigned)block_start] \ + : (char*)NULL, \ + (long)strstart - block_start, \ + (eof) \ + ) + /* =========================================================================== * Same as above, but achieves better compression. We use a lazy @@ -915,8 +920,10 @@ static ulg deflate(void) match_available = 0; match_length = MIN_MATCH - 1; strstart++; - if (flush) - FLUSH_BLOCK(0), block_start = strstart; + if (flush) { + FLUSH_BLOCK(0); + block_start = strstart; + } } else if (match_available) { /* If there was no match at the previous position, output a * single literal. If there was a match but the current match @@ -924,7 +931,8 @@ static ulg deflate(void) */ Tracevv((stderr, "%c", window[strstart - 1])); if (ct_tally(0, window[strstart - 1])) { - FLUSH_BLOCK(0), block_start = strstart; + FLUSH_BLOCK(0); + block_start = strstart; } strstart++; lookahead--; @@ -2246,7 +2254,7 @@ static int zip(int in, int out) ct_init(&attr, &method); lm_init(&deflate_flags); - put_8bit((uch) deflate_flags); /* extra flags */ + put_8bit(deflate_flags); /* extra flags */ put_8bit(3); /* OS identifier = 3 (Unix) */ deflate(); |