From 3ae6f34135af68818aced918f675c525d5c5a4cf Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Sun, 7 Jan 2007 19:44:57 +0000 Subject: gzip cleanup part #12 --- archival/gzip.c | 403 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 200 insertions(+), 203 deletions(-) diff --git a/archival/gzip.c b/archival/gzip.c index 17010438a..632dd4a14 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -39,8 +39,6 @@ gzip: bogus: No such file or directory aa: 85.1% -- replaced with aa.gz */ - -//#include #include "busybox.h" @@ -195,10 +193,12 @@ aa: 85.1% -- replaced with aa.gz /* =========================================================================== + * These types are not really 'char', 'short' and 'long' */ -typedef unsigned char uch; -typedef unsigned short ush; -typedef unsigned long ulg; +typedef uint8_t uch; +typedef uint16_t ush; +typedef uint32_t ulg; +typedef uint32_t lng; /* =========================================================================== @@ -211,7 +211,7 @@ typedef unsigned IPos; * save space in the various tables. IPos is used only for parameter passing. */ -#define DECLARE(type, array, size)\ +#define DECLARE(type, array, size) \ static type * array #define ALLOC(type, array, size) { \ array = xzalloc((size_t)(((size)+1L)/2) * 2*sizeof(type)); \ @@ -223,7 +223,7 @@ typedef unsigned IPos; array = NULL; \ } -static long block_start; +static lng block_start; /* window position at the beginning of the current output block. Gets * negative when the window is moved backwards. @@ -339,12 +339,15 @@ DECLARE(ush, d_buf, DIST_BUFSIZE); DECLARE(uch, window, 2L * WSIZE); DECLARE(ush, prev, 1L << BITS); -static long isize; /* number of input bytes */ +/* number of input bytes */ +static ulg isize; /* only 32 bits stored in .gz file */ static int foreground; /* set if program run in foreground */ static int method = DEFLATED; /* compression method */ static int exit_code; /* program exit code */ -static long time_stamp; /* original time stamp (modification time) */ + +/* original time stamp (modification time) */ +static ulg time_stamp; /* only 32 bits stored in .gz file */ ////static char z_suffix[MAX_SUFFIX + 1]; /* default suffix (can be set with --suffix) */ static int ifd; /* input file descriptor */ @@ -425,15 +428,6 @@ static void put_32bit(ulg n) put_16bit(n >> 16); } -/* put_header_byte is used for the compressed output - * - for the initial 4 bytes that can't overflow the buffer. - */ -#define put_header_byte(c) \ -{ \ - outbuf[outcnt++] = (c); \ -} - - /* =========================================================================== * Clear input and output buffers */ @@ -443,7 +437,7 @@ static void clear_bufs(void) #ifdef DEBUG insize = 0; #endif - isize = 0L; + isize = 0; } @@ -486,20 +480,6 @@ static unsigned file_read(void *buf, unsigned size) } -/* =========================================================================== - * Initialize the bit string routines. - */ -static void bi_init(int zipfile) -{ - zfile = zipfile; - bi_buf = 0; - bi_valid = 0; -#ifdef DEBUG - bits_sent = 0L; -#endif -} - - /* =========================================================================== * Send a value on a given number of bits. * IN assertion: length <= 16 and value fits in length bits. @@ -646,56 +626,6 @@ static void fill_window(void) } -/* =========================================================================== - * Update a hash value with the given input byte - * IN assertion: all calls to to UPDATE_HASH are made with consecutive - * input characters, so that a running hash key can be computed from the - * previous key instead of complete recalculation each time. - */ -#define UPDATE_HASH(h, c) (h = (((h)< length code (0..28) */ - length = 0; - for (code = 0; code < LENGTH_CODES - 1; code++) { - base_length[code] = length; - for (n = 0; n < (1 << extra_lbits[code]); n++) { - length_code[length++] = code; - } - } - Assert(length == 256, "ct_init: length != 256"); - /* Note that the length 255 (match length 258) can be represented - * in two different ways: code 284 + 5 bits or code 285, so we - * overwrite length_code[255] to use the best encoding: - */ - length_code[length - 1] = code; - - /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ - dist = 0; - for (code = 0; code < 16; code++) { - base_dist[code] = dist; - for (n = 0; n < (1 << extra_dbits[code]); n++) { - dist_code[dist++] = code; - } - } - Assert(dist == 256, "ct_init: dist != 256"); - dist >>= 7; /* from now on, all distances are divided by 128 */ - for (; code < D_CODES; code++) { - base_dist[code] = dist << 7; - for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) { - dist_code[256 + dist++] = code; - } - } - Assert(dist == 256, "ct_init: 256+dist != 512"); - - /* Construct the codes of the static literal tree */ - /* already zeroed - it's in bss - for (bits = 0; bits <= MAX_BITS; bits++) - bl_count[bits] = 0; */ - - n = 0; - while (n <= 143) { - static_ltree[n++].Len = 8; - bl_count[8]++; - } - while (n <= 255) { - static_ltree[n++].Len = 9; - bl_count[9]++; - } - while (n <= 279) { - static_ltree[n++].Len = 7; - bl_count[7]++; - } - while (n <= 287) { - static_ltree[n++].Len = 8; - bl_count[8]++; - } - /* Codes 286 and 287 do not exist, but we must include them in the - * tree construction to get a canonical Huffman tree (longest code - * all ones) - */ - gen_codes((ct_data *) static_ltree, L_CODES + 1); - - /* The static distance tree is trivial: */ - for (n = 0; n < D_CODES; n++) { - static_dtree[n].Len = 5; - static_dtree[n].Code = bi_reverse(n, 5); - } - - /* Initialize the first block of the first file: */ - init_block(); -} - - /* =========================================================================== * Restore the heap property by moving down the tree starting at node k, * exchanging a node with the smallest of its two sons if necessary, stopping @@ -1364,8 +1198,8 @@ static void gen_bitlen(tree_desc * desc) continue; if (tree[m].Len != (unsigned) bits) { Trace((stderr, "code %d bits %d->%d\n", m, tree[m].Len, bits)); - opt_len += ((long) bits - (long) tree[m].Len) * (long) tree[m].Freq; - tree[m].Len = (ush) bits; + opt_len += ((int32_t) bits - tree[m].Len) * tree[m].Freq; + tree[m].Len = bits; } n--; } @@ -1840,10 +1674,10 @@ static void compress_block(ct_data * ltree, ct_data * dtree) */ static ulg flush_block(char *buf, ulg stored_len, int eof) { - ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ - int max_blindex; /* index of last bit length code of non zero freq */ + ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */ + int max_blindex; /* index of last bit length code of non zero freq */ - flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */ + flag_buf[last_flags] = flags; /* Save the flags for the last 8 items */ /* Check if the file is ascii or binary */ if (*file_type == (ush) UNKNOWN) @@ -1889,7 +1723,7 @@ static ulg flush_block(char *buf, ulg stored_len, int eof) compressed_len = stored_len << 3; *file_method = STORED; - } else if (stored_len + 4 <= opt_lenb && buf != (char *) 0) { + } else if (stored_len + 4 <= opt_lenb && buf != NULL) { /* 4: two words for the lengths */ /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE. * Otherwise we can't have processed more than WSIZE input bytes since @@ -1928,6 +1762,15 @@ static ulg flush_block(char *buf, ulg stored_len, int eof) } +/* =========================================================================== + * Update a hash value with the given input byte + * IN assertion: all calls to to UPDATE_HASH are made with consecutive + * input characters, so that a running hash key can be computed from the + * previous key instead of complete recalculation each time. + */ +#define UPDATE_HASH(h, c) (h = (((h)<= 0L \ ? (char*)&window[(unsigned)block_start] \ : (char*)NULL, \ - (long)strstart - block_start, \ + (ulg)strstart - block_start, \ (eof) \ ) @@ -2067,11 +1910,167 @@ static ulg deflate(void) } +/* =========================================================================== + * Initialize the bit string routines. + */ +static void bi_init(int zipfile) +{ + zfile = zipfile; + bi_buf = 0; + bi_valid = 0; +#ifdef DEBUG + bits_sent = 0L; +#endif +} + + +/* =========================================================================== + * Initialize the "longest match" routines for a new file + */ +static void lm_init(ush * flags) +{ + unsigned j; + + /* Initialize the hash table. */ + memset(head, 0, HASH_SIZE * sizeof(*head)); + /* prev will be initialized on the fly */ + + /*speed options for the general purpose bit flag */ + *flags |= 2; /* FAST 4, SLOW 2 */ + /* ??? reduce max_chain_length for binary files */ + + strstart = 0; + block_start = 0L; + + lookahead = file_read(window, + sizeof(int) <= 2 ? (unsigned) WSIZE : 2 * WSIZE); + + if (lookahead == 0 || lookahead == (unsigned) -1) { + eofile = 1; + lookahead = 0; + return; + } + eofile = 0; + /* Make sure that we always have enough lookahead. This is important + * if input comes from a device such as a tty. + */ + while (lookahead < MIN_LOOKAHEAD && !eofile) + fill_window(); + + ins_h = 0; + for (j = 0; j < MIN_MATCH - 1; j++) + UPDATE_HASH(ins_h, window[j]); + /* If lookahead < MIN_MATCH, ins_h is garbage, but this is + * not important since only literal bytes will be emitted. + */ +} + + +/* =========================================================================== + * Allocate the match buffer, initialize the various tables and save the + * location of the internal file attribute (ascii/binary) and method + * (DEFLATE/STORE). + * One callsite in zip() + */ +static void ct_init(ush * attr, int *methodp) +{ + int n; /* iterates over tree elements */ + int bits; /* bit counter */ + int length; /* length value */ + int code; /* code value */ + int dist; /* distance index */ + + file_type = attr; + file_method = methodp; + compressed_len = 0L; + +#ifdef NOT_NEEDED + if (static_dtree[0].Len != 0) + return; /* ct_init already called */ +#endif + + /* Initialize the mapping length (0..255) -> length code (0..28) */ + length = 0; + for (code = 0; code < LENGTH_CODES - 1; code++) { + base_length[code] = length; + for (n = 0; n < (1 << extra_lbits[code]); n++) { + length_code[length++] = code; + } + } + Assert(length == 256, "ct_init: length != 256"); + /* Note that the length 255 (match length 258) can be represented + * in two different ways: code 284 + 5 bits or code 285, so we + * overwrite length_code[255] to use the best encoding: + */ + length_code[length - 1] = code; + + /* Initialize the mapping dist (0..32K) -> dist code (0..29) */ + dist = 0; + for (code = 0; code < 16; code++) { + base_dist[code] = dist; + for (n = 0; n < (1 << extra_dbits[code]); n++) { + dist_code[dist++] = code; + } + } + Assert(dist == 256, "ct_init: dist != 256"); + dist >>= 7; /* from now on, all distances are divided by 128 */ + for (; code < D_CODES; code++) { + base_dist[code] = dist << 7; + for (n = 0; n < (1 << (extra_dbits[code] - 7)); n++) { + dist_code[256 + dist++] = code; + } + } + Assert(dist == 256, "ct_init: 256+dist != 512"); + + /* Construct the codes of the static literal tree */ + /* already zeroed - it's in bss + for (bits = 0; bits <= MAX_BITS; bits++) + bl_count[bits] = 0; */ + + n = 0; + while (n <= 143) { + static_ltree[n++].Len = 8; + bl_count[8]++; + } + while (n <= 255) { + static_ltree[n++].Len = 9; + bl_count[9]++; + } + while (n <= 279) { + static_ltree[n++].Len = 7; + bl_count[7]++; + } + while (n <= 287) { + static_ltree[n++].Len = 8; + bl_count[8]++; + } + /* Codes 286 and 287 do not exist, but we must include them in the + * tree construction to get a canonical Huffman tree (longest code + * all ones) + */ + gen_codes((ct_data *) static_ltree, L_CODES + 1); + + /* The static distance tree is trivial: */ + for (n = 0; n < D_CODES; n++) { + static_dtree[n].Len = 5; + static_dtree[n].Code = bi_reverse(n, 5); + } + + /* Initialize the first block of the first file: */ + init_block(); +} + + /* =========================================================================== * Deflate in to out. * IN assertions: the input and output buffers are cleared. * The variables time_stamp and save_orig_name are initialized. */ + +/* put_header_byte is used for the compressed output + * - for the initial 4 bytes that can't overflow the buffer. */ +#define put_header_byte(c) outbuf[outcnt++] = (c) + static int zip(int in, int out) { uch my_flags = 0; /* general purpose bit flags */ @@ -2085,12 +2084,10 @@ static int zip(int in, int out) /* Write the header to the gzip file. See algorithm.doc for the format */ method = DEFLATED; - put_header_byte(0x1f); /* magic header for gzip files, 1F 8B */ + put_header_byte(0x1f); /* magic header for gzip files, 1F 8B */ put_header_byte(0x8b); - - put_header_byte(DEFLATED); /* compression method */ - - put_header_byte(my_flags); /* general flags */ + put_header_byte(DEFLATED); /* compression method */ + put_header_byte(my_flags); /* general flags */ put_32bit(time_stamp); /* Write deflated file to zip file */ -- cgit v1.2.3