From 917892ea7d676c0937f1921bb126b200b2dea329 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Thu, 2 Aug 2018 15:47:49 -0500 Subject: Move pending/compress.c to lib/deflate.c, first pass at genericizing it. --- lib/deflate.c | 485 +++++++++++++++++++++++++++++++++++++++++++++ toys/pending/compress.c | 513 ------------------------------------------------ 2 files changed, 485 insertions(+), 513 deletions(-) create mode 100644 lib/deflate.c delete mode 100644 toys/pending/compress.c diff --git a/lib/deflate.c b/lib/deflate.c new file mode 100644 index 00000000..2a6274e7 --- /dev/null +++ b/lib/deflate.c @@ -0,0 +1,485 @@ +/* deflate.c - deflate/inflate code for gzip and friends + * + * Copyright 2014 Rob Landley + * + * See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip) + * LSB 4.1 has gzip, gunzip, and zcat + * + * TODO: zip -d DIR -x LIST -list -quiet -no overwrite -overwrite -p to stdout + */ + +#include "toys.h" + +struct deflate { + // Huffman codes: base offset and extra bits tables (length and distance) + char lenbits[29], distbits[30]; + unsigned short lenbase[29], distbase[30]; + void *fixdisthuff, *fixlithuff; + + // CRC + void (*crcfunc)(struct deflate *dd, char *data, int len); + unsigned crctable[256], crc; + + + // Tables only used for deflation + unsigned short *hashhead, *hashchain; + + // Compressed data buffer (extra space malloced at end) + unsigned pos, len; + int infd, outfd; + char data[]; +}; + +// little endian bit buffer +struct bitbuf { + int fd, bitpos, len, max; + char buf[]; +}; + +// malloc a struct bitbuf +struct bitbuf *bitbuf_init(int fd, int size) +{ + struct bitbuf *bb = xzalloc(sizeof(struct bitbuf)+size); + + bb->max = size; + bb->fd = fd; + + return bb; +} + +// Advance bitpos without the overhead of recording bits +// Loads more data when input buffer empty +void bitbuf_skip(struct bitbuf *bb, int bits) +{ + int pos = bb->bitpos + bits, len = bb->len << 3; + + while (pos >= len) { + pos -= len; + len = (bb->len = read(bb->fd, bb->buf, bb->max)) << 3; + if (bb->len < 1) perror_exit("inflate EOF"); + } + bb->bitpos = pos; +} + +// Optimized single bit inlined version +static inline int bitbuf_bit(struct bitbuf *bb) +{ + int bufpos = bb->bitpos>>3; + + if (bufpos == bb->len) { + bitbuf_skip(bb, 0); + bufpos = 0; + } + + return (bb->buf[bufpos]>>(bb->bitpos++&7))&1; +} + +// Fetch the next X bits from the bitbuf, little endian +unsigned bitbuf_get(struct bitbuf *bb, int bits) +{ + int result = 0, offset = 0; + + while (bits) { + int click = bb->bitpos >> 3, blow, blen; + + // Load more data if buffer empty + if (click == bb->len) bitbuf_skip(bb, click = 0); + + // grab bits from next byte + blow = bb->bitpos & 7; + blen = 8-blow; + if (blen > bits) blen = bits; + result |= ((bb->buf[click] >> blow) & ((1<bitpos += blen; + } + + return result; +} + +void bitbuf_flush(struct bitbuf *bb) +{ + if (!bb->bitpos) return; + + xwrite(bb->fd, bb->buf, (bb->bitpos+7)>>3); + memset(bb->buf, 0, bb->max); + bb->bitpos = 0; +} + +void bitbuf_put(struct bitbuf *bb, int data, int len) +{ + while (len) { + int click = bb->bitpos >> 3, blow, blen; + + // Flush buffer if necessary + if (click == bb->max) { + bitbuf_flush(bb); + click = 0; + } + blow = bb->bitpos & 7; + blen = 8-blow; + if (blen > len) blen = len; + bb->buf[click] |= data << blow; + bb->bitpos += blen; + data >>= blen; + len -= blen; + } +} + +static void output_byte(struct deflate *dd, char sym) +{ + int pos = dd->pos++ & 32767; + + dd->data[pos] = sym; + + if (pos == 32767) { + xwrite(dd->outfd, dd->data, 32768); + if (dd->crcfunc) dd->crcfunc(dd, dd->data, 32768); + } +} + +// Huffman coding uses bits to traverse a binary tree to a leaf node, +// By placing frequently occurring symbols at shorter paths, frequently +// used symbols may be represented in fewer bits than uncommon symbols. +// (length[0] isn't used but code's clearer if it's there.) + +struct huff { + unsigned short length[16]; // How many symbols have this bit length? + unsigned short symbol[288]; // sorted by bit length, then ascending order +}; + +// Create simple huffman tree from array of bit lengths. + +// The symbols in the huffman trees are sorted (first by bit length +// of the code to reach them, then by symbol number). This means that given +// the bit length of each symbol, we can construct a unique tree. +static void len2huff(struct huff *huff, char bitlen[], int len) +{ + int offset[16]; + int i; + + // Count number of codes at each bit length + memset(huff, 0, sizeof(struct huff)); + for (i = 0; ilength[bitlen[i]]++; + + // Sort symbols by bit length, then symbol. Get list of starting positions + // for each group, then write each symbol to next position within its group. + *huff->length = *offset = 0; + for (i = 1; i<16; i++) offset[i] = offset[i-1] + huff->length[i-1]; + for (i = 0; isymbol[offset[bitlen[i]]++] = i; +} + +// Fetch and decode next huffman coded symbol from bitbuf. +// This takes advantage of the sorting to navigate the tree as an array: +// each time we fetch a bit we have all the codes at that bit level in +// order with no gaps. +static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff) +{ + unsigned short *length = huff->length; + int start = 0, offset = 0; + + // Traverse through the bit lengths until our code is in this range + for (;;) { + offset = (offset << 1) | bitbuf_bit(bb); + start += *++length; + if ((offset -= *length) < 0) break; + if ((length - huff->length) & 16) error_exit("bad symbol"); + } + + return huff->symbol[start + offset]; +} + +// Decompress deflated data from bitbuf to dd->outfd. +static void inflate(struct deflate *dd, struct bitbuf *bb) +{ + dd->crc = ~0; + // repeat until spanked + for (;;) { + int final, type; + + final = bitbuf_get(bb, 1); + type = bitbuf_get(bb, 2); + + if (type == 3) error_exit("bad type"); + + // Uncompressed block? + if (!type) { + int len, nlen; + + // Align to byte, read length + bitbuf_skip(bb, (8-bb->bitpos)&7); + len = bitbuf_get(bb, 16); + nlen = bitbuf_get(bb, 16); + if (len != (0xffff & ~nlen)) error_exit("bad len"); + + // Dump literal output data + while (len) { + int pos = bb->bitpos >> 3, bblen = bb->len - pos; + char *p = bb->buf+pos; + + // dump bytes until done or end of current bitbuf contents + if (bblen > len) bblen = len; + pos = bblen; + while (pos--) output_byte(dd, *(p++)); + bitbuf_skip(bb, bblen << 3); + len -= bblen; + } + + // Compressed block + } else { + struct huff *disthuff, *lithuff; + + // Dynamic huffman codes? + if (type == 2) { + struct huff *h2 = ((struct huff *)toybuf)+1; + int i, litlen, distlen, hufflen; + char *hufflen_order = "\x10\x11\x12\0\x08\x07\x09\x06\x0a\x05\x0b" + "\x04\x0c\x03\x0d\x02\x0e\x01\x0f", *bits; + + // The huffman trees are stored as a series of bit lengths + litlen = bitbuf_get(bb, 5)+257; // max 288 + distlen = bitbuf_get(bb, 5)+1; // max 32 + hufflen = bitbuf_get(bb, 4)+4; // max 19 + + // The literal and distance codes are themselves compressed, in + // a complicated way: an array of bit lengths (hufflen many + // entries, each 3 bits) is used to fill out an array of 19 entries + // in a magic order, leaving the rest 0. Then make a tree out of it: + memset(bits = toybuf+1, 0, 19); + for (i=0; i>1)) + 3 + (len<<2); + memset(bits+i, bits[i-1] * !(sym&3), len); + i += len; + } + } + if (i > litlen+distlen) error_exit("bad tree"); + + len2huff(lithuff = h2, bits, litlen); + len2huff(disthuff = ((struct huff *)toybuf)+2, bits+litlen, distlen); + + // Static huffman codes + } else { + lithuff = dd->fixlithuff; + disthuff = dd->fixdisthuff; + } + + // Use huffman tables to decode block of compressed symbols + for (;;) { + int sym = huff_and_puff(bb, lithuff); + + // Literal? + if (sym < 256) output_byte(dd, sym); + + // Copy range? + else if (sym > 256) { + int len, dist; + + sym -= 257; + len = dd->lenbase[sym] + bitbuf_get(bb, dd->lenbits[sym]); + sym = huff_and_puff(bb, disthuff); + dist = dd->distbase[sym] + bitbuf_get(bb, dd->distbits[sym]); + sym = dd->pos & 32767; + + while (len--) output_byte(dd, dd->data[(dd->pos-dist) & 32767]); + + // End of block + } else break; + } + } + + // Was that the last block? + if (final) break; + } + + if (dd->pos & 32767) { + xwrite(dd->outfd, dd->data, dd->pos&32767); + if (dd->crcfunc) dd->crcfunc(dd, dd->data, dd->pos&32767); + } +} + +// Deflate from dd->infd to bitbuf +// For deflate, dd->len = input read, dd->pos = input consumed +static void deflate(struct deflate *dd, struct bitbuf *bb) +{ + char *data = dd->data; + int len, final = 0; + + dd->crc = ~0; + + while (!final) { + // Read next half-window of data if we haven't hit EOF yet. + len = readall(dd->infd, data+(dd->len&32768), 32768); + if (len < 0) perror_exit("read"); // todo: add filename + if (len != 32768) final++; + if (dd->crcfunc) dd->crcfunc(dd, data+(dd->len&32768), len); + // dd->len += len; crcfunc advances len TODO + + // store block as literal + bitbuf_put(bb, final, 1); + bitbuf_put(bb, 0, 1); + + bitbuf_put(bb, 0, (8-bb->bitpos)&7); + bitbuf_put(bb, len, 16); + bitbuf_put(bb, 0xffff & ~len, 16); + + // repeat until spanked + while (dd->pos != dd->len) { + unsigned pos = dd->pos&65535; + + bitbuf_put(bb, data[pos], 8); + + // need to refill buffer? + if (!(32767 & ++dd->pos) && !final) break; + } + } + bitbuf_flush(bb); +} + +// Allocate memory for deflate/inflate. +static struct deflate *init_deflate(int compress) +{ + int i, n = 1; + struct deflate *dd = xmalloc(sizeof(struct deflate)+32768*(compress ? 4 : 1)); + +// TODO sizeof and order of these? + // decompress needs 32k history, compress adds 64k hashhead and 32k hashchain + if (compress) { + dd->hashhead = (unsigned short *)(dd->data+65536); + dd->hashchain = (unsigned short *)(dd->data+65536+32768); + } + + // Calculate lenbits, lenbase, distbits, distbase + *dd->lenbase = 3; + for (i = 0; ilenbits)-1; i++) { + if (i>4) { + if (!(i&3)) { + dd->lenbits[i]++; + n <<= 1; + } + if (i == 27) n--; + else dd->lenbits[i+1] = dd->lenbits[i]; + } + dd->lenbase[i+1] = n + dd->lenbase[i]; + } + n = 0; + for (i = 0; idistbits); i++) { + dd->distbase[i] = 1<distbase[i] += dd->distbase[i-1]; + if (i>3 && !(i&1)) n++; + dd->distbits[i] = n; + } + +// TODO layout and lifetime of this? + // Init fixed huffman tables + for (i=0; i<288; i++) toybuf[i] = 8 + (i>143) - ((i>255)<<1) + (i>279); + len2huff(dd->fixlithuff = ((struct huff *)toybuf)+3, toybuf, 288); + memset(toybuf, 5, 30); + len2huff(dd->fixdisthuff = ((struct huff *)toybuf)+4, toybuf, 30); + + return dd; +} + +// Return true/false whether we consumed a gzip header. +static int is_gzip(struct bitbuf *bb) +{ + int flags; + + // Confirm signature + if (bitbuf_get(bb, 24) != 0x088b1f || (flags = bitbuf_get(bb, 8)) > 31) + return 0; + bitbuf_skip(bb, 6*8); + + // Skip extra, name, comment, header CRC fields + if (flags & 4) bitbuf_skip(bb, 16); + if (flags & 8) while (bitbuf_get(bb, 8)); + if (flags & 16) while (bitbuf_get(bb, 8)); + if (flags & 2) bitbuf_skip(bb, 16); + + return 1; +} + +void gzip_crc(struct deflate *dd, char *data, int len) +{ + int i; + unsigned crc, *crc_table = dd->crctable; + + crc = dd->crc; + for (i=0; i>8); + dd->crc = crc; + dd->len += len; +} + +long long gzip_fd(int infd, int outfd) +{ + struct bitbuf *bb = bitbuf_init(outfd, 4096); + struct deflate *dd = init_deflate(1); + long long rc; + + // Header from RFC 1952 section 2.2: + // 2 ID bytes (1F, 8b), gzip method byte (8=deflate), FLAG byte (none), + // 4 byte MTIME (zeroed), Extra Flags (2=maximum compression), + // Operating System (FF=unknown) + + dd->infd = infd; + xwrite(bb->fd, "\x1f\x8b\x08\0\0\0\0\0\x02\xff", 10); + + // Little endian crc table + crc_init(dd->crctable, 1); + dd->crcfunc = gzip_crc; + + deflate(dd, bb); + + // tail: crc32, len32 + + bitbuf_put(bb, 0, (8-bb->bitpos)&7); + bitbuf_put(bb, ~dd->crc, 32); + bitbuf_put(bb, dd->len, 32); + rc = dd->len; + + bitbuf_flush(bb); + free(bb); + free(dd); + + return rc; +} + +long long gunzip_fd(int infd, int outfd) +{ + struct bitbuf *bb = bitbuf_init(infd, 4096); + struct deflate *dd = init_deflate(0); + long long rc; + + if (!is_gzip(bb)) error_exit("not gzip"); + dd->outfd = outfd; + + // Little endian crc table + crc_init(dd->crctable, 1); + dd->crcfunc = gzip_crc; + + inflate(dd, bb); + + // tail: crc32, len32 + + bitbuf_skip(bb, (8-bb->bitpos)&7); + if (~dd->crc != bitbuf_get(bb, 32) || dd->len != bitbuf_get(bb, 32)) + error_exit("bad crc"); + + rc = dd->len; + free(bb); + free(dd); + + return rc; +} diff --git a/toys/pending/compress.c b/toys/pending/compress.c deleted file mode 100644 index bc69a035..00000000 --- a/toys/pending/compress.c +++ /dev/null @@ -1,513 +0,0 @@ -/* compress.c - deflate/inflate code for zip, gzip, zlib, and raw - * - * Copyright 2014 Rob Landley - * - * The inflate/deflate code lives here, so the various things that use it - * either live here or call these commands to pipe data through them. - * - * Divergence from posix: replace obsolete/patented "compress" with mutiplexer. - * (gzip already replaces "uncompress".) - * - * See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip) - * LSB 4.1 has gzip, gunzip, and zcat - * TODO: zip -d DIR -x LIST -list -quiet -no overwrite -overwrite -p to stdout - -// Accept many different kinds of command line argument. -// Leave Lrg at end so flag values line up. - -USE_COMPRESS(NEWTOY(compress, "zcd9lrg[-cd][!zgLr]", TOYFLAG_USR|TOYFLAG_BIN)) - -//zip unzip gzip gunzip zcat - -config COMPRESS - bool "compress" - default n - help - usage: compress [-zgLR19] [FILE] - - Compress or decompress file (or stdin) using "deflate" algorithm. - - -1 min compression - -9 max compression (default) - -g gzip (default) - -L zlib - -R raw - -z zip - -config DECOMPRESS - bool "decompress" - default n - help - usage: compress [-zglrcd9] [FILE] - - Compress or decompress file (or stdin) using "deflate" algorithm. - - -c compress with -g gzip (default) -l zlib -r raw -z zip - -d decompress (autodetects type) -*/ - -#define FOR_compress -#include "toys.h" - -GLOBALS( - // Huffman codes: base offset and extra bits tables (length and distance) - char lenbits[29], distbits[30]; - unsigned short lenbase[29], distbase[30]; - void *fixdisthuff, *fixlithuff; - - // CRC - void (*crcfunc)(char *data, int len); - unsigned crc; - - // Compressed data buffer - char *data; - unsigned pos, len; - int infd, outfd; - - // Tables only used for deflation - unsigned short *hashhead, *hashchain; -) - -// little endian bit buffer -struct bitbuf { - int fd, bitpos, len, max; - char buf[]; -}; - -// malloc a struct bitbuf -struct bitbuf *bitbuf_init(int fd, int size) -{ - struct bitbuf *bb = xzalloc(sizeof(struct bitbuf)+size); - - bb->max = size; - bb->fd = fd; - - return bb; -} - -// Advance bitpos without the overhead of recording bits -void bitbuf_skip(struct bitbuf *bb, int bits) -{ - int pos = bb->bitpos + bits, len = bb->len << 3; - - while (pos >= len) { - pos -= len; - len = (bb->len = read(bb->fd, bb->buf, bb->max)) << 3; - if (bb->len < 1) perror_exit("inflate EOF"); - } - bb->bitpos = pos; -} - -// Optimized single bit inlined version -static inline int bitbuf_bit(struct bitbuf *bb) -{ - int bufpos = bb->bitpos>>3; - - if (bufpos == bb->len) { - bitbuf_skip(bb, 0); - bufpos = 0; - } - - return (bb->buf[bufpos]>>(bb->bitpos++&7))&1; -} - -// Fetch the next X bits from the bitbuf, little endian -unsigned bitbuf_get(struct bitbuf *bb, int bits) -{ - int result = 0, offset = 0; - - while (bits) { - int click = bb->bitpos >> 3, blow, blen; - - // Load more data if buffer empty - if (click == bb->len) bitbuf_skip(bb, click = 0); - - // grab bits from next byte - blow = bb->bitpos & 7; - blen = 8-blow; - if (blen > bits) blen = bits; - result |= ((bb->buf[click] >> blow) & ((1<bitpos += blen; - } - - return result; -} - -void bitbuf_flush(struct bitbuf *bb) -{ - if (!bb->bitpos) return; - - xwrite(bb->fd, bb->buf, (bb->bitpos+7)/8); - memset(bb->buf, 0, bb->max); - bb->bitpos = 0; -} - -void bitbuf_put(struct bitbuf *bb, int data, int len) -{ - while (len) { - int click = bb->bitpos >> 3, blow, blen; - - // Flush buffer if necessary - if (click == bb->max) { - bitbuf_flush(bb); - click = 0; - } - blow = bb->bitpos & 7; - blen = 8-blow; - if (blen > len) blen = len; - bb->buf[click] |= data << blow; - bb->bitpos += blen; - data >>= blen; - len -= blen; - } -} - -static void output_byte(char sym) -{ - int pos = TT.pos++ & 32767; - - TT.data[pos] = sym; - - if (pos == 32767) { - xwrite(TT.outfd, TT.data, 32768); - if (TT.crcfunc) TT.crcfunc(TT.data, 32768); - } -} - -// Huffman coding uses bits to traverse a binary tree to a leaf node, -// By placing frequently occurring symbols at shorter paths, frequently -// used symbols may be represented in fewer bits than uncommon symbols. - -struct huff { - unsigned short length[16]; - unsigned short symbol[288]; -}; - -// Create simple huffman tree from array of bit lengths. - -// The symbols in the huffman trees are sorted (first by bit length -// of the code to reach them, then by symbol number). This means that given -// the bit length of each symbol, we can construct a unique tree. -static void len2huff(struct huff *huff, char bitlen[], int len) -{ - int offset[16]; - int i; - - // Count number of codes at each bit length - memset(huff, 0, sizeof(struct huff)); - for (i = 0; ilength[bitlen[i]]++; - - // Sort symbols by bit length. (They'll remain sorted by symbol within that.) - *huff->length = *offset = 0; - for (i = 1; i<16; i++) offset[i] = offset[i-1] + huff->length[i-1]; - - for (i = 0; isymbol[offset[bitlen[i]]++] = i; -} - -// Fetch and decode next huffman coded symbol from bitbuf. -// This takes advantage of the sorting to navigate the tree as an array: -// each time we fetch a bit we have all the codes at that bit level in -// order with no gaps. -static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff) -{ - unsigned short *length = huff->length; - int start = 0, offset = 0; - - // Traverse through the bit lengths until our code is in this range - for (;;) { - offset = (offset << 1) | bitbuf_bit(bb); - start += *++length; - if ((offset -= *length) < 0) break; - if ((length - huff->length) & 16) error_exit("bad symbol"); - } - - return huff->symbol[start + offset]; -} - -// Decompress deflated data from bitbuf to TT.outfd. -static void inflate(struct bitbuf *bb) -{ - TT.crc = ~0; - // repeat until spanked - for (;;) { - int final, type; - - final = bitbuf_get(bb, 1); - type = bitbuf_get(bb, 2); - - if (type == 3) error_exit("bad type"); - - // Uncompressed block? - if (!type) { - int len, nlen; - - // Align to byte, read length - bitbuf_skip(bb, (8-bb->bitpos)&7); - len = bitbuf_get(bb, 16); - nlen = bitbuf_get(bb, 16); - if (len != (0xffff & ~nlen)) error_exit("bad len"); - - // Dump literal output data - while (len) { - int pos = bb->bitpos >> 3, bblen = bb->len - pos; - char *p = bb->buf+pos; - - // dump bytes until done or end of current bitbuf contents - if (bblen > len) bblen = len; - pos = bblen; - while (pos--) output_byte(*(p++)); - bitbuf_skip(bb, bblen << 3); - len -= bblen; - } - - // Compressed block - } else { - struct huff *disthuff, *lithuff; - - // Dynamic huffman codes? - if (type == 2) { - struct huff *h2 = ((struct huff *)toybuf)+1; - int i, litlen, distlen, hufflen; - char *hufflen_order = "\x10\x11\x12\0\x08\x07\x09\x06\x0a\x05\x0b" - "\x04\x0c\x03\x0d\x02\x0e\x01\x0f", *bits; - - // The huffman trees are stored as a series of bit lengths - litlen = bitbuf_get(bb, 5)+257; // max 288 - distlen = bitbuf_get(bb, 5)+1; // max 32 - hufflen = bitbuf_get(bb, 4)+4; // max 19 - - // The literal and distance codes are themselves compressed, in - // a complicated way: an array of bit lengths (hufflen many - // entries, each 3 bits) is used to fill out an array of 19 entries - // in a magic order, leaving the rest 0. Then make a tree out of it: - memset(bits = toybuf+1, 0, 19); - for (i=0; i>1)) + 3 + (len<<2); - memset(bits+i, bits[i-1] * !(sym&3), len); - i += len; - } - } - if (i > litlen+distlen) error_exit("bad tree"); - - len2huff(lithuff = h2, bits, litlen); - len2huff(disthuff = ((struct huff *)toybuf)+2, bits+litlen, distlen); - - // Static huffman codes - } else { - lithuff = TT.fixlithuff; - disthuff = TT.fixdisthuff; - } - - // Use huffman tables to decode block of compressed symbols - for (;;) { - int sym = huff_and_puff(bb, lithuff); - - // Literal? - if (sym < 256) output_byte(sym); - - // Copy range? - else if (sym > 256) { - int len, dist; - - sym -= 257; - len = TT.lenbase[sym] + bitbuf_get(bb, TT.lenbits[sym]); - sym = huff_and_puff(bb, disthuff); - dist = TT.distbase[sym] + bitbuf_get(bb, TT.distbits[sym]); - sym = TT.pos & 32767; - - while (len--) output_byte(TT.data[(TT.pos-dist) & 32767]); - - // End of block - } else break; - } - } - - // Was that the last block? - if (final) break; - } - - if (TT.pos & 32767) { - xwrite(TT.outfd, TT.data, TT.pos & 32767); - if (TT.crcfunc) TT.crcfunc(TT.data, TT.pos & 32767); - } -} - -// Deflate from TT.infd to bitbuf -// For deflate, TT.len = input read, TT.pos = input consumed -static void deflate(struct bitbuf *bb) -{ - char *data = TT.data; - int len, final = 0; - - TT.crc = ~0; - - while (!final) { - // Read next half-window of data if we haven't hit EOF yet. - len = readall(TT.infd, data+(TT.len&32768), 32768); - if (len < 0) perror_exit("read"); // todo: add filename - if (len != 32768) final++; - if (TT.crcfunc) TT.crcfunc(data+(TT.len&32768), len); - // TT.len += len; crcfunc advances len - - // store block as literal - bitbuf_put(bb, final, 1); - bitbuf_put(bb, 0, 1); - - bitbuf_put(bb, 0, (8-bb->bitpos)&7); - bitbuf_put(bb, len, 16); - bitbuf_put(bb, 0xffff & ~len, 16); - - // repeat until spanked - while (TT.pos != TT.len) { - unsigned pos = TT.pos & 65535; - - bitbuf_put(bb, data[pos], 8); - - // need to refill buffer? - if (!(32767 & ++TT.pos) && !final) break; - } - } - bitbuf_flush(bb); -} - -// Allocate memory for deflate/inflate. -static void init_deflate(int compress) -{ - int i, n = 1; - - // compress needs 64k data and 32k each for hashhead and hashchain. - // decompress just needs 32k data. - TT.data = xmalloc(32768*(compress ? 4 : 1)); - if (compress) { - TT.hashhead = (unsigned short *)(TT.data + 65536); - TT.hashchain = (unsigned short *)(TT.data + 65536 + 32768); - } - - // Calculate lenbits, lenbase, distbits, distbase - *TT.lenbase = 3; - for (i = 0; i4) { - if (!(i&3)) { - TT.lenbits[i]++; - n <<= 1; - } - if (i == 27) n--; - else TT.lenbits[i+1] = TT.lenbits[i]; - } - TT.lenbase[i+1] = n + TT.lenbase[i]; - } - n = 0; - for (i = 0; i3 && !(i&1)) n++; - TT.distbits[i] = n; - } - - // Init fixed huffman tables - for (i=0; i<288; i++) toybuf[i] = 8 + (i>143) - ((i>255)<<1) + (i>279); - len2huff(TT.fixlithuff = ((struct huff *)toybuf)+3, toybuf, 288); - memset(toybuf, 5, 30); - len2huff(TT.fixdisthuff = ((struct huff *)toybuf)+4, toybuf, 30); -} - -// Return true/false whether we consumed a gzip header. -static int is_gzip(struct bitbuf *bb) -{ - int flags; - - // Confirm signature - if (bitbuf_get(bb, 24) != 0x088b1f || (flags = bitbuf_get(bb, 8)) > 31) - return 0; - bitbuf_skip(bb, 6*8); - - // Skip extra, name, comment, header CRC fields - if (flags & 4) bitbuf_skip(bb, 16); - if (flags & 8) while (bitbuf_get(bb, 8)); - if (flags & 16) while (bitbuf_get(bb, 8)); - if (flags & 2) bitbuf_skip(bb, 16); - - return 1; -} - -void gzip_crc(char *data, int len) -{ - int i; - unsigned crc, *crc_table = (unsigned *)(toybuf+sizeof(toybuf)-1024); - - crc = TT.crc; - for (i=0; i>8); - TT.crc = crc; - TT.len += len; -} - -static void do_gzip(int fd, char *name) -{ - struct bitbuf *bb = bitbuf_init(1, sizeof(toybuf)); - - // Header from RFC 1952 section 2.2: - // 2 ID bytes (1F, 8b), gzip method byte (8=deflate), FLAG byte (none), - // 4 byte MTIME (zeroed), Extra Flags (2=maximum compression), - // Operating System (FF=unknown) - - TT.infd = fd; - xwrite(bb->fd, "\x1f\x8b\x08\0\0\0\0\0\x02\xff", 10); - - // Use last 1k of toybuf for little endian crc table - crc_init((unsigned *)(toybuf+sizeof(toybuf)-1024), 1); - TT.crcfunc = gzip_crc; - - deflate(bb); - - // tail: crc32, len32 - - bitbuf_put(bb, 0, (8-bb->bitpos)&7); - bitbuf_put(bb, ~TT.crc, 32); - bitbuf_put(bb, TT.len, 32); - - bitbuf_flush(bb); - free(bb); -} - -static void do_zcat(int fd, char *name) -{ - struct bitbuf *bb = bitbuf_init(fd, sizeof(toybuf)); - - if (!is_gzip(bb)) error_exit("not gzip"); - TT.outfd = 1; - - // Use last 1k of toybuf for little endian crc table - crc_init((unsigned *)(toybuf+sizeof(toybuf)-1024), 1); - TT.crcfunc = gzip_crc; - - inflate(bb); - - // tail: crc32, len32 - - bitbuf_skip(bb, (8-bb->bitpos)&7); - if (~TT.crc != bitbuf_get(bb, 32) || TT.len != bitbuf_get(bb, 32)) - error_exit("bad crc"); - free(bb); -} - -// Parse many different kinds of command line argument: - -void compress_main(void) -{ - // todo: this - printf("hello world"); -} -- cgit v1.2.3