aboutsummaryrefslogtreecommitdiff
path: root/toys/pending/compress.c
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2018-08-02 15:47:49 -0500
committerRob Landley <rob@landley.net>2018-08-02 15:47:49 -0500
commit917892ea7d676c0937f1921bb126b200b2dea329 (patch)
tree1f665f8b36d146d61f218770e229351cc70ab098 /toys/pending/compress.c
parent72af8466ac0e86dc4a4999f289dd082369bcaedc (diff)
downloadtoybox-917892ea7d676c0937f1921bb126b200b2dea329.tar.gz
Move pending/compress.c to lib/deflate.c, first pass at genericizing it.
Diffstat (limited to 'toys/pending/compress.c')
-rw-r--r--toys/pending/compress.c513
1 files changed, 0 insertions, 513 deletions
diff --git a/toys/pending/compress.c b/toys/pending/compress.c
deleted file mode 100644
index bc69a035..00000000
--- a/toys/pending/compress.c
+++ /dev/null
@@ -1,513 +0,0 @@
-/* compress.c - deflate/inflate code for zip, gzip, zlib, and raw
- *
- * Copyright 2014 Rob Landley <rob@landley.net>
- *
- * The inflate/deflate code lives here, so the various things that use it
- * either live here or call these commands to pipe data through them.
- *
- * Divergence from posix: replace obsolete/patented "compress" with mutiplexer.
- * (gzip already replaces "uncompress".)
- *
- * See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip)
- * LSB 4.1 has gzip, gunzip, and zcat
- * TODO: zip -d DIR -x LIST -list -quiet -no overwrite -overwrite -p to stdout
-
-// Accept many different kinds of command line argument.
-// Leave Lrg at end so flag values line up.
-
-USE_COMPRESS(NEWTOY(compress, "zcd9lrg[-cd][!zgLr]", TOYFLAG_USR|TOYFLAG_BIN))
-
-//zip unzip gzip gunzip zcat
-
-config COMPRESS
- bool "compress"
- default n
- help
- usage: compress [-zgLR19] [FILE]
-
- Compress or decompress file (or stdin) using "deflate" algorithm.
-
- -1 min compression
- -9 max compression (default)
- -g gzip (default)
- -L zlib
- -R raw
- -z zip
-
-config DECOMPRESS
- bool "decompress"
- default n
- help
- usage: compress [-zglrcd9] [FILE]
-
- Compress or decompress file (or stdin) using "deflate" algorithm.
-
- -c compress with -g gzip (default) -l zlib -r raw -z zip
- -d decompress (autodetects type)
-*/
-
-#define FOR_compress
-#include "toys.h"
-
-GLOBALS(
- // Huffman codes: base offset and extra bits tables (length and distance)
- char lenbits[29], distbits[30];
- unsigned short lenbase[29], distbase[30];
- void *fixdisthuff, *fixlithuff;
-
- // CRC
- void (*crcfunc)(char *data, int len);
- unsigned crc;
-
- // Compressed data buffer
- char *data;
- unsigned pos, len;
- int infd, outfd;
-
- // Tables only used for deflation
- unsigned short *hashhead, *hashchain;
-)
-
-// little endian bit buffer
-struct bitbuf {
- int fd, bitpos, len, max;
- char buf[];
-};
-
-// malloc a struct bitbuf
-struct bitbuf *bitbuf_init(int fd, int size)
-{
- struct bitbuf *bb = xzalloc(sizeof(struct bitbuf)+size);
-
- bb->max = size;
- bb->fd = fd;
-
- return bb;
-}
-
-// Advance bitpos without the overhead of recording bits
-void bitbuf_skip(struct bitbuf *bb, int bits)
-{
- int pos = bb->bitpos + bits, len = bb->len << 3;
-
- while (pos >= len) {
- pos -= len;
- len = (bb->len = read(bb->fd, bb->buf, bb->max)) << 3;
- if (bb->len < 1) perror_exit("inflate EOF");
- }
- bb->bitpos = pos;
-}
-
-// Optimized single bit inlined version
-static inline int bitbuf_bit(struct bitbuf *bb)
-{
- int bufpos = bb->bitpos>>3;
-
- if (bufpos == bb->len) {
- bitbuf_skip(bb, 0);
- bufpos = 0;
- }
-
- return (bb->buf[bufpos]>>(bb->bitpos++&7))&1;
-}
-
-// Fetch the next X bits from the bitbuf, little endian
-unsigned bitbuf_get(struct bitbuf *bb, int bits)
-{
- int result = 0, offset = 0;
-
- while (bits) {
- int click = bb->bitpos >> 3, blow, blen;
-
- // Load more data if buffer empty
- if (click == bb->len) bitbuf_skip(bb, click = 0);
-
- // grab bits from next byte
- blow = bb->bitpos & 7;
- blen = 8-blow;
- if (blen > bits) blen = bits;
- result |= ((bb->buf[click] >> blow) & ((1<<blen)-1)) << offset;
- offset += blen;
- bits -= blen;
- bb->bitpos += blen;
- }
-
- return result;
-}
-
-void bitbuf_flush(struct bitbuf *bb)
-{
- if (!bb->bitpos) return;
-
- xwrite(bb->fd, bb->buf, (bb->bitpos+7)/8);
- memset(bb->buf, 0, bb->max);
- bb->bitpos = 0;
-}
-
-void bitbuf_put(struct bitbuf *bb, int data, int len)
-{
- while (len) {
- int click = bb->bitpos >> 3, blow, blen;
-
- // Flush buffer if necessary
- if (click == bb->max) {
- bitbuf_flush(bb);
- click = 0;
- }
- blow = bb->bitpos & 7;
- blen = 8-blow;
- if (blen > len) blen = len;
- bb->buf[click] |= data << blow;
- bb->bitpos += blen;
- data >>= blen;
- len -= blen;
- }
-}
-
-static void output_byte(char sym)
-{
- int pos = TT.pos++ & 32767;
-
- TT.data[pos] = sym;
-
- if (pos == 32767) {
- xwrite(TT.outfd, TT.data, 32768);
- if (TT.crcfunc) TT.crcfunc(TT.data, 32768);
- }
-}
-
-// Huffman coding uses bits to traverse a binary tree to a leaf node,
-// By placing frequently occurring symbols at shorter paths, frequently
-// used symbols may be represented in fewer bits than uncommon symbols.
-
-struct huff {
- unsigned short length[16];
- unsigned short symbol[288];
-};
-
-// Create simple huffman tree from array of bit lengths.
-
-// The symbols in the huffman trees are sorted (first by bit length
-// of the code to reach them, then by symbol number). This means that given
-// the bit length of each symbol, we can construct a unique tree.
-static void len2huff(struct huff *huff, char bitlen[], int len)
-{
- int offset[16];
- int i;
-
- // Count number of codes at each bit length
- memset(huff, 0, sizeof(struct huff));
- for (i = 0; i<len; i++) huff->length[bitlen[i]]++;
-
- // Sort symbols by bit length. (They'll remain sorted by symbol within that.)
- *huff->length = *offset = 0;
- for (i = 1; i<16; i++) offset[i] = offset[i-1] + huff->length[i-1];
-
- for (i = 0; i<len; i++) if (bitlen[i]) huff->symbol[offset[bitlen[i]]++] = i;
-}
-
-// Fetch and decode next huffman coded symbol from bitbuf.
-// This takes advantage of the sorting to navigate the tree as an array:
-// each time we fetch a bit we have all the codes at that bit level in
-// order with no gaps.
-static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff)
-{
- unsigned short *length = huff->length;
- int start = 0, offset = 0;
-
- // Traverse through the bit lengths until our code is in this range
- for (;;) {
- offset = (offset << 1) | bitbuf_bit(bb);
- start += *++length;
- if ((offset -= *length) < 0) break;
- if ((length - huff->length) & 16) error_exit("bad symbol");
- }
-
- return huff->symbol[start + offset];
-}
-
-// Decompress deflated data from bitbuf to TT.outfd.
-static void inflate(struct bitbuf *bb)
-{
- TT.crc = ~0;
- // repeat until spanked
- for (;;) {
- int final, type;
-
- final = bitbuf_get(bb, 1);
- type = bitbuf_get(bb, 2);
-
- if (type == 3) error_exit("bad type");
-
- // Uncompressed block?
- if (!type) {
- int len, nlen;
-
- // Align to byte, read length
- bitbuf_skip(bb, (8-bb->bitpos)&7);
- len = bitbuf_get(bb, 16);
- nlen = bitbuf_get(bb, 16);
- if (len != (0xffff & ~nlen)) error_exit("bad len");
-
- // Dump literal output data
- while (len) {
- int pos = bb->bitpos >> 3, bblen = bb->len - pos;
- char *p = bb->buf+pos;
-
- // dump bytes until done or end of current bitbuf contents
- if (bblen > len) bblen = len;
- pos = bblen;
- while (pos--) output_byte(*(p++));
- bitbuf_skip(bb, bblen << 3);
- len -= bblen;
- }
-
- // Compressed block
- } else {
- struct huff *disthuff, *lithuff;
-
- // Dynamic huffman codes?
- if (type == 2) {
- struct huff *h2 = ((struct huff *)toybuf)+1;
- int i, litlen, distlen, hufflen;
- char *hufflen_order = "\x10\x11\x12\0\x08\x07\x09\x06\x0a\x05\x0b"
- "\x04\x0c\x03\x0d\x02\x0e\x01\x0f", *bits;
-
- // The huffman trees are stored as a series of bit lengths
- litlen = bitbuf_get(bb, 5)+257; // max 288
- distlen = bitbuf_get(bb, 5)+1; // max 32
- hufflen = bitbuf_get(bb, 4)+4; // max 19
-
- // The literal and distance codes are themselves compressed, in
- // a complicated way: an array of bit lengths (hufflen many
- // entries, each 3 bits) is used to fill out an array of 19 entries
- // in a magic order, leaving the rest 0. Then make a tree out of it:
- memset(bits = toybuf+1, 0, 19);
- for (i=0; i<hufflen; i++) bits[hufflen_order[i]] = bitbuf_get(bb, 3);
- len2huff(h2, bits, 19);
-
- // Use that tree to read in the literal and distance bit lengths
- for (i = 0; i < litlen + distlen;) {
- int sym = huff_and_puff(bb, h2);
-
- // 0-15 are literals, 16 = repeat previous code 3-6 times,
- // 17 = 3-10 zeroes (3 bit), 18 = 11-138 zeroes (7 bit)
- if (sym < 16) bits[i++] = sym;
- else {
- int len = sym & 2;
-
- len = bitbuf_get(bb, sym-14+len+(len>>1)) + 3 + (len<<2);
- memset(bits+i, bits[i-1] * !(sym&3), len);
- i += len;
- }
- }
- if (i > litlen+distlen) error_exit("bad tree");
-
- len2huff(lithuff = h2, bits, litlen);
- len2huff(disthuff = ((struct huff *)toybuf)+2, bits+litlen, distlen);
-
- // Static huffman codes
- } else {
- lithuff = TT.fixlithuff;
- disthuff = TT.fixdisthuff;
- }
-
- // Use huffman tables to decode block of compressed symbols
- for (;;) {
- int sym = huff_and_puff(bb, lithuff);
-
- // Literal?
- if (sym < 256) output_byte(sym);
-
- // Copy range?
- else if (sym > 256) {
- int len, dist;
-
- sym -= 257;
- len = TT.lenbase[sym] + bitbuf_get(bb, TT.lenbits[sym]);
- sym = huff_and_puff(bb, disthuff);
- dist = TT.distbase[sym] + bitbuf_get(bb, TT.distbits[sym]);
- sym = TT.pos & 32767;
-
- while (len--) output_byte(TT.data[(TT.pos-dist) & 32767]);
-
- // End of block
- } else break;
- }
- }
-
- // Was that the last block?
- if (final) break;
- }
-
- if (TT.pos & 32767) {
- xwrite(TT.outfd, TT.data, TT.pos & 32767);
- if (TT.crcfunc) TT.crcfunc(TT.data, TT.pos & 32767);
- }
-}
-
-// Deflate from TT.infd to bitbuf
-// For deflate, TT.len = input read, TT.pos = input consumed
-static void deflate(struct bitbuf *bb)
-{
- char *data = TT.data;
- int len, final = 0;
-
- TT.crc = ~0;
-
- while (!final) {
- // Read next half-window of data if we haven't hit EOF yet.
- len = readall(TT.infd, data+(TT.len&32768), 32768);
- if (len < 0) perror_exit("read"); // todo: add filename
- if (len != 32768) final++;
- if (TT.crcfunc) TT.crcfunc(data+(TT.len&32768), len);
- // TT.len += len; crcfunc advances len
-
- // store block as literal
- bitbuf_put(bb, final, 1);
- bitbuf_put(bb, 0, 1);
-
- bitbuf_put(bb, 0, (8-bb->bitpos)&7);
- bitbuf_put(bb, len, 16);
- bitbuf_put(bb, 0xffff & ~len, 16);
-
- // repeat until spanked
- while (TT.pos != TT.len) {
- unsigned pos = TT.pos & 65535;
-
- bitbuf_put(bb, data[pos], 8);
-
- // need to refill buffer?
- if (!(32767 & ++TT.pos) && !final) break;
- }
- }
- bitbuf_flush(bb);
-}
-
-// Allocate memory for deflate/inflate.
-static void init_deflate(int compress)
-{
- int i, n = 1;
-
- // compress needs 64k data and 32k each for hashhead and hashchain.
- // decompress just needs 32k data.
- TT.data = xmalloc(32768*(compress ? 4 : 1));
- if (compress) {
- TT.hashhead = (unsigned short *)(TT.data + 65536);
- TT.hashchain = (unsigned short *)(TT.data + 65536 + 32768);
- }
-
- // Calculate lenbits, lenbase, distbits, distbase
- *TT.lenbase = 3;
- for (i = 0; i<sizeof(TT.lenbits)-1; i++) {
- if (i>4) {
- if (!(i&3)) {
- TT.lenbits[i]++;
- n <<= 1;
- }
- if (i == 27) n--;
- else TT.lenbits[i+1] = TT.lenbits[i];
- }
- TT.lenbase[i+1] = n + TT.lenbase[i];
- }
- n = 0;
- for (i = 0; i<sizeof(TT.distbits); i++) {
- TT.distbase[i] = 1<<n;
- if (i) TT.distbase[i] += TT.distbase[i-1];
- if (i>3 && !(i&1)) n++;
- TT.distbits[i] = n;
- }
-
- // Init fixed huffman tables
- for (i=0; i<288; i++) toybuf[i] = 8 + (i>143) - ((i>255)<<1) + (i>279);
- len2huff(TT.fixlithuff = ((struct huff *)toybuf)+3, toybuf, 288);
- memset(toybuf, 5, 30);
- len2huff(TT.fixdisthuff = ((struct huff *)toybuf)+4, toybuf, 30);
-}
-
-// Return true/false whether we consumed a gzip header.
-static int is_gzip(struct bitbuf *bb)
-{
- int flags;
-
- // Confirm signature
- if (bitbuf_get(bb, 24) != 0x088b1f || (flags = bitbuf_get(bb, 8)) > 31)
- return 0;
- bitbuf_skip(bb, 6*8);
-
- // Skip extra, name, comment, header CRC fields
- if (flags & 4) bitbuf_skip(bb, 16);
- if (flags & 8) while (bitbuf_get(bb, 8));
- if (flags & 16) while (bitbuf_get(bb, 8));
- if (flags & 2) bitbuf_skip(bb, 16);
-
- return 1;
-}
-
-void gzip_crc(char *data, int len)
-{
- int i;
- unsigned crc, *crc_table = (unsigned *)(toybuf+sizeof(toybuf)-1024);
-
- crc = TT.crc;
- for (i=0; i<len; i++) crc = crc_table[(crc^data[i])&0xff] ^ (crc>>8);
- TT.crc = crc;
- TT.len += len;
-}
-
-static void do_gzip(int fd, char *name)
-{
- struct bitbuf *bb = bitbuf_init(1, sizeof(toybuf));
-
- // Header from RFC 1952 section 2.2:
- // 2 ID bytes (1F, 8b), gzip method byte (8=deflate), FLAG byte (none),
- // 4 byte MTIME (zeroed), Extra Flags (2=maximum compression),
- // Operating System (FF=unknown)
-
- TT.infd = fd;
- xwrite(bb->fd, "\x1f\x8b\x08\0\0\0\0\0\x02\xff", 10);
-
- // Use last 1k of toybuf for little endian crc table
- crc_init((unsigned *)(toybuf+sizeof(toybuf)-1024), 1);
- TT.crcfunc = gzip_crc;
-
- deflate(bb);
-
- // tail: crc32, len32
-
- bitbuf_put(bb, 0, (8-bb->bitpos)&7);
- bitbuf_put(bb, ~TT.crc, 32);
- bitbuf_put(bb, TT.len, 32);
-
- bitbuf_flush(bb);
- free(bb);
-}
-
-static void do_zcat(int fd, char *name)
-{
- struct bitbuf *bb = bitbuf_init(fd, sizeof(toybuf));
-
- if (!is_gzip(bb)) error_exit("not gzip");
- TT.outfd = 1;
-
- // Use last 1k of toybuf for little endian crc table
- crc_init((unsigned *)(toybuf+sizeof(toybuf)-1024), 1);
- TT.crcfunc = gzip_crc;
-
- inflate(bb);
-
- // tail: crc32, len32
-
- bitbuf_skip(bb, (8-bb->bitpos)&7);
- if (~TT.crc != bitbuf_get(bb, 32) || TT.len != bitbuf_get(bb, 32))
- error_exit("bad crc");
- free(bb);
-}
-
-// Parse many different kinds of command line argument:
-
-void compress_main(void)
-{
- // todo: this
- printf("hello world");
-}