aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--lib/deflate.c (renamed from toys/pending/compress.c)234
1 files changed, 103 insertions, 131 deletions
diff --git a/toys/pending/compress.c b/lib/deflate.c
index bc69a035..2a6274e7 100644
--- a/toys/pending/compress.c
+++ b/lib/deflate.c
@@ -1,72 +1,34 @@
-/* compress.c - deflate/inflate code for zip, gzip, zlib, and raw
+/* deflate.c - deflate/inflate code for gzip and friends
*
* Copyright 2014 Rob Landley <rob@landley.net>
*
- * The inflate/deflate code lives here, so the various things that use it
- * either live here or call these commands to pipe data through them.
- *
- * Divergence from posix: replace obsolete/patented "compress" with mutiplexer.
- * (gzip already replaces "uncompress".)
- *
* See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip)
* LSB 4.1 has gzip, gunzip, and zcat
+ *
* TODO: zip -d DIR -x LIST -list -quiet -no overwrite -overwrite -p to stdout
+ */
-// Accept many different kinds of command line argument.
-// Leave Lrg at end so flag values line up.
-
-USE_COMPRESS(NEWTOY(compress, "zcd9lrg[-cd][!zgLr]", TOYFLAG_USR|TOYFLAG_BIN))
-
-//zip unzip gzip gunzip zcat
-
-config COMPRESS
- bool "compress"
- default n
- help
- usage: compress [-zgLR19] [FILE]
-
- Compress or decompress file (or stdin) using "deflate" algorithm.
-
- -1 min compression
- -9 max compression (default)
- -g gzip (default)
- -L zlib
- -R raw
- -z zip
-
-config DECOMPRESS
- bool "decompress"
- default n
- help
- usage: compress [-zglrcd9] [FILE]
-
- Compress or decompress file (or stdin) using "deflate" algorithm.
-
- -c compress with -g gzip (default) -l zlib -r raw -z zip
- -d decompress (autodetects type)
-*/
-
-#define FOR_compress
#include "toys.h"
-GLOBALS(
+struct deflate {
// Huffman codes: base offset and extra bits tables (length and distance)
char lenbits[29], distbits[30];
unsigned short lenbase[29], distbase[30];
void *fixdisthuff, *fixlithuff;
// CRC
- void (*crcfunc)(char *data, int len);
- unsigned crc;
+ void (*crcfunc)(struct deflate *dd, char *data, int len);
+ unsigned crctable[256], crc;
- // Compressed data buffer
- char *data;
- unsigned pos, len;
- int infd, outfd;
// Tables only used for deflation
unsigned short *hashhead, *hashchain;
-)
+
+ // Compressed data buffer (extra space malloced at end)
+ unsigned pos, len;
+ int infd, outfd;
+ char data[];
+};
// little endian bit buffer
struct bitbuf {
@@ -86,6 +48,7 @@ struct bitbuf *bitbuf_init(int fd, int size)
}
// Advance bitpos without the overhead of recording bits
+// Loads more data when input buffer empty
void bitbuf_skip(struct bitbuf *bb, int bits)
{
int pos = bb->bitpos + bits, len = bb->len << 3;
@@ -139,7 +102,7 @@ void bitbuf_flush(struct bitbuf *bb)
{
if (!bb->bitpos) return;
- xwrite(bb->fd, bb->buf, (bb->bitpos+7)/8);
+ xwrite(bb->fd, bb->buf, (bb->bitpos+7)>>3);
memset(bb->buf, 0, bb->max);
bb->bitpos = 0;
}
@@ -164,25 +127,26 @@ void bitbuf_put(struct bitbuf *bb, int data, int len)
}
}
-static void output_byte(char sym)
+static void output_byte(struct deflate *dd, char sym)
{
- int pos = TT.pos++ & 32767;
+ int pos = dd->pos++ & 32767;
- TT.data[pos] = sym;
+ dd->data[pos] = sym;
if (pos == 32767) {
- xwrite(TT.outfd, TT.data, 32768);
- if (TT.crcfunc) TT.crcfunc(TT.data, 32768);
+ xwrite(dd->outfd, dd->data, 32768);
+ if (dd->crcfunc) dd->crcfunc(dd, dd->data, 32768);
}
}
// Huffman coding uses bits to traverse a binary tree to a leaf node,
// By placing frequently occurring symbols at shorter paths, frequently
// used symbols may be represented in fewer bits than uncommon symbols.
+// (length[0] isn't used but code's clearer if it's there.)
struct huff {
- unsigned short length[16];
- unsigned short symbol[288];
+ unsigned short length[16]; // How many symbols have this bit length?
+ unsigned short symbol[288]; // sorted by bit length, then ascending order
};
// Create simple huffman tree from array of bit lengths.
@@ -199,10 +163,10 @@ static void len2huff(struct huff *huff, char bitlen[], int len)
memset(huff, 0, sizeof(struct huff));
for (i = 0; i<len; i++) huff->length[bitlen[i]]++;
- // Sort symbols by bit length. (They'll remain sorted by symbol within that.)
+ // Sort symbols by bit length, then symbol. Get list of starting positions
+ // for each group, then write each symbol to next position within its group.
*huff->length = *offset = 0;
for (i = 1; i<16; i++) offset[i] = offset[i-1] + huff->length[i-1];
-
for (i = 0; i<len; i++) if (bitlen[i]) huff->symbol[offset[bitlen[i]]++] = i;
}
@@ -226,10 +190,10 @@ static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff)
return huff->symbol[start + offset];
}
-// Decompress deflated data from bitbuf to TT.outfd.
-static void inflate(struct bitbuf *bb)
+// Decompress deflated data from bitbuf to dd->outfd.
+static void inflate(struct deflate *dd, struct bitbuf *bb)
{
- TT.crc = ~0;
+ dd->crc = ~0;
// repeat until spanked
for (;;) {
int final, type;
@@ -257,7 +221,7 @@ static void inflate(struct bitbuf *bb)
// dump bytes until done or end of current bitbuf contents
if (bblen > len) bblen = len;
pos = bblen;
- while (pos--) output_byte(*(p++));
+ while (pos--) output_byte(dd, *(p++));
bitbuf_skip(bb, bblen << 3);
len -= bblen;
}
@@ -308,8 +272,8 @@ static void inflate(struct bitbuf *bb)
// Static huffman codes
} else {
- lithuff = TT.fixlithuff;
- disthuff = TT.fixdisthuff;
+ lithuff = dd->fixlithuff;
+ disthuff = dd->fixdisthuff;
}
// Use huffman tables to decode block of compressed symbols
@@ -317,19 +281,19 @@ static void inflate(struct bitbuf *bb)
int sym = huff_and_puff(bb, lithuff);
// Literal?
- if (sym < 256) output_byte(sym);
+ if (sym < 256) output_byte(dd, sym);
// Copy range?
else if (sym > 256) {
int len, dist;
sym -= 257;
- len = TT.lenbase[sym] + bitbuf_get(bb, TT.lenbits[sym]);
+ len = dd->lenbase[sym] + bitbuf_get(bb, dd->lenbits[sym]);
sym = huff_and_puff(bb, disthuff);
- dist = TT.distbase[sym] + bitbuf_get(bb, TT.distbits[sym]);
- sym = TT.pos & 32767;
+ dist = dd->distbase[sym] + bitbuf_get(bb, dd->distbits[sym]);
+ sym = dd->pos & 32767;
- while (len--) output_byte(TT.data[(TT.pos-dist) & 32767]);
+ while (len--) output_byte(dd, dd->data[(dd->pos-dist) & 32767]);
// End of block
} else break;
@@ -340,28 +304,28 @@ static void inflate(struct bitbuf *bb)
if (final) break;
}
- if (TT.pos & 32767) {
- xwrite(TT.outfd, TT.data, TT.pos & 32767);
- if (TT.crcfunc) TT.crcfunc(TT.data, TT.pos & 32767);
+ if (dd->pos & 32767) {
+ xwrite(dd->outfd, dd->data, dd->pos&32767);
+ if (dd->crcfunc) dd->crcfunc(dd, dd->data, dd->pos&32767);
}
}
-// Deflate from TT.infd to bitbuf
-// For deflate, TT.len = input read, TT.pos = input consumed
-static void deflate(struct bitbuf *bb)
+// Deflate from dd->infd to bitbuf
+// For deflate, dd->len = input read, dd->pos = input consumed
+static void deflate(struct deflate *dd, struct bitbuf *bb)
{
- char *data = TT.data;
+ char *data = dd->data;
int len, final = 0;
- TT.crc = ~0;
+ dd->crc = ~0;
while (!final) {
// Read next half-window of data if we haven't hit EOF yet.
- len = readall(TT.infd, data+(TT.len&32768), 32768);
+ len = readall(dd->infd, data+(dd->len&32768), 32768);
if (len < 0) perror_exit("read"); // todo: add filename
if (len != 32768) final++;
- if (TT.crcfunc) TT.crcfunc(data+(TT.len&32768), len);
- // TT.len += len; crcfunc advances len
+ if (dd->crcfunc) dd->crcfunc(dd, data+(dd->len&32768), len);
+ // dd->len += len; crcfunc advances len TODO
// store block as literal
bitbuf_put(bb, final, 1);
@@ -372,57 +336,60 @@ static void deflate(struct bitbuf *bb)
bitbuf_put(bb, 0xffff & ~len, 16);
// repeat until spanked
- while (TT.pos != TT.len) {
- unsigned pos = TT.pos & 65535;
+ while (dd->pos != dd->len) {
+ unsigned pos = dd->pos&65535;
bitbuf_put(bb, data[pos], 8);
// need to refill buffer?
- if (!(32767 & ++TT.pos) && !final) break;
+ if (!(32767 & ++dd->pos) && !final) break;
}
}
bitbuf_flush(bb);
}
// Allocate memory for deflate/inflate.
-static void init_deflate(int compress)
+static struct deflate *init_deflate(int compress)
{
int i, n = 1;
+ struct deflate *dd = xmalloc(sizeof(struct deflate)+32768*(compress ? 4 : 1));
- // compress needs 64k data and 32k each for hashhead and hashchain.
- // decompress just needs 32k data.
- TT.data = xmalloc(32768*(compress ? 4 : 1));
+// TODO sizeof and order of these?
+ // decompress needs 32k history, compress adds 64k hashhead and 32k hashchain
if (compress) {
- TT.hashhead = (unsigned short *)(TT.data + 65536);
- TT.hashchain = (unsigned short *)(TT.data + 65536 + 32768);
+ dd->hashhead = (unsigned short *)(dd->data+65536);
+ dd->hashchain = (unsigned short *)(dd->data+65536+32768);
}
// Calculate lenbits, lenbase, distbits, distbase
- *TT.lenbase = 3;
- for (i = 0; i<sizeof(TT.lenbits)-1; i++) {
+ *dd->lenbase = 3;
+ for (i = 0; i<sizeof(dd->lenbits)-1; i++) {
if (i>4) {
if (!(i&3)) {
- TT.lenbits[i]++;
+ dd->lenbits[i]++;
n <<= 1;
}
if (i == 27) n--;
- else TT.lenbits[i+1] = TT.lenbits[i];
+ else dd->lenbits[i+1] = dd->lenbits[i];
}
- TT.lenbase[i+1] = n + TT.lenbase[i];
+ dd->lenbase[i+1] = n + dd->lenbase[i];
}
n = 0;
- for (i = 0; i<sizeof(TT.distbits); i++) {
- TT.distbase[i] = 1<<n;
- if (i) TT.distbase[i] += TT.distbase[i-1];
+ for (i = 0; i<sizeof(dd->distbits); i++) {
+ dd->distbase[i] = 1<<n;
+ if (i) dd->distbase[i] += dd->distbase[i-1];
if (i>3 && !(i&1)) n++;
- TT.distbits[i] = n;
+ dd->distbits[i] = n;
}
+// TODO layout and lifetime of this?
// Init fixed huffman tables
for (i=0; i<288; i++) toybuf[i] = 8 + (i>143) - ((i>255)<<1) + (i>279);
- len2huff(TT.fixlithuff = ((struct huff *)toybuf)+3, toybuf, 288);
+ len2huff(dd->fixlithuff = ((struct huff *)toybuf)+3, toybuf, 288);
memset(toybuf, 5, 30);
- len2huff(TT.fixdisthuff = ((struct huff *)toybuf)+4, toybuf, 30);
+ len2huff(dd->fixdisthuff = ((struct huff *)toybuf)+4, toybuf, 30);
+
+ return dd;
}
// Return true/false whether we consumed a gzip header.
@@ -444,70 +411,75 @@ static int is_gzip(struct bitbuf *bb)
return 1;
}
-void gzip_crc(char *data, int len)
+void gzip_crc(struct deflate *dd, char *data, int len)
{
int i;
- unsigned crc, *crc_table = (unsigned *)(toybuf+sizeof(toybuf)-1024);
+ unsigned crc, *crc_table = dd->crctable;
- crc = TT.crc;
+ crc = dd->crc;
for (i=0; i<len; i++) crc = crc_table[(crc^data[i])&0xff] ^ (crc>>8);
- TT.crc = crc;
- TT.len += len;
+ dd->crc = crc;
+ dd->len += len;
}
-static void do_gzip(int fd, char *name)
+long long gzip_fd(int infd, int outfd)
{
- struct bitbuf *bb = bitbuf_init(1, sizeof(toybuf));
+ struct bitbuf *bb = bitbuf_init(outfd, 4096);
+ struct deflate *dd = init_deflate(1);
+ long long rc;
// Header from RFC 1952 section 2.2:
// 2 ID bytes (1F, 8b), gzip method byte (8=deflate), FLAG byte (none),
// 4 byte MTIME (zeroed), Extra Flags (2=maximum compression),
// Operating System (FF=unknown)
- TT.infd = fd;
+ dd->infd = infd;
xwrite(bb->fd, "\x1f\x8b\x08\0\0\0\0\0\x02\xff", 10);
- // Use last 1k of toybuf for little endian crc table
- crc_init((unsigned *)(toybuf+sizeof(toybuf)-1024), 1);
- TT.crcfunc = gzip_crc;
+ // Little endian crc table
+ crc_init(dd->crctable, 1);
+ dd->crcfunc = gzip_crc;
- deflate(bb);
+ deflate(dd, bb);
// tail: crc32, len32
bitbuf_put(bb, 0, (8-bb->bitpos)&7);
- bitbuf_put(bb, ~TT.crc, 32);
- bitbuf_put(bb, TT.len, 32);
+ bitbuf_put(bb, ~dd->crc, 32);
+ bitbuf_put(bb, dd->len, 32);
+ rc = dd->len;
bitbuf_flush(bb);
free(bb);
+ free(dd);
+
+ return rc;
}
-static void do_zcat(int fd, char *name)
+long long gunzip_fd(int infd, int outfd)
{
- struct bitbuf *bb = bitbuf_init(fd, sizeof(toybuf));
+ struct bitbuf *bb = bitbuf_init(infd, 4096);
+ struct deflate *dd = init_deflate(0);
+ long long rc;
if (!is_gzip(bb)) error_exit("not gzip");
- TT.outfd = 1;
+ dd->outfd = outfd;
- // Use last 1k of toybuf for little endian crc table
- crc_init((unsigned *)(toybuf+sizeof(toybuf)-1024), 1);
- TT.crcfunc = gzip_crc;
+ // Little endian crc table
+ crc_init(dd->crctable, 1);
+ dd->crcfunc = gzip_crc;
- inflate(bb);
+ inflate(dd, bb);
// tail: crc32, len32
bitbuf_skip(bb, (8-bb->bitpos)&7);
- if (~TT.crc != bitbuf_get(bb, 32) || TT.len != bitbuf_get(bb, 32))
+ if (~dd->crc != bitbuf_get(bb, 32) || dd->len != bitbuf_get(bb, 32))
error_exit("bad crc");
- free(bb);
-}
-// Parse many different kinds of command line argument:
+ rc = dd->len;
+ free(bb);
+ free(dd);
-void compress_main(void)
-{
- // todo: this
- printf("hello world");
+ return rc;
}