diff options
-rw-r--r-- | toys/pending/compress.c | 92 |
1 files changed, 66 insertions, 26 deletions
diff --git a/toys/pending/compress.c b/toys/pending/compress.c index ca3af033..acc7d62b 100644 --- a/toys/pending/compress.c +++ b/toys/pending/compress.c @@ -41,7 +41,8 @@ GLOBALS( char lenbits[29], distbits[30]; unsigned short lenbase[29], distbase[30]; - unsigned (*crc)(char *data, int len); + unsigned (*crcfunc)(char *data, int len); + unsigned crc; char *outbuf; unsigned outlen; @@ -116,10 +117,14 @@ int bitbuf_get(struct bitbuf *bb, int bits) return result; } -static void outbuf_crc(char *buf, int len) +static void outbuf_crc(char sym) { - if (TT.crc) TT.crc(buf, len); - xwrite(TT.outfd, buf, len); + TT.outbuf[TT.outlen++ & 32767] = sym; + + if (!(TT.outlen & 32767)) { + xwrite(TT.outfd, TT.outbuf, 32768); + if (TT.crcfunc) TT.crcfunc(0, 32768); + } } // Huffman coding uses bits to traverse a binary tree to a leaf node, @@ -139,7 +144,7 @@ struct huff { static void len2huff(struct huff *huff, char bitlen[], int len) { int offset[16]; - int i, sum; + int i; // Count number of codes at each bit length memset(huff, 0, sizeof(struct huff)); @@ -147,7 +152,8 @@ static void len2huff(struct huff *huff, char bitlen[], int len) // Sort symbols by bit length. (They'll remain sorted by symbol within that.) *huff->length = *offset = 0; - for (i = sum = 0; i<16; i++) offset[i] = offset[i-1] + huff->length[i]; + for (i = 1; i<16; i++) offset[i] = offset[i-1] + huff->length[i-1]; + for (i = 0; i<len; i++) if (bitlen[i]) huff->symbol[offset[bitlen[i]]++] = i; } @@ -165,10 +171,7 @@ static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff) offset = (offset << 1) | bitbuf_bit(bb); start += *++length; if ((offset -= *length) < 0) break; -// Note to self: what prevents overflow? -// If we ensure ranges add up to sizeof(symbol), does that ensure all codes -// terminate within table? Patholotical is 11111111... -// if (length - huff_length > 16) error_exit("bad"); + if ((length - huff->length) & 16) error_exit("bad symbol"); } return huff->symbol[start + offset]; @@ -177,13 +180,18 @@ static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff) // Decompress deflated data from bitbuf to filehandle. static void inflate(struct bitbuf *bb) { + struct huff *disthuff, *lithuff, *fixdisthuff = (struct huff *)(toybuf+2048), + *fixlithuff = (struct huff *)(toybuf+2560); + +// len2huff( + + TT.crc = ~0; // repeat until spanked for (;;) { int final, type; final = bitbuf_get(bb, 1); type = bitbuf_get(bb, 2); -fprintf(stderr, "final=%d type=%d\n", final, type); if (type == 3) error_exit("bad type"); @@ -200,27 +208,29 @@ fprintf(stderr, "final=%d type=%d\n", final, type); // Dump output data while (len) { int pos = bb->bitpos >> 3, bblen = bb->len - pos; + char *p = bb->buf+pos; + // dump bytes until done or end of current bitbuf contents if (bblen > len) bblen = len; - if (bblen) outbuf_crc(bb->buf+pos, bblen); - len -= bblen; + pos = bblen; + while (pos--) outbuf_crc(*(p++)); bitbuf_skip(bb, bblen << 3); + len -= bblen; } // Compressed block } else { -// struct huff huff; // Dynamic huffman codes? if (type == 2) { - struct huff hlo; - int i, literal, distance, hufflen; + struct huff *h2 = (struct huff *)(toybuf+512); + int i, litlen, distlen, hufflen; char *hufflen_order = "\x10\x11\x12\0\x08\x07\x09\x06\x0a\x05\x0b" "\x04\x0c\x03\x0d\x02\x0e\x01\x0f", *bits; // The huffman trees are stored as a series of bit lengths - literal = bitbuf_get(bb, 5)+257; // max 288 - distance = bitbuf_get(bb, 5)+1; // max 32 + litlen = bitbuf_get(bb, 5)+257; // max 288 + distlen = bitbuf_get(bb, 5)+1; // max 32 hufflen = bitbuf_get(bb, 4)+4; // max 19 // The literal and distance codes are themselves compressed, in @@ -229,25 +239,55 @@ fprintf(stderr, "final=%d type=%d\n", final, type); // in a magic order, leaving the rest 0. Then make a tree out of it: memset(bits = toybuf+1, 0, 19); for (i=0; i<hufflen; i++) bits[hufflen_order[i]] = bitbuf_get(bb, 3); - len2huff(&hlo, bits, 19); + len2huff(h2, bits, 19); // Use that tree to read in the literal and distance bit lengths - for (i = 0; i < literal + distance; i++) { - int sym = huff_and_puff(bb, &hlo); + for (i = 0; i < litlen + distlen;) { + int sym = huff_and_puff(bb, h2); // 0-15 are literals, 16 = repeat previous code 3-6 times, - // 17 = 3-10 zeroes, 18 = 11-138 zeroes - if (sym < 16) bits[i] = sym; - else memset(bits+i, bits[i-1] * !(sym&3), - bitbuf_get(bb, (sym-14)+((sym&2)<<2))); + // 17 = 3-10 zeroes (3 bit), 18 = 11-138 zeroes (7 bit) + if (sym < 16) bits[i++] = sym; + else { + int len = sym & 2; + + len = bitbuf_get(bb, sym-14+len+(len>>1)) + 3 + (len<<2); + memset(bits+i, bits[i-1] * !(sym&3), len); + i += len; + } } + if (i > litlen+distlen) error_exit("bad tree"); + + len2huff(lithuff = (struct huff *)(toybuf+1024), bits, litlen); + len2huff(disthuff = (struct huff *)(toybuf+1536), bits+litlen, distlen); + // Static huffman codes } else { + lithuff = fixlithuff; + disthuff = fixdisthuff; +error_exit("todo static huffman init"); + } + for (;;) { + int sym = huff_and_puff(bb, lithuff); + + if (sym < 256) outbuf_crc(sym); + else if (sym > 256) { + int len, dist; + + sym -= 257; + len = TT.lenbase[sym] + bitbuf_get(bb, TT.lenbits[sym]); + sym = huff_and_puff(bb, disthuff); + dist = TT.distbase[sym] + bitbuf_get(bb, TT.distbits[sym]); + sym = TT.outlen & 32767; + + while (len--) outbuf_crc(TT.outbuf[(TT.outlen-dist) & 32767]); + } else break; } } - if (final) return; + if (final) break; } + if (TT.outlen & 32767) xwrite(TT.outfd, TT.outbuf, TT.outlen & 32767); } static void init_deflate(void) |