From 3b22ffe6026ccbdc6da6391a479470c750d907b4 Mon Sep 17 00:00:00 2001 From: Alan Cox Date: Mon, 29 Dec 2014 22:18:37 +0000 Subject: [PATCH] decomp16: decompress 16bit LZ This is going to give most Z80 boxes a bit of a battering if it works at all. Committing this more because it'll be an excellent way to test pipes and abuse the system for stress testing. --- Applications/util/Makefile | 1 + Applications/util/decomp16.c | 441 +++++++++++++++++++++++++++++++++++ 2 files changed, 442 insertions(+) create mode 100644 Applications/util/decomp16.c diff --git a/Applications/util/Makefile b/Applications/util/Makefile index 64d39e20..6f473333 100644 --- a/Applications/util/Makefile +++ b/Applications/util/Makefile @@ -29,6 +29,7 @@ SRCS = banner.c \ cut.c \ date.c \ dd.c \ + decomp16.c \ df.c \ dirname.c \ du.c \ diff --git a/Applications/util/decomp16.c b/Applications/util/decomp16.c new file mode 100644 index 00000000..cd9fb4a2 --- /dev/null +++ b/Applications/util/decomp16.c @@ -0,0 +1,441 @@ +/* decomp16: decompress 16bit compressed files on a 16bit Intel processor + * + * Version 1.3 of 25 Mar 92. + * + * This was written by John N. White on 6/30/91 and is Public Domain. + * Patched to run under news by Will Rose, Feb 92. + * J N White's (earlier) patches added by Will Rose, 20 Feb 92. + * Unsigned int increment/wrap bug fixed by Will Rose, 24 Mar 92. + * Argument bug fixed, stdio generalised by Will Rose, 25 Mar 92. + * + * decomp16 can use as as little as 512 bytes of stack; since it forks + * four additional copies, it's probably worth using minimum stack rather + * than the 8192 byte Minix default. To reduce memory still further, + * change BUFSZ below to 256; it is currently set to 1024 for speed. The + * minimal decomp16 needs about 280k to run in pipe mode (56k per copy). + * + * This program acts as a filter: + * decomp16 < compressed_file > decompressed_file + * The arguments -0 to -4 run only the corresponding pass. + * Thus: + * decomp16 -4 < compressed_file > 3; + * decomp16 -3 < 3 > 2; + * decomp16 -2 < 2 > 1; + * decomp16 -1 < 1 > 0; + * decomp16 -0 < 0 > decompressed_file + * will also work, as will connecting the passes by explicit pipes if + * there is enough memory to do so. File name arguments can also be + * given directly on the command line. + * + * Compress uses a modified LZW compression algorithm. A compressed file + * is a set of indices into a dictionary of strings. The number of bits + * used to store each index depends on the number of entries currently + * in the dictionary. If there are between 257 and 512 entries, 9 bits + * are used. With 513 entries, 10 bits are used, etc. The initial dictionary + * consists of 0-255 (which are the corresponding chars) and 256 (which + * is a special CLEAR code). As each index in the compressed file is read, + * a new entry is added to the dictionary consisting of the current string + * with the first char of the next string appended. When the dictionary + * is full, no further entries are added. If a CLEAR code is received, + * the dictionary will be completely reset. The first two bytes of the + * compressed file are a magic number, and the third byte indicates the + * maximum number of bits, and whether the CLEAR code is used (older versions + * of compress didn't have CLEAR). + * + * This program works by forking four more copies of itself. The five + * programs form a pipeline. Copy 0 writes to stdout, and forks copy 1 + * to supply its input, which in turn forks and reads from copy 2, etc. + * This sequence is used so that when the program exits, all writes + * are completed and a program that has exec'd uncompress (such as news) + * can immediately use the uncompressed data when the wait() call returns. + * + * If given a switch -#, where # is a digit from 0 to 4 (example: -2), the + * program will run as that copy, reading from stdin and writing to stdout. + * This allows decompressing with very limited RAM because only one of the + * five passes is in memory at a time. + * + * The compressed data is a series of string indices (and a header at + * the beginning and an occasional CLEAR code). As these indices flow + * through the pipes, each program decodes the ones it can. The result + * of each decoding will be indices that the following programs can handle. + * + * Each of the 65536 strings in the dictionary is an earlier string with + * some character added to the end (except for the the 256 predefined + * single char strings). When new entries are made to the dictionary, + * the string index part will just be the last index to pass through. + * But the char part is the first char of the next string, which isn't + * known yet. So the string can be stored as a pair of indices. When + * this string is specified, it is converted to this pair of indices, + * which are flagged so that the first will be decoded in full while + * the second will be decoded to its first char. The dictionary takes + * 256k to store (64k strings of 2 indices of 2 bytes each). This is + * too big for a 64k data segment, so it is divided into 5 equal parts. + * Copy 4 of the program maintains the high part and copy 0 holds the + * low part. + */ + +#include +#include +#include +#include +#include + +#define BUFSZ 1024 /* size of i/o buffers */ +#define BUFSZ_2 (BUFSZ/2) /* # of unsigned shorts in i/o bufs */ +#define DICTSZ (unsigned)13056 /* # of local dictionary entries */ +#define EOF_INDEX (unsigned short)0xFFFF /* EOF flag for pipeline */ +#define FALSE 0 +#define TRUE ~FALSE + +int fdin, fdout, fderr; /* input, output, and error file descriptors */ +int ibufstart, obufind, ibufend;/* i/o buffer indices */ +int ipbufind = BUFSZ_2; /* pipe buffer indices */ +int opbufind = 0; +int pnum = -1; /* ID of this copy */ +unsigned short ipbuf[BUFSZ_2]; /* for buffering input */ +unsigned short opbuf[BUFSZ_2]; /* for buffering output */ +unsigned char *ibuf = (unsigned char *) ipbuf; +unsigned char *obuf = (unsigned char *) opbuf; + +unsigned short dindex[DICTSZ]; /* dictionary: index to substring */ +unsigned short dchar[DICTSZ]; /* dictionary: last char of string */ +unsigned iindex, tindex, tindex2; /* holds index being processed */ +unsigned base; /* where in global dict local dict starts */ +unsigned tbase; +unsigned locend; /* where in global dict local dict ends */ +unsigned curend = 256; /* current end of global dict */ +unsigned maxend; /* max end of global dict */ +int dcharp; /* ptr to dchar that needs next index entry */ +int curbits; /* number of bits for getbits() to read */ +int maxbits; /* limit on number of bits */ +int clearflg; /* if set, allow CLEAR */ +int inmod; /* mod 8 for getbits() */ + +int main(int argc, char *argv[]); +void ffork(void); +void die(const char *s); +void myputc(unsigned c); +unsigned mygetc(void); +void getbits(void); +void getpipe(void); +void putpipe(unsigned u, int flag); + + + +/* P u t p i p e + * + * put an index into the pipeline. + */ +void putpipe(unsigned u, int flag) +{ + static unsigned short flags, *flagp; + static int n = 0; /* number of flags in flags */ + + if (pnum == 0) { /* if we should write to stdout */ + myputc(u); /* index will be the char value */ + return; + } + if (n == 0) { /* if we need to reserve a flag entry */ + flags = 0; + flagp = opbuf + opbufind; + opbufind++; + } + opbuf[opbufind++] = u; /* add index to buffer */ + flags = (flags << 1) | flag; /* add firstonly flag */ + if (++n >= 15) { /* if block of 15 indices */ + n = 0; + *flagp = flags; /* insert flags entry */ + if (opbufind >= BUFSZ_2) { /* if pipe out buffer full */ + opbufind = 0; + if (write(fdout, (char *) opbuf, BUFSZ) != BUFSZ) + die("bad pipe write\n"); + } + } +} + + + +/* D i e + * + * If s is a message, write it to stderr. Flush buffers if needed. Then exit. + */ +void die(const char *s) +{ + /* Flush stdout buffer if needed */ + if (obufind != 0) { + if (write(fdout, (char *) obuf, (unsigned) obufind) != obufind) + s = "bad stdout write\n"; + obufind = 0; + } + + /* Flush pipe if needed */ + do + putpipe(EOF_INDEX, 0); + while (opbufind); + /* Write any error message */ + if (s != (const char *) NULL) + write(fderr, s, strlen(s)); + exit((s == (char *) NULL) ? 0 : 1); +} + + +/* M p u t c + * + * Put a char to stdout. + */ +void myputc(unsigned c) +{ + obuf[obufind++] = c; + if (obufind >= BUFSZ) { /* if stdout buffer full */ + if (write(fdout, (char *) obuf, BUFSZ) != BUFSZ) /* flush to stdout */ + die("bad stdout write\n"); + obufind = 0; + } +} + + +/* M y g e t c + * + * Get a char from stdin. If EOF, then die() and exit. + */ +unsigned mygetc(void) +{ + if (ibufstart >= ibufend) { /* if stdin buffer empty */ + if ((ibufend = read(fdin, (char *) ibuf, BUFSZ)) <= 0) + die((char *) NULL); /* if EOF, do normal exit */ + ibufstart = 0; + } + return(ibuf[ibufstart++] & 0xff); +} + + +/* G e t b i t s + * + * Put curbits bits into index from stdin. Note: only copy 4 uses this. + * The bits within a byte are in the correct order. But when the bits + * cross a byte boundry, the lowest bits will be in the higher part of + * the current byte, and the higher bits will be in the lower part of + * the next byte. + */ +void getbits(void) +{ + int have; + static unsigned curbyte; /* byte having bits extracted from it */ + static int left; /* how many bits are left in curbyte */ + + inmod = (inmod + 1) & 7; /* count input mod 8 */ + iindex = curbyte; + have = left; + if (curbits - have > 8) { + iindex |= mygetc() << have; + have += 8; + } + iindex |= ((curbyte = mygetc()) << have) & ~((unsigned) 0xFFFF << curbits); + curbyte >>= curbits - have; + left = 8 - (curbits - have); +} + + +/* G e t p i p e + * + * Get an index from the pipeline. If flagged firstonly, handle it here. + */ +void getpipe(void) +{ + static short flags; + static int n = 0; /* number of flags in flags */ + + while (TRUE) { /* while index with firstonly flag set */ + if (n <= 0) { + if (ipbufind >= BUFSZ_2) { /* if pipe input buffer + * empty */ + if (read(fdin, (char *) ipbuf, BUFSZ) != BUFSZ) + die("bad pipe read\n"); + ipbufind = 0; + } + flags = ipbuf[ipbufind++]; + n = 15; + } + iindex = ipbuf[ipbufind++]; + if (iindex > curend) + die((iindex == EOF_INDEX) ? (char *) NULL : "invalid data\n"); + flags <<= 1; + n--; + /* Assume flags < 0 if highest remaining flag is set */ + if (flags < 0) { /* if firstonly flag for index is not set */ + while (iindex >= base) iindex = dindex[iindex - base]; + putpipe(iindex, 1); + } else + return; /* return with valid non-firstonly index */ + } +} + + +/* F f o r k + * + * Fork off the previous pass - the parent reads from the child. + */ +void ffork(void) +{ + int j, pfd[2]; + + if (pipe(pfd) == -1) die("pipe() error\n"); + if ((j = fork()) == -1) die("fork() error\n"); + if (j == 0) { /* this is the child */ + if (close(1) == -1) die("close(1) error\n"); + if (dup(pfd[1]) != 1) die("dup(1) error\n"); + (void) close(pfd[0]); + pnum++; + } else { /* this is the parent */ + if (close(0) == -1) die("close(0) error\n"); + if (dup(pfd[0]) != 0) die("dup(0) error\n"); + (void) close(pfd[1]); + } +} + + +int main(int argc, char *argv[]) +{ + char c, *cp; + int j, k, fdtmp; + unsigned int len; + + /* Find the program name */ + j = 0; + while (argv[0][j] != '\0') j++; + len = (unsigned int) j; + while (j--) + if (argv[0][j] == '/') break; + if (argv[0][j] == '/') j++; + cp = argv[0] + j; + len -= j; + + /* Sort out the flags */ + for (k = 1; k < argc; k++) { + if (argv[k][0] == '-') { + c = argv[k][1]; + switch (c) { + case '0': /* pass numbers */ + case '1': + case '2': + case '3': + case '4': pnum = c - '0'; break; + case 'd': /* used by news */ + break; + default: + (void) write(1, "Usage: ", 7); + (void) write(1, cp, len); + (void) write(1, " [-#] [in] [out]\n", 17); + exit(0); + break; + } + + /* Once it's checked, lose it anyway */ + for (j = k; j < argc; j++) argv[j] = argv[j + 1]; + argc--; + k--; + } + } + + /* Default i/o settings */ + fdin = 0; + fdout = 1; + fderr = 2; + + /* Try to open specific files and connect them to stdin/stdout */ + if (argc > 1) { + if ((fdtmp = open(argv[1], 0)) == -1) die("input open failed"); + (void) close(0); + if ((fdin = dup(fdtmp)) == -1) die("input dup failed\n"); + (void) close(fdtmp); + } + if (argc > 2) { + (void) unlink(argv[2]); + if ((fdtmp = creat(argv[2], 0666)) == -1) die("output creat failed"); + (void) close(1); + if ((fdout = dup(fdtmp)) == -1) die("output dup failed\n"); + (void) close(fdtmp); + } + + /* Sort out type of compression */ + if (pnum == -1 || pnum == 4) {/* if this is pass 4 */ + /* Check header of compressed file */ + if (mygetc() != 0x1F || mygetc() != 0x9D) /* check magic number */ + die("not a compressed file\n"); + iindex = mygetc(); /* get compression style */ + } else + getpipe(); /* get compression style */ + + maxbits = iindex & 0x1F; + clearflg = ((iindex & 0x80) != 0) ? TRUE : FALSE; + if (maxbits < 9 || maxbits > 16) /* check for valid maxbits */ + die("can't decompress\n"); + if (pnum != -1 && pnum != 0) + putpipe(iindex, 0); /* pass style to next copy */ + + /* Fork off an ancestor if necessary - ffork() increments pnum */ + if (pnum == -1) { + pnum = 0; + if (pnum == 0) ffork(); + if (pnum == 1) ffork(); + if (pnum == 2) ffork(); + if (pnum == 3) ffork(); + } + + /* Preliminary inits. Note: end/maxend/curend are highest, not + * highest + 1 */ + base = DICTSZ * pnum + 256; + locend = base + DICTSZ - 1; + maxend = (1 << maxbits) - 1; + if (maxend > locend) maxend = locend; + + while (TRUE) { + curend = 255 + (clearflg ? 1 : 0); /* init dictionary */ + dcharp = DICTSZ; /* flag for none needed */ + curbits = 9; /* init curbits (for copy 0) */ + while (TRUE) { /* for each index in input */ + if (pnum == 4) {/* get index using getbits() */ + if (curbits < maxbits && (1 << curbits) <= curend) { + /* Curbits needs to be increased */ + /* Due to uglyness in compress, these + * indices in the compressed file are + * wasted */ + while (inmod) getbits(); + curbits++; + } + getbits(); + } else + getpipe(); /* get next index */ + + if (iindex == 256 && clearflg) { + if (pnum > 0) putpipe(iindex, 0); + /* Due to uglyness in compress, these indices + * in the compressed file are wasted */ + while (inmod) getbits(); + break; + } + tindex = iindex; + /* Convert the index part, ignoring spawned chars */ + while (tindex >= base) tindex = dindex[tindex - base]; + /* Pass on the index */ + putpipe(tindex, 0); + /* Save the char of the last added entry, if any */ + if (dcharp < DICTSZ) dchar[dcharp++] = tindex; + if (curend < maxend && ++curend > (base - 1)) + dindex[dcharp = (curend - base)] = iindex; + + /* Do spawned chars. They are naturally produced in + * the wrong order. To get them in the right order + * without using memory, a series of passes, + * progressively less deep, are used */ + tbase = base; + while ((tindex = iindex) >= tbase) {/* for each char to spawn*/ + while ((tindex2 = dindex[tindex - base]) >= tbase) + tindex = tindex2; /* scan to desired char */ + putpipe(dchar[tindex-base], 1); /* put it to the pipe*/ + tbase = tindex + 1; + if (tbase == 0) break; /* it's a wrap */ + } + } + } +} -- 2.34.1