| /* |
| * Copyright (c) 1985, 1986 The Regents of the University of California. |
| * All rights reserved. |
| * |
| * This code is derived from software contributed to Berkeley by |
| * James A. Woods, derived from original work by Spencer Thomas |
| * and Joseph Orost. |
| * |
| * Redistribution and use in source and binary forms, with or without |
| * modification, are permitted provided that the following conditions |
| * are met: |
| * 1. Redistributions of source code must retain the above copyright |
| * notice, this list of conditions and the following disclaimer. |
| * 2. Redistributions in binary form must reproduce the above copyright |
| * notice, this list of conditions and the following disclaimer in the |
| * documentation and/or other materials provided with the distribution. |
| * 3. All advertising materials mentioning features or use of this software |
| * must display the following acknowledgement: |
| * This product includes software developed by the University of |
| * California, Berkeley and its contributors. |
| * 4. Neither the name of the University nor the names of its contributors |
| * may be used to endorse or promote products derived from this software |
| * without specific prior written permission. |
| * |
| * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND |
| * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE |
| * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL |
| * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS |
| * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) |
| * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT |
| * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY |
| * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF |
| * SUCH DAMAGE. |
| */ |
| |
| #ifndef lint |
| char copyright[] = |
| "@(#) Copyright (c) 1985, 1986 The Regents of the University of California.\n\ |
| All rights reserved.\n"; |
| #endif /* not lint */ |
| |
| #ifndef lint |
| static char sccsid[] = "@(#)compress.c 5.19 (Berkeley) 3/18/91"; |
| #endif /* not lint */ |
| |
| /* |
| * compress.c - File compression ala IEEE Computer, June 1984. |
| * |
| * Authors: Spencer W. Thomas (decvax!utah-cs!thomas) |
| * Jim McKie (decvax!mcvax!jim) |
| * Steve Davies (decvax!vax135!petsd!peora!srd) |
| * Ken Turkowski (decvax!decwrl!turtlevax!ken) |
| * James A. Woods (decvax!ihnp4!ames!jaw) |
| * Joe Orost (decvax!vax135!petsd!joe) |
| */ |
| |
| #include <sys/param.h> |
| #include <sys/stat.h> |
| #include <signal.h> |
| #include <utime.h> |
| #include <errno.h> |
| #include <unistd.h> |
| #include <stdio.h> |
| #include <ctype.h> |
| #include <stdlib.h> |
| #include <string.h> |
| |
| /* |
| * Set USERMEM to the maximum amount of physical user memory available |
| * in bytes. USERMEM is used to determine the maximum BITS that can be used |
| * for compression. |
| * |
| * SACREDMEM is the amount of physical memory saved for others; compress |
| * will hog the rest. |
| */ |
| #ifndef SACREDMEM |
| #define SACREDMEM 0 |
| #endif |
| |
| #ifndef USERMEM |
| # define USERMEM 450000 /* default user memory */ |
| #endif |
| |
| #ifdef pdp11 |
| # define BITS 12 /* max bits/code for 16-bit machine */ |
| # define NO_UCHAR /* also if "unsigned char" functions as signed char */ |
| # undef USERMEM |
| #endif /* pdp11 */ /* don't forget to compile with -i */ |
| |
| #ifdef USERMEM |
| # if USERMEM >= (433484+SACREDMEM) |
| # define PBITS 16 |
| # else |
| # if USERMEM >= (229600+SACREDMEM) |
| # define PBITS 15 |
| # else |
| # if USERMEM >= (127536+SACREDMEM) |
| # define PBITS 14 |
| # else |
| # if USERMEM >= (73464+SACREDMEM) |
| # define PBITS 13 |
| # else |
| # define PBITS 12 |
| # endif |
| # endif |
| # endif |
| # endif |
| # undef USERMEM |
| #endif /* USERMEM */ |
| |
| #ifdef PBITS /* Preferred BITS for this memory size */ |
| # ifndef BITS |
| # define BITS PBITS |
| # endif BITS |
| #endif /* PBITS */ |
| |
| #if BITS == 16 |
| # define HSIZE 69001 /* 95% occupancy */ |
| #endif |
| #if BITS == 15 |
| # define HSIZE 35023 /* 94% occupancy */ |
| #endif |
| #if BITS == 14 |
| # define HSIZE 18013 /* 91% occupancy */ |
| #endif |
| #if BITS == 13 |
| # define HSIZE 9001 /* 91% occupancy */ |
| #endif |
| #if BITS <= 12 |
| # define HSIZE 5003 /* 80% occupancy */ |
| #endif |
| |
| /* |
| * a code_int must be able to hold 2**BITS values of type int, and also -1 |
| */ |
| #if BITS > 15 |
| typedef long int code_int; |
| #else |
| typedef int code_int; |
| #endif |
| |
| #ifdef SIGNED_COMPARE_SLOW |
| typedef unsigned long int count_int; |
| typedef unsigned short int count_short; |
| #else |
| typedef long int count_int; |
| #endif |
| |
| #ifdef NO_UCHAR |
| typedef char char_type; |
| #else |
| typedef unsigned char char_type; |
| #endif /* UCHAR */ |
| char_type magic_header[] = { "\037\235" }; /* 1F 9D */ |
| |
| /* Defines for third byte of header */ |
| #define BIT_MASK 0x1f |
| #define BLOCK_MASK 0x80 |
| /* Masks 0x40 and 0x20 are free. I think 0x20 should mean that there is |
| a fourth header byte (for expansion). |
| */ |
| #define INIT_BITS 9 /* initial number of bits/code */ |
| |
| int n_bits; /* number of bits/code */ |
| int maxbits = BITS; /* user settable max # bits/code */ |
| code_int maxcode; /* maximum code, given n_bits */ |
| code_int maxmaxcode = 1 << BITS; /* should NEVER generate this code */ |
| #ifdef COMPATIBLE /* But wrong! */ |
| # define MAXCODE(n_bits) (1 << (n_bits) - 1) |
| #else |
| # define MAXCODE(n_bits) ((1 << (n_bits)) - 1) |
| #endif /* COMPATIBLE */ |
| |
| count_int htab [HSIZE]; |
| unsigned short codetab [HSIZE]; |
| |
| #define htabof(i) htab[i] |
| #define codetabof(i) codetab[i] |
| code_int hsize = HSIZE; /* for dynamic table sizing */ |
| count_int fsize; |
| |
| /* |
| * To save much memory, we overlay the table used by compress() with those |
| * used by decompress(). The tab_prefix table is the same size and type |
| * as the codetab. The tab_suffix table needs 2**BITS characters. We |
| * get this from the beginning of htab. The output stack uses the rest |
| * of htab, and contains characters. There is plenty of room for any |
| * possible stack (stack used to be 8000 characters). |
| */ |
| |
| #define tab_prefixof(i) codetabof(i) |
| # define tab_suffixof(i) ((char_type *)(htab))[i] |
| # define de_stack ((char_type *)&tab_suffixof(1<<BITS)) |
| |
| code_int free_ent = 0; /* first unused entry */ |
| int exit_stat = 0; /* per-file status */ |
| int perm_stat = 0; /* permanent status */ |
| |
| code_int getcode(); |
| |
| int nomagic = 0; /* Use a 3-byte magic number header, unless old file */ |
| int zcat_flg = 0; /* Write output on stdout, suppress messages */ |
| int precious = 1; /* Don't unlink output file on interrupt */ |
| int quiet = 1; /* don't tell me about compression */ |
| |
| /* |
| * block compression parameters -- after all codes are used up, |
| * and compression rate changes, start over. |
| */ |
| int block_compress = BLOCK_MASK; |
| int clear_flg = 0; |
| long int ratio = 0; |
| #define CHECK_GAP 10000 /* ratio check interval */ |
| count_int checkpoint = CHECK_GAP; |
| /* |
| * the next two codes should not be changed lightly, as they must not |
| * lie within the contiguous general code space. |
| */ |
| #define FIRST 257 /* first free entry */ |
| #define CLEAR 256 /* table clear output code */ |
| |
| int force = 0; |
| char ofname [100]; |
| #ifdef DEBUG |
| int debug, verbose; |
| #endif |
| sig_t oldint; |
| int bgnd_flag; |
| |
| int do_decomp = 0; |
| |
| /*- |
| * Algorithm from "A Technique for High Performance Data Compression", |
| * Terry A. Welch, IEEE Computer Vol 17, No 6 (June 1984), pp 8-19. |
| * |
| * Usage: compress [-dfvc] [-b bits] [file ...] |
| * Inputs: |
| * -d: If given, decompression is done instead. |
| * |
| * -c: Write output on stdout, don't remove original. |
| * |
| * -b: Parameter limits the max number of bits/code. |
| * |
| * -f: Forces output file to be generated, even if one already |
| * exists, and even if no space is saved by compressing. |
| * If -f is not used, the user will be prompted if stdin is |
| * a tty, otherwise, the output file will not be overwritten. |
| * |
| * -v: Write compression statistics |
| * |
| * file ...: Files to be compressed. If none specified, stdin |
| * is used. |
| * Outputs: |
| * file.Z: Compressed form of file with same mode, owner, and utimes |
| * or stdout (if stdin used as input) |
| * |
| * Assumptions: |
| * When filenames are given, replaces with the compressed version |
| * (.Z suffix) only if the file decreases in size. |
| * Algorithm: |
| * Modified Lempel-Ziv method (LZW). Basically finds common |
| * substrings and replaces them with a variable size code. This is |
| * deterministic, and can be done on the fly. Thus, the decompression |
| * procedure needs no input table, but tracks the way the table was built. |
| */ |
| |
| main(argc, argv) |
| int argc; |
| char **argv; |
| { |
| extern int optind; |
| extern char *optarg; |
| struct stat statbuf; |
| int ch, overwrite; |
| char **filelist, **fileptr, *cp, tempname[MAXPATHLEN]; |
| void onintr(), oops(); |
| |
| /* This bg check only works for sh. */ |
| if ((oldint = signal(SIGINT, SIG_IGN)) != SIG_IGN) { |
| (void)signal(SIGINT, onintr); |
| (void)signal(SIGSEGV, oops); /* XXX */ |
| } |
| bgnd_flag = oldint != SIG_DFL; |
| |
| #ifdef COMPATIBLE |
| nomagic = 1; /* Original didn't have a magic number */ |
| #endif |
| |
| if (cp = rindex(argv[0], '/')) |
| ++cp; |
| else |
| cp = argv[0]; |
| if (strcmp(cp, "uncompress") == 0) |
| do_decomp = 1; |
| else if(strcmp(cp, "zcat") == 0) { |
| do_decomp = 1; |
| zcat_flg = 1; |
| } |
| |
| /* |
| * -b maxbits => maxbits. |
| * -C => generate output compatible with compress 2.0. |
| * -c => cat all output to stdout |
| * -D => debug |
| * -d => do_decomp |
| * -f => force overwrite of output file |
| * -n => no header: useful to uncompress old files |
| * -V => print Version; debug verbose |
| * -v => unquiet |
| */ |
| |
| overwrite = 0; |
| #ifdef DEBUG |
| while ((ch = getopt(argc, argv, "b:CcDdfnVv")) != EOF) |
| #else |
| while ((ch = getopt(argc, argv, "b:Ccdfnv")) != EOF) |
| #endif |
| switch(ch) { |
| case 'b': |
| maxbits = atoi(optarg); |
| break; |
| case 'C': |
| block_compress = 0; |
| break; |
| case 'c': |
| zcat_flg = 1; |
| break; |
| #ifdef DEBUG |
| case 'D': |
| debug = 1; |
| break; |
| #endif |
| case 'd': |
| do_decomp = 1; |
| break; |
| case 'f': |
| overwrite = 1; |
| force = 1; |
| break; |
| case 'n': |
| nomagic = 1; |
| break; |
| case 'q': |
| quiet = 1; |
| break; |
| #ifdef DEBUG |
| case 'V': |
| verbose = 1; |
| break; |
| #endif |
| case 'v': |
| quiet = 0; |
| break; |
| case '?': |
| default: |
| usage(); |
| } |
| argc -= optind; |
| argv += optind; |
| |
| if (maxbits < INIT_BITS) |
| maxbits = INIT_BITS; |
| if (maxbits > BITS) |
| maxbits = BITS; |
| maxmaxcode = 1 << maxbits; |
| |
| /* Build useless input file list. */ |
| filelist = fileptr = (char **)(malloc(argc * sizeof(*argv))); |
| while (*argv) |
| *fileptr++ = *argv++; |
| *fileptr = NULL; |
| |
| if (*filelist != NULL) { |
| for (fileptr = filelist; *fileptr; fileptr++) { |
| exit_stat = 0; |
| if (do_decomp) { /* DECOMPRESSION */ |
| /* Check for .Z suffix */ |
| if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") != 0) { |
| /* No .Z: tack one on */ |
| strcpy(tempname, *fileptr); |
| strcat(tempname, ".Z"); |
| *fileptr = tempname; |
| } |
| /* Open input file */ |
| if ((freopen(*fileptr, "r", stdin)) == NULL) { |
| perror(*fileptr); |
| perm_stat = 1; |
| continue; |
| } |
| /* Check the magic number */ |
| if (nomagic == 0) { |
| if ((getchar() != (magic_header[0] & 0xFF)) |
| || (getchar() != (magic_header[1] & 0xFF))) { |
| fprintf(stderr, "%s: not in compressed format\n", |
| *fileptr); |
| continue; |
| } |
| maxbits = getchar(); /* set -b from file */ |
| block_compress = maxbits & BLOCK_MASK; |
| maxbits &= BIT_MASK; |
| maxmaxcode = 1 << maxbits; |
| if(maxbits > BITS) { |
| fprintf(stderr, |
| "%s: compressed with %d bits, can only handle %d bits\n", |
| *fileptr, maxbits, BITS); |
| continue; |
| } |
| } |
| /* Generate output filename */ |
| strcpy(ofname, *fileptr); |
| ofname[strlen(*fileptr) - 2] = '\0'; /* Strip off .Z */ |
| } else { /* COMPRESSION */ |
| if (strcmp(*fileptr + strlen(*fileptr) - 2, ".Z") == 0) { |
| fprintf(stderr, "%s: already has .Z suffix -- no change\n", |
| *fileptr); |
| continue; |
| } |
| /* Open input file */ |
| if ((freopen(*fileptr, "r", stdin)) == NULL) { |
| perror(*fileptr); |
| perm_stat = 1; |
| continue; |
| } |
| stat ( *fileptr, &statbuf ); |
| fsize = (long) statbuf.st_size; |
| /* |
| * tune hash table size for small files -- ad hoc, |
| * but the sizes match earlier #defines, which |
| * serve as upper bounds on the number of output codes. |
| */ |
| hsize = HSIZE; |
| if ( fsize < (1 << 12) ) |
| hsize = MIN ( 5003, HSIZE ); |
| else if ( fsize < (1 << 13) ) |
| hsize = MIN ( 9001, HSIZE ); |
| else if ( fsize < (1 << 14) ) |
| hsize = MIN ( 18013, HSIZE ); |
| else if ( fsize < (1 << 15) ) |
| hsize = MIN ( 35023, HSIZE ); |
| else if ( fsize < 47000 ) |
| hsize = MIN ( 50021, HSIZE ); |
| |
| /* Generate output filename */ |
| strcpy(ofname, *fileptr); |
| strcat(ofname, ".Z"); |
| } |
| /* Check for overwrite of existing file */ |
| if (overwrite == 0 && zcat_flg == 0) { |
| if (stat(ofname, &statbuf) == 0) { |
| char response[2]; |
| response[0] = 'n'; |
| fprintf(stderr, "%s already exists;", ofname); |
| if (bgnd_flag == 0 && isatty(2)) { |
| fprintf(stderr, " do you wish to overwrite %s (y or n)? ", |
| ofname); |
| fflush(stderr); |
| read(2, response, 2); |
| while (response[1] != '\n') { |
| if (read(2, response+1, 1) < 0) { /* Ack! */ |
| perror("stderr"); break; |
| } |
| } |
| } |
| if (response[0] != 'y') { |
| fprintf(stderr, "\tnot overwritten\n"); |
| continue; |
| } |
| } |
| } |
| if(zcat_flg == 0) { /* Open output file */ |
| if (freopen(ofname, "w", stdout) == NULL) { |
| perror(ofname); |
| perm_stat = 1; |
| continue; |
| } |
| precious = 0; |
| if(!quiet) |
| fprintf(stderr, "%s: ", *fileptr); |
| } |
| |
| /* Actually do the compression/decompression */ |
| if (do_decomp == 0) compress(); |
| #ifndef DEBUG |
| else decompress(); |
| #else |
| else if (debug == 0) decompress(); |
| else printcodes(); |
| if (verbose) dump_tab(); |
| #endif /* DEBUG */ |
| if(zcat_flg == 0) { |
| copystat(*fileptr, ofname); /* Copy stats */ |
| precious = 1; |
| if((exit_stat == 1) || (!quiet)) |
| putc('\n', stderr); |
| } |
| } |
| } else { /* Standard input */ |
| if (do_decomp == 0) { |
| compress(); |
| #ifdef DEBUG |
| if(verbose) dump_tab(); |
| #endif /* DEBUG */ |
| if(!quiet) |
| putc('\n', stderr); |
| } else { |
| /* Check the magic number */ |
| if (nomagic == 0) { |
| if ((getchar()!=(magic_header[0] & 0xFF)) |
| || (getchar()!=(magic_header[1] & 0xFF))) { |
| fprintf(stderr, "stdin: not in compressed format\n"); |
| exit(1); |
| } |
| maxbits = getchar(); /* set -b from file */ |
| block_compress = maxbits & BLOCK_MASK; |
| maxbits &= BIT_MASK; |
| maxmaxcode = 1 << maxbits; |
| fsize = 100000; /* assume stdin large for USERMEM */ |
| if(maxbits > BITS) { |
| fprintf(stderr, |
| "stdin: compressed with %d bits, can only handle %d bits\n", |
| maxbits, BITS); |
| exit(1); |
| } |
| } |
| #ifndef DEBUG |
| decompress(); |
| #else |
| if (debug == 0) decompress(); |
| else printcodes(); |
| if (verbose) dump_tab(); |
| #endif /* DEBUG */ |
| } |
| } |
| exit(perm_stat ? perm_stat : exit_stat); |
| } |
| |
| static int offset; |
| long int in_count = 1; /* length of input */ |
| long int bytes_out; /* length of compressed output */ |
| long int out_count = 0; /* # of codes output (for debugging) */ |
| |
| /* |
| * compress stdin to stdout |
| * |
| * Algorithm: use open addressing double hashing (no chaining) on the |
| * prefix code / next character combination. We do a variant of Knuth's |
| * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime |
| * secondary probe. Here, the modular division first probe is gives way |
| * to a faster exclusive-or manipulation. Also do block compression with |
| * an adaptive reset, whereby the code table is cleared when the compression |
| * ratio decreases, but after the table fills. The variable-length output |
| * codes are re-sized at this point, and a special CLEAR code is generated |
| * for the decompressor. Late addition: construct the table according to |
| * file size for noticeable speed improvement on small files. Please direct |
| * questions about this implementation to ames!jaw. |
| */ |
| |
| compress() |
| { |
| register long fcode; |
| register code_int i = 0; |
| register int c; |
| register code_int ent; |
| register int disp; |
| register code_int hsize_reg; |
| register int hshift; |
| |
| #ifndef COMPATIBLE |
| if (nomagic == 0) { |
| putchar(magic_header[0]); |
| putchar(magic_header[1]); |
| putchar((char)(maxbits | block_compress)); |
| if(ferror(stdout)) |
| writeerr(); |
| } |
| #endif /* COMPATIBLE */ |
| |
| offset = 0; |
| bytes_out = 3; /* includes 3-byte header mojo */ |
| out_count = 0; |
| clear_flg = 0; |
| ratio = 0; |
| in_count = 1; |
| checkpoint = CHECK_GAP; |
| maxcode = MAXCODE(n_bits = INIT_BITS); |
| free_ent = ((block_compress) ? FIRST : 256 ); |
| |
| ent = getchar (); |
| |
| hshift = 0; |
| for ( fcode = (long) hsize; fcode < 65536L; fcode *= 2L ) |
| hshift++; |
| hshift = 8 - hshift; /* set hash code range bound */ |
| |
| hsize_reg = hsize; |
| cl_hash( (count_int) hsize_reg); /* clear hash table */ |
| |
| #ifdef SIGNED_COMPARE_SLOW |
| while ( (c = getchar()) != (unsigned) EOF ) { |
| #else |
| while ( (c = getchar()) != EOF ) { |
| #endif |
| in_count++; |
| fcode = (long) (((long) c << maxbits) + ent); |
| i = ((c << hshift) ^ ent); /* xor hashing */ |
| |
| if ( htabof (i) == fcode ) { |
| ent = codetabof (i); |
| continue; |
| } else if ( (long)htabof (i) < 0 ) /* empty slot */ |
| goto nomatch; |
| disp = hsize_reg - i; /* secondary hash (after G. Knott) */ |
| if ( i == 0 ) |
| disp = 1; |
| probe: |
| if ( (i -= disp) < 0 ) |
| i += hsize_reg; |
| |
| if ( htabof (i) == fcode ) { |
| ent = codetabof (i); |
| continue; |
| } |
| if ( (long)htabof (i) > 0 ) |
| goto probe; |
| nomatch: |
| output ( (code_int) ent ); |
| out_count++; |
| ent = c; |
| #ifdef SIGNED_COMPARE_SLOW |
| if ( (unsigned) free_ent < (unsigned) maxmaxcode) { |
| #else |
| if ( free_ent < maxmaxcode ) { |
| #endif |
| codetabof (i) = free_ent++; /* code -> hashtable */ |
| htabof (i) = fcode; |
| } |
| else if ( (count_int)in_count >= checkpoint && block_compress ) |
| cl_block (); |
| } |
| /* |
| * Put out the final code. |
| */ |
| output( (code_int)ent ); |
| out_count++; |
| output( (code_int)-1 ); |
| |
| /* |
| * Print out stats on stderr |
| */ |
| if(zcat_flg == 0 && !quiet) { |
| #ifdef DEBUG |
| fprintf( stderr, |
| "%ld chars in, %ld codes (%ld bytes) out, compression factor: ", |
| in_count, out_count, bytes_out ); |
| prratio( stderr, in_count, bytes_out ); |
| fprintf( stderr, "\n"); |
| fprintf( stderr, "\tCompression as in compact: " ); |
| prratio( stderr, in_count-bytes_out, in_count ); |
| fprintf( stderr, "\n"); |
| fprintf( stderr, "\tLargest code (of last block) was %d (%d bits)\n", |
| free_ent - 1, n_bits ); |
| #else /* !DEBUG */ |
| fprintf( stderr, "Compression: " ); |
| prratio( stderr, in_count-bytes_out, in_count ); |
| #endif /* DEBUG */ |
| } |
| if(bytes_out > in_count) /* exit(2) if no savings */ |
| exit_stat = 2; |
| return; |
| } |
| |
| /*- |
| * Output the given code. |
| * Inputs: |
| * code: A n_bits-bit integer. If == -1, then EOF. This assumes |
| * that n_bits =< (long)wordsize - 1. |
| * Outputs: |
| * Outputs code to the file. |
| * Assumptions: |
| * Chars are 8 bits long. |
| * Algorithm: |
| * Maintain a BITS character long buffer (so that 8 codes will |
| * fit in it exactly). Use the VAX insv instruction to insert each |
| * code in turn. When the buffer fills up empty it and start over. |
| */ |
| |
| static char buf[BITS]; |
| |
| #ifndef vax |
| char_type lmask[9] = {0xff, 0xfe, 0xfc, 0xf8, 0xf0, 0xe0, 0xc0, 0x80, 0x00}; |
| char_type rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; |
| #endif /* vax */ |
| |
| output( code ) |
| code_int code; |
| { |
| #ifdef DEBUG |
| static int col = 0; |
| #endif /* DEBUG */ |
| |
| /* |
| * On the VAX, it is important to have the register declarations |
| * in exactly the order given, or the asm will break. |
| */ |
| register int r_off = offset, bits= n_bits; |
| register char * bp = buf; |
| |
| #ifdef DEBUG |
| if ( verbose ) |
| fprintf( stderr, "%5d%c", code, |
| (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); |
| #endif /* DEBUG */ |
| if ( code >= 0 ) { |
| #if defined(vax) && !defined(__GNUC__) |
| /* |
| * VAX and PCC DEPENDENT!! Implementation on other machines is |
| * below. |
| * |
| * Translation: Insert BITS bits from the argument starting at |
| * offset bits from the beginning of buf. |
| */ |
| 0; /* Work around for pcc -O bug with asm and if stmt */ |
| asm( "insv 4(ap),r11,r10,(r9)" ); |
| #else |
| /* |
| * byte/bit numbering on the VAX is simulated by the following code |
| */ |
| /* |
| * Get to the first byte. |
| */ |
| bp += (r_off >> 3); |
| r_off &= 7; |
| /* |
| * Since code is always >= 8 bits, only need to mask the first |
| * hunk on the left. |
| */ |
| *bp = (*bp & rmask[r_off]) | (code << r_off) & lmask[r_off]; |
| bp++; |
| bits -= (8 - r_off); |
| code >>= 8 - r_off; |
| /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ |
| if ( bits >= 8 ) { |
| *bp++ = code; |
| code >>= 8; |
| bits -= 8; |
| } |
| /* Last bits. */ |
| if(bits) |
| *bp = code; |
| #endif /* vax */ |
| offset += n_bits; |
| if ( offset == (n_bits << 3) ) { |
| bp = buf; |
| bits = n_bits; |
| bytes_out += bits; |
| do { |
| putchar(*bp++); |
| if (ferror(stdout)) |
| writeerr(); |
| } while(--bits); |
| offset = 0; |
| } |
| |
| /* |
| * If the next entry is going to be too big for the code size, |
| * then increase it, if possible. |
| */ |
| if ( free_ent > maxcode || (clear_flg > 0)) |
| { |
| /* |
| * Write the whole buffer, because the input side won't |
| * discover the size increase until after it has read it. |
| */ |
| if ( offset > 0 ) { |
| if( fwrite( buf, 1, n_bits, stdout ) != n_bits) |
| writeerr(); |
| bytes_out += n_bits; |
| } |
| offset = 0; |
| |
| if ( clear_flg ) { |
| maxcode = MAXCODE (n_bits = INIT_BITS); |
| clear_flg = 0; |
| } |
| else { |
| n_bits++; |
| if ( n_bits == maxbits ) |
| maxcode = maxmaxcode; |
| else |
| maxcode = MAXCODE(n_bits); |
| } |
| #ifdef DEBUG |
| if ( debug ) { |
| fprintf( stderr, "\nChange to %d bits\n", n_bits ); |
| col = 0; |
| } |
| #endif /* DEBUG */ |
| } |
| } else { |
| /* |
| * At EOF, write the rest of the buffer. |
| */ |
| if ( offset > 0 ) { |
| offset = (offset + 7) / 8; |
| if( fwrite( buf, 1, offset, stdout ) != offset ) |
| writeerr(); |
| bytes_out += offset; |
| } |
| offset = 0; |
| (void)fflush( stdout ); |
| if( ferror( stdout ) ) |
| writeerr(); |
| #ifdef DEBUG |
| if ( verbose ) |
| fprintf( stderr, "\n" ); |
| #endif |
| } |
| } |
| |
| /* |
| * Decompress stdin to stdout. This routine adapts to the codes in the |
| * file building the "string" table on-the-fly; requiring no table to |
| * be stored in the compressed file. The tables used herein are shared |
| * with those of the compress() routine. See the definitions above. |
| */ |
| |
| decompress() { |
| register char_type *stackp; |
| register int finchar; |
| register code_int code, oldcode, incode; |
| int n, nwritten, offset; /* Variables for buffered write */ |
| char buff[BUFSIZ]; /* Buffer for buffered write */ |
| |
| |
| /* |
| * As above, initialize the first 256 entries in the table. |
| */ |
| maxcode = MAXCODE(n_bits = INIT_BITS); |
| for ( code = 255; code >= 0; code-- ) { |
| tab_prefixof(code) = 0; |
| tab_suffixof(code) = (char_type)code; |
| } |
| free_ent = ((block_compress) ? FIRST : 256 ); |
| |
| finchar = oldcode = getcode(); |
| if(oldcode == -1) /* EOF already? */ |
| return; /* Get out of here */ |
| |
| /* first code must be 8 bits = char */ |
| n=0; |
| buff[n++] = (char)finchar; |
| |
| stackp = de_stack; |
| |
| while ( (code = getcode()) > -1 ) { |
| |
| if ( (code == CLEAR) && block_compress ) { |
| for ( code = 255; code >= 0; code-- ) |
| tab_prefixof(code) = 0; |
| clear_flg = 1; |
| free_ent = FIRST - 1; |
| if ( (code = getcode ()) == -1 ) /* O, untimely death! */ |
| break; |
| } |
| incode = code; |
| /* |
| * Special case for KwKwK string. |
| */ |
| if ( code >= free_ent ) { |
| *stackp++ = finchar; |
| code = oldcode; |
| } |
| |
| /* |
| * Generate output characters in reverse order |
| */ |
| #ifdef SIGNED_COMPARE_SLOW |
| while ( ((unsigned long)code) >= ((unsigned long)256) ) { |
| #else |
| while ( code >= 256 ) { |
| #endif |
| *stackp++ = tab_suffixof(code); |
| code = tab_prefixof(code); |
| } |
| *stackp++ = finchar = tab_suffixof(code); |
| |
| /* |
| * And put them out in forward order |
| */ |
| do { |
| /* |
| * About 60% of the time is spent in the putchar() call |
| * that appeared here. It was originally |
| * putchar ( *--stackp ); |
| * If we buffer the writes ourselves, we can go faster (about |
| * 30%). |
| * |
| * At this point, the next line is the next *big* time |
| * sink in the code. It takes up about 10% of the time. |
| */ |
| buff[n++] = *--stackp; |
| if (n == BUFSIZ) { |
| offset = 0; |
| do { |
| nwritten = write(fileno(stdout), &buff[offset], n); |
| if (nwritten < 0) |
| writeerr(); |
| offset += nwritten; |
| } while ((n -= nwritten) > 0); |
| } |
| } while ( stackp > de_stack ); |
| |
| /* |
| * Generate the new entry. |
| */ |
| if ( (code=free_ent) < maxmaxcode ) { |
| tab_prefixof(code) = (unsigned short)oldcode; |
| tab_suffixof(code) = finchar; |
| free_ent = code+1; |
| } |
| /* |
| * Remember previous code. |
| */ |
| oldcode = incode; |
| } |
| /* |
| * Flush the stuff remaining in our buffer... |
| */ |
| offset = 0; |
| while (n > 0) { |
| nwritten = write(fileno(stdout), &buff[offset], n); |
| if (nwritten < 0) |
| writeerr(); |
| offset += nwritten; |
| n -= nwritten; |
| } |
| } |
| |
| /*- |
| * Read one code from the standard input. If EOF, return -1. |
| * Inputs: |
| * stdin |
| * Outputs: |
| * code or -1 is returned. |
| */ |
| code_int |
| getcode() { |
| /* |
| * On the VAX, it is important to have the register declarations |
| * in exactly the order given, or the asm will break. |
| */ |
| register code_int code; |
| static int offset = 0, size = 0; |
| static char_type buf[BITS]; |
| register int r_off, bits; |
| register char_type *bp = buf; |
| |
| if ( clear_flg > 0 || offset >= size || free_ent > maxcode ) { |
| /* |
| * If the next entry will be too big for the current code |
| * size, then we must increase the size. This implies reading |
| * a new buffer full, too. |
| */ |
| if ( free_ent > maxcode ) { |
| n_bits++; |
| if ( n_bits == maxbits ) |
| maxcode = maxmaxcode; /* won't get any bigger now */ |
| else |
| maxcode = MAXCODE(n_bits); |
| } |
| if ( clear_flg > 0) { |
| maxcode = MAXCODE (n_bits = INIT_BITS); |
| clear_flg = 0; |
| } |
| size = fread( buf, 1, n_bits, stdin ); |
| if ( size <= 0 ) |
| return -1; /* end of file */ |
| offset = 0; |
| /* Round size down to integral number of codes */ |
| size = (size << 3) - (n_bits - 1); |
| } |
| r_off = offset; |
| bits = n_bits; |
| #ifdef vax |
| asm( "extzv r10,r9,(r8),r11" ); |
| #else /* not a vax */ |
| /* |
| * Get to the first byte. |
| */ |
| bp += (r_off >> 3); |
| r_off &= 7; |
| /* Get first part (low order bits) */ |
| #ifdef NO_UCHAR |
| code = ((*bp++ >> r_off) & rmask[8 - r_off]) & 0xff; |
| #else |
| code = (*bp++ >> r_off); |
| #endif /* NO_UCHAR */ |
| bits -= (8 - r_off); |
| r_off = 8 - r_off; /* now, offset into code word */ |
| /* Get any 8 bit parts in the middle (<=1 for up to 16 bits). */ |
| if ( bits >= 8 ) { |
| #ifdef NO_UCHAR |
| code |= (*bp++ & 0xff) << r_off; |
| #else |
| code |= *bp++ << r_off; |
| #endif /* NO_UCHAR */ |
| r_off += 8; |
| bits -= 8; |
| } |
| /* high order bits. */ |
| code |= (*bp & rmask[bits]) << r_off; |
| #endif /* vax */ |
| offset += n_bits; |
| |
| return code; |
| } |
| |
| #ifdef DEBUG |
| printcodes() |
| { |
| /* |
| * Just print out codes from input file. For debugging. |
| */ |
| code_int code; |
| int col = 0, bits; |
| |
| bits = n_bits = INIT_BITS; |
| maxcode = MAXCODE(n_bits); |
| free_ent = ((block_compress) ? FIRST : 256 ); |
| while ( ( code = getcode() ) >= 0 ) { |
| if ( (code == CLEAR) && block_compress ) { |
| free_ent = FIRST - 1; |
| clear_flg = 1; |
| } |
| else if ( free_ent < maxmaxcode ) |
| free_ent++; |
| if ( bits != n_bits ) { |
| fprintf(stderr, "\nChange to %d bits\n", n_bits ); |
| bits = n_bits; |
| col = 0; |
| } |
| fprintf(stderr, "%5d%c", code, (col+=6) >= 74 ? (col = 0, '\n') : ' ' ); |
| } |
| putc( '\n', stderr ); |
| exit( 0 ); |
| } |
| |
| code_int sorttab[1<<BITS]; /* sorted pointers into htab */ |
| |
| dump_tab() /* dump string table */ |
| { |
| register int i, first; |
| register ent; |
| #define STACK_SIZE 15000 |
| int stack_top = STACK_SIZE; |
| register c; |
| |
| if(do_decomp == 0) { /* compressing */ |
| register int flag = 1; |
| |
| for(i=0; i<hsize; i++) { /* build sort pointers */ |
| if((long)htabof(i) >= 0) { |
| sorttab[codetabof(i)] = i; |
| } |
| } |
| first = block_compress ? FIRST : 256; |
| for(i = first; i < free_ent; i++) { |
| fprintf(stderr, "%5d: \"", i); |
| de_stack[--stack_top] = '\n'; |
| de_stack[--stack_top] = '"'; |
| stack_top = in_stack((htabof(sorttab[i])>>maxbits)&0xff, |
| stack_top); |
| for(ent=htabof(sorttab[i]) & ((1<<maxbits)-1); |
| ent > 256; |
| ent=htabof(sorttab[ent]) & ((1<<maxbits)-1)) { |
| stack_top = in_stack(htabof(sorttab[ent]) >> maxbits, |
| stack_top); |
| } |
| stack_top = in_stack(ent, stack_top); |
| fwrite( &de_stack[stack_top], 1, STACK_SIZE-stack_top, stderr); |
| stack_top = STACK_SIZE; |
| } |
| } else if(!debug) { /* decompressing */ |
| |
| for ( i = 0; i < free_ent; i++ ) { |
| ent = i; |
| c = tab_suffixof(ent); |
| if ( isascii(c) && isprint(c) ) |
| fprintf( stderr, "%5d: %5d/'%c' \"", |
| ent, tab_prefixof(ent), c ); |
| else |
| fprintf( stderr, "%5d: %5d/\\%03o \"", |
| ent, tab_prefixof(ent), c ); |
| de_stack[--stack_top] = '\n'; |
| de_stack[--stack_top] = '"'; |
| for ( ; ent != NULL; |
| ent = (ent >= FIRST ? tab_prefixof(ent) : NULL) ) { |
| stack_top = in_stack(tab_suffixof(ent), stack_top); |
| } |
| fwrite( &de_stack[stack_top], 1, STACK_SIZE - stack_top, stderr ); |
| stack_top = STACK_SIZE; |
| } |
| } |
| } |
| |
| int |
| in_stack(c, stack_top) |
| register c, stack_top; |
| { |
| if ( (isascii(c) && isprint(c) && c != '\\') || c == ' ' ) { |
| de_stack[--stack_top] = c; |
| } else { |
| switch( c ) { |
| case '\n': de_stack[--stack_top] = 'n'; break; |
| case '\t': de_stack[--stack_top] = 't'; break; |
| case '\b': de_stack[--stack_top] = 'b'; break; |
| case '\f': de_stack[--stack_top] = 'f'; break; |
| case '\r': de_stack[--stack_top] = 'r'; break; |
| case '\\': de_stack[--stack_top] = '\\'; break; |
| default: |
| de_stack[--stack_top] = '0' + c % 8; |
| de_stack[--stack_top] = '0' + (c / 8) % 8; |
| de_stack[--stack_top] = '0' + c / 64; |
| break; |
| } |
| de_stack[--stack_top] = '\\'; |
| } |
| return stack_top; |
| } |
| #endif /* DEBUG */ |
| |
| writeerr() |
| { |
| (void)fprintf(stderr, "compress: %s: %s\n", |
| ofname[0] ? ofname : "stdout", strerror(errno)); |
| (void)unlink(ofname); |
| exit(1); |
| } |
| |
| copystat(ifname, ofname) |
| char *ifname, *ofname; |
| { |
| struct stat statbuf; |
| int mode; |
| struct utimbuf tp; |
| |
| fclose(stdout); |
| if (stat(ifname, &statbuf)) { /* Get stat on input file */ |
| perror(ifname); |
| return; |
| } |
| if ((statbuf.st_mode & S_IFMT/*0170000*/) != S_IFREG/*0100000*/) { |
| if(quiet) |
| fprintf(stderr, "%s: ", ifname); |
| fprintf(stderr, " -- not a regular file: unchanged"); |
| exit_stat = 1; |
| perm_stat = 1; |
| } else if (statbuf.st_nlink > 1) { |
| if(quiet) |
| fprintf(stderr, "%s: ", ifname); |
| fprintf(stderr, " -- has %d other links: unchanged", |
| statbuf.st_nlink - 1); |
| exit_stat = 1; |
| perm_stat = 1; |
| } else if (exit_stat == 2 && (!force)) { /* No compression: remove file.Z */ |
| if(!quiet) |
| fprintf(stderr, " -- file unchanged"); |
| } else { /* ***** Successful Compression ***** */ |
| exit_stat = 0; |
| mode = statbuf.st_mode & 07777; |
| if (chmod(ofname, mode)) /* Copy modes */ |
| perror(ofname); |
| chown(ofname, statbuf.st_uid, statbuf.st_gid); /* Copy ownership */ |
| tp.actime = statbuf.st_atime; |
| tp.modtime = statbuf.st_mtime; |
| utime(ofname, &tp); /* Update last accessed and modified times */ |
| if (unlink(ifname)) /* Remove input file */ |
| perror(ifname); |
| if(!quiet) |
| fprintf(stderr, " -- replaced with %s", ofname); |
| return; /* Successful return */ |
| } |
| |
| /* Unsuccessful return -- one of the tests failed */ |
| if (unlink(ofname)) |
| perror(ofname); |
| } |
| |
| void |
| onintr ( ) |
| { |
| if (!precious) |
| unlink ( ofname ); |
| exit ( 1 ); |
| } |
| |
| void |
| oops ( ) /* wild pointer -- assume bad input */ |
| { |
| if ( do_decomp ) |
| fprintf ( stderr, "uncompress: corrupt input\n" ); |
| unlink ( ofname ); |
| exit ( 1 ); |
| } |
| |
| cl_block () /* table clear for block compress */ |
| { |
| register long int rat; |
| |
| checkpoint = in_count + CHECK_GAP; |
| #ifdef DEBUG |
| if ( debug ) { |
| fprintf ( stderr, "count: %ld, ratio: ", in_count ); |
| prratio ( stderr, in_count, bytes_out ); |
| fprintf ( stderr, "\n"); |
| } |
| #endif /* DEBUG */ |
| |
| if(in_count > 0x007fffff) { /* shift will overflow */ |
| rat = bytes_out >> 8; |
| if(rat == 0) { /* Don't divide by zero */ |
| rat = 0x7fffffff; |
| } else { |
| rat = in_count / rat; |
| } |
| } else { |
| rat = (in_count << 8) / bytes_out; /* 8 fractional bits */ |
| } |
| if ( rat > ratio ) { |
| ratio = rat; |
| } else { |
| ratio = 0; |
| #ifdef DEBUG |
| if(verbose) |
| dump_tab(); /* dump string table */ |
| #endif |
| cl_hash ( (count_int) hsize ); |
| free_ent = FIRST; |
| clear_flg = 1; |
| output ( (code_int) CLEAR ); |
| #ifdef DEBUG |
| if(debug) |
| fprintf ( stderr, "clear\n" ); |
| #endif /* DEBUG */ |
| } |
| } |
| |
| cl_hash(hsize) /* reset code table */ |
| register count_int hsize; |
| { |
| register count_int *htab_p = htab+hsize; |
| register long i; |
| register long m1 = -1; |
| |
| i = hsize - 16; |
| do { /* might use Sys V memset(3) here */ |
| *(htab_p-16) = m1; |
| *(htab_p-15) = m1; |
| *(htab_p-14) = m1; |
| *(htab_p-13) = m1; |
| *(htab_p-12) = m1; |
| *(htab_p-11) = m1; |
| *(htab_p-10) = m1; |
| *(htab_p-9) = m1; |
| *(htab_p-8) = m1; |
| *(htab_p-7) = m1; |
| *(htab_p-6) = m1; |
| *(htab_p-5) = m1; |
| *(htab_p-4) = m1; |
| *(htab_p-3) = m1; |
| *(htab_p-2) = m1; |
| *(htab_p-1) = m1; |
| htab_p -= 16; |
| } while ((i -= 16) >= 0); |
| for ( i += 16; i > 0; i-- ) |
| *--htab_p = m1; |
| } |
| |
| prratio(stream, num, den) |
| FILE *stream; |
| long int num, den; |
| { |
| register int q; /* Doesn't need to be long */ |
| |
| if(num > 214748L) { /* 2147483647/10000 */ |
| q = num / (den / 10000L); |
| } else { |
| q = 10000L * num / den; /* Long calculations, though */ |
| } |
| if (q < 0) { |
| putc('-', stream); |
| q = -q; |
| } |
| fprintf(stream, "%d.%02d%%", q / 100, q % 100); |
| } |
| |
| usage() |
| { |
| (void)fprintf(stderr, |
| #ifdef DEBUG |
| "compress [-CDVcdfnv] [-b maxbits] [file ...]\n"); |
| #else |
| "compress [-Ccdfnv] [-b maxbits] [file ...]\n"); |
| #endif |
| exit(1); |
| } |