From 146a5086066d725c8cea3137cfcb9b26c19adf87 Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Sat, 28 Sep 2002 01:06:22 +0000 Subject: [PATCH] New file with LZC (UNIX compress) code. --- nufxlib-0/Lzc.c | 1101 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1101 insertions(+) create mode 100644 nufxlib-0/Lzc.c diff --git a/nufxlib-0/Lzc.c b/nufxlib-0/Lzc.c new file mode 100644 index 0000000..bbcbb66 --- /dev/null +++ b/nufxlib-0/Lzc.c @@ -0,0 +1,1101 @@ +/* + * NuFX archive manipulation library + * Copyright (C) 2000 by Andy McFadden, All Rights Reserved. + * This is free software; you can redistribute it and/or modify it under the + * terms of the GNU Library General Public License, see the file COPYING.LIB. + * + * This is the LZW implementation found in the UNIX "compress" command, + * sometimes referred to as "LZC". + * + * There's a lot of junk in here for the sake of smaller systems (e.g. MSDOS) + * and pre-ANSI compilers. For the most part it has been left unchanged. + * I have done some minor reformatting, and have undone the authors' + * penchant for assigning variables inside function call statements, but + * for the most part it is as it was. (A much cleaner implementation + * could probably be derived by adapting the NufxLib Lzw.c code...) + */ +#include "NufxLibPriv.h" + +#define DEBUG_LZC + +/* + * Selected definitions from compress.h. + */ +typedef unsigned short CODE; +typedef unsigned char UCHAR; +typedef unsigned int INTCODE; +typedef unsigned int HASH; +typedef int FLAG; + +#ifndef FALSE /* let's get some sense to this */ +#define FALSE 0 +#define TRUE !FALSE +#endif + +#define CONST const +#define FAR +#define NULLPTR(type) ((type FAR *) NULL) +#define ALLOCTYPE void + +#define INITBITS 9 +#define MINBITS 12 +#define MAXMAXBITS 16 +#define MAXBITS MAXMAXBITS +#define DFLTBITS MAXBITS + +#define UNUSED ((CODE)0) /* Indicates hash table value unused */ +#define CLEAR ((CODE)256) /* Code requesting table to be cleared */ +#define FIRSTFREE ((CODE)257) /* First free code for token encoding */ +#define MAXTOKLEN 512 /* Max chars in token; size of buffer */ +#define OK kNuErrNone /* Result codes from functions: */ + +#define BIT_MASK 0x1f +#define BLOCK_MASK 0x80 + +#define CHECK_GAP 10000L /* ratio check interval, for COMP40 */ + +static UCHAR gNu_magic_header[] = { 0x1F,0x9D }; + +/* don't need these */ +/*#define SPLIT_HT 1*/ +/*#define SPLIT_PFX 1*/ +/*#define COMP40 1*/ + +#define NOMEM kNuErrMalloc /* Ran out of memory */ +#define TOKTOOBIG kNuErrBadData /* Token longer than MAXTOKLEN chars */ +#define READERR kNuErrFileRead /* I/O error on input */ +#define WRITEERR kNuErrFileWrite /* I/O error on output */ +#define CODEBAD kNuErrBadData /* Infile contained a bad token code */ +#define TABLEBAD kNuErrInternal /* The tables got corrupted (!) */ +#define NOSAVING kNuErrNone /* no saving in file size */ + + +/* + * Normally in COMPUSI.UNI. + */ +static inline ALLOCTYPE FAR * +Nu_LZC_emalloc(NuArchive* pArchive, unsigned int x, int y) +{ + return Nu_Malloc(pArchive, x*y); +} +static inline void +Nu_LZC_efree(NuArchive* pArchive, ALLOCTYPE FAR * ptr) +{ + return Nu_Free(pArchive, ptr); +} + +/*@H************************ < COMPRESS API > **************************** +* $@(#) compapi.c,v 4.3d 90/01/18 03:00:00 don Release ^ * +* * +* compress : compapi.c * +* * +* port by : Donald J. Gloistein * +* * +* Source, Documentation, Object Code: * +* released to Public Domain. This code is based on code as documented * +* below in release notes. * +* * +*--------------------------- Module Description --------------------------* +* Contains source code for modified Lempel-Ziv method (LZW) compression * +* and decompression. * +* * +* This code module can be maintained to keep current on releases on the * +* Unix system. The command shell and dos modules can remain the same. * +* * +*--------------------------- Implementation Notes --------------------------* +* * +* compiled with : compress.h compress.fns compress.c * +* linked with : compress.obj compusi.obj * +* * +* problems: * +* * +* * +* CAUTION: Uses a number of defines for access and speed. If you change * +* anything, make sure about side effects. * +* * +* Compression: * +* Algorithm: use open addressing double hashing (no chaining) on the * +* prefix code / next character combination. We do a variant of Knuth's * +* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime * +* secondary probe. Here, the modular division first probe is gives way * +* to a faster exclusive-or manipulation. * +* Also block compression with an adaptive reset was used in original code, * +* whereby the code table is cleared when the compression ration decreases * +* but after the table fills. This was removed from this edition. The table * +* is re-sized at this point when it is filled , and a special CLEAR code is * +* generated for the decompressor. This results in some size difference from * +* straight version 4.0 joe Release. But it is fully compatible in both v4.0 * +* and v4.01 * +* * +* Decompression: * +* This routine adapts to the codes in the file building the "string" table * +* on-the-fly; requiring no table to be stored in the compressed file. The * +* tables used herein are shared with those of the compress() routine. * +* * +* Initials ---- Name --------------------------------- * +* DjG Donald J. Gloistein, current port to MsDos 16 bit * +* Plus many others, see rev.hst file for full list * +* LvR Lyle V. Rains, many thanks for improved implementation * +* of the compression and decompression routines. * +*************************************************************************@H*/ + +#include + +/* + * LZC state, largely variables with non-local scope. + */ +typedef struct LZCState { + NuArchive* pArchive; + int doCalcCRC; + ushort crc; + + /* compression */ + NuStraw* pStraw; + FILE* outfp; + long uncompRemaining; + + /* expansion */ + FILE* infp; + NuFunnel* pFunnel; + ushort* pCrc; + long compRemaining; + + + /* + * Globals from Compress sources. + */ + int offset; + long int in_count ; /* length of input */ + long int bytes_out; /* length of compressed output */ + + INTCODE prefxcode, nextfree; + INTCODE highcode; + INTCODE maxcode; + HASH hashsize; + int bits; + + char FAR *sfx; + + #if (SPLIT_PFX) + CODE FAR *pfx[2]; + #else + CODE FAR *pfx; + #endif + + #if (SPLIT_HT) + CODE FAR *ht[2]; + #else + CODE FAR *ht; + #endif + + #ifdef COMP40 + long int ratio; + long checkpoint; /* initialized to CHECK_GAP */ + #endif + + #ifdef DEBUG_LZC + int debug; /* initialized to FALSE */ + #endif + + NuError exit_stat; + + int maxbits; /* initialized to DFLTBITS */ + int block_compress; /* initialized to BLOCK_MASK */ + + /* + * Static local variables. Some of these were explicitly initialized + * to zero. + */ + INTCODE oldmaxcode; /* alloc_tables */ + HASH oldhashsize; /* alloc_tables */ + int oldbits; /* putcode */ + UCHAR outbuf[MAXBITS]; /* putcode */ + int prevbits; /* nextcode */ + int size; /* nextcode */ + UCHAR inbuf[MAXBITS]; /* nextcode */ +} LZCState; + + +/* + * The following two parameter tables are the hash table sizes and + * maximum code values for various code bit-lengths. The requirements + * are that Hashsize[n] must be a prime number and Maxcode[n] must be less + * than Maxhash[n]. Table occupancy factor is (Maxcode - 256)/Maxhash. + * Note: I am using a lower Maxcode for 16-bit codes in order to + * keep the hash table size less than 64k entries. + */ +static CONST HASH gNu_hs[] = { + 0x13FF, /* 12-bit codes, 75% occupancy */ + 0x26C3, /* 13-bit codes, 80% occupancy */ + 0x4A1D, /* 14-bit codes, 85% occupancy */ + 0x8D0D, /* 15-bit codes, 90% occupancy */ + 0xFFD9 /* 16-bit codes, 94% occupancy, 6% of code values unused */ +}; +#define Hashsize(maxb) (gNu_hs[(maxb) -MINBITS]) + +static CONST INTCODE gNu_mc[] = { + 0x0FFF, /* 12-bit codes */ + 0x1FFF, /* 13-bit codes */ + 0x3FFF, /* 14-bit codes */ + 0x7FFF, /* 15-bit codes */ + 0xEFFF /* 16-bit codes, 6% of code values unused */ +}; +#define Maxcode(maxb) (gNu_mc[(maxb) -MINBITS]) + +#ifdef __STDC__ +#ifdef DEBUG_LZC +#define allocx(type, ptr, size) \ + (((ptr) = (type FAR *) Nu_LZC_emalloc(pArchive, (unsigned int)(size),sizeof(type))) == NULLPTR(type) \ + ? (DBUG(("%s: "#ptr" -- ", "LZC")), NOMEM) : OK \ + ) +#else +#define allocx(type,ptr,size) \ + (((ptr) = (type FAR *) Nu_LZC_emalloc(pArchive, (unsigned int)(size),sizeof(type))) == NULLPTR(type) \ + ? NOMEM : OK \ + ) +#endif +#else +#define allocx(type,ptr,size) \ + (((ptr) = (type FAR *) Nu_LZC_emalloc(, pArchive, (unsigned int)(size),sizeof(type))) == NULLPTR(type) \ + ? NOMEM : OK \ + ) +#endif + +#define free_array(type,ptr,offset) \ + if (ptr != NULLPTR(type)) { \ + Nu_LZC_efree(pArchive, (ALLOCTYPE FAR *)((ptr) + (offset))); \ + (ptr) = NULLPTR(type); \ + } + + /* + * Macro to allocate new memory to a pointer with an offset value. + */ +#define alloc_array(type, ptr, size, offset) \ + ( allocx(type, ptr, (size) - (offset)) != OK \ + ? NOMEM \ + : (((ptr) -= (offset)), OK) \ + ) + +/*static char FAR *sfx = NULLPTR(char) ;*/ +#define suffix(code) pLzcState->sfx[code] + + +#if (SPLIT_PFX) + /*static CODE FAR *pfx[2] = {NULLPTR(CODE), NULLPTR(CODE)};*/ +#else + /*static CODE FAR *pfx = NULLPTR(CODE);*/ +#endif + + +#if (SPLIT_HT) + /*static CODE FAR *ht[2] = {NULLPTR(CODE),NULLPTR(CODE)};*/ +#else + /*static CODE FAR *ht = NULLPTR(CODE);*/ +#endif + + +static int +Nu_LZC_alloc_tables(LZCState* pLzcState, INTCODE newmaxcode, HASH newhashsize) +{ + NuArchive* pArchive = pLzcState->pArchive; + /*static INTCODE oldmaxcode = 0;*/ + /*static HASH oldhashsize = 0;*/ + + if (newhashsize > pLzcState->oldhashsize) { +#if (SPLIT_HT) + free_array(CODE,pLzcState->ht[1], 0); + free_array(CODE,pLzcState->ht[0], 0); +#else + free_array(CODE,pLzcState->ht, 0); +#endif + pLzcState->oldhashsize = 0; + } + + if (newmaxcode > pLzcState->oldmaxcode) { +#if (SPLIT_PFX) + free_array(CODE,pLzcState->pfx[1], 128); + free_array(CODE,pLzcState->pfx[0], 128); +#else + free_array(CODE,pLzcState->pfx, 256); +#endif + free_array(char,pLzcState->sfx, 256); + + if ( alloc_array(char, pLzcState->sfx, newmaxcode + 1, 256) +#if (SPLIT_PFX) + || alloc_array(CODE, pLzcState->pfx[0], (newmaxcode + 1) / 2, 128) + || alloc_array(CODE, pLzcState->pfx[1], (newmaxcode + 1) / 2, 128) +#else + || alloc_array(CODE, pLzcState->pfx, (newmaxcode + 1), 256) +#endif + ) { + pLzcState->oldmaxcode = 0; + pLzcState->exit_stat = NOMEM; + return(NOMEM); + } + pLzcState->oldmaxcode = newmaxcode; + } + if (newhashsize > pLzcState->oldhashsize) { + if ( +#if (SPLIT_HT) + alloc_array(CODE, pLzcState->ht[0], (newhashsize / 2) + 1, 0) + || alloc_array(CODE, pLzcState->ht[1], newhashsize / 2, 0) +#else + alloc_array(CODE, pLzcState->ht, newhashsize, 0) +#endif + ) { + pLzcState->oldhashsize = 0; + pLzcState->exit_stat = NOMEM; + return(NOMEM); + } + pLzcState->oldhashsize = newhashsize; + } + return (OK); +} + +# if (SPLIT_PFX) + /* + * We have to split pfx[] table in half, + * because it's potentially larger than 64k bytes. + */ +# define prefix(code) (pLzcState->pfx[(code) & 1][(code) >> 1]) +# else + /* + * Then pfx[] can't be larger than 64k bytes, + * or we don't care if it is, so we don't split. + */ +# define prefix(code) (pLzcState->pfx[code]) +# endif + + +/* The initializing of the tables can be done quicker with memset() */ +/* but this way is portable through out the memory models. */ +/* If you use Microsoft halloc() to allocate the arrays, then */ +/* include the pragma #pragma function(memset) and make sure that */ +/* the length of the memory block is not greater than 64K. */ +/* This also means that you MUST compile in a model that makes the */ +/* default pointers to be far pointers (compact or large models). */ +/* See the file COMPUSI.DOS to modify function emalloc(). */ + +# if (SPLIT_HT) + /* + * We have to split ht[] hash table in half, + * because it's potentially larger than 64k bytes. + */ +# define probe(hash) (pLzcState->ht[(hash) & 1][(hash) >> 1]) +# define init_tables() \ + { \ + hash = pLzcState->hashsize >> 1; \ + pLzcState->ht[0][hash] = 0; \ + while (hash--) pLzcState->ht[0][hash] = pLzcState->ht[1][hash] = 0; \ + pLzcState->highcode = ~(~(INTCODE)0 << (pLzcState->bits = INITBITS)); \ + pLzcState->nextfree = (pLzcState->block_compress ? FIRSTFREE : 256); \ + } + +# else + + /* + * Then ht[] can't be larger than 64k bytes, + * or we don't care if it is, so we don't split. + */ +# define probe(hash) (pLzcState->ht[hash]) +# define init_tables() \ + { \ + hash = pLzcState->hashsize; \ + while (hash--) pLzcState->ht[hash] = 0; \ + pLzcState->highcode = ~(~(INTCODE)0 << (pLzcState->bits = INITBITS)); \ + pLzcState->nextfree = (pLzcState->block_compress ? FIRSTFREE : 256); \ + } + +# endif + + +/* + * =========================================================================== + * Compression + * =========================================================================== + */ + +static void +Nu_prratio(long int num, long int den) +{ + register int q; /* Doesn't need to be long */ + + if(num > 214748L) { /* 2147483647/10000 */ + q = (int) (num / (den / 10000L)); + } + else { + q = (int) (10000L * num / den); /* Long calculations, though */ + } + if (q < 0) { + DBUG(("-")); + q = -q; + } + DBUG(("%d.%02d%%", q / 100, q % 100)); +} + +#ifdef COMP40 +/* table clear for block compress */ +/* this is for adaptive reset present in version 4.0 joe release */ +/* DjG, sets it up and returns TRUE to compress and FALSE to not compress */ +static int +Nu_LZC_cl_block(LZCState* pLzcState) +{ + register long int rat; + + pLzcState->checkpoint = pLzcState->in_count + CHECK_GAP; +#ifdef DEBUG_LZC + if ( pLzcState->debug ) { + DBUG(( "count: %ld, ratio: ", pLzcState->in_count )); + Nu_prratio ( pLzcState->in_count, pLzcState->bytes_out ); + DBUG(( "\n")); + } +#endif + + if(pLzcState->in_count > 0x007fffff) { /* shift will overflow */ + rat = pLzcState->bytes_out >> 8; + if(rat == 0) /* Don't divide by zero */ + rat = 0x7fffffff; + else + rat = pLzcState->in_count / rat; + } + else + rat = (pLzcState->in_count << 8) / pLzcState->bytes_out; /* 8 fractional bits */ + + if ( rat > pLzcState->ratio ){ + pLzcState->ratio = rat; + return FALSE; + } + else { + pLzcState->ratio = 0; +#ifdef DEBUG_LZC + if(pLzcState->debug) { + DBUG(( "clear\n" )); + } +#endif + return TRUE; /* clear the table */ + } + return FALSE; /* don't clear the table */ +} +#endif + +static CONST UCHAR gNu_rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff}; + +static void +Nu_LZC_putcode(LZCState* pLzcState, INTCODE code, register int bits) +{ + /*static int oldbits = 0;*/ + /*static UCHAR outbuf[MAXBITS];*/ + register UCHAR *buf; + register int shift; + + if (bits != pLzcState->oldbits) { + if (bits == 0) { + /* bits == 0 means EOF, write the rest of the buffer. */ + if (pLzcState->offset > 0) { + fwrite(pLzcState->outbuf,1,(pLzcState->offset +7) >> 3, pLzcState->outfp); + pLzcState->bytes_out += ((pLzcState->offset +7) >> 3); + } + pLzcState->offset = 0; + pLzcState->oldbits = 0; + fflush(pLzcState->outfp); + return; + } + else { + /* Change the code size. We must write the whole buffer, + * because the expand side won't discover the size change + * until after it has read a buffer full. + */ + if (pLzcState->offset > 0) { + fwrite(pLzcState->outbuf, 1, pLzcState->oldbits, pLzcState->outfp); + pLzcState->bytes_out += pLzcState->oldbits; + pLzcState->offset = 0; + } + pLzcState->oldbits = bits; + #ifdef DEBUG_LZC + if ( pLzcState->debug ) { + DBUG(( "\nChange to %d bits\n", bits )); + } + #endif /* DEBUG_LZC */ + } + } + /* Get to the first byte. */ + buf = pLzcState->outbuf + ((shift = pLzcState->offset) >> 3); + if ((shift &= 7) != 0) { + *(buf) |= (*buf & gNu_rmask[shift]) | (UCHAR)(code << shift); + *(++buf) = (UCHAR)(code >> (8 - shift)); + if (bits + shift > 16) + *(++buf) = (UCHAR)(code >> (16 - shift)); + } + else { + /* Special case for fast execution */ + *(buf) = (UCHAR)code; + *(++buf) = (UCHAR)(code >> 8); + } + if ((pLzcState->offset += bits) == (bits << 3)) { + pLzcState->bytes_out += bits; + fwrite(pLzcState->outbuf,1,bits,pLzcState->outfp); + pLzcState->offset = 0; + } + return; +} + + +#define kNuLZCEOF (-1) + +/* + * Get the next byte from the input straw. Also updates the CRC + * if "doCalcCRC" is set to true. + * + * Returns kNuLZCEOF as the value when we're out of data. + */ +static NuError +Nu_LZCGetcCRC(LZCState* pLzcState, int* pSym) +{ + NuError err; + uchar c; + + if (!pLzcState->uncompRemaining) { + *pSym = kNuLZCEOF; + return kNuErrNone; + } + + err = Nu_StrawRead(pLzcState->pArchive, pLzcState->pStraw, &c, 1); + if (err == kNuErrNone) { + if (pLzcState->doCalcCRC) + pLzcState->crc = Nu_CalcCRC16(pLzcState->crc, &c, 1); + *pSym = c; + pLzcState->uncompRemaining--; + } + + return err; +} + +/* + * compress stdin to stdout + */ +static void +Nu_LZC_compress(LZCState* pLzcState, ulong* pDstLen) +{ + int c,adjbits; + register HASH hash; + register INTCODE code; + HASH hashf[256]; + + Assert(pLzcState->outfp != nil); + + pLzcState->maxcode = Maxcode(pLzcState->maxbits); + pLzcState->hashsize = Hashsize(pLzcState->maxbits); + +#ifdef COMP40 +/* Only needed for adaptive reset */ + pLzcState->checkpoint = CHECK_GAP; + pLzcState->ratio = 0; +#endif + + adjbits = pLzcState->maxbits -10; + for (c = 256; --c >= 0; ){ + hashf[c] = ((( c &0x7) << 7) ^ c) << adjbits; + } + pLzcState->exit_stat = OK; + if (Nu_LZC_alloc_tables(pLzcState, pLzcState->maxcode, pLzcState->hashsize)) /* exit_stat already set */ + return; + init_tables(); + + #if 0 + /* if not zcat or filter */ + if(is_list && !zcat_flg) { /* Open output file */ + if (freopen(ofname, WRITE_FILE_TYPE, pLzcState->outfp) == NULL) { + pLzcState->exit_stat = NOTOPENED; + return; + } + if (!quiet) + fprintf(stderr, "%s: ",ifname); + setvbuf(Xstdout,zbuf,_IOFBF,ZBUFSIZE); + } + #endif + + /* + * Check the input stream for previously seen strings. We keep + * adding characters to the previously seen prefix string until we + * get a character which forms a new (unseen) string. We then send + * the code for the previously seen prefix string, and add the new + * string to our tables. The check for previous strings is done by + * hashing. If the code for the hash value is unused, then we have + * a new string. If the code is used, we check to see if the prefix + * and suffix values match the current input; if so, we have found + * a previously seen string. Otherwise, we have a hash collision, + * and we try secondary hash probes until we either find the current + * string, or we find an unused entry (which indicates a new string). + */ + if (1 /*!nomagic*/) { + putc(gNu_magic_header[0], pLzcState->outfp); + putc(gNu_magic_header[1], pLzcState->outfp); + putc((char)(pLzcState->maxbits | pLzcState->block_compress), pLzcState->outfp); + if(ferror(pLzcState->outfp)){ /* check it on entry */ + pLzcState->exit_stat = WRITEERR; + return; + } + pLzcState->bytes_out = 3L; /* includes 3-byte header mojo */ + } + else + pLzcState->bytes_out = 0L; /* no 3-byte header mojo */ + pLzcState->in_count = 1L; + pLzcState->offset = 0; + + pLzcState->exit_stat = Nu_LZCGetcCRC(pLzcState, &c); + if (pLzcState->exit_stat != kNuErrNone) + return; + pLzcState->prefxcode = (INTCODE)c; + + while (1) { + pLzcState->exit_stat = Nu_LZCGetcCRC(pLzcState, &c); + if (pLzcState->exit_stat != kNuErrNone) + return; + if (c == kNuLZCEOF) + break; + + pLzcState->in_count++; + hash = pLzcState->prefxcode ^ hashf[c]; + /* I need to check that my hash value is within range + * because my 16-bit hash table is smaller than 64k. + */ + if (hash >= pLzcState->hashsize) + hash -= pLzcState->hashsize; + if ((code = (INTCODE)probe(hash)) != UNUSED) { + if (suffix(code) != (char)c || (INTCODE)prefix(code) != pLzcState->prefxcode) { + /* hashdelta is subtracted from hash on each iteration of + * the following hash table search loop. I compute it once + * here to remove it from the loop. + */ + HASH hashdelta = (0x120 - c) << (adjbits); + do { + /* rehash and keep looking */ + Assert(code >= FIRSTFREE && code <= pLzcState->maxcode); + if (hash >= hashdelta) hash -= hashdelta; + else hash += (pLzcState->hashsize - hashdelta); + Assert(hash < pLzcState->hashsize); + if ((code = (INTCODE)probe(hash)) == UNUSED) + goto newcode; + } while (suffix(code) != (char)c || (INTCODE)prefix(code) != pLzcState->prefxcode); + } + pLzcState->prefxcode = code; + } + else { + newcode: { + Nu_LZC_putcode(pLzcState, pLzcState->prefxcode, pLzcState->bits); + code = pLzcState->nextfree; + Assert(hash < pLzcState->hashsize); + Assert(code >= FIRSTFREE); + Assert(code <= pLzcState->maxcode + 1); + if (code <= pLzcState->maxcode) { + probe(hash) = (CODE)code; + prefix(code) = (CODE)pLzcState->prefxcode; + suffix(code) = (char)c; + if (code > pLzcState->highcode) { + pLzcState->highcode += code; + ++pLzcState->bits; + } + pLzcState->nextfree = code + 1; + } +#ifdef COMP40 + else if (pLzcState->in_count >= pLzcState->checkpoint && pLzcState->block_compress ) { + if (Nu_LZC_cl_block(pLzcState)){ +#else + else if (pLzcState->block_compress){ +#endif + Nu_LZC_putcode(pLzcState, (INTCODE)c, pLzcState->bits); + Nu_LZC_putcode(pLzcState, CLEAR, pLzcState->bits); + init_tables(); + pLzcState->exit_stat = Nu_LZCGetcCRC(pLzcState, &c); + if (pLzcState->exit_stat != kNuErrNone) + return; + if (c == kNuLZCEOF) + break; + pLzcState->in_count++; +#ifdef COMP40 + } +#endif + } + pLzcState->prefxcode = (INTCODE)c; + } + } + } + Nu_LZC_putcode(pLzcState, pLzcState->prefxcode, pLzcState->bits); + Nu_LZC_putcode(pLzcState, CLEAR, 0); + /* + * Print out stats on stderr + */ + if(1 /*zcat_flg == 0 && !quiet*/) { +#ifdef DEBUG_LZC + DBUG(( + "%ld chars in, (%ld bytes) out, compression factor: ", + pLzcState->in_count, pLzcState->bytes_out )); + Nu_prratio( pLzcState->in_count, pLzcState->bytes_out ); + DBUG(( "\n")); + DBUG(( "\tCompression as in compact: " )); + Nu_prratio( pLzcState->in_count-pLzcState->bytes_out, pLzcState->in_count ); + DBUG(( "\n")); + DBUG(( "\tLargest code (of last block) was %d (%d bits)\n", + pLzcState->prefxcode - 1, pLzcState->bits )); +#else + DBUG(( "Compression: " )); + Nu_prratio( pLzcState->in_count-pLzcState->bytes_out, pLzcState->in_count ); +#endif /* DEBUG_LZC */ + } + if(pLzcState->bytes_out > pLzcState->in_count) /* if no savings */ + pLzcState->exit_stat = NOSAVING; + *pDstLen = pLzcState->bytes_out; + return ; +} + + +/* + * NufxLib interface to LZC compression. + */ +static NuError +Nu_CompressLZC(NuArchive* pArchive, NuStraw* pStraw, FILE* fp, + ulong srcLen, ulong* pDstLen, ushort* pCrc, int maxbits) +{ + NuError err = kNuErrNone; + LZCState lzcState; + + memset(&lzcState, 0, sizeof(lzcState)); + lzcState.pArchive = pArchive; + lzcState.pStraw = pStraw; + lzcState.outfp = fp; + lzcState.uncompRemaining = srcLen; + + if (pCrc == nil) { + lzcState.doCalcCRC = false; + } else { + lzcState.doCalcCRC = true; + lzcState.crc = *pCrc; + } + + lzcState.maxbits = maxbits; + lzcState.block_compress = BLOCK_MASK; /* enabled */ + + Nu_LZC_compress(&lzcState, pDstLen); + err = lzcState.exit_stat; + DBUG(("+++ LZC_compress returned with %d\n", err)); + +#if (SPLIT_HT) + free_array(CODE,lzcState.ht[1], 0); + free_array(CODE,lzcState.ht[0], 0); +#else + free_array(CODE,lzcState.ht, 0); +#endif + +#if (SPLIT_PFX) + free_array(CODE,lzcState.pfx[1], 128); + free_array(CODE,lzcState.pfx[0], 128); +#else + free_array(CODE,lzcState.pfx, 256); +#endif + free_array(char,lzcState.sfx, 256); + + if (pCrc != nil) + *pCrc = lzcState.crc; + + return err; +} + +NuError +Nu_CompressLZC12(NuArchive* pArchive, NuStraw* pStraw, FILE* fp, + ulong srcLen, ulong* pDstLen, ushort* pCrc) +{ + return Nu_CompressLZC(pArchive, pStraw, fp, srcLen, pDstLen, pCrc, 12); +} + +NuError +Nu_CompressLZC16(NuArchive* pArchive, NuStraw* pStraw, FILE* fp, + ulong srcLen, ulong* pDstLen, ushort* pCrc) +{ + return Nu_CompressLZC(pArchive, pStraw, fp, srcLen, pDstLen, pCrc, 16); +} + + +/* + * =========================================================================== + * Expansion + * =========================================================================== + */ + +/* + * Write the next byte to the output funnel. Also updates the CRC + * if "doCalcCRC" is set to true. + * + * Returns kNuLZCEOF as the value when we're out of data. + */ +static NuError +Nu_LZCPutcCRC(LZCState* pLzcState, char c) +{ + NuError err; + + err = Nu_FunnelWrite(pLzcState->pArchive, pLzcState->pFunnel, &c, 1); + if (pLzcState->doCalcCRC) + pLzcState->crc = Nu_CalcCRC16(pLzcState->crc, &c, 1); + + return err; +} + + +static int +Nu_LZC_nextcode(LZCState* pLzcState, INTCODE* codeptr) +/* Get the next code from input and put it in *codeptr. + * Return (TRUE) on success, or return (FALSE) on end-of-file. + * Adapted from COMPRESS V4.0. + */ +{ + /*static int prevbits = 0;*/ + register INTCODE code; + /*static int size;*/ + /*static UCHAR inbuf[MAXBITS];*/ + register int shift; + UCHAR *bp; + + /* If the next entry is a different bit-size than the preceeding one + * then we must adjust the size and scrap the old buffer. + */ + if (pLzcState->prevbits != pLzcState->bits) { + pLzcState->prevbits = pLzcState->bits; + pLzcState->size = 0; + } + /* If we can't read another code from the buffer, then refill it. + */ + shift = pLzcState->offset; + if (pLzcState->size - shift < pLzcState->bits) { + /* Read more input and convert size from # of bytes to # of bits */ + long getSize; + + getSize = pLzcState->bits; + if (getSize > pLzcState->compRemaining) + getSize = pLzcState->compRemaining; + if (!getSize) /* act like EOF */ + return FALSE; + pLzcState->size = fread(pLzcState->inbuf, 1, getSize, pLzcState->infp) << 3; + if (pLzcState->size <= 0 || ferror(pLzcState->infp)) + return(FALSE); + pLzcState->compRemaining -= getSize; + pLzcState->offset = shift = 0; + } + /* Get to the first byte. */ + bp = pLzcState->inbuf + (shift >> 3); + /* Get first part (low order bits) */ + code = (*bp++ >> (shift &= 7)); + /* high order bits. */ + code |= *bp++ << (shift = 8 - shift); + if ((shift += 8) < pLzcState->bits) code |= *bp << shift; + *codeptr = code & pLzcState->highcode; + pLzcState->offset += pLzcState->bits; + return (TRUE); +} + +static void +Nu_LZC_decompress(LZCState* pLzcState, ulong compressedLen) +{ + NuArchive* pArchive = pLzcState->pArchive; + register int i; + register INTCODE code; + char sufxchar = 0; + INTCODE savecode; + FLAG fulltable = FALSE, cleartable; + /*static*/ char *token= NULL; /* String buffer to build token */ + /*static*/ int maxtoklen = MAXTOKLEN; + int flags; + + Assert(pLzcState->infp != nil); + + pLzcState->exit_stat = OK; + + if (compressedLen < 3) { + /* not long enough to be valid! */ + pLzcState->exit_stat = kNuErrBadData; + Nu_ReportError(NU_BLOB, pLzcState->exit_stat, "thread too short to be valid LZC"); + return; + } + pLzcState->compRemaining = compressedLen; + + /* + * This comes out of "compress.c" rather than "compapi.c". + */ + if ((getc(pLzcState->infp)!=(gNu_magic_header[0] & 0xFF)) + || (getc(pLzcState->infp)!=(gNu_magic_header[1] & 0xFF))) + { + DBUG(("not in compressed format\n")); + pLzcState->exit_stat = kNuErrBadData; + return; + } + flags = getc(pLzcState->infp); /* set -b from file */ + pLzcState->block_compress = flags & BLOCK_MASK; + pLzcState->maxbits = flags & BIT_MASK; + if(pLzcState->maxbits > MAXBITS) { + DBUG(("compressed with %d bits, can only handle %d bits\n", + pLzcState->maxbits, MAXBITS)); + pLzcState->exit_stat = kNuErrBadData; + return; + } + + pLzcState->compRemaining -= 3; + + /* Initialze the token buffer. */ + token = (char*)Nu_Malloc(pArchive, maxtoklen); + if (token == NULL) { + pLzcState->exit_stat = NOMEM; + return; + } + + if (Nu_LZC_alloc_tables(pLzcState, pLzcState->maxcode = ~(~(INTCODE)0 << pLzcState->maxbits),0)) /* exit_stat already set */ + return; + + #if 0 + /* if not zcat or filter */ + if(is_list && !zcat_flg) { /* Open output file */ + if (freopen(ofname, WRITE_FILE_TYPE, stdout) == NULL) { + pLzcState->exit_stat = NOTOPENED; + return; + } + if (!quiet) + fprintf(stderr, "%s: ",ifname); + setvbuf(stdout,xbuf,_IOFBF,XBUFSIZE); + } + #endif + + cleartable = TRUE; + savecode = CLEAR; + pLzcState->offset = 0; + do { + if ((code = savecode) == CLEAR && cleartable) { + pLzcState->highcode = ~(~(INTCODE)0 << (pLzcState->bits = INITBITS)); + fulltable = FALSE; + pLzcState->nextfree = (cleartable = pLzcState->block_compress) == FALSE ? 256 : FIRSTFREE; + if (!Nu_LZC_nextcode(pLzcState, &pLzcState->prefxcode)) + break; + /*putc((*/sufxchar = (char)pLzcState->prefxcode/*), stdout)*/; + pLzcState->exit_stat = Nu_LZCPutcCRC(pLzcState, sufxchar); + if (pLzcState->exit_stat != kNuErrNone) + return; + continue; + } + i = 0; + if (code >= pLzcState->nextfree && !fulltable) { + if (code != pLzcState->nextfree){ + DBUG(("ERROR: code (0x%x) != nextfree (0x%x)\n", + code, pLzcState->nextfree)); + pLzcState->exit_stat = CODEBAD; + return ; /* Non-existant code */ + } + /* Special case for sequence KwKwK (see text of article) */ + code = pLzcState->prefxcode; + token[i++] = sufxchar; + } + /* Build the token string in reverse order by chasing down through + * successive prefix tokens of the current token. Then output it. + */ + while (code >= 256) { + #ifdef DEBUG_LZC + /* These are checks to ease paranoia. Prefix codes must decrease + * monotonically, otherwise we must have corrupt tables. We can + * also check that we haven't overrun the token buffer. + */ + if (code <= (INTCODE)prefix(code)){ + pLzcState->exit_stat= TABLEBAD; + return; + } + #endif + if (i >= maxtoklen) { + maxtoklen *= 2; /* double the size of the token buffer */ + if ((token = Nu_Realloc(pArchive, token, maxtoklen)) == NULL) { + pLzcState->exit_stat = TOKTOOBIG; + return; + } + } + token[i++] = suffix(code); + code = (INTCODE)prefix(code); + } + /*putc(*/sufxchar = (char)code/*, stdout)*/; + pLzcState->exit_stat = Nu_LZCPutcCRC(pLzcState, sufxchar); + while (--i >= 0) { + /*putc(token[i], stdout);*/ + pLzcState->exit_stat = Nu_LZCPutcCRC(pLzcState, token[i]); + } + if (pLzcState->exit_stat != kNuErrNone) + return; + /* If table isn't full, add new token code to the table with + * codeprefix and codesuffix, and remember current code. + */ + if (!fulltable) { + code = pLzcState->nextfree; + Assert(256 <= code && code <= pLzcState->maxcode); + prefix(code) = (CODE)pLzcState->prefxcode; + suffix(code) = sufxchar; + pLzcState->prefxcode = savecode; + if (code++ == pLzcState->highcode) { + if (pLzcState->highcode >= pLzcState->maxcode) { + fulltable = TRUE; + --code; + } + else { + ++pLzcState->bits; + pLzcState->highcode += code; /* nextfree == highcode + 1 */ + } + } + pLzcState->nextfree = code; + } + } while (Nu_LZC_nextcode(pLzcState, &savecode)); + pLzcState->exit_stat = (ferror(pLzcState->infp))? READERR : OK; + + Nu_Free(pArchive, token); + return ; +} + + +/* + * NufxLib interface to LZC expansion. + */ +NuError +Nu_ExpandLZC(NuArchive* pArchive, const NuRecord* pRecord, + const NuThread* pThread, FILE* infp, NuFunnel* pFunnel, ushort* pThreadCrc) +{ + NuError err = kNuErrNone; + LZCState lzcState; + + memset(&lzcState, 0, sizeof(lzcState)); + lzcState.pArchive = pArchive; + lzcState.infp = infp; + lzcState.pFunnel = pFunnel; + + if (pThreadCrc == nil) { + lzcState.doCalcCRC = false; + } else { + lzcState.doCalcCRC = true; + lzcState.crc = *pThreadCrc; + } + + Nu_LZC_decompress(&lzcState, pThread->thCompThreadEOF); + err = lzcState.exit_stat; + DBUG(("+++ LZC_decompress returned with %d\n", err)); + + if (err == kNuErrNone) + err = Nu_FunnelFlush(pArchive, pFunnel); + +#if (SPLIT_HT) + free_array(CODE,lzcState.ht[1], 0); + free_array(CODE,lzcState.ht[0], 0); +#else + free_array(CODE,lzcState.ht, 0); +#endif + +#if (SPLIT_PFX) + free_array(CODE,lzcState.pfx[1], 128); + free_array(CODE,lzcState.pfx[0], 128); +#else + free_array(CODE,lzcState.pfx, 256); +#endif + free_array(char,lzcState.sfx, 256); + + if (pThreadCrc != nil) + *pThreadCrc = lzcState.crc; + return err; +} +