nulib2/nufxlib/Lzc.c

/*
 * NuFX archive manipulation library
 * Copyright (C) 2000-2007 by Andy McFadden, All Rights Reserved.
 * This is free software; you can redistribute it and/or modify it under the
 * terms of the BSD License, see the file COPYING-LIB.
 *
 * This is the LZW implementation found in the UNIX "compress" command,
 * sometimes referred to as "LZC".  GS/ShrinkIt v1.1 can unpack threads
 * in LZC format, P8 ShrinkIt cannot.  The only other application that
 * is known to create LZC threads is the original NuLib.
 *
 * There's a lot of junk in here for the sake of smaller systems (e.g. MSDOS)
 * and pre-ANSI compilers.  For the most part it has been left unchanged.
 * I have done some minor reformatting, and have undone the authors'
 * penchant for assigning variables inside function call statements, but
 * for the most part it is as it was.  (A much cleaner implementation
 * could probably be derived by adapting the NufxLib Lzw.c code...)
 */
#include "NufxLibPriv.h"

#ifdef ENABLE_LZC

/*#define DEBUG_LZC*/

/*
 * Selected definitions from compress.h.
 */
typedef unsigned short CODE;
typedef unsigned char UCHAR;
typedef unsigned int INTCODE;
typedef unsigned int HASH;
typedef int FLAG;

#ifndef FALSE            /* let's get some sense to this */
#define FALSE 0
#define TRUE !FALSE
#endif

#define CONST       const
#ifndef FAR
# define FAR
#endif
#define NULLPTR(type)   ((type FAR *) NULL)
#define ALLOCTYPE   void

#define INITBITS    9
#define MINBITS     12
#define MAXMAXBITS  16
#define MAXBITS     MAXMAXBITS
#define DFLTBITS    MAXBITS

#define UNUSED      ((CODE)0)   /* Indicates hash table value unused    */
#define CLEAR       ((CODE)256) /* Code requesting table to be cleared  */
#define FIRSTFREE   ((CODE)257) /* First free code for token encoding */
#define MAXTOKLEN   512         /* Max chars in token; size of buffer   */
#define OK          kNuErrNone  /* Result codes from functions:         */

#define BIT_MASK    0x1f
#define BLOCK_MASK  0x80

#define CHECK_GAP   10000L      /* ratio check interval, for COMP40 */

static UCHAR gNu_magic_header[] = { 0x1F,0x9D };

/* don't need these */
/*#define SPLIT_HT    1*/
/*#define SPLIT_PFX   1*/
/*#define COMP40      1*/

#define NOMEM       kNuErrMalloc    /*   Ran out of memory                  */
#define TOKTOOBIG   kNuErrBadData   /*   Token longer than MAXTOKLEN chars  */
#define READERR     kNuErrFileRead  /*   I/O error on input                 */
#define WRITEERR    kNuErrFileWrite /*   I/O error on output                */
#define CODEBAD     kNuErrBadData   /*   Infile contained a bad token code  */
#define TABLEBAD    kNuErrInternal  /*   The tables got corrupted (!)       */
#define NOSAVING    kNuErrNone      /*   no saving in file size             */


/*
 * Normally in COMPUSI.UNI.
 */
static inline ALLOCTYPE FAR *
Nu_LZC_emalloc(NuArchive* pArchive, unsigned int x, int y)
{
    return Nu_Malloc(pArchive, x*y);
}
static inline void
Nu_LZC_efree(NuArchive* pArchive, ALLOCTYPE FAR * ptr)
{
    Nu_Free(pArchive, ptr);
}

/*@H************************ < COMPRESS API    > ****************************
*   $@(#) compapi.c,v 4.3d 90/01/18 03:00:00 don Release ^                  *
*                                                                           *
*   compress : compapi.c  <current version of compress algorithm>           *
*                                                                           *
*   port by  : Donald J. Gloistein                                          *
*                                                                           *
*   Source, Documentation, Object Code:                                     *
*   released to Public Domain.  This code is based on code as documented    *
*   below in release notes.                                                 *
*                                                                           *
*---------------------------  Module Description  --------------------------*
*   Contains source code for modified Lempel-Ziv method (LZW) compression   *
*   and decompression.                                                      *
*                                                                           *
*   This code module can be maintained to keep current on releases on the   *
*   Unix system. The command shell and dos modules can remain the same.     *
*                                                                           *
*--------------------------- Implementation Notes --------------------------*
*                                                                           *
*   compiled with : compress.h compress.fns compress.c                      *
*   linked with   : compress.obj compusi.obj                                *
*                                                                           *
*   problems:                                                               *
*                                                                           *
*                                                                           *
*   CAUTION: Uses a number of defines for access and speed. If you change   *
*            anything, make sure about side effects.                        *
*                                                                           *
* Compression:                                                              *
* Algorithm:  use open addressing double hashing (no chaining) on the       *
* prefix code / next character combination.  We do a variant of Knuth's     *
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime     *
* secondary probe.  Here, the modular division first probe is gives way     *
* to a faster exclusive-or manipulation.                                    *
* Also block compression with an adaptive reset was used in original code,  *
* whereby the code table is cleared when the compression ration decreases   *
* but after the table fills.  This was removed from this edition. The table *
* is re-sized at this point when it is filled , and a special CLEAR code is *
* generated for the decompressor. This results in some size difference from *
* straight version 4.0 joe Release. But it is fully compatible in both v4.0 *
* and v4.01                                                                 *
*                                                                           *
* Decompression:                                                            *
* This routine adapts to the codes in the file building the "string" table  *
* on-the-fly; requiring no table to be stored in the compressed file.  The  *
* tables used herein are shared with those of the compress() routine.       *
*                                                                           *
*     Initials ---- Name ---------------------------------                  *
*      DjG          Donald J. Gloistein, current port to MsDos 16 bit       *
*                   Plus many others, see rev.hst file for full list        *
*      LvR          Lyle V. Rains, many thanks for improved implementation  *
*                   of the compression and decompression routines.          *
*************************************************************************@H*/

#include <stdio.h>

/*
 * LZC state, largely variables with non-local scope.
 */
typedef struct LZCState {
    NuArchive* pArchive;
    int doCalcCRC;
    ushort crc;

    /* compression */
    NuStraw* pStraw;
    FILE* outfp;
    long uncompRemaining;

    /* expansion */
    FILE* infp;
    NuFunnel* pFunnel;
    ushort* pCrc;
    long compRemaining;


    /*
     * Globals from Compress sources.
     */
    int offset;
    long int in_count ;         /* length of input */
    long int bytes_out;         /* length of compressed output */

    INTCODE prefxcode, nextfree;
    INTCODE highcode;
    INTCODE maxcode;
    HASH hashsize;
    int  bits;

    char FAR *sfx;

    #if (SPLIT_PFX)
        CODE FAR *pfx[2];
    #else
        CODE FAR *pfx;
    #endif

    #if (SPLIT_HT)
        CODE FAR *ht[2];
    #else
        CODE FAR *ht;
    #endif

    #ifdef COMP40
    long int ratio;
    long checkpoint;            /* initialized to CHECK_GAP */
    #endif

    #ifdef DEBUG_LZC
    int debug;                  /* initialized to FALSE */
    #endif

    NuError exit_stat;

    int maxbits;                /* initialized to DFLTBITS */
    int block_compress;         /* initialized to BLOCK_MASK */

    /*
     * Static local variables.  Some of these were explicitly initialized
     * to zero.
     */
    INTCODE oldmaxcode;         /* alloc_tables */
    HASH oldhashsize;           /* alloc_tables */
    int oldbits;                /* putcode */
    UCHAR outbuf[MAXBITS];      /* putcode */
    int prevbits;               /* nextcode */
    int size;                   /* nextcode */
    UCHAR inbuf[MAXBITS];       /* nextcode */
} LZCState;


/*
 * The following two parameter tables are the hash table sizes and
 * maximum code values for various code bit-lengths.  The requirements
 * are that Hashsize[n] must be a prime number and Maxcode[n] must be less
 * than Maxhash[n].  Table occupancy factor is (Maxcode - 256)/Maxhash.
 * Note:  I am using a lower Maxcode for 16-bit codes in order to
 * keep the hash table size less than 64k entries.
 */
static CONST HASH gNu_hs[] = {
    0x13FF,       /* 12-bit codes, 75% occupancy */
    0x26C3,       /* 13-bit codes, 80% occupancy */
    0x4A1D,       /* 14-bit codes, 85% occupancy */
    0x8D0D,       /* 15-bit codes, 90% occupancy */
    0xFFD9        /* 16-bit codes, 94% occupancy, 6% of code values unused */
};
#define Hashsize(maxb) (gNu_hs[(maxb) -MINBITS])

static CONST INTCODE gNu_mc[] = {
    0x0FFF,       /* 12-bit codes */
    0x1FFF,       /* 13-bit codes */
    0x3FFF,       /* 14-bit codes */
    0x7FFF,       /* 15-bit codes */
    0xEFFF        /* 16-bit codes, 6% of code values unused */
};
#define Maxcode(maxb) (gNu_mc[(maxb) -MINBITS])

#ifdef __STDC__
#ifdef DEBUG_LZC
#define allocx(type, ptr, size) \
    (((ptr) = (type FAR *) Nu_LZC_emalloc(pArchive, (unsigned int)(size),sizeof(type))) == NULLPTR(type) \
    ?   (DBUG(("%s: "#ptr" -- ", "LZC")), NOMEM) : OK \
    )
#else
#define allocx(type,ptr,size) \
    (((ptr) = (type FAR *) Nu_LZC_emalloc(pArchive, (unsigned int)(size),sizeof(type))) == NULLPTR(type) \
    ? NOMEM : OK \
    )
#endif
#else
#define allocx(type,ptr,size) \
    (((ptr) = (type FAR *) Nu_LZC_emalloc(pArchive, (unsigned int)(size),sizeof(type))) == NULLPTR(type) \
    ? NOMEM : OK \
    )
#endif

#define free_array(type,ptr,offset) \
    if (ptr != NULLPTR(type)) { \
        Nu_LZC_efree(pArchive, (ALLOCTYPE FAR *)((ptr) + (offset))); \
        (ptr) = NULLPTR(type); \
    }

  /*
   * Macro to allocate new memory to a pointer with an offset value.
   */
#define alloc_array(type, ptr, size, offset) \
    ( allocx(type, ptr, (size) - (offset)) != OK \
      ? NOMEM \
      : (((ptr) -= (offset)), OK) \
    )

/*static char FAR *sfx = NULLPTR(char) ;*/
#define suffix(code)     pLzcState->sfx[code]


#if (SPLIT_PFX)
    /*static CODE FAR *pfx[2] = {NULLPTR(CODE), NULLPTR(CODE)};*/
#else
    /*static CODE FAR *pfx = NULLPTR(CODE);*/
#endif


#if (SPLIT_HT)
    /*static CODE FAR *ht[2] = {NULLPTR(CODE),NULLPTR(CODE)};*/
#else
    /*static CODE FAR *ht = NULLPTR(CODE);*/
#endif


static int
Nu_LZC_alloc_tables(LZCState* pLzcState, INTCODE newmaxcode, HASH newhashsize)
{
    NuArchive* pArchive = pLzcState->pArchive;
    /*static INTCODE oldmaxcode = 0;*/
    /*static HASH oldhashsize = 0;*/

    if (newhashsize > pLzcState->oldhashsize) {
#if (SPLIT_HT)
        free_array(CODE,pLzcState->ht[1], 0);
        free_array(CODE,pLzcState->ht[0], 0);
#else
        free_array(CODE,pLzcState->ht, 0);
#endif
        pLzcState->oldhashsize = 0;
    }

    if (newmaxcode > pLzcState->oldmaxcode) {
#if (SPLIT_PFX)
        free_array(CODE,pLzcState->pfx[1], 128);
        free_array(CODE,pLzcState->pfx[0], 128);
#else
        free_array(CODE,pLzcState->pfx, 256);
#endif
        free_array(char,pLzcState->sfx, 256);

        if (   alloc_array(char, pLzcState->sfx, newmaxcode + 1, 256)
#if (SPLIT_PFX)
            || alloc_array(CODE, pLzcState->pfx[0], (newmaxcode + 1) / 2, 128)
            || alloc_array(CODE, pLzcState->pfx[1], (newmaxcode + 1) / 2, 128)
#else
            || alloc_array(CODE, pLzcState->pfx, (newmaxcode + 1), 256)
#endif
        ) {
            pLzcState->oldmaxcode = 0;
            pLzcState->exit_stat = NOMEM;
            return(NOMEM);
        }
        pLzcState->oldmaxcode = newmaxcode;
    }
    if (newhashsize > pLzcState->oldhashsize) {
        if (
#if (SPLIT_HT)
            alloc_array(CODE, pLzcState->ht[0], (newhashsize / 2) + 1, 0)
            || alloc_array(CODE, pLzcState->ht[1], newhashsize / 2, 0)
#else
            alloc_array(CODE, pLzcState->ht, newhashsize, 0)
#endif
        ) {
            pLzcState->oldhashsize = 0;
            pLzcState->exit_stat = NOMEM;
            return(NOMEM);
        }
        pLzcState->oldhashsize = newhashsize;
    }
    return (OK);
}

# if (SPLIT_PFX)
    /*
     * We have to split pfx[] table in half,
     * because it's potentially larger than 64k bytes.
     */
#   define prefix(code)   (pLzcState->pfx[(code) & 1][(code) >> 1])
# else
    /*
     * Then pfx[] can't be larger than 64k bytes,
     * or we don't care if it is, so we don't split.
     */
#   define prefix(code) (pLzcState->pfx[code])
# endif


/* The initializing of the tables can be done quicker with memset() */
/* but this way is portable through out the memory models.          */
/* If you use Microsoft halloc() to allocate the arrays, then       */
/* include the pragma #pragma function(memset)  and make sure that  */
/* the length of the memory block is not greater than 64K.          */
/* This also means that you MUST compile in a model that makes the  */
/* default pointers to be far pointers (compact or large models).   */
/* See the file COMPUSI.DOS to modify function emalloc().           */

# if (SPLIT_HT)
    /*
     * We have to split ht[] hash table in half,
     * because it's potentially larger than 64k bytes.
     */
#   define probe(hash)    (pLzcState->ht[(hash) & 1][(hash) >> 1])
#   define init_tables() \
    { \
      hash = pLzcState->hashsize >> 1; \
      pLzcState->ht[0][hash] = 0; \
      while (hash--) pLzcState->ht[0][hash] = pLzcState->ht[1][hash] = 0; \
      pLzcState->highcode = ~(~(INTCODE)0 << (pLzcState->bits = INITBITS)); \
      pLzcState->nextfree = (pLzcState->block_compress ? FIRSTFREE : 256); \
    }

# else

    /*
     * Then ht[] can't be larger than 64k bytes,
     * or we don't care if it is, so we don't split.
     */
#   define probe(hash) (pLzcState->ht[hash])
#   define init_tables() \
    { \
      hash = pLzcState->hashsize; \
      while (hash--) pLzcState->ht[hash] = 0; \
      pLzcState->highcode = ~(~(INTCODE)0 << (pLzcState->bits = INITBITS)); \
      pLzcState->nextfree = (pLzcState->block_compress ? FIRSTFREE : 256); \
    }

# endif


/*
 * ===========================================================================
 *      Compression
 * ===========================================================================
 */

static void
Nu_prratio(long int num, long int den)
{
    register int q;         /* Doesn't need to be long */

    if(num > 214748L) {     /* 2147483647/10000 */
        q = (int) (num / (den / 10000L));
    }
    else {
        q = (int) (10000L * num / den);     /* Long calculations, though */
    }
    if (q < 0) {
        DBUG(("-"));
        q = -q;
    }
    DBUG(("%d.%02d%%", q / 100, q % 100));
}

#ifdef COMP40
/* table clear for block compress */
/* this is for adaptive reset present in version 4.0 joe release */
/* DjG, sets it up and returns TRUE to compress and FALSE to not compress */
static int
Nu_LZC_cl_block(LZCState* pLzcState)
{
    register long int rat;

    pLzcState->checkpoint = pLzcState->in_count + CHECK_GAP;
#ifdef DEBUG_LZC
    if ( pLzcState->debug ) {
        DBUG(( "count: %ld, ratio: ", pLzcState->in_count ));
        Nu_prratio ( pLzcState->in_count, pLzcState->bytes_out );
        DBUG(( "\n"));
    }
#endif

    if(pLzcState->in_count > 0x007fffff) { /* shift will overflow */
        rat = pLzcState->bytes_out >> 8;
        if(rat == 0)       /* Don't divide by zero */
            rat = 0x7fffffff;
        else
            rat = pLzcState->in_count / rat;
    }
    else
        rat = (pLzcState->in_count << 8) / pLzcState->bytes_out;  /* 8 fractional bits */

    if ( rat > pLzcState->ratio ){
        pLzcState->ratio = rat;
        return FALSE;
    }
    else {
        pLzcState->ratio = 0;
#ifdef DEBUG_LZC
        if(pLzcState->debug) {
            DBUG(( "clear\n" ));
        }
#endif
        return TRUE;    /* clear the table */
    }
    return FALSE; /* don't clear the table */
}
#endif

static CONST UCHAR gNu_rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};

static void
Nu_LZC_putcode(LZCState* pLzcState, INTCODE code, register int bits)
{
    /*static int oldbits = 0;*/
    /*static UCHAR outbuf[MAXBITS];*/
    register UCHAR *buf;
    register int shift;

    if (bits != pLzcState->oldbits) {
        if (bits == 0) {
            /* bits == 0 means EOF, write the rest of the buffer. */
            if (pLzcState->offset > 0) {
                fwrite(pLzcState->outbuf,1,(pLzcState->offset +7) >> 3, pLzcState->outfp);
                pLzcState->bytes_out += ((pLzcState->offset +7) >> 3);
            }
            pLzcState->offset = 0;
            pLzcState->oldbits = 0;
            fflush(pLzcState->outfp);
            return;
        }
        else {
            /* Change the code size.  We must write the whole buffer,
             * because the expand side won't discover the size change
             * until after it has read a buffer full.
             */
            if (pLzcState->offset > 0) {
                fwrite(pLzcState->outbuf, 1, pLzcState->oldbits, pLzcState->outfp);
                pLzcState->bytes_out += pLzcState->oldbits;
                pLzcState->offset = 0;
            }
            pLzcState->oldbits = bits;
    #ifdef DEBUG_LZC
            if ( pLzcState->debug ) {
                DBUG(( "\nChange to %d bits\n", bits ));
            }
    #endif /* DEBUG_LZC */
        }
    }
    /*  Get to the first byte. */
    buf = pLzcState->outbuf + ((shift = pLzcState->offset) >> 3);
    if ((shift &= 7) != 0) {
        *(buf) |= (*buf & gNu_rmask[shift]) | (UCHAR)(code << shift);
        *(++buf) = (UCHAR)(code >> (8 - shift));
        if (bits + shift > 16)
            *(++buf) = (UCHAR)(code >> (16 - shift));
    }
    else {
        /* Special case for fast execution */
        *(buf) = (UCHAR)code;
        *(++buf) = (UCHAR)(code >> 8);
    }
    if ((pLzcState->offset += bits) == (bits << 3)) {
        pLzcState->bytes_out += bits;
        fwrite(pLzcState->outbuf,1,bits,pLzcState->outfp);
        pLzcState->offset = 0;
    }
    return;
}


#define kNuLZCEOF   (-1)

/*
 * Get the next byte from the input straw.  Also updates the CRC
 * if "doCalcCRC" is set to true.
 *
 * Returns kNuLZCEOF as the value when we're out of data.
 */
static NuError
Nu_LZCGetcCRC(LZCState* pLzcState, int* pSym)
{
    NuError err;
    uchar c;

    if (!pLzcState->uncompRemaining) {
        *pSym = kNuLZCEOF;
        return kNuErrNone;
    }

    err = Nu_StrawRead(pLzcState->pArchive, pLzcState->pStraw, &c, 1);
    if (err == kNuErrNone) {
        if (pLzcState->doCalcCRC)
            pLzcState->crc = Nu_CalcCRC16(pLzcState->crc, &c, 1);
        *pSym = c;
        pLzcState->uncompRemaining--;
    }

    return err;
}

/*
 * compress stdin to stdout
 */
static void
Nu_LZC_compress(LZCState* pLzcState, ulong* pDstLen)
{
    int c,adjbits;
    register HASH hash;
    register INTCODE code;
    HASH hashf[256];

    Assert(pLzcState->outfp != nil);

    pLzcState->maxcode = Maxcode(pLzcState->maxbits);
    pLzcState->hashsize = Hashsize(pLzcState->maxbits);

#ifdef COMP40
/* Only needed for adaptive reset */
    pLzcState->checkpoint = CHECK_GAP;
    pLzcState->ratio = 0;
#endif

    adjbits = pLzcState->maxbits -10;
    for (c = 256; --c >= 0; ){
        hashf[c] = ((( c &0x7) << 7) ^ c) << adjbits;
    }
    pLzcState->exit_stat = OK;
    if (Nu_LZC_alloc_tables(pLzcState, pLzcState->maxcode, pLzcState->hashsize))  /* exit_stat already set */
        return;
    init_tables();

    #if 0
    /* if not zcat or filter */
    if(is_list && !zcat_flg) {  /* Open output file */
        if (freopen(ofname, WRITE_FILE_TYPE, pLzcState->outfp) == NULL) {
            pLzcState->exit_stat = NOTOPENED;
            return;
        }
        if (!quiet)
            fprintf(stderr, "%s: ",ifname);     /*#if 0*/
        setvbuf(Xstdout,zbuf,_IOFBF,ZBUFSIZE);
    }
    #endif

    /*
    * Check the input stream for previously seen strings.  We keep
    * adding characters to the previously seen prefix string until we
    * get a character which forms a new (unseen) string.  We then send
    * the code for the previously seen prefix string, and add the new
    * string to our tables.  The check for previous strings is done by
    * hashing.  If the code for the hash value is unused, then we have
    * a new string.  If the code is used, we check to see if the prefix
    * and suffix values match the current input; if so, we have found
    * a previously seen string.  Otherwise, we have a hash collision,
    * and we try secondary hash probes until we either find the current
    * string, or we find an unused entry (which indicates a new string).
    */
    if (1 /*!nomagic*/) {
        putc(gNu_magic_header[0], pLzcState->outfp);
        putc(gNu_magic_header[1], pLzcState->outfp);
        putc((char)(pLzcState->maxbits | pLzcState->block_compress), pLzcState->outfp);
        if(ferror(pLzcState->outfp)){  /* check it on entry */
            pLzcState->exit_stat = WRITEERR;
            return;
        }
        pLzcState->bytes_out = 3L;     /* includes 3-byte header mojo */
    }
    else
        pLzcState->bytes_out = 0L;      /* no 3-byte header mojo */
    pLzcState->in_count = 1L;
    pLzcState->offset = 0;

    pLzcState->exit_stat = Nu_LZCGetcCRC(pLzcState, &c);
    if (pLzcState->exit_stat != kNuErrNone)
        return;
    pLzcState->prefxcode = (INTCODE)c;

    while (1) {
        pLzcState->exit_stat = Nu_LZCGetcCRC(pLzcState, &c);
        if (pLzcState->exit_stat != kNuErrNone)
            return;
        if (c == kNuLZCEOF)
            break;

        pLzcState->in_count++;
        hash = pLzcState->prefxcode ^ hashf[c];
        /* I need to check that my hash value is within range
        * because my 16-bit hash table is smaller than 64k.
        */
        if (hash >= pLzcState->hashsize)
            hash -= pLzcState->hashsize;
        if ((code = (INTCODE)probe(hash)) != UNUSED) {
            if (suffix(code) != (char)c || (INTCODE)prefix(code) != pLzcState->prefxcode) {
            /* hashdelta is subtracted from hash on each iteration of
            * the following hash table search loop.  I compute it once
            * here to remove it from the loop.
            */
                HASH hashdelta = (0x120 - c) << (adjbits);
                do  {
                    /* rehash and keep looking */
                    Assert(code >= FIRSTFREE && code <= pLzcState->maxcode);
                    if (hash >= hashdelta) hash -= hashdelta;
                        else hash += (pLzcState->hashsize - hashdelta);
                    Assert(hash < pLzcState->hashsize);
                    if ((code = (INTCODE)probe(hash)) == UNUSED)
                        goto newcode;
                } while (suffix(code) != (char)c || (INTCODE)prefix(code) != pLzcState->prefxcode);
            }
            pLzcState->prefxcode = code;
        }
        else {
            newcode: {
                Nu_LZC_putcode(pLzcState, pLzcState->prefxcode, pLzcState->bits);
                code = pLzcState->nextfree;
                Assert(hash < pLzcState->hashsize);
                Assert(code >= FIRSTFREE);
                Assert(code <= pLzcState->maxcode + 1);
                if (code <= pLzcState->maxcode) {
                    probe(hash) = (CODE)code;
                    prefix(code) = (CODE)pLzcState->prefxcode;
                    suffix(code) = (char)c;
                    if (code > pLzcState->highcode) {
                        pLzcState->highcode += code;
                        ++pLzcState->bits;
                    }
                    pLzcState->nextfree = code + 1;
                }
#ifdef COMP40
                else if (pLzcState->in_count >= pLzcState->checkpoint && pLzcState->block_compress ) {
                    if (Nu_LZC_cl_block(pLzcState)){
#else
                else if (pLzcState->block_compress){
#endif
                        Nu_LZC_putcode(pLzcState, (INTCODE)c, pLzcState->bits);
                        Nu_LZC_putcode(pLzcState, CLEAR, pLzcState->bits);
                        init_tables();
                        pLzcState->exit_stat = Nu_LZCGetcCRC(pLzcState, &c);
                        if (pLzcState->exit_stat != kNuErrNone)
                            return;
                        if (c == kNuLZCEOF)
                            break;
                        pLzcState->in_count++;
#ifdef COMP40
                    }
#endif
                }
                pLzcState->prefxcode = (INTCODE)c;
            }
        }
    }
    Nu_LZC_putcode(pLzcState, pLzcState->prefxcode, pLzcState->bits);
    Nu_LZC_putcode(pLzcState, CLEAR, 0);
    /*
     * Print out stats on stderr
     */
    if(1 /*zcat_flg == 0 && !quiet*/) {
#ifdef DEBUG_LZC
        DBUG((
            "%ld chars in, (%ld bytes) out, compression factor: ",
            pLzcState->in_count, pLzcState->bytes_out ));
        Nu_prratio( pLzcState->in_count, pLzcState->bytes_out );
        DBUG(( "\n"));
        DBUG(( "\tCompression as in compact: " ));
        Nu_prratio( pLzcState->in_count-pLzcState->bytes_out, pLzcState->in_count );
        DBUG(( "\n"));
        DBUG(( "\tLargest code (of last block) was %d (%d bits)\n",
            pLzcState->prefxcode - 1, pLzcState->bits ));
#else
        DBUG(( "Compression: " ));
        Nu_prratio( pLzcState->in_count-pLzcState->bytes_out, pLzcState->in_count );
#endif /* DEBUG_LZC */
    }
    if(pLzcState->bytes_out > pLzcState->in_count)      /*  if no savings */
        pLzcState->exit_stat = NOSAVING;
    *pDstLen = pLzcState->bytes_out;
    return ;
}


/*
 * NufxLib interface to LZC compression.
 */
static NuError
Nu_CompressLZC(NuArchive* pArchive, NuStraw* pStraw, FILE* fp,
    ulong srcLen, ulong* pDstLen, ushort* pCrc, int maxbits)
{
    NuError err = kNuErrNone;
    LZCState lzcState;

    memset(&lzcState, 0, sizeof(lzcState));
    lzcState.pArchive = pArchive;
    lzcState.pStraw = pStraw;
    lzcState.outfp = fp;
    lzcState.uncompRemaining = srcLen;

    if (pCrc == nil) {
        lzcState.doCalcCRC = false;
    } else {
        lzcState.doCalcCRC = true;
        lzcState.crc = *pCrc;
    }

    lzcState.maxbits = maxbits;
    lzcState.block_compress = BLOCK_MASK;     /* enabled */

    Nu_LZC_compress(&lzcState, pDstLen);
    err = lzcState.exit_stat;
    DBUG(("+++ LZC_compress returned with %d\n", err));

#if (SPLIT_HT)
    free_array(CODE,lzcState.ht[1], 0);
    free_array(CODE,lzcState.ht[0], 0);
#else
    free_array(CODE,lzcState.ht, 0);
#endif

#if (SPLIT_PFX)
    free_array(CODE,lzcState.pfx[1], 128);
    free_array(CODE,lzcState.pfx[0], 128);
#else
    free_array(CODE,lzcState.pfx, 256);
#endif
    free_array(char,lzcState.sfx, 256);

    if (pCrc != nil)
        *pCrc = lzcState.crc;

    return err;
}

NuError
Nu_CompressLZC12(NuArchive* pArchive, NuStraw* pStraw, FILE* fp,
    ulong srcLen, ulong* pDstLen, ushort* pCrc)
{
    return Nu_CompressLZC(pArchive, pStraw, fp, srcLen, pDstLen, pCrc, 12);
}

NuError
Nu_CompressLZC16(NuArchive* pArchive, NuStraw* pStraw, FILE* fp,
    ulong srcLen, ulong* pDstLen, ushort* pCrc)
{
    return Nu_CompressLZC(pArchive, pStraw, fp, srcLen, pDstLen, pCrc, 16);
}


/*
 * ===========================================================================
 *      Expansion
 * ===========================================================================
 */

/*
 * Write the next byte to the output funnel.  Also updates the CRC
 * if "doCalcCRC" is set to true.
 *
 * Returns kNuLZCEOF as the value when we're out of data.
 */
static NuError
Nu_LZCPutcCRC(LZCState* pLzcState, char c)
{
    NuError err;

    err = Nu_FunnelWrite(pLzcState->pArchive, pLzcState->pFunnel,
            (uchar*) &c, 1);
    if (pLzcState->doCalcCRC)
        pLzcState->crc = Nu_CalcCRC16(pLzcState->crc, (uchar*) &c, 1);

    return err;
}


static int
Nu_LZC_nextcode(LZCState* pLzcState, INTCODE* codeptr)
/* Get the next code from input and put it in *codeptr.
 * Return (TRUE) on success, or return (FALSE) on end-of-file.
 * Adapted from COMPRESS V4.0.
 */
{
    /*static int prevbits = 0;*/
    register INTCODE code;
    /*static int size;*/
    /*static UCHAR inbuf[MAXBITS];*/
    register int shift;
    UCHAR *bp;

    /* If the next entry is a different bit-size than the preceeding one
    * then we must adjust the size and scrap the old buffer.
    */
    if (pLzcState->prevbits != pLzcState->bits) {
        pLzcState->prevbits = pLzcState->bits;
        pLzcState->size = 0;
    }
    /* If we can't read another code from the buffer, then refill it.
    */
    shift = pLzcState->offset;
    if (pLzcState->size - shift < pLzcState->bits) {
        /* Read more input and convert size from # of bytes to # of bits */
        long getSize;

        getSize = pLzcState->bits;
        if (getSize > pLzcState->compRemaining)
            getSize = pLzcState->compRemaining;
        if (!getSize)       /* act like EOF */
            return FALSE;
        pLzcState->size = fread(pLzcState->inbuf, 1, getSize, pLzcState->infp) << 3;
        if (pLzcState->size <= 0 || ferror(pLzcState->infp))
            return(FALSE);
        pLzcState->compRemaining -= getSize;
        pLzcState->offset = shift = 0;
    }
    /* Get to the first byte. */
    bp = pLzcState->inbuf + (shift >> 3);
    /* Get first part (low order bits) */
    code = (*bp++ >> (shift &= 7));
    /* high order bits. */
    code |= *bp++ << (shift = 8 - shift);
    if ((shift += 8) < pLzcState->bits) code |= *bp << shift;
        *codeptr = code & pLzcState->highcode;
    pLzcState->offset += pLzcState->bits;
    return (TRUE);
}

static void
Nu_LZC_decompress(LZCState* pLzcState, ulong compressedLen)
{
    NuArchive* pArchive = pLzcState->pArchive;
    register int i;
    register INTCODE code;
    char sufxchar = 0;
    INTCODE savecode;
    FLAG fulltable = FALSE, cleartable;
    /*static*/ char *token= NULL;         /* String buffer to build token */
    /*static*/ int maxtoklen = MAXTOKLEN;
    int flags;

    Assert(pLzcState->infp != nil);

    pLzcState->exit_stat = OK;

    if (compressedLen < 3) {
        /* not long enough to be valid! */
        pLzcState->exit_stat = kNuErrBadData;
        Nu_ReportError(NU_BLOB, pLzcState->exit_stat, "thread too short to be valid LZC");
        return;
    }
    pLzcState->compRemaining = compressedLen;

    /*
     * This comes out of "compress.c" rather than "compapi.c".
     */
    if ((getc(pLzcState->infp)!=(gNu_magic_header[0] & 0xFF))
        || (getc(pLzcState->infp)!=(gNu_magic_header[1] & 0xFF)))
    {
        DBUG(("not in compressed format\n"));
        pLzcState->exit_stat = kNuErrBadData;
        return;
    }
    flags = getc(pLzcState->infp);    /* set -b from file */
    pLzcState->block_compress = flags & BLOCK_MASK;
    pLzcState->maxbits = flags & BIT_MASK;
    if(pLzcState->maxbits > MAXBITS) {
        DBUG(("compressed with %d bits, can only handle %d bits\n",
            pLzcState->maxbits, MAXBITS));
        pLzcState->exit_stat = kNuErrBadData;
        return;
    }

    pLzcState->compRemaining -= 3;

    /* Initialze the token buffer. */
    token = (char*)Nu_Malloc(pArchive, maxtoklen);
    if (token == NULL) {
        pLzcState->exit_stat = NOMEM;
        return;
    }

    if (Nu_LZC_alloc_tables(pLzcState, pLzcState->maxcode = ~(~(INTCODE)0 << pLzcState->maxbits),0)) /* exit_stat already set */
        return;

    #if 0
    /* if not zcat or filter */
    if(is_list && !zcat_flg) {  /* Open output file */
        if (freopen(ofname, WRITE_FILE_TYPE, stdout) == NULL) {
            pLzcState->exit_stat = NOTOPENED;
            return;
        }
        if (!quiet)
            fprintf(stderr, "%s: ",ifname);     /*#if 0*/
        setvbuf(stdout,xbuf,_IOFBF,XBUFSIZE);
    }
    #endif

    cleartable = TRUE;
    savecode = CLEAR;
    pLzcState->offset = 0;
    do {
        if ((code = savecode) == CLEAR && cleartable) {
            pLzcState->highcode = ~(~(INTCODE)0 << (pLzcState->bits = INITBITS));
            fulltable = FALSE;
            pLzcState->nextfree = (cleartable = pLzcState->block_compress) == FALSE ? 256 : FIRSTFREE;
            if (!Nu_LZC_nextcode(pLzcState, &pLzcState->prefxcode))
                break;
            /*putc((*/sufxchar = (char)pLzcState->prefxcode/*), stdout)*/;
            pLzcState->exit_stat = Nu_LZCPutcCRC(pLzcState, sufxchar);
            if (pLzcState->exit_stat != kNuErrNone)
                return;
            continue;
        }
        i = 0;
        if (code >= pLzcState->nextfree && !fulltable) {
            if (code != pLzcState->nextfree){
                DBUG(("ERROR: code (0x%x) != nextfree (0x%x)\n",
                    code, pLzcState->nextfree));
                pLzcState->exit_stat = CODEBAD;
                return ;     /* Non-existant code */
            }
            /* Special case for sequence KwKwK (see text of article)         */
            code = pLzcState->prefxcode;
            token[i++] = sufxchar;
        }
        /* Build the token string in reverse order by chasing down through
         * successive prefix tokens of the current token.  Then output it.
         */
        while (code >= 256) {
            #ifdef DEBUG_LZC
            /* These are checks to ease paranoia. Prefix codes must decrease
             * monotonically, otherwise we must have corrupt tables.  We can
             * also check that we haven't overrun the token buffer.
             */
            if (code <= (INTCODE)prefix(code)){
                pLzcState->exit_stat= TABLEBAD;
                return;
            }
            #endif
            if (i >= maxtoklen) {
                maxtoklen *= 2;   /* double the size of the token buffer */
                if ((token = Nu_Realloc(pArchive, token, maxtoklen)) == NULL) {
                    pLzcState->exit_stat = TOKTOOBIG;
                    return;
                }
            }
            token[i++] = suffix(code);
            code = (INTCODE)prefix(code);
        }
        /*putc(*/sufxchar = (char)code/*, stdout)*/;
        pLzcState->exit_stat = Nu_LZCPutcCRC(pLzcState, sufxchar);
        while (--i >= 0) {
            /*putc(token[i], stdout);*/
            pLzcState->exit_stat = Nu_LZCPutcCRC(pLzcState, token[i]);
        }
        if (pLzcState->exit_stat != kNuErrNone)
            return;
        /* If table isn't full, add new token code to the table with
         * codeprefix and codesuffix, and remember current code.
         */
        if (!fulltable) {
            code = pLzcState->nextfree;
            Assert(256 <= code && code <= pLzcState->maxcode);
            prefix(code) = (CODE)pLzcState->prefxcode;
            suffix(code) = sufxchar;
            pLzcState->prefxcode = savecode;
            if (code++ == pLzcState->highcode) {
                if (pLzcState->highcode >= pLzcState->maxcode) {
                    fulltable = TRUE;
                    --code;
                }
                else {
                    ++pLzcState->bits;
                    pLzcState->highcode += code;  /* nextfree == highcode + 1 */
                }
            }
            pLzcState->nextfree = code;
        }
    } while (Nu_LZC_nextcode(pLzcState, &savecode));
    pLzcState->exit_stat = (ferror(pLzcState->infp))? READERR : OK;

    Nu_Free(pArchive, token);
    return ;
}


/*
 * NufxLib interface to LZC expansion.
 */
NuError
Nu_ExpandLZC(NuArchive* pArchive, const NuRecord* pRecord,
    const NuThread* pThread, FILE* infp, NuFunnel* pFunnel, ushort* pCrc)
{
    NuError err = kNuErrNone;
    LZCState lzcState;

    memset(&lzcState, 0, sizeof(lzcState));
    lzcState.pArchive = pArchive;
    lzcState.infp = infp;
    lzcState.pFunnel = pFunnel;

    if (pCrc == nil) {
        lzcState.doCalcCRC = false;
    } else {
        lzcState.doCalcCRC = true;
        lzcState.crc = *pCrc;
    }

    Nu_LZC_decompress(&lzcState, pThread->thCompThreadEOF);
    err = lzcState.exit_stat;
    DBUG(("+++ LZC_decompress returned with %d\n", err));

#if (SPLIT_HT)
    free_array(CODE,lzcState.ht[1], 0);
    free_array(CODE,lzcState.ht[0], 0);
#else
    free_array(CODE,lzcState.ht, 0);
#endif

#if (SPLIT_PFX)
    free_array(CODE,lzcState.pfx[1], 128);
    free_array(CODE,lzcState.pfx[0], 128);
#else
    free_array(CODE,lzcState.pfx, 256);
#endif
    free_array(char,lzcState.sfx, 256);

    if (pCrc != nil)
        *pCrc = lzcState.crc;
    return err;
}

#endif /*ENABLE_LZC*/