mirror of
https://github.com/fadden/nulib2.git
synced 2024-11-19 14:33:47 +00:00
d41016e6c1
Generalized compression method enable/disable. Now any method can be removed. Applications can call NuTestFeature() to figure out what is supported by the copy of NufxLib they're linked against.
1104 lines
36 KiB
C
1104 lines
36 KiB
C
/*
|
|
* NuFX archive manipulation library
|
|
* Copyright (C) 2000 by Andy McFadden, All Rights Reserved.
|
|
* This is free software; you can redistribute it and/or modify it under the
|
|
* terms of the GNU Library General Public License, see the file COPYING.LIB.
|
|
*
|
|
* This is the LZW implementation found in the UNIX "compress" command,
|
|
* sometimes referred to as "LZC". GS/ShrinkIt v1.1 can unpack threads
|
|
* in LZC format, P8 ShrinkIt cannot. The only other application that
|
|
* is known to create LZC threads is the original NuLib.
|
|
*
|
|
* There's a lot of junk in here for the sake of smaller systems (e.g. MSDOS)
|
|
* and pre-ANSI compilers. For the most part it has been left unchanged.
|
|
* I have done some minor reformatting, and have undone the authors'
|
|
* penchant for assigning variables inside function call statements, but
|
|
* for the most part it is as it was. (A much cleaner implementation
|
|
* could probably be derived by adapting the NufxLib Lzw.c code...)
|
|
*/
|
|
#include "NufxLibPriv.h"
|
|
|
|
#ifdef ENABLE_LZC
|
|
|
|
#define DEBUG_LZC
|
|
|
|
/*
|
|
* Selected definitions from compress.h.
|
|
*/
|
|
typedef unsigned short CODE;
|
|
typedef unsigned char UCHAR;
|
|
typedef unsigned int INTCODE;
|
|
typedef unsigned int HASH;
|
|
typedef int FLAG;
|
|
|
|
#ifndef FALSE /* let's get some sense to this */
|
|
#define FALSE 0
|
|
#define TRUE !FALSE
|
|
#endif
|
|
|
|
#define CONST const
|
|
#define FAR
|
|
#define NULLPTR(type) ((type FAR *) NULL)
|
|
#define ALLOCTYPE void
|
|
|
|
#define INITBITS 9
|
|
#define MINBITS 12
|
|
#define MAXMAXBITS 16
|
|
#define MAXBITS MAXMAXBITS
|
|
#define DFLTBITS MAXBITS
|
|
|
|
#define UNUSED ((CODE)0) /* Indicates hash table value unused */
|
|
#define CLEAR ((CODE)256) /* Code requesting table to be cleared */
|
|
#define FIRSTFREE ((CODE)257) /* First free code for token encoding */
|
|
#define MAXTOKLEN 512 /* Max chars in token; size of buffer */
|
|
#define OK kNuErrNone /* Result codes from functions: */
|
|
|
|
#define BIT_MASK 0x1f
|
|
#define BLOCK_MASK 0x80
|
|
|
|
#define CHECK_GAP 10000L /* ratio check interval, for COMP40 */
|
|
|
|
static UCHAR gNu_magic_header[] = { 0x1F,0x9D };
|
|
|
|
/* don't need these */
|
|
/*#define SPLIT_HT 1*/
|
|
/*#define SPLIT_PFX 1*/
|
|
/*#define COMP40 1*/
|
|
|
|
#define NOMEM kNuErrMalloc /* Ran out of memory */
|
|
#define TOKTOOBIG kNuErrBadData /* Token longer than MAXTOKLEN chars */
|
|
#define READERR kNuErrFileRead /* I/O error on input */
|
|
#define WRITEERR kNuErrFileWrite /* I/O error on output */
|
|
#define CODEBAD kNuErrBadData /* Infile contained a bad token code */
|
|
#define TABLEBAD kNuErrInternal /* The tables got corrupted (!) */
|
|
#define NOSAVING kNuErrNone /* no saving in file size */
|
|
|
|
|
|
/*
|
|
* Normally in COMPUSI.UNI.
|
|
*/
|
|
static inline ALLOCTYPE FAR *
|
|
Nu_LZC_emalloc(NuArchive* pArchive, unsigned int x, int y)
|
|
{
|
|
return Nu_Malloc(pArchive, x*y);
|
|
}
|
|
static inline void
|
|
Nu_LZC_efree(NuArchive* pArchive, ALLOCTYPE FAR * ptr)
|
|
{
|
|
return Nu_Free(pArchive, ptr);
|
|
}
|
|
|
|
/*@H************************ < COMPRESS API > ****************************
|
|
* $@(#) compapi.c,v 4.3d 90/01/18 03:00:00 don Release ^ *
|
|
* *
|
|
* compress : compapi.c <current version of compress algorithm> *
|
|
* *
|
|
* port by : Donald J. Gloistein *
|
|
* *
|
|
* Source, Documentation, Object Code: *
|
|
* released to Public Domain. This code is based on code as documented *
|
|
* below in release notes. *
|
|
* *
|
|
*--------------------------- Module Description --------------------------*
|
|
* Contains source code for modified Lempel-Ziv method (LZW) compression *
|
|
* and decompression. *
|
|
* *
|
|
* This code module can be maintained to keep current on releases on the *
|
|
* Unix system. The command shell and dos modules can remain the same. *
|
|
* *
|
|
*--------------------------- Implementation Notes --------------------------*
|
|
* *
|
|
* compiled with : compress.h compress.fns compress.c *
|
|
* linked with : compress.obj compusi.obj *
|
|
* *
|
|
* problems: *
|
|
* *
|
|
* *
|
|
* CAUTION: Uses a number of defines for access and speed. If you change *
|
|
* anything, make sure about side effects. *
|
|
* *
|
|
* Compression: *
|
|
* Algorithm: use open addressing double hashing (no chaining) on the *
|
|
* prefix code / next character combination. We do a variant of Knuth's *
|
|
* algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime *
|
|
* secondary probe. Here, the modular division first probe is gives way *
|
|
* to a faster exclusive-or manipulation. *
|
|
* Also block compression with an adaptive reset was used in original code, *
|
|
* whereby the code table is cleared when the compression ration decreases *
|
|
* but after the table fills. This was removed from this edition. The table *
|
|
* is re-sized at this point when it is filled , and a special CLEAR code is *
|
|
* generated for the decompressor. This results in some size difference from *
|
|
* straight version 4.0 joe Release. But it is fully compatible in both v4.0 *
|
|
* and v4.01 *
|
|
* *
|
|
* Decompression: *
|
|
* This routine adapts to the codes in the file building the "string" table *
|
|
* on-the-fly; requiring no table to be stored in the compressed file. The *
|
|
* tables used herein are shared with those of the compress() routine. *
|
|
* *
|
|
* Initials ---- Name --------------------------------- *
|
|
* DjG Donald J. Gloistein, current port to MsDos 16 bit *
|
|
* Plus many others, see rev.hst file for full list *
|
|
* LvR Lyle V. Rains, many thanks for improved implementation *
|
|
* of the compression and decompression routines. *
|
|
*************************************************************************@H*/
|
|
|
|
#include <stdio.h>
|
|
|
|
/*
|
|
* LZC state, largely variables with non-local scope.
|
|
*/
|
|
typedef struct LZCState {
|
|
NuArchive* pArchive;
|
|
int doCalcCRC;
|
|
ushort crc;
|
|
|
|
/* compression */
|
|
NuStraw* pStraw;
|
|
FILE* outfp;
|
|
long uncompRemaining;
|
|
|
|
/* expansion */
|
|
FILE* infp;
|
|
NuFunnel* pFunnel;
|
|
ushort* pCrc;
|
|
long compRemaining;
|
|
|
|
|
|
/*
|
|
* Globals from Compress sources.
|
|
*/
|
|
int offset;
|
|
long int in_count ; /* length of input */
|
|
long int bytes_out; /* length of compressed output */
|
|
|
|
INTCODE prefxcode, nextfree;
|
|
INTCODE highcode;
|
|
INTCODE maxcode;
|
|
HASH hashsize;
|
|
int bits;
|
|
|
|
char FAR *sfx;
|
|
|
|
#if (SPLIT_PFX)
|
|
CODE FAR *pfx[2];
|
|
#else
|
|
CODE FAR *pfx;
|
|
#endif
|
|
|
|
#if (SPLIT_HT)
|
|
CODE FAR *ht[2];
|
|
#else
|
|
CODE FAR *ht;
|
|
#endif
|
|
|
|
#ifdef COMP40
|
|
long int ratio;
|
|
long checkpoint; /* initialized to CHECK_GAP */
|
|
#endif
|
|
|
|
#ifdef DEBUG_LZC
|
|
int debug; /* initialized to FALSE */
|
|
#endif
|
|
|
|
NuError exit_stat;
|
|
|
|
int maxbits; /* initialized to DFLTBITS */
|
|
int block_compress; /* initialized to BLOCK_MASK */
|
|
|
|
/*
|
|
* Static local variables. Some of these were explicitly initialized
|
|
* to zero.
|
|
*/
|
|
INTCODE oldmaxcode; /* alloc_tables */
|
|
HASH oldhashsize; /* alloc_tables */
|
|
int oldbits; /* putcode */
|
|
UCHAR outbuf[MAXBITS]; /* putcode */
|
|
int prevbits; /* nextcode */
|
|
int size; /* nextcode */
|
|
UCHAR inbuf[MAXBITS]; /* nextcode */
|
|
} LZCState;
|
|
|
|
|
|
/*
|
|
* The following two parameter tables are the hash table sizes and
|
|
* maximum code values for various code bit-lengths. The requirements
|
|
* are that Hashsize[n] must be a prime number and Maxcode[n] must be less
|
|
* than Maxhash[n]. Table occupancy factor is (Maxcode - 256)/Maxhash.
|
|
* Note: I am using a lower Maxcode for 16-bit codes in order to
|
|
* keep the hash table size less than 64k entries.
|
|
*/
|
|
static CONST HASH gNu_hs[] = {
|
|
0x13FF, /* 12-bit codes, 75% occupancy */
|
|
0x26C3, /* 13-bit codes, 80% occupancy */
|
|
0x4A1D, /* 14-bit codes, 85% occupancy */
|
|
0x8D0D, /* 15-bit codes, 90% occupancy */
|
|
0xFFD9 /* 16-bit codes, 94% occupancy, 6% of code values unused */
|
|
};
|
|
#define Hashsize(maxb) (gNu_hs[(maxb) -MINBITS])
|
|
|
|
static CONST INTCODE gNu_mc[] = {
|
|
0x0FFF, /* 12-bit codes */
|
|
0x1FFF, /* 13-bit codes */
|
|
0x3FFF, /* 14-bit codes */
|
|
0x7FFF, /* 15-bit codes */
|
|
0xEFFF /* 16-bit codes, 6% of code values unused */
|
|
};
|
|
#define Maxcode(maxb) (gNu_mc[(maxb) -MINBITS])
|
|
|
|
#ifdef __STDC__
|
|
#ifdef DEBUG_LZC
|
|
#define allocx(type, ptr, size) \
|
|
(((ptr) = (type FAR *) Nu_LZC_emalloc(pArchive, (unsigned int)(size),sizeof(type))) == NULLPTR(type) \
|
|
? (DBUG(("%s: "#ptr" -- ", "LZC")), NOMEM) : OK \
|
|
)
|
|
#else
|
|
#define allocx(type,ptr,size) \
|
|
(((ptr) = (type FAR *) Nu_LZC_emalloc(pArchive, (unsigned int)(size),sizeof(type))) == NULLPTR(type) \
|
|
? NOMEM : OK \
|
|
)
|
|
#endif
|
|
#else
|
|
#define allocx(type,ptr,size) \
|
|
(((ptr) = (type FAR *) Nu_LZC_emalloc(, pArchive, (unsigned int)(size),sizeof(type))) == NULLPTR(type) \
|
|
? NOMEM : OK \
|
|
)
|
|
#endif
|
|
|
|
#define free_array(type,ptr,offset) \
|
|
if (ptr != NULLPTR(type)) { \
|
|
Nu_LZC_efree(pArchive, (ALLOCTYPE FAR *)((ptr) + (offset))); \
|
|
(ptr) = NULLPTR(type); \
|
|
}
|
|
|
|
/*
|
|
* Macro to allocate new memory to a pointer with an offset value.
|
|
*/
|
|
#define alloc_array(type, ptr, size, offset) \
|
|
( allocx(type, ptr, (size) - (offset)) != OK \
|
|
? NOMEM \
|
|
: (((ptr) -= (offset)), OK) \
|
|
)
|
|
|
|
/*static char FAR *sfx = NULLPTR(char) ;*/
|
|
#define suffix(code) pLzcState->sfx[code]
|
|
|
|
|
|
#if (SPLIT_PFX)
|
|
/*static CODE FAR *pfx[2] = {NULLPTR(CODE), NULLPTR(CODE)};*/
|
|
#else
|
|
/*static CODE FAR *pfx = NULLPTR(CODE);*/
|
|
#endif
|
|
|
|
|
|
#if (SPLIT_HT)
|
|
/*static CODE FAR *ht[2] = {NULLPTR(CODE),NULLPTR(CODE)};*/
|
|
#else
|
|
/*static CODE FAR *ht = NULLPTR(CODE);*/
|
|
#endif
|
|
|
|
|
|
static int
|
|
Nu_LZC_alloc_tables(LZCState* pLzcState, INTCODE newmaxcode, HASH newhashsize)
|
|
{
|
|
NuArchive* pArchive = pLzcState->pArchive;
|
|
/*static INTCODE oldmaxcode = 0;*/
|
|
/*static HASH oldhashsize = 0;*/
|
|
|
|
if (newhashsize > pLzcState->oldhashsize) {
|
|
#if (SPLIT_HT)
|
|
free_array(CODE,pLzcState->ht[1], 0);
|
|
free_array(CODE,pLzcState->ht[0], 0);
|
|
#else
|
|
free_array(CODE,pLzcState->ht, 0);
|
|
#endif
|
|
pLzcState->oldhashsize = 0;
|
|
}
|
|
|
|
if (newmaxcode > pLzcState->oldmaxcode) {
|
|
#if (SPLIT_PFX)
|
|
free_array(CODE,pLzcState->pfx[1], 128);
|
|
free_array(CODE,pLzcState->pfx[0], 128);
|
|
#else
|
|
free_array(CODE,pLzcState->pfx, 256);
|
|
#endif
|
|
free_array(char,pLzcState->sfx, 256);
|
|
|
|
if ( alloc_array(char, pLzcState->sfx, newmaxcode + 1, 256)
|
|
#if (SPLIT_PFX)
|
|
|| alloc_array(CODE, pLzcState->pfx[0], (newmaxcode + 1) / 2, 128)
|
|
|| alloc_array(CODE, pLzcState->pfx[1], (newmaxcode + 1) / 2, 128)
|
|
#else
|
|
|| alloc_array(CODE, pLzcState->pfx, (newmaxcode + 1), 256)
|
|
#endif
|
|
) {
|
|
pLzcState->oldmaxcode = 0;
|
|
pLzcState->exit_stat = NOMEM;
|
|
return(NOMEM);
|
|
}
|
|
pLzcState->oldmaxcode = newmaxcode;
|
|
}
|
|
if (newhashsize > pLzcState->oldhashsize) {
|
|
if (
|
|
#if (SPLIT_HT)
|
|
alloc_array(CODE, pLzcState->ht[0], (newhashsize / 2) + 1, 0)
|
|
|| alloc_array(CODE, pLzcState->ht[1], newhashsize / 2, 0)
|
|
#else
|
|
alloc_array(CODE, pLzcState->ht, newhashsize, 0)
|
|
#endif
|
|
) {
|
|
pLzcState->oldhashsize = 0;
|
|
pLzcState->exit_stat = NOMEM;
|
|
return(NOMEM);
|
|
}
|
|
pLzcState->oldhashsize = newhashsize;
|
|
}
|
|
return (OK);
|
|
}
|
|
|
|
# if (SPLIT_PFX)
|
|
/*
|
|
* We have to split pfx[] table in half,
|
|
* because it's potentially larger than 64k bytes.
|
|
*/
|
|
# define prefix(code) (pLzcState->pfx[(code) & 1][(code) >> 1])
|
|
# else
|
|
/*
|
|
* Then pfx[] can't be larger than 64k bytes,
|
|
* or we don't care if it is, so we don't split.
|
|
*/
|
|
# define prefix(code) (pLzcState->pfx[code])
|
|
# endif
|
|
|
|
|
|
/* The initializing of the tables can be done quicker with memset() */
|
|
/* but this way is portable through out the memory models. */
|
|
/* If you use Microsoft halloc() to allocate the arrays, then */
|
|
/* include the pragma #pragma function(memset) and make sure that */
|
|
/* the length of the memory block is not greater than 64K. */
|
|
/* This also means that you MUST compile in a model that makes the */
|
|
/* default pointers to be far pointers (compact or large models). */
|
|
/* See the file COMPUSI.DOS to modify function emalloc(). */
|
|
|
|
# if (SPLIT_HT)
|
|
/*
|
|
* We have to split ht[] hash table in half,
|
|
* because it's potentially larger than 64k bytes.
|
|
*/
|
|
# define probe(hash) (pLzcState->ht[(hash) & 1][(hash) >> 1])
|
|
# define init_tables() \
|
|
{ \
|
|
hash = pLzcState->hashsize >> 1; \
|
|
pLzcState->ht[0][hash] = 0; \
|
|
while (hash--) pLzcState->ht[0][hash] = pLzcState->ht[1][hash] = 0; \
|
|
pLzcState->highcode = ~(~(INTCODE)0 << (pLzcState->bits = INITBITS)); \
|
|
pLzcState->nextfree = (pLzcState->block_compress ? FIRSTFREE : 256); \
|
|
}
|
|
|
|
# else
|
|
|
|
/*
|
|
* Then ht[] can't be larger than 64k bytes,
|
|
* or we don't care if it is, so we don't split.
|
|
*/
|
|
# define probe(hash) (pLzcState->ht[hash])
|
|
# define init_tables() \
|
|
{ \
|
|
hash = pLzcState->hashsize; \
|
|
while (hash--) pLzcState->ht[hash] = 0; \
|
|
pLzcState->highcode = ~(~(INTCODE)0 << (pLzcState->bits = INITBITS)); \
|
|
pLzcState->nextfree = (pLzcState->block_compress ? FIRSTFREE : 256); \
|
|
}
|
|
|
|
# endif
|
|
|
|
|
|
/*
|
|
* ===========================================================================
|
|
* Compression
|
|
* ===========================================================================
|
|
*/
|
|
|
|
static void
|
|
Nu_prratio(long int num, long int den)
|
|
{
|
|
register int q; /* Doesn't need to be long */
|
|
|
|
if(num > 214748L) { /* 2147483647/10000 */
|
|
q = (int) (num / (den / 10000L));
|
|
}
|
|
else {
|
|
q = (int) (10000L * num / den); /* Long calculations, though */
|
|
}
|
|
if (q < 0) {
|
|
DBUG(("-"));
|
|
q = -q;
|
|
}
|
|
DBUG(("%d.%02d%%", q / 100, q % 100));
|
|
}
|
|
|
|
#ifdef COMP40
|
|
/* table clear for block compress */
|
|
/* this is for adaptive reset present in version 4.0 joe release */
|
|
/* DjG, sets it up and returns TRUE to compress and FALSE to not compress */
|
|
static int
|
|
Nu_LZC_cl_block(LZCState* pLzcState)
|
|
{
|
|
register long int rat;
|
|
|
|
pLzcState->checkpoint = pLzcState->in_count + CHECK_GAP;
|
|
#ifdef DEBUG_LZC
|
|
if ( pLzcState->debug ) {
|
|
DBUG(( "count: %ld, ratio: ", pLzcState->in_count ));
|
|
Nu_prratio ( pLzcState->in_count, pLzcState->bytes_out );
|
|
DBUG(( "\n"));
|
|
}
|
|
#endif
|
|
|
|
if(pLzcState->in_count > 0x007fffff) { /* shift will overflow */
|
|
rat = pLzcState->bytes_out >> 8;
|
|
if(rat == 0) /* Don't divide by zero */
|
|
rat = 0x7fffffff;
|
|
else
|
|
rat = pLzcState->in_count / rat;
|
|
}
|
|
else
|
|
rat = (pLzcState->in_count << 8) / pLzcState->bytes_out; /* 8 fractional bits */
|
|
|
|
if ( rat > pLzcState->ratio ){
|
|
pLzcState->ratio = rat;
|
|
return FALSE;
|
|
}
|
|
else {
|
|
pLzcState->ratio = 0;
|
|
#ifdef DEBUG_LZC
|
|
if(pLzcState->debug) {
|
|
DBUG(( "clear\n" ));
|
|
}
|
|
#endif
|
|
return TRUE; /* clear the table */
|
|
}
|
|
return FALSE; /* don't clear the table */
|
|
}
|
|
#endif
|
|
|
|
static CONST UCHAR gNu_rmask[9] = {0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff};
|
|
|
|
static void
|
|
Nu_LZC_putcode(LZCState* pLzcState, INTCODE code, register int bits)
|
|
{
|
|
/*static int oldbits = 0;*/
|
|
/*static UCHAR outbuf[MAXBITS];*/
|
|
register UCHAR *buf;
|
|
register int shift;
|
|
|
|
if (bits != pLzcState->oldbits) {
|
|
if (bits == 0) {
|
|
/* bits == 0 means EOF, write the rest of the buffer. */
|
|
if (pLzcState->offset > 0) {
|
|
fwrite(pLzcState->outbuf,1,(pLzcState->offset +7) >> 3, pLzcState->outfp);
|
|
pLzcState->bytes_out += ((pLzcState->offset +7) >> 3);
|
|
}
|
|
pLzcState->offset = 0;
|
|
pLzcState->oldbits = 0;
|
|
fflush(pLzcState->outfp);
|
|
return;
|
|
}
|
|
else {
|
|
/* Change the code size. We must write the whole buffer,
|
|
* because the expand side won't discover the size change
|
|
* until after it has read a buffer full.
|
|
*/
|
|
if (pLzcState->offset > 0) {
|
|
fwrite(pLzcState->outbuf, 1, pLzcState->oldbits, pLzcState->outfp);
|
|
pLzcState->bytes_out += pLzcState->oldbits;
|
|
pLzcState->offset = 0;
|
|
}
|
|
pLzcState->oldbits = bits;
|
|
#ifdef DEBUG_LZC
|
|
if ( pLzcState->debug ) {
|
|
DBUG(( "\nChange to %d bits\n", bits ));
|
|
}
|
|
#endif /* DEBUG_LZC */
|
|
}
|
|
}
|
|
/* Get to the first byte. */
|
|
buf = pLzcState->outbuf + ((shift = pLzcState->offset) >> 3);
|
|
if ((shift &= 7) != 0) {
|
|
*(buf) |= (*buf & gNu_rmask[shift]) | (UCHAR)(code << shift);
|
|
*(++buf) = (UCHAR)(code >> (8 - shift));
|
|
if (bits + shift > 16)
|
|
*(++buf) = (UCHAR)(code >> (16 - shift));
|
|
}
|
|
else {
|
|
/* Special case for fast execution */
|
|
*(buf) = (UCHAR)code;
|
|
*(++buf) = (UCHAR)(code >> 8);
|
|
}
|
|
if ((pLzcState->offset += bits) == (bits << 3)) {
|
|
pLzcState->bytes_out += bits;
|
|
fwrite(pLzcState->outbuf,1,bits,pLzcState->outfp);
|
|
pLzcState->offset = 0;
|
|
}
|
|
return;
|
|
}
|
|
|
|
|
|
#define kNuLZCEOF (-1)
|
|
|
|
/*
|
|
* Get the next byte from the input straw. Also updates the CRC
|
|
* if "doCalcCRC" is set to true.
|
|
*
|
|
* Returns kNuLZCEOF as the value when we're out of data.
|
|
*/
|
|
static NuError
|
|
Nu_LZCGetcCRC(LZCState* pLzcState, int* pSym)
|
|
{
|
|
NuError err;
|
|
uchar c;
|
|
|
|
if (!pLzcState->uncompRemaining) {
|
|
*pSym = kNuLZCEOF;
|
|
return kNuErrNone;
|
|
}
|
|
|
|
err = Nu_StrawRead(pLzcState->pArchive, pLzcState->pStraw, &c, 1);
|
|
if (err == kNuErrNone) {
|
|
if (pLzcState->doCalcCRC)
|
|
pLzcState->crc = Nu_CalcCRC16(pLzcState->crc, &c, 1);
|
|
*pSym = c;
|
|
pLzcState->uncompRemaining--;
|
|
}
|
|
|
|
return err;
|
|
}
|
|
|
|
/*
|
|
* compress stdin to stdout
|
|
*/
|
|
static void
|
|
Nu_LZC_compress(LZCState* pLzcState, ulong* pDstLen)
|
|
{
|
|
int c,adjbits;
|
|
register HASH hash;
|
|
register INTCODE code;
|
|
HASH hashf[256];
|
|
|
|
Assert(pLzcState->outfp != nil);
|
|
|
|
pLzcState->maxcode = Maxcode(pLzcState->maxbits);
|
|
pLzcState->hashsize = Hashsize(pLzcState->maxbits);
|
|
|
|
#ifdef COMP40
|
|
/* Only needed for adaptive reset */
|
|
pLzcState->checkpoint = CHECK_GAP;
|
|
pLzcState->ratio = 0;
|
|
#endif
|
|
|
|
adjbits = pLzcState->maxbits -10;
|
|
for (c = 256; --c >= 0; ){
|
|
hashf[c] = ((( c &0x7) << 7) ^ c) << adjbits;
|
|
}
|
|
pLzcState->exit_stat = OK;
|
|
if (Nu_LZC_alloc_tables(pLzcState, pLzcState->maxcode, pLzcState->hashsize)) /* exit_stat already set */
|
|
return;
|
|
init_tables();
|
|
|
|
#if 0
|
|
/* if not zcat or filter */
|
|
if(is_list && !zcat_flg) { /* Open output file */
|
|
if (freopen(ofname, WRITE_FILE_TYPE, pLzcState->outfp) == NULL) {
|
|
pLzcState->exit_stat = NOTOPENED;
|
|
return;
|
|
}
|
|
if (!quiet)
|
|
fprintf(stderr, "%s: ",ifname);
|
|
setvbuf(Xstdout,zbuf,_IOFBF,ZBUFSIZE);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Check the input stream for previously seen strings. We keep
|
|
* adding characters to the previously seen prefix string until we
|
|
* get a character which forms a new (unseen) string. We then send
|
|
* the code for the previously seen prefix string, and add the new
|
|
* string to our tables. The check for previous strings is done by
|
|
* hashing. If the code for the hash value is unused, then we have
|
|
* a new string. If the code is used, we check to see if the prefix
|
|
* and suffix values match the current input; if so, we have found
|
|
* a previously seen string. Otherwise, we have a hash collision,
|
|
* and we try secondary hash probes until we either find the current
|
|
* string, or we find an unused entry (which indicates a new string).
|
|
*/
|
|
if (1 /*!nomagic*/) {
|
|
putc(gNu_magic_header[0], pLzcState->outfp);
|
|
putc(gNu_magic_header[1], pLzcState->outfp);
|
|
putc((char)(pLzcState->maxbits | pLzcState->block_compress), pLzcState->outfp);
|
|
if(ferror(pLzcState->outfp)){ /* check it on entry */
|
|
pLzcState->exit_stat = WRITEERR;
|
|
return;
|
|
}
|
|
pLzcState->bytes_out = 3L; /* includes 3-byte header mojo */
|
|
}
|
|
else
|
|
pLzcState->bytes_out = 0L; /* no 3-byte header mojo */
|
|
pLzcState->in_count = 1L;
|
|
pLzcState->offset = 0;
|
|
|
|
pLzcState->exit_stat = Nu_LZCGetcCRC(pLzcState, &c);
|
|
if (pLzcState->exit_stat != kNuErrNone)
|
|
return;
|
|
pLzcState->prefxcode = (INTCODE)c;
|
|
|
|
while (1) {
|
|
pLzcState->exit_stat = Nu_LZCGetcCRC(pLzcState, &c);
|
|
if (pLzcState->exit_stat != kNuErrNone)
|
|
return;
|
|
if (c == kNuLZCEOF)
|
|
break;
|
|
|
|
pLzcState->in_count++;
|
|
hash = pLzcState->prefxcode ^ hashf[c];
|
|
/* I need to check that my hash value is within range
|
|
* because my 16-bit hash table is smaller than 64k.
|
|
*/
|
|
if (hash >= pLzcState->hashsize)
|
|
hash -= pLzcState->hashsize;
|
|
if ((code = (INTCODE)probe(hash)) != UNUSED) {
|
|
if (suffix(code) != (char)c || (INTCODE)prefix(code) != pLzcState->prefxcode) {
|
|
/* hashdelta is subtracted from hash on each iteration of
|
|
* the following hash table search loop. I compute it once
|
|
* here to remove it from the loop.
|
|
*/
|
|
HASH hashdelta = (0x120 - c) << (adjbits);
|
|
do {
|
|
/* rehash and keep looking */
|
|
Assert(code >= FIRSTFREE && code <= pLzcState->maxcode);
|
|
if (hash >= hashdelta) hash -= hashdelta;
|
|
else hash += (pLzcState->hashsize - hashdelta);
|
|
Assert(hash < pLzcState->hashsize);
|
|
if ((code = (INTCODE)probe(hash)) == UNUSED)
|
|
goto newcode;
|
|
} while (suffix(code) != (char)c || (INTCODE)prefix(code) != pLzcState->prefxcode);
|
|
}
|
|
pLzcState->prefxcode = code;
|
|
}
|
|
else {
|
|
newcode: {
|
|
Nu_LZC_putcode(pLzcState, pLzcState->prefxcode, pLzcState->bits);
|
|
code = pLzcState->nextfree;
|
|
Assert(hash < pLzcState->hashsize);
|
|
Assert(code >= FIRSTFREE);
|
|
Assert(code <= pLzcState->maxcode + 1);
|
|
if (code <= pLzcState->maxcode) {
|
|
probe(hash) = (CODE)code;
|
|
prefix(code) = (CODE)pLzcState->prefxcode;
|
|
suffix(code) = (char)c;
|
|
if (code > pLzcState->highcode) {
|
|
pLzcState->highcode += code;
|
|
++pLzcState->bits;
|
|
}
|
|
pLzcState->nextfree = code + 1;
|
|
}
|
|
#ifdef COMP40
|
|
else if (pLzcState->in_count >= pLzcState->checkpoint && pLzcState->block_compress ) {
|
|
if (Nu_LZC_cl_block(pLzcState)){
|
|
#else
|
|
else if (pLzcState->block_compress){
|
|
#endif
|
|
Nu_LZC_putcode(pLzcState, (INTCODE)c, pLzcState->bits);
|
|
Nu_LZC_putcode(pLzcState, CLEAR, pLzcState->bits);
|
|
init_tables();
|
|
pLzcState->exit_stat = Nu_LZCGetcCRC(pLzcState, &c);
|
|
if (pLzcState->exit_stat != kNuErrNone)
|
|
return;
|
|
if (c == kNuLZCEOF)
|
|
break;
|
|
pLzcState->in_count++;
|
|
#ifdef COMP40
|
|
}
|
|
#endif
|
|
}
|
|
pLzcState->prefxcode = (INTCODE)c;
|
|
}
|
|
}
|
|
}
|
|
Nu_LZC_putcode(pLzcState, pLzcState->prefxcode, pLzcState->bits);
|
|
Nu_LZC_putcode(pLzcState, CLEAR, 0);
|
|
/*
|
|
* Print out stats on stderr
|
|
*/
|
|
if(1 /*zcat_flg == 0 && !quiet*/) {
|
|
#ifdef DEBUG_LZC
|
|
DBUG((
|
|
"%ld chars in, (%ld bytes) out, compression factor: ",
|
|
pLzcState->in_count, pLzcState->bytes_out ));
|
|
Nu_prratio( pLzcState->in_count, pLzcState->bytes_out );
|
|
DBUG(( "\n"));
|
|
DBUG(( "\tCompression as in compact: " ));
|
|
Nu_prratio( pLzcState->in_count-pLzcState->bytes_out, pLzcState->in_count );
|
|
DBUG(( "\n"));
|
|
DBUG(( "\tLargest code (of last block) was %d (%d bits)\n",
|
|
pLzcState->prefxcode - 1, pLzcState->bits ));
|
|
#else
|
|
DBUG(( "Compression: " ));
|
|
Nu_prratio( pLzcState->in_count-pLzcState->bytes_out, pLzcState->in_count );
|
|
#endif /* DEBUG_LZC */
|
|
}
|
|
if(pLzcState->bytes_out > pLzcState->in_count) /* if no savings */
|
|
pLzcState->exit_stat = NOSAVING;
|
|
*pDstLen = pLzcState->bytes_out;
|
|
return ;
|
|
}
|
|
|
|
|
|
/*
|
|
* NufxLib interface to LZC compression.
|
|
*/
|
|
static NuError
|
|
Nu_CompressLZC(NuArchive* pArchive, NuStraw* pStraw, FILE* fp,
|
|
ulong srcLen, ulong* pDstLen, ushort* pCrc, int maxbits)
|
|
{
|
|
NuError err = kNuErrNone;
|
|
LZCState lzcState;
|
|
|
|
memset(&lzcState, 0, sizeof(lzcState));
|
|
lzcState.pArchive = pArchive;
|
|
lzcState.pStraw = pStraw;
|
|
lzcState.outfp = fp;
|
|
lzcState.uncompRemaining = srcLen;
|
|
|
|
if (pCrc == nil) {
|
|
lzcState.doCalcCRC = false;
|
|
} else {
|
|
lzcState.doCalcCRC = true;
|
|
lzcState.crc = *pCrc;
|
|
}
|
|
|
|
lzcState.maxbits = maxbits;
|
|
lzcState.block_compress = BLOCK_MASK; /* enabled */
|
|
|
|
Nu_LZC_compress(&lzcState, pDstLen);
|
|
err = lzcState.exit_stat;
|
|
DBUG(("+++ LZC_compress returned with %d\n", err));
|
|
|
|
#if (SPLIT_HT)
|
|
free_array(CODE,lzcState.ht[1], 0);
|
|
free_array(CODE,lzcState.ht[0], 0);
|
|
#else
|
|
free_array(CODE,lzcState.ht, 0);
|
|
#endif
|
|
|
|
#if (SPLIT_PFX)
|
|
free_array(CODE,lzcState.pfx[1], 128);
|
|
free_array(CODE,lzcState.pfx[0], 128);
|
|
#else
|
|
free_array(CODE,lzcState.pfx, 256);
|
|
#endif
|
|
free_array(char,lzcState.sfx, 256);
|
|
|
|
if (pCrc != nil)
|
|
*pCrc = lzcState.crc;
|
|
|
|
return err;
|
|
}
|
|
|
|
NuError
|
|
Nu_CompressLZC12(NuArchive* pArchive, NuStraw* pStraw, FILE* fp,
|
|
ulong srcLen, ulong* pDstLen, ushort* pCrc)
|
|
{
|
|
return Nu_CompressLZC(pArchive, pStraw, fp, srcLen, pDstLen, pCrc, 12);
|
|
}
|
|
|
|
NuError
|
|
Nu_CompressLZC16(NuArchive* pArchive, NuStraw* pStraw, FILE* fp,
|
|
ulong srcLen, ulong* pDstLen, ushort* pCrc)
|
|
{
|
|
return Nu_CompressLZC(pArchive, pStraw, fp, srcLen, pDstLen, pCrc, 16);
|
|
}
|
|
|
|
|
|
/*
|
|
* ===========================================================================
|
|
* Expansion
|
|
* ===========================================================================
|
|
*/
|
|
|
|
/*
|
|
* Write the next byte to the output funnel. Also updates the CRC
|
|
* if "doCalcCRC" is set to true.
|
|
*
|
|
* Returns kNuLZCEOF as the value when we're out of data.
|
|
*/
|
|
static NuError
|
|
Nu_LZCPutcCRC(LZCState* pLzcState, char c)
|
|
{
|
|
NuError err;
|
|
|
|
err = Nu_FunnelWrite(pLzcState->pArchive, pLzcState->pFunnel, &c, 1);
|
|
if (pLzcState->doCalcCRC)
|
|
pLzcState->crc = Nu_CalcCRC16(pLzcState->crc, &c, 1);
|
|
|
|
return err;
|
|
}
|
|
|
|
|
|
static int
|
|
Nu_LZC_nextcode(LZCState* pLzcState, INTCODE* codeptr)
|
|
/* Get the next code from input and put it in *codeptr.
|
|
* Return (TRUE) on success, or return (FALSE) on end-of-file.
|
|
* Adapted from COMPRESS V4.0.
|
|
*/
|
|
{
|
|
/*static int prevbits = 0;*/
|
|
register INTCODE code;
|
|
/*static int size;*/
|
|
/*static UCHAR inbuf[MAXBITS];*/
|
|
register int shift;
|
|
UCHAR *bp;
|
|
|
|
/* If the next entry is a different bit-size than the preceeding one
|
|
* then we must adjust the size and scrap the old buffer.
|
|
*/
|
|
if (pLzcState->prevbits != pLzcState->bits) {
|
|
pLzcState->prevbits = pLzcState->bits;
|
|
pLzcState->size = 0;
|
|
}
|
|
/* If we can't read another code from the buffer, then refill it.
|
|
*/
|
|
shift = pLzcState->offset;
|
|
if (pLzcState->size - shift < pLzcState->bits) {
|
|
/* Read more input and convert size from # of bytes to # of bits */
|
|
long getSize;
|
|
|
|
getSize = pLzcState->bits;
|
|
if (getSize > pLzcState->compRemaining)
|
|
getSize = pLzcState->compRemaining;
|
|
if (!getSize) /* act like EOF */
|
|
return FALSE;
|
|
pLzcState->size = fread(pLzcState->inbuf, 1, getSize, pLzcState->infp) << 3;
|
|
if (pLzcState->size <= 0 || ferror(pLzcState->infp))
|
|
return(FALSE);
|
|
pLzcState->compRemaining -= getSize;
|
|
pLzcState->offset = shift = 0;
|
|
}
|
|
/* Get to the first byte. */
|
|
bp = pLzcState->inbuf + (shift >> 3);
|
|
/* Get first part (low order bits) */
|
|
code = (*bp++ >> (shift &= 7));
|
|
/* high order bits. */
|
|
code |= *bp++ << (shift = 8 - shift);
|
|
if ((shift += 8) < pLzcState->bits) code |= *bp << shift;
|
|
*codeptr = code & pLzcState->highcode;
|
|
pLzcState->offset += pLzcState->bits;
|
|
return (TRUE);
|
|
}
|
|
|
|
static void
|
|
Nu_LZC_decompress(LZCState* pLzcState, ulong compressedLen)
|
|
{
|
|
NuArchive* pArchive = pLzcState->pArchive;
|
|
register int i;
|
|
register INTCODE code;
|
|
char sufxchar = 0;
|
|
INTCODE savecode;
|
|
FLAG fulltable = FALSE, cleartable;
|
|
/*static*/ char *token= NULL; /* String buffer to build token */
|
|
/*static*/ int maxtoklen = MAXTOKLEN;
|
|
int flags;
|
|
|
|
Assert(pLzcState->infp != nil);
|
|
|
|
pLzcState->exit_stat = OK;
|
|
|
|
if (compressedLen < 3) {
|
|
/* not long enough to be valid! */
|
|
pLzcState->exit_stat = kNuErrBadData;
|
|
Nu_ReportError(NU_BLOB, pLzcState->exit_stat, "thread too short to be valid LZC");
|
|
return;
|
|
}
|
|
pLzcState->compRemaining = compressedLen;
|
|
|
|
/*
|
|
* This comes out of "compress.c" rather than "compapi.c".
|
|
*/
|
|
if ((getc(pLzcState->infp)!=(gNu_magic_header[0] & 0xFF))
|
|
|| (getc(pLzcState->infp)!=(gNu_magic_header[1] & 0xFF)))
|
|
{
|
|
DBUG(("not in compressed format\n"));
|
|
pLzcState->exit_stat = kNuErrBadData;
|
|
return;
|
|
}
|
|
flags = getc(pLzcState->infp); /* set -b from file */
|
|
pLzcState->block_compress = flags & BLOCK_MASK;
|
|
pLzcState->maxbits = flags & BIT_MASK;
|
|
if(pLzcState->maxbits > MAXBITS) {
|
|
DBUG(("compressed with %d bits, can only handle %d bits\n",
|
|
pLzcState->maxbits, MAXBITS));
|
|
pLzcState->exit_stat = kNuErrBadData;
|
|
return;
|
|
}
|
|
|
|
pLzcState->compRemaining -= 3;
|
|
|
|
/* Initialze the token buffer. */
|
|
token = (char*)Nu_Malloc(pArchive, maxtoklen);
|
|
if (token == NULL) {
|
|
pLzcState->exit_stat = NOMEM;
|
|
return;
|
|
}
|
|
|
|
if (Nu_LZC_alloc_tables(pLzcState, pLzcState->maxcode = ~(~(INTCODE)0 << pLzcState->maxbits),0)) /* exit_stat already set */
|
|
return;
|
|
|
|
#if 0
|
|
/* if not zcat or filter */
|
|
if(is_list && !zcat_flg) { /* Open output file */
|
|
if (freopen(ofname, WRITE_FILE_TYPE, stdout) == NULL) {
|
|
pLzcState->exit_stat = NOTOPENED;
|
|
return;
|
|
}
|
|
if (!quiet)
|
|
fprintf(stderr, "%s: ",ifname);
|
|
setvbuf(stdout,xbuf,_IOFBF,XBUFSIZE);
|
|
}
|
|
#endif
|
|
|
|
cleartable = TRUE;
|
|
savecode = CLEAR;
|
|
pLzcState->offset = 0;
|
|
do {
|
|
if ((code = savecode) == CLEAR && cleartable) {
|
|
pLzcState->highcode = ~(~(INTCODE)0 << (pLzcState->bits = INITBITS));
|
|
fulltable = FALSE;
|
|
pLzcState->nextfree = (cleartable = pLzcState->block_compress) == FALSE ? 256 : FIRSTFREE;
|
|
if (!Nu_LZC_nextcode(pLzcState, &pLzcState->prefxcode))
|
|
break;
|
|
/*putc((*/sufxchar = (char)pLzcState->prefxcode/*), stdout)*/;
|
|
pLzcState->exit_stat = Nu_LZCPutcCRC(pLzcState, sufxchar);
|
|
if (pLzcState->exit_stat != kNuErrNone)
|
|
return;
|
|
continue;
|
|
}
|
|
i = 0;
|
|
if (code >= pLzcState->nextfree && !fulltable) {
|
|
if (code != pLzcState->nextfree){
|
|
DBUG(("ERROR: code (0x%x) != nextfree (0x%x)\n",
|
|
code, pLzcState->nextfree));
|
|
pLzcState->exit_stat = CODEBAD;
|
|
return ; /* Non-existant code */
|
|
}
|
|
/* Special case for sequence KwKwK (see text of article) */
|
|
code = pLzcState->prefxcode;
|
|
token[i++] = sufxchar;
|
|
}
|
|
/* Build the token string in reverse order by chasing down through
|
|
* successive prefix tokens of the current token. Then output it.
|
|
*/
|
|
while (code >= 256) {
|
|
#ifdef DEBUG_LZC
|
|
/* These are checks to ease paranoia. Prefix codes must decrease
|
|
* monotonically, otherwise we must have corrupt tables. We can
|
|
* also check that we haven't overrun the token buffer.
|
|
*/
|
|
if (code <= (INTCODE)prefix(code)){
|
|
pLzcState->exit_stat= TABLEBAD;
|
|
return;
|
|
}
|
|
#endif
|
|
if (i >= maxtoklen) {
|
|
maxtoklen *= 2; /* double the size of the token buffer */
|
|
if ((token = Nu_Realloc(pArchive, token, maxtoklen)) == NULL) {
|
|
pLzcState->exit_stat = TOKTOOBIG;
|
|
return;
|
|
}
|
|
}
|
|
token[i++] = suffix(code);
|
|
code = (INTCODE)prefix(code);
|
|
}
|
|
/*putc(*/sufxchar = (char)code/*, stdout)*/;
|
|
pLzcState->exit_stat = Nu_LZCPutcCRC(pLzcState, sufxchar);
|
|
while (--i >= 0) {
|
|
/*putc(token[i], stdout);*/
|
|
pLzcState->exit_stat = Nu_LZCPutcCRC(pLzcState, token[i]);
|
|
}
|
|
if (pLzcState->exit_stat != kNuErrNone)
|
|
return;
|
|
/* If table isn't full, add new token code to the table with
|
|
* codeprefix and codesuffix, and remember current code.
|
|
*/
|
|
if (!fulltable) {
|
|
code = pLzcState->nextfree;
|
|
Assert(256 <= code && code <= pLzcState->maxcode);
|
|
prefix(code) = (CODE)pLzcState->prefxcode;
|
|
suffix(code) = sufxchar;
|
|
pLzcState->prefxcode = savecode;
|
|
if (code++ == pLzcState->highcode) {
|
|
if (pLzcState->highcode >= pLzcState->maxcode) {
|
|
fulltable = TRUE;
|
|
--code;
|
|
}
|
|
else {
|
|
++pLzcState->bits;
|
|
pLzcState->highcode += code; /* nextfree == highcode + 1 */
|
|
}
|
|
}
|
|
pLzcState->nextfree = code;
|
|
}
|
|
} while (Nu_LZC_nextcode(pLzcState, &savecode));
|
|
pLzcState->exit_stat = (ferror(pLzcState->infp))? READERR : OK;
|
|
|
|
Nu_Free(pArchive, token);
|
|
return ;
|
|
}
|
|
|
|
|
|
/*
|
|
* NufxLib interface to LZC expansion.
|
|
*/
|
|
NuError
|
|
Nu_ExpandLZC(NuArchive* pArchive, const NuRecord* pRecord,
|
|
const NuThread* pThread, FILE* infp, NuFunnel* pFunnel, ushort* pCrc)
|
|
{
|
|
NuError err = kNuErrNone;
|
|
LZCState lzcState;
|
|
|
|
memset(&lzcState, 0, sizeof(lzcState));
|
|
lzcState.pArchive = pArchive;
|
|
lzcState.infp = infp;
|
|
lzcState.pFunnel = pFunnel;
|
|
|
|
if (pCrc == nil) {
|
|
lzcState.doCalcCRC = false;
|
|
} else {
|
|
lzcState.doCalcCRC = true;
|
|
lzcState.crc = *pCrc;
|
|
}
|
|
|
|
Nu_LZC_decompress(&lzcState, pThread->thCompThreadEOF);
|
|
err = lzcState.exit_stat;
|
|
DBUG(("+++ LZC_decompress returned with %d\n", err));
|
|
|
|
#if (SPLIT_HT)
|
|
free_array(CODE,lzcState.ht[1], 0);
|
|
free_array(CODE,lzcState.ht[0], 0);
|
|
#else
|
|
free_array(CODE,lzcState.ht, 0);
|
|
#endif
|
|
|
|
#if (SPLIT_PFX)
|
|
free_array(CODE,lzcState.pfx[1], 128);
|
|
free_array(CODE,lzcState.pfx[0], 128);
|
|
#else
|
|
free_array(CODE,lzcState.pfx, 256);
|
|
#endif
|
|
free_array(char,lzcState.sfx, 256);
|
|
|
|
if (pCrc != nil)
|
|
*pCrc = lzcState.crc;
|
|
return err;
|
|
}
|
|
|
|
#endif /*ENABLE_LZC*/
|