mirror of
https://github.com/fadden/ciderpress.git
synced 2024-11-26 17:49:21 +00:00
412 lines
11 KiB
C++
412 lines
11 KiB
C++
/*
|
|
* CiderPress
|
|
* Copyright (C) 2007 by faddenSoft, LLC. All Rights Reserved.
|
|
* See the file LICENSE for distribution terms.
|
|
*/
|
|
/*
|
|
* Implementation of SQueeze (RLE+Huffman) compression.
|
|
*
|
|
* This was ripped fairly directly from Squeeze.c in NufxLib. Because
|
|
* there's relatively little code, and providing direct access to the
|
|
* compression functions already in NuLib is a little unwieldy, I've just
|
|
* cut & pasted the necessary pieces here.
|
|
*/
|
|
#include "stdafx.h"
|
|
#include "Squeeze.h"
|
|
#include "NufxArchive.h"
|
|
|
|
#define kSqBufferSize 8192 /* must hold full SQ header, and % 128 */
|
|
|
|
#define kNuSQMagic 0xff76 /* magic value for file header */
|
|
#define kNuSQRLEDelim 0x90 /* RLE delimiter */
|
|
#define kNuSQEOFToken 256 /* distinguished stop symbol */
|
|
#define kNuSQNumVals 257 /* 256 symbols + stop */
|
|
|
|
|
|
/*
|
|
* ===========================================================================
|
|
* Unsqueeze
|
|
* ===========================================================================
|
|
*/
|
|
|
|
/*
|
|
* State during uncompression.
|
|
*/
|
|
typedef struct USQState {
|
|
unsigned long dataInBuffer;
|
|
unsigned char* dataPtr;
|
|
int bitPosn;
|
|
int bits;
|
|
|
|
/*
|
|
* Decoding tree; first "nodeCount" values are populated. Positive
|
|
* values are indices to another node in the tree, negative values
|
|
* are literals (+1 because "negative zero" doesn't work well).
|
|
*/
|
|
int nodeCount;
|
|
struct {
|
|
short child[2]; /* left/right kids, must be signed 16-bit */
|
|
} decTree[kNuSQNumVals-1];
|
|
} USQState;
|
|
|
|
|
|
/*
|
|
* Decode the next symbol from the Huffman stream.
|
|
*/
|
|
static NuError
|
|
USQDecodeHuffSymbol(USQState* pUsqState, int* pVal)
|
|
{
|
|
short val = 0;
|
|
int bits, bitPosn;
|
|
|
|
bits = pUsqState->bits; /* local copy */
|
|
bitPosn = pUsqState->bitPosn;
|
|
|
|
do {
|
|
if (++bitPosn > 7) {
|
|
/* grab the next byte and use that */
|
|
bits = *pUsqState->dataPtr++;
|
|
bitPosn = 0;
|
|
if (!pUsqState->dataInBuffer--)
|
|
return kNuErrBufferUnderrun;
|
|
|
|
val = pUsqState->decTree[val].child[1 & bits];
|
|
} else {
|
|
/* still got bits; shift right and use it */
|
|
val = pUsqState->decTree[val].child[1 & (bits >>= 1)];
|
|
}
|
|
} while (val >= 0);
|
|
|
|
/* val is negative literal; add one to make it zero-based then negate it */
|
|
*pVal = -(val + 1);
|
|
|
|
pUsqState->bits = bits;
|
|
pUsqState->bitPosn = bitPosn;
|
|
|
|
return kNuErrNone;
|
|
}
|
|
|
|
|
|
/*
|
|
* Read two bytes of signed data out of the buffer.
|
|
*/
|
|
static inline NuError
|
|
USQReadShort(USQState* pUsqState, short* pShort)
|
|
{
|
|
if (pUsqState->dataInBuffer < 2)
|
|
return kNuErrBufferUnderrun;
|
|
|
|
*pShort = *pUsqState->dataPtr++;
|
|
*pShort |= (*pUsqState->dataPtr++) << 8;
|
|
pUsqState->dataInBuffer -= 2;
|
|
|
|
return kNuErrNone;
|
|
}
|
|
|
|
/*
|
|
* Wrapper for fread(). Note the arguments resemble read(2) rather
|
|
* than fread(3S).
|
|
*/
|
|
static NuError
|
|
SQRead(FILE* fp, void* buf, size_t nbyte)
|
|
{
|
|
size_t result;
|
|
|
|
ASSERT(buf != nil);
|
|
ASSERT(nbyte > 0);
|
|
ASSERT(fp != nil);
|
|
|
|
errno = 0;
|
|
result = fread(buf, 1, nbyte, fp);
|
|
if (result != nbyte)
|
|
return errno ? (NuError)errno : kNuErrFileRead;
|
|
return kNuErrNone;
|
|
}
|
|
|
|
|
|
/*
|
|
* Expand "SQ" format. Archive file should already be seeked.
|
|
*
|
|
* Because we have a stop symbol, knowing the uncompressed length of
|
|
* the file is not essential.
|
|
*
|
|
* If "outExp" is nil, no output is produced (useful for "test" mode).
|
|
*/
|
|
NuError
|
|
UnSqueeze(FILE* fp, unsigned long realEOF, ExpandBuffer* outExp,
|
|
bool fullSqHeader, int blockSize)
|
|
{
|
|
NuError err = kNuErrNone;
|
|
USQState usqState;
|
|
unsigned long compRemaining, getSize;
|
|
unsigned short magic, fileChecksum, checksum; // fullSqHeader only
|
|
short nodeCount;
|
|
int i, inrep;
|
|
unsigned char* tmpBuf = nil;
|
|
unsigned char lastc = 0;
|
|
|
|
tmpBuf = (unsigned char*) malloc(kSqBufferSize);
|
|
if (tmpBuf == nil) {
|
|
err = kNuErrMalloc;
|
|
goto bail;
|
|
}
|
|
|
|
usqState.dataInBuffer = 0;
|
|
usqState.dataPtr = tmpBuf;
|
|
|
|
compRemaining = realEOF;
|
|
if ((fullSqHeader && compRemaining < 8) ||
|
|
(!fullSqHeader && compRemaining < 3))
|
|
{
|
|
err = kNuErrBadData;
|
|
WMSG0("too short to be valid SQ data\n");
|
|
goto bail;
|
|
}
|
|
|
|
/*
|
|
* Round up to the nearest 128-byte boundary. We need to read
|
|
* everything out of the file in case this is a streaming archive.
|
|
* Because the compressed data has an embedded stop symbol, it's okay
|
|
* to "overrun" the expansion code.
|
|
*/
|
|
if (blockSize != 0) {
|
|
compRemaining =
|
|
((compRemaining + blockSize-1) / blockSize) * blockSize;
|
|
}
|
|
|
|
/* want to grab up to kSqBufferSize bytes */
|
|
if (compRemaining > kSqBufferSize)
|
|
getSize = kSqBufferSize;
|
|
else
|
|
getSize = compRemaining;
|
|
|
|
/*
|
|
* Grab a big chunk. "compRemaining" is the amount of compressed
|
|
* data left in the file, usqState.dataInBuffer is the amount of
|
|
* compressed data left in the buffer.
|
|
*
|
|
* For BNY, we want to read 128-byte blocks.
|
|
*/
|
|
if (getSize) {
|
|
ASSERT(getSize <= kSqBufferSize);
|
|
err = SQRead(fp, usqState.dataPtr, getSize);
|
|
if (err != kNuErrNone) {
|
|
WMSG1("failed reading compressed data (%ld bytes)\n", getSize);
|
|
goto bail;
|
|
}
|
|
usqState.dataInBuffer += getSize;
|
|
if (getSize > compRemaining)
|
|
compRemaining = 0;
|
|
else
|
|
compRemaining -= getSize;
|
|
}
|
|
|
|
/* reset dataPtr */
|
|
usqState.dataPtr = tmpBuf;
|
|
|
|
/*
|
|
* Read the header. We assume that the header will fit in the
|
|
* compression buffer ( sq allowed 300+ for the filename, plus
|
|
* 257*2 for the tree, plus misc).
|
|
*/
|
|
ASSERT(kSqBufferSize > 1200);
|
|
if (fullSqHeader) {
|
|
err = USQReadShort(&usqState, (short*)&magic);
|
|
if (err != kNuErrNone)
|
|
goto bail;
|
|
if (magic != kNuSQMagic) {
|
|
err = kNuErrBadData;
|
|
WMSG0("bad magic number in SQ block\n");
|
|
goto bail;
|
|
}
|
|
|
|
err = USQReadShort(&usqState, (short*)&fileChecksum);
|
|
if (err != kNuErrNone)
|
|
goto bail;
|
|
|
|
checksum = 0;
|
|
|
|
/* skip over the filename */
|
|
while (*usqState.dataPtr++ != '\0')
|
|
usqState.dataInBuffer--;
|
|
usqState.dataInBuffer--;
|
|
}
|
|
|
|
err = USQReadShort(&usqState, &nodeCount);
|
|
if (err != kNuErrNone)
|
|
goto bail;
|
|
if (nodeCount < 0 || nodeCount >= kNuSQNumVals) {
|
|
err = kNuErrBadData;
|
|
WMSG1("invalid decode tree in SQ (%d nodes)\n", nodeCount);
|
|
goto bail;
|
|
}
|
|
usqState.nodeCount = nodeCount;
|
|
|
|
/* initialize for possibly empty tree (only happens on an empty file) */
|
|
usqState.decTree[0].child[0] = -(kNuSQEOFToken+1);
|
|
usqState.decTree[0].child[1] = -(kNuSQEOFToken+1);
|
|
|
|
/* read the nodes, ignoring "read errors" until we're done */
|
|
for (i = 0; i < nodeCount; i++) {
|
|
err = USQReadShort(&usqState, &usqState.decTree[i].child[0]);
|
|
err = USQReadShort(&usqState, &usqState.decTree[i].child[1]);
|
|
}
|
|
if (err != kNuErrNone) {
|
|
err = kNuErrBadData;
|
|
WMSG0("SQ data looks truncated at tree\n");
|
|
goto bail;
|
|
}
|
|
|
|
usqState.bitPosn = 99; /* force an immediate read */
|
|
|
|
/*
|
|
* Start pulling data out of the file. We have to Huffman-decode
|
|
* the input, and then feed that into an RLE expander.
|
|
*
|
|
* A completely lopsided (and broken) Huffman tree could require
|
|
* 256 tree descents, so we want to try to ensure we have at least 256
|
|
* bits in the buffer. Otherwise, we could get a false buffer underrun
|
|
* indication back from DecodeHuffSymbol.
|
|
*
|
|
* The SQ sources actually guarantee that a code will fit entirely
|
|
* in 16 bits, but there's no reason not to use the larger value.
|
|
*/
|
|
inrep = false;
|
|
while (1) {
|
|
int val;
|
|
|
|
if (usqState.dataInBuffer < 65 && compRemaining) {
|
|
/*
|
|
* Less than 256 bits, but there's more in the file.
|
|
*
|
|
* First thing we do is slide the old data to the start of
|
|
* the buffer.
|
|
*/
|
|
if (usqState.dataInBuffer) {
|
|
ASSERT(tmpBuf != usqState.dataPtr);
|
|
memmove(tmpBuf, usqState.dataPtr, usqState.dataInBuffer);
|
|
}
|
|
usqState.dataPtr = tmpBuf;
|
|
|
|
/*
|
|
* Next we read as much as we can.
|
|
*/
|
|
if (kSqBufferSize - usqState.dataInBuffer < compRemaining)
|
|
getSize = kSqBufferSize - usqState.dataInBuffer;
|
|
else
|
|
getSize = compRemaining;
|
|
|
|
ASSERT(getSize <= kSqBufferSize);
|
|
//WMSG2("Reading from offset=%ld (compRem=%ld)\n",
|
|
// ftell(fp), compRemaining);
|
|
err = SQRead(fp, usqState.dataPtr + usqState.dataInBuffer,
|
|
getSize);
|
|
if (err != kNuErrNone) {
|
|
WMSG2("failed reading compressed data (%ld bytes, err=%d)\n",
|
|
getSize, err);
|
|
goto bail;
|
|
}
|
|
usqState.dataInBuffer += getSize;
|
|
if (getSize > compRemaining)
|
|
compRemaining = 0;
|
|
else
|
|
compRemaining -= getSize;
|
|
|
|
ASSERT(compRemaining < 32767*65536);
|
|
ASSERT(usqState.dataInBuffer <= kSqBufferSize);
|
|
}
|
|
|
|
err = USQDecodeHuffSymbol(&usqState, &val);
|
|
if (err != kNuErrNone) {
|
|
WMSG0("failed decoding huff symbol\n");
|
|
goto bail;
|
|
}
|
|
|
|
if (val == kNuSQEOFToken)
|
|
break;
|
|
|
|
/*
|
|
* Feed the symbol into the RLE decoder.
|
|
*/
|
|
if (inrep) {
|
|
/*
|
|
* Last char was RLE delim, handle this specially. We use
|
|
* --val instead of val-- because we already emitted the
|
|
* first occurrence of the char (right before the RLE delim).
|
|
*/
|
|
if (val == 0) {
|
|
/* special case -- just an escaped RLE delim */
|
|
lastc = kNuSQRLEDelim;
|
|
val = 2;
|
|
}
|
|
while (--val) {
|
|
/*if (pCrc != nil)
|
|
*pCrc = Nu_CalcCRC16(*pCrc, &lastc, 1);*/
|
|
if (outExp != nil)
|
|
outExp->Putc(lastc);
|
|
if (fullSqHeader) {
|
|
checksum += lastc;
|
|
}
|
|
}
|
|
inrep = false;
|
|
} else {
|
|
/* last char was ordinary */
|
|
if (val == kNuSQRLEDelim) {
|
|
/* set a flag and catch the count the next time around */
|
|
inrep = true;
|
|
} else {
|
|
lastc = val;
|
|
/*if (pCrc != nil)
|
|
*pCrc = Nu_CalcCRC16(*pCrc, &lastc, 1);*/
|
|
if (outExp != nil)
|
|
outExp->Putc(lastc);
|
|
if (fullSqHeader) {
|
|
checksum += lastc;
|
|
}
|
|
}
|
|
}
|
|
|
|
}
|
|
|
|
if (inrep) {
|
|
err = kNuErrBadData;
|
|
WMSG0("got stop symbol when run length expected\n");
|
|
goto bail;
|
|
}
|
|
|
|
if (fullSqHeader) {
|
|
/* verify the checksum stored in the SQ file */
|
|
if (checksum != fileChecksum) {
|
|
err = kNuErrBadDataCRC;
|
|
WMSG2("expected 0x%04x, got 0x%04x (SQ)\n", fileChecksum, checksum);
|
|
goto bail;
|
|
} else {
|
|
WMSG1("--- SQ checksums match (0x%04x)\n", checksum);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Gobble up any unused bytes in the last 128-byte block. There
|
|
* shouldn't be more than that left over.
|
|
*/
|
|
if (compRemaining > kSqBufferSize) {
|
|
err = kNuErrBadData;
|
|
WMSG1("wow: found %ld bytes left over\n", compRemaining);
|
|
goto bail;
|
|
}
|
|
if (compRemaining) {
|
|
WMSG1("+++ slurping up last %ld bytes\n", compRemaining);
|
|
err = SQRead(fp, tmpBuf, compRemaining);
|
|
if (err != kNuErrNone) {
|
|
WMSG0("failed reading leftovers\n");
|
|
goto bail;
|
|
}
|
|
}
|
|
|
|
bail:
|
|
//if (outfp != nil)
|
|
// fflush(outfp);
|
|
free(tmpBuf);
|
|
return err;
|
|
}
|