/* * NuFX archive manipulation library * Copyright (C) 2000-2007 by Andy McFadden, All Rights Reserved. * This is free software; you can redistribute it and/or modify it under the * terms of the BSD License, see the file COPYING-LIB. * * ShrinkIt LZW functions. The original code was developed by Kent Dickey * and Andy Nicholas. * * Unisys holds US patent #4,558,302 (filed June 20, 1983 and issued December * 10, 1985). A policy set in 1995 specifies the lifetime of a patent as * the longer of 20 years from the date of application or 17 years from the * date of grant, so the Unisys LZW patent expired on June 20, 2003 in the * USA. Patents in some other countries expire after July 7, 2004. * * An older note: * * The Unisys patent is one of many that covers LZW compression, but Unisys * is the only company actively attacking anyone who uses it. The statement * Unisys made regarding LZW (and, specifically, GIF and TIFF-LZW) says: * * Q: I use LZW in my programs, but not for GIF or TIFF graphics. What should * I do? * A: If you are not a business, and the programs are for your own personal * non-commercial or not-for-profit use, Unisys does not require you to * obtain a license. If they are used as part of a business and/or you sell * the programs for commercial or for-profit purposes, then you must contact * the Welch Patent Licensing Department at Unisys and explain your * circumstances. They will have a license agreement for your application of * their LZW algorithm. * * According to this, the use of LZW in NufxLib has never required a license. */ #include "NufxLibPriv.h" #ifdef ENABLE_LZW /* the LZW algorithms operate on 4K chunks */ #define kNuLZWBlockSize 4096 /* a little padding to avoid mysterious crashes on bad data */ #define kNuSafetyPadding 64 #define kNuLZWClearCode 0x0100 #define kNuLZWFirstCode 0x0101 /* sometimes we want to get *really* verbose rather late in a large archive */ #ifdef DEBUG_LZW static Boolean gNuDebugVerbose = true; #define DBUG_LZW(x) { if (gNuDebugVerbose) { DBUG(x); } } #else #define DBUG_LZW ((void)0) #endif /* * =========================================================================== * Compression * =========================================================================== */ /* * We use a hash function borrowed from UNIX compress, which is described * in the v4.3 sources as: * * Algorithm: use open addressing double hashing (no chaining) on the * prefix code / next character combination. We do a variant of Knuth's * algorithm D (vol. 3, sec. 6.4) along with G. Knott's relatively-prime * secondary probe. Here, the modular division first probe is gives way * to a faster exclusive-or manipulation. * * The function used to generate it is: * * int c, hashf[256]; * for (c = 256; --c >= 0; ) { * hashf[c] = (((c & 0x7) << 7) ^ c) << (maxbits-10); * } * * It is used with: * * hash = prefixcode ^ hashf[c]; \* c is char from getchar() *\ * * The value for kNuLZWHashSize determines the size of the hash table and * the % occupancy. We want a fair number of vacancies because we probe * when we collide. Using 5119 (0x13ff) with 12-bit codes yields 75% * occupancy. */ #define kNuLZWHashSize 5119 /* must be prime */ #define kNuLZWEntryUnused 0 /* indicates an unused hash entry */ #define kNuLZWHashFuncTblSize 256 /* one entry per char value */ #define kNuLZWDefaultVol 0xfe /* use this as volume number */ #define kNuLZWHashDelta 0x120 /* used in secondary hashing */ #define kNuLZWMinCode kNuLZWClearCode /* smallest 12-bit LZW code */ #define kNuLZWMaxCode 0x0fff /* largest 12-bit LZW code */ #define kNuLZW2StopCode 0x0ffd /* LZW/2 stops here */ /* * Mask of bits, from 0 to 8. */ static const int gNuBitMask[] = { 0x00, 0x01, 0x03, 0x07, 0x0f, 0x1f, 0x3f, 0x7f, 0xff }; #define kNuRLEDefaultEscape 0xdb /* ShrinkIt standard */ /* * This holds all of the "big" dynamic state, plus a few things that I * don't want to pass around. It's allocated once for each instance of * an open archive, and re-used. * * The hash table consists of three parts. We have a choice for some of * them, "ushort" or "uint". With "ushort" it uses less memory and is * more likely to fit in a CPU cache, but on some processors you have to * add instructions to manipulate 16-bit values in a 32-bit word. I'm * guessing "ushort" is better overall. */ typedef struct LZWCompressState { NuArchive* pArchive; uint16_t entry[kNuLZWHashSize]; /* uint or ushort */ uint16_t prefix[kNuLZWMaxCode+1]; /* uint or ushort */ uint8_t suffix[kNuLZWMaxCode+1]; uint16_t hashFunc[kNuLZWHashFuncTblSize]; /* uint or ushort */ uint8_t inputBuf[kNuLZWBlockSize]; /* 4K of raw input */ uint8_t rleBuf[kNuLZWBlockSize*2 + kNuSafetyPadding]; uint8_t lzwBuf[(kNuLZWBlockSize * 3) / 2 + kNuSafetyPadding]; uint16_t chunkCrc; /* CRC for LZW/1 */ /* LZW/2 state variables */ int nextFree; int codeBits; int highCode; Boolean initialClear; } LZWCompressState; /* * Allocate some "reusable" state for LZW compression. * * The only thing that really needs to be retained across calls is * the hash function. This way we don't have to re-create it for * every file, or store it statically in the binary. */ static NuError Nu_AllocLZWCompressState(NuArchive* pArchive) { NuError err; LZWCompressState* lzwState; int ic; Assert(pArchive != NULL); Assert(pArchive->lzwCompressState == NULL); /* allocate the general-purpose compression buffer, if needed */ err = Nu_AllocCompressionBufferIFN(pArchive); if (err != kNuErrNone) return err; pArchive->lzwCompressState = Nu_Malloc(pArchive, sizeof(LZWCompressState)); if (pArchive->lzwCompressState == NULL) return kNuErrMalloc; /* * The "hashFunc" table only needs to be set up once. */ lzwState = pArchive->lzwCompressState; for (ic = 256; --ic >= 0; ) lzwState->hashFunc[ic] = (((ic & 0x7) << 7) ^ ic) << 2; return kNuErrNone; } /* * Compress a block of input from lzwState->inputBuf to lzwState->rleBuf. * The size of the output is returned in "*pRLESize" (will be zero if the * block expanded instead of compressing). * * The maximum possible size of the output is 2x the original, which can * only occur if the input is an alternating sequence of RLE delimiters * and non-delimiters. It requires 3 bytes to encode a solitary 0xdb, * so you get (4096 / 2) non-delimiters plus (4096 / 2) * 3 RLE-encoded * delimiters. We deal with this by using an 8K output buffer, so we * don't have to watch for overflow in the inner loop. * * The RLE format is " ", where count is zero-based * (i.e. for three bytes we encode "2", allowing us to express 1-256). */ static NuError Nu_CompressBlockRLE(LZWCompressState* lzwState, int* pRLESize) { const uint8_t* inPtr = lzwState->inputBuf; const uint8_t* endPtr = inPtr + kNuLZWBlockSize; uint8_t* outPtr = lzwState->rleBuf; uint8_t matchChar; int matchCount; while (inPtr < endPtr) { matchChar = *inPtr; matchCount = 1; /* count up the matching chars */ while (*++inPtr == matchChar && inPtr < endPtr) matchCount++; if (matchCount > 3) { if (matchCount > 256) { /* rare case - really long match */ while (matchCount > 256) { *outPtr++ = kNuRLEDefaultEscape; *outPtr++ = matchChar; *outPtr++ = 255; matchCount -= 256; } /* take care of the odd bits -- which might not form a run! */ if (matchCount > 3) { *outPtr++ = kNuRLEDefaultEscape; *outPtr++ = matchChar; *outPtr++ = matchCount -1; } else { while (matchCount--) *outPtr++ = matchChar; } } else { /* common case */ *outPtr++ = kNuRLEDefaultEscape; *outPtr++ = matchChar; *outPtr++ = matchCount -1; } } else { if (matchChar == kNuRLEDefaultEscape) { /* encode 1-3 0xDBs */ *outPtr++ = kNuRLEDefaultEscape; *outPtr++ = kNuRLEDefaultEscape; *outPtr++ = matchCount -1; } else { while (matchCount--) *outPtr++ = matchChar; } } } *pRLESize = outPtr - lzwState->rleBuf; Assert(*pRLESize > 0 && *pRLESize < sizeof(lzwState->rleBuf)); return kNuErrNone; } /* * Clear the LZW table. Also resets the LZW/2 state. */ static void Nu_ClearLZWTable(LZWCompressState* lzwState) { Assert(lzwState != NULL); /*DBUG_LZW(("### clear table\n"));*/ /* reset table entries */ Assert(kNuLZWEntryUnused == 0); /* make sure this is okay */ memset(lzwState->entry, 0, sizeof(lzwState->entry)); /* reset state variables */ lzwState->nextFree = kNuLZWFirstCode; lzwState->codeBits = 9; lzwState->highCode = ~(~0 << lzwState->codeBits); /* a/k/a 0x01ff */ lzwState->initialClear = false; } /* * Write a variable-width LZW code to the output. "prefixCode" has the * value to write, and "codeBits" is the width. * * Data is written in little-endian order (lowest byte first). The * putcode function in LZC is probably faster, but the format isn't * compatible with SHK. * * The worst conceivable expansion for LZW is 12 bits of output for every * byte of input. Because we're using variable-width codes and LZW is * reasonably effective at finding matches, the actual expansion will * certainly be less. Throwing the extra 2K onto the end of the buffer * saves us from having to check for a buffer overflow here. * * On exit, "*pOutBuf" will point PAST the last byte we wrote (even if * it's a partial byte), and "*pAtBit" will contain the bit offset. * * (Turning this into a macro might speed things up.) */ static inline void Nu_LZWPutCode(uint8_t** pOutBuf, uint32_t prefixCode, int codeBits, int* pAtBit) { int atBit = *pAtBit; uint8_t* outBuf = *pOutBuf; /*DBUG_LZW(("### PUT: prefixCode=0x%04lx, codeBits=%d, atBit=%d\n", prefixCode, codeBits, atBit));*/ Assert(atBit >= 0 && atBit < sizeof(gNuBitMask)); if (atBit) { /* align the prefix code with the existing byte */ prefixCode <<= atBit; /* merge it with the buffer contents (if necessary) and write lo bits */ outBuf--; *outBuf = (uint8_t)((*outBuf & gNuBitMask[atBit]) | prefixCode); outBuf++; } else { /* nothing to merge with; write lo byte at next posn and advance */ *outBuf++ = (uint8_t)prefixCode; } /* codes are at least 9 bits, so we know we have to write one more */ *outBuf++ = (uint8_t)(prefixCode >> 8); /* in some cases, we may have to write yet another */ atBit += codeBits; if (atBit > 16) *outBuf++ = (uint8_t)(prefixCode >> 16); *pAtBit = atBit & 0x07; *pOutBuf = outBuf; } /* * Compress a block of data with LZW, from "inputBuf" to lzwState->lzwBuf. * * LZW/1 is just like LZW/2, except that for the former the table is * always cleared before this function is called. Because of this, the * table never fills completely, so none of the table-overflow code * ever happens. * * This function is patterned after the LZC compress function, rather * than the NuLib LZW code, because the NuLib code was abysmal (a rather * straight translation from 6502 assembly). This function differs from LZC * in a few areas in order to make the output match GS/ShrinkIt. * * There is a (deliberate) minor bug here: if a table clear is emitted * when there is only one character left in the input, nothing will be * added to the hash table (as there is nothing to add) but "nextFree" * will be advanced. This mimics GSHK's behavior, and accounts for the * "resetFix" logic in the expansion functions. Code 0x0101 is essentially * lost in this situation. */ static NuError Nu_CompressLZWBlock(LZWCompressState* lzwState, const uint8_t* inputBuf, int inputCount, int* pOutputCount) { int nextFree, ic, atBit, codeBits; int hash, hashDelta; int prefixCode, code, highCode; const uint8_t* inputEnd = inputBuf + inputCount; /* local copies of lzwState members, for speed */ const uint16_t* pHashFunc = lzwState->hashFunc; uint16_t* pEntry = lzwState->entry; uint16_t* pPrefix = lzwState->prefix; uint8_t* pSuffix = lzwState->suffix; uint8_t* outBuf = lzwState->lzwBuf; Assert(lzwState != NULL); Assert(inputBuf != NULL); Assert(inputCount > 0 && inputCount <= kNuLZWBlockSize); /* make sure nobody has been messing with the types */ Assert(sizeof(pHashFunc[0]) == sizeof(lzwState->hashFunc[0])); Assert(sizeof(pEntry[0]) == sizeof(lzwState->entry[0])); Assert(sizeof(pPrefix[0]) == sizeof(lzwState->prefix[0])); Assert(sizeof(pSuffix[0]) == sizeof(lzwState->suffix[0])); /*DBUG_LZW(("### START LZW (nextFree=0x%04x)\n", lzwState->nextFree));*/ atBit = 0; if (lzwState->initialClear) { /*DBUG_LZW(("### initialClear set\n"));*/ codeBits = lzwState->codeBits; Nu_LZWPutCode(&outBuf, kNuLZWClearCode, codeBits, &atBit); Nu_ClearLZWTable(lzwState); } table_cleared: /* recover our state (or get newly-cleared state) */ nextFree = lzwState->nextFree; codeBits = lzwState->codeBits; highCode = lzwState->highCode; prefixCode = *inputBuf++; /*DBUG_LZW(("### fchar=0x%02x\n", prefixCode));*/ while (inputBuf < inputEnd) { ic = *inputBuf++; /*DBUG_LZW(("### char=0x%02x\n", ic));*/ hash = prefixCode ^ pHashFunc[ic]; code = pEntry[hash]; if (code != kNuLZWEntryUnused) { /* something is here, either our prefix or a hash collision */ if (pSuffix[code] != ic || pPrefix[code] != prefixCode) { /* we've collided; do the secondary probe */ hashDelta = (kNuLZWHashDelta - ic) << 2; do { /* rehash and keep looking */ Assert(code >= kNuLZWMinCode && code <= kNuLZWMaxCode); if (hash >= hashDelta) hash -= hashDelta; else hash += kNuLZWHashSize - hashDelta; Assert(hash >= 0 && hash < kNuLZWHashSize); if ((code = pEntry[hash]) == kNuLZWEntryUnused) goto new_code; } while (pSuffix[code] != ic || pPrefix[code] != prefixCode); } /* else we found a matching string, and can keep searching */ prefixCode = code; } else { /* found an empty entry, add the prefix+suffix to the table */ new_code: Nu_LZWPutCode(&outBuf, prefixCode, codeBits, &atBit); Assert(outBuf < lzwState->lzwBuf + sizeof(lzwState->lzwBuf)); /*DBUG_LZW(("### outBuf now at +%d\n",outBuf - lzwState->lzwBuf));*/ code = nextFree; Assert(hash < kNuLZWHashSize); Assert(code >= kNuLZWMinCode); Assert(code <= kNuLZWMaxCode); /* * GSHK accepts 0x0ffd, and then sends the table clear * immediately. We could improve on GSHK's compression slightly * by using the entire table, but I want to generate the exact * same output as GSHK. (The decoder believes the table clear * is entry 0xffe, so we've got one more coming, and possibly * two if we tweak getcode slightly.) * * Experiments show that switching to 0xffe increases the size * of files that don't compress well, and decreases the size * of files that do. In both cases, the difference in size * is very small. */ Assert(code <= kNuLZW2StopCode); /*if (code <= kNuLZW2StopCode) {*/ /*DBUG_LZW(("### added new code 0x%04x prefix=0x%04x ch=0x%02x\n", code, prefixCode, ic));*/ pEntry[hash] = code; pPrefix[code] = prefixCode; pSuffix[code] = ic; /* * Check and see if it's time to increase the code size (note * we flip earlier than LZC by one here). */ if (code >= highCode) { highCode += code +1; codeBits++; } nextFree++; /*}*/ prefixCode = ic; /* if the table is full, clear it (only for LZW/2) */ if (code == kNuLZW2StopCode) { /* output last code */ Nu_LZWPutCode(&outBuf, prefixCode, codeBits, &atBit); if (inputBuf < inputEnd) { /* still have data, keep going */ Nu_LZWPutCode(&outBuf, kNuLZWClearCode, codeBits, &atBit); Nu_ClearLZWTable(lzwState); goto table_cleared; } else { /* no more input, hold table clear for next block */ DBUG(("--- RARE: block-end clear\n")); lzwState->initialClear = true; goto table_clear_finish; } } Assert(nextFree <= kNuLZW2StopCode); } } /* * Output the last code. Since there's no following character, we don't * need to add an entry to the table... whatever we've found is already * in there. */ Nu_LZWPutCode(&outBuf, prefixCode, codeBits, &atBit); /* * Update the counters so LZW/2 has continuity. */ Assert(nextFree <= kNuLZW2StopCode); if (nextFree >= highCode) { highCode += nextFree +1; codeBits++; } nextFree++; /* make room for the code we just wrote */ if (nextFree > kNuLZW2StopCode) { /* * The code we just wrote, which was part of a longer string already * in the tree, took the last entry in the table. We need to clear * the table, but we can't do it in this block. We will have to * emit a table clear as the very first thing in the next block. */ DBUG(("--- RARE: block-end inter clear\n")); lzwState->initialClear = true; } table_clear_finish: /* save state for next pass through */ lzwState->nextFree = nextFree; lzwState->codeBits = codeBits; lzwState->highCode = highCode; Assert(inputBuf == inputEnd); *pOutputCount = outBuf - lzwState->lzwBuf; /* if (*pOutputCount < inputCount) { DBUG_LZW(("### compressed from %d to %d\n", inputCount, *pOutputCount)); } else { DBUG_LZW(("### NO compression (%d to %d)\n", inputCount,*pOutputCount)); } */ return kNuErrNone; } /* * Compress ShrinkIt-style "LZW/1" and "LZW/2". * * "*pThreadCrc" should already be set to its initial value. On exit it * will contain the CRC of the uncompressed data. * * On exit, the output file will be positioned past the last byte written. */ static NuError Nu_CompressLZW(NuArchive* pArchive, NuStraw* pStraw, FILE* fp, uint32_t srcLen, uint32_t* pDstLen, uint16_t* pThreadCrc, Boolean isType2) { NuError err = kNuErrNone; LZWCompressState* lzwState; long initialOffset; const uint8_t* lzwInputBuf; uint32_t blockSize, rleSize, lzwSize; long compressedLen; Boolean keepLzw; Assert(pArchive != NULL); Assert(pStraw != NULL); Assert(fp != NULL); Assert(srcLen > 0); Assert(pDstLen != NULL); Assert(pThreadCrc != NULL); Assert(isType2 == true || isType2 == false); /* * Do some initialization and set-up. */ if (pArchive->lzwCompressState == NULL) { err = Nu_AllocLZWCompressState(pArchive); BailError(err); } Assert(pArchive->lzwCompressState != NULL); Assert(pArchive->compBuf != NULL); lzwState = pArchive->lzwCompressState; lzwState->pArchive = pArchive; compressedLen = 0; /* * And now for something ugly: for LZW/1 we have to compute the CRC * twice. Old versions of ShrinkIt used LZW/1 and put the CRC in * the compressed block while newer versions used LZW/2 and put the * CRC in the thread header. We're using LZW/1 with the newer record * format, so we need two CRCs. For some odd reason Andy N. decided * to use 0xffff as the initial value for the thread one, so we can't * just store the same thing in two places. * * Of course, this also means that an LZW/2 chunk stored in an old * pre-v3 record wouldn't have a CRC at all... * * LZW/1 is included here for completeness. I can't think of a reason * why you'd want to use it, really. */ lzwState->chunkCrc = kNuInitialChunkCRC; /* 0x0000 */ /* * An LZW/1 file starts off with a CRC of the data, which means we * have to compress the whole thing, then seek back afterward and * write the value. This annoyance went away in LZW/2. */ err = Nu_FTell(fp, &initialOffset); BailError(err); if (!isType2) { putc(0, fp); /* leave space for CRC */ putc(0, fp); compressedLen += 2; } putc(kNuLZWDefaultVol, fp); putc(kNuRLEDefaultEscape, fp); compressedLen += 2; if (isType2) Nu_ClearLZWTable(lzwState); while (srcLen) { /* * Fill up the input buffer. */ blockSize = (srcLen > kNuLZWBlockSize) ? kNuLZWBlockSize : srcLen; err = Nu_StrawRead(pArchive, pStraw, lzwState->inputBuf, blockSize); if (err != kNuErrNone) { Nu_ReportError(NU_BLOB, err, "compression read failed"); goto bail; } /* * ShrinkIt was originally just going to be a 5.25" disk compressor, * so the compression functions were organized around 4K blocks (the * size of one track on a 5.25" disk). The block passed into the * RLE function is always 4K, so we zero out any extra space. */ if (blockSize < kNuLZWBlockSize) { memset(lzwState->inputBuf + blockSize, 0, kNuLZWBlockSize - blockSize); } /* * Compute the CRC. For LZW/1 this is on the entire 4K block, for * the "version 3" thread header CRC this is on just the "real" data. */ *pThreadCrc = Nu_CalcCRC16(*pThreadCrc, lzwState->inputBuf, blockSize); if (!isType2) { lzwState->chunkCrc = Nu_CalcCRC16(lzwState->chunkCrc, lzwState->inputBuf, kNuLZWBlockSize); } /* * Try to compress with RLE, from inputBuf to rleBuf. */ err = Nu_CompressBlockRLE(lzwState, (int*) &rleSize); BailError(err); if (rleSize < kNuLZWBlockSize) { lzwInputBuf = lzwState->rleBuf; } else { lzwInputBuf = lzwState->inputBuf; rleSize = kNuLZWBlockSize; } /* * Compress with LZW, into lzwBuf. */ if (!isType2) Nu_ClearLZWTable(lzwState); err = Nu_CompressLZWBlock(lzwState, lzwInputBuf, rleSize, (int*) &lzwSize); BailError(err); /* decide if we want to keep it, bearing in mind the LZW/2 header */ if (pArchive->valMimicSHK) { /* GSHK doesn't factor in header -- and *sometimes* uses "<=" !! */ keepLzw = (lzwSize < rleSize); } else { if (isType2) keepLzw = (lzwSize +2 < rleSize); else keepLzw = (lzwSize < rleSize); } /* * Write the compressed (or not) chunk. */ if (keepLzw) { /* * LZW succeeded. */ if (isType2) rleSize |= 0x8000; /* for LZW/2, set "LZW used" flag */ putc(rleSize & 0xff, fp); /* size after RLE */ putc(rleSize >> 8, fp); compressedLen += 2; if (isType2) { /* write compressed LZW len (+4 for header bytes) */ putc((lzwSize+4) & 0xff, fp); putc((lzwSize+4) >> 8, fp); compressedLen += 2; } else { /* set LZW/1 "LZW used" flag */ putc(1, fp); compressedLen++; } /* write data from LZW buffer */ err = Nu_FWrite(fp, lzwState->lzwBuf, lzwSize); BailError(err); compressedLen += lzwSize; } else { /* * LZW failed. */ putc(rleSize & 0xff, fp); /* size after RLE */ putc(rleSize >> 8, fp); compressedLen += 2; if (isType2) { /* clear LZW/2 table; we can't use it next time */ Nu_ClearLZWTable(lzwState); } else { /* set LZW/1 "LZW not used" flag */ putc(0, fp); compressedLen++; } /* write data from RLE or plain-input buffer */ err = Nu_FWrite(fp, lzwInputBuf, rleSize); BailError(err); compressedLen += rleSize; } /* * Update the counter and continue. */ srcLen -= blockSize; } /* * For LZW/1, go back and write the CRC. */ if (!isType2) { long curOffset; err = Nu_FTell(fp, &curOffset); BailError(err); err = Nu_FSeek(fp, initialOffset, SEEK_SET); BailError(err); putc(lzwState->chunkCrc & 0xff, fp); putc(lzwState->chunkCrc >> 8, fp); err = Nu_FSeek(fp, curOffset, SEEK_SET); BailError(err); } /* P8SHK and GSHK add an extra byte to LZW-compressed threads */ if (pArchive->valMimicSHK) { putc(0, fp); compressedLen++; } *pDstLen = compressedLen; bail: return err; } /* * Compress ShrinkIt-style "LZW/1". */ NuError Nu_CompressLZW1(NuArchive* pArchive, NuStraw* pStraw, FILE* fp, uint32_t srcLen, uint32_t* pDstLen, uint16_t* pCrc) { return Nu_CompressLZW(pArchive, pStraw, fp, srcLen, pDstLen, pCrc, false); } /* * Compress ShrinkIt-style "LZW/2". */ NuError Nu_CompressLZW2(NuArchive* pArchive, NuStraw* pStraw, FILE* fp, uint32_t srcLen, uint32_t* pDstLen, uint16_t* pCrc) { return Nu_CompressLZW(pArchive, pStraw, fp, srcLen, pDstLen, pCrc, true); } /* * =========================================================================== * Expansion * =========================================================================== */ /* if we don't have at least this much data, we try to read more */ /* (the "+3" is for the chunk header bytes) */ #define kNuLZWDesiredChunk (kNuLZWBlockSize + 3) /* * Static tables useful for bit manipulation. */ static const uint32_t gNuMaskTable[17] = { 0x0000, 0x01ff, 0x03ff, 0x03ff, 0x07ff, 0x07ff, 0x07ff, 0x07ff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff, 0x0fff }; /* convert high byte of "entry" into a bit width */ static const uint32_t gNuBitWidth[17] = { 8,9,10,10,11,11,11,11,12,12,12,12,12,12,12,12,12 }; /* entry in the trie */ typedef struct TableEntry { uint8_t ch; uint32_t prefix; } TableEntry; /* * This holds all of the "big" dynamic state, plus a few things that I * don't want to pass around. It's allocated once for each instance of * an open archive, and re-used. */ typedef struct LZWExpandState { NuArchive* pArchive; TableEntry trie[4096-256]; /* holds from 9 bits to 12 bits */ uint8_t stack[kNuLZWBlockSize]; // some of these don't need to be 32 bits; they were "uint" before uint32_t entry; /* 16-bit index into table */ uint32_t oldcode; /* carryover state for LZW/2 */ uint32_t incode; /* carryover state for LZW/2 */ uint32_t finalc; /* carryover state for LZW/2 */ Boolean resetFix; /* work around an LZW/2 bug */ uint16_t chunkCrc; /* CRC we calculate for LZW/1 */ uint16_t fileCrc; /* CRC stored with file */ uint8_t diskVol; /* disk volume # */ uint8_t rleEscape; /* RLE escape char, usually 0xdb */ uint32_t dataInBuffer; /* #of bytes in compBuf */ uint8_t* dataPtr; /* current data offset */ uint8_t lzwOutBuf[kNuLZWBlockSize + kNuSafetyPadding]; uint8_t rleOutBuf[kNuLZWBlockSize + kNuSafetyPadding]; } LZWExpandState; /* * Allocate some "reusable" state for LZW expansion. */ static NuError Nu_AllocLZWExpandState(NuArchive* pArchive) { NuError err; Assert(pArchive != NULL); Assert(pArchive->lzwExpandState == NULL); /* allocate the general-purpose compression buffer, if needed */ err = Nu_AllocCompressionBufferIFN(pArchive); if (err != kNuErrNone) return err; pArchive->lzwExpandState = Nu_Malloc(pArchive, sizeof(LZWExpandState)); if (pArchive->lzwExpandState == NULL) return kNuErrMalloc; return kNuErrNone; } #ifdef NDEBUG # define Nu_LZWPush(uch) ( *stackPtr++ = (uch) ) # define Nu_LZWPop() ( *(--stackPtr) ) # define Nu_LZWStackEmpty() ( stackPtr == lzwState->stack ) #else # define Nu_LZWPush(uch) \ ( Nu_LZWPushCheck(uch, lzwState, stackPtr), *stackPtr++ = (uch) ) # define Nu_LZWPop() \ ( Nu_LZWPopCheck(lzwState, stackPtr), *(--stackPtr) ) # define Nu_LZWStackEmpty() ( stackPtr == lzwState->stack ) static inline void Nu_LZWPushCheck(uint8_t uch, const LZWExpandState* lzwState, const uint8_t* stackPtr) { if (stackPtr >= lzwState->stack + sizeof(lzwState->stack)) { Nu_ReportError(lzwState->NU_BLOB, kNuErrBadData, "stack overflow"); abort(); } } static inline void Nu_LZWPopCheck(const LZWExpandState* lzwState, const uint8_t* stackPtr) { if (stackPtr == lzwState->stack) { Nu_ReportError(lzwState->NU_BLOB, kNuErrBadData, "stack underflow"); abort(); } } #endif /* * Get the next LZW code from the input, advancing pointers as needed. * * This would be faster as a macro and less ugly with pass-by-reference. * Resorting to globals is unacceptable. Might be less ugly if we clumped * some stuff into a struct. Should be good enough as-is. * * Returns an integer up to 12 bits long. * * (Turning this into a macro might speed things up.) */ static inline uint32_t Nu_LZWGetCode(const uint8_t** pInBuf, uint32_t entry, int* pAtBit, uint32_t* pLastByte) { uint32_t numBits, startBit, lastBit; uint32_t value; numBits = (entry +1) >> 8; /* bit-width of next code */ startBit = *pAtBit; lastBit = startBit + gNuBitWidth[numBits]; /* * We need one or two bytes from the input. These have to be shifted * around and merged with the bits we already have (if any). */ if (!startBit) value = *(*pInBuf)++; else value = *pLastByte; if (lastBit > 16) { /* need two more bytes */ value |= *(*pInBuf)++ << 8; *pLastByte = *(*pInBuf)++; value |= (uint32_t) *pLastByte << 16; } else { /* only need one more byte */ *pLastByte = *(*pInBuf)++; value |= *pLastByte << 8; } *pAtBit = lastBit & 0x07; /*printf("| EX: value=$%06lx mask=$%04x return=$%03lx\n", value,gNuMaskTable[numBits], (value >> startBit) & gNuMaskTable[numBits]);*/ /*DBUG_LZW(("### getcode 0x%04lx\n", (value >> startBit) & gNuMaskTable[numBits]));*/ /* I believe ANSI allows shifting by zero bits, so don't test "!startBit" */ return (value >> startBit) & gNuMaskTable[numBits]; } /* * Expand an LZW/1 chunk. * * Reads from lzwState->dataPtr, writes to lzwState->lzwOutBuf. */ static NuError Nu_ExpandLZW1(LZWExpandState* lzwState, uint32_t expectedLen) { NuError err = kNuErrNone; TableEntry* tablePtr; int atBit; uint32_t entry, oldcode, incode, ptr; uint32_t lastByte, finalc; const uint8_t* inbuf; uint8_t* outbuf; uint8_t* outbufend; uint8_t* stackPtr; Assert(lzwState != NULL); Assert(expectedLen > 0 && expectedLen <= kNuLZWBlockSize); inbuf = lzwState->dataPtr; outbuf = lzwState->lzwOutBuf; outbufend = outbuf + expectedLen; tablePtr = lzwState->trie - 256; /* don't store 256 empties */ stackPtr = lzwState->stack; atBit = 0; lastByte = 0; entry = kNuLZWFirstCode; /* 0x101 */ finalc = oldcode = incode = Nu_LZWGetCode(&inbuf, entry, &atBit, &lastByte); *outbuf++ = incode; Assert(incode <= 0xff); if (incode > 0xff) { err = kNuErrBadData; Nu_ReportError(lzwState->NU_BLOB, err, "invalid initial LZW symbol"); goto bail; } while (outbuf < outbufend) { incode = ptr = Nu_LZWGetCode(&inbuf, entry, &atBit, &lastByte); /* handle KwKwK case */ if (ptr >= entry) { //DBUG_LZW(("### KwKwK (ptr=%d entry=%d)\n", ptr, entry)); if (ptr != entry) { /* bad code -- this would make us read uninitialized data */ DBUG(("--- bad code (ptr=%d entry=%d)\n", ptr, entry)); err = kNuErrBadData; return err; } Nu_LZWPush((uint8_t)finalc); ptr = oldcode; } /* fill the stack by chasing up the trie */ while (ptr > 0xff) { Nu_LZWPush(tablePtr[ptr].ch); ptr = tablePtr[ptr].prefix; Assert(ptr < 4096); } /* done chasing up, now dump the stack, starting with ptr */ finalc = ptr; *outbuf++ = ptr; /*printf("PUT 0x%02x\n", *(outbuf-1));*/ while (!Nu_LZWStackEmpty()) { *outbuf++ = Nu_LZWPop(); /*printf("POP/PUT 0x%02x\n", *(outbuf-1));*/ } /* add the new prefix to the trie -- last string plus new char */ Assert(finalc <= 0xff); tablePtr[entry].ch = finalc; tablePtr[entry].prefix = oldcode; entry++; oldcode = incode; } bail: if (outbuf != outbufend) { err = kNuErrBadData; Nu_ReportError(lzwState->NU_BLOB, err, "LZW expansion failed"); return err; } /* adjust input buffer */ lzwState->dataInBuffer -= (inbuf - lzwState->dataPtr); Assert(lzwState->dataInBuffer < 32767*65536); lzwState->dataPtr = (uint8_t*)inbuf; return err; } /* * Expand an LZW/2 chunk. Main difference from LZW/1 is that the state * is carried over from the previous block in most cases, and the table * is cleared explicitly. * * Reads from lzwState->dataPtr, writes to lzwState->lzwOutBuf. * * In some cases, "expectedInputUsed" will be -1 to indicate that the * value is not known. */ static NuError Nu_ExpandLZW2(LZWExpandState* lzwState, uint32_t expectedLen, uint32_t expectedInputUsed) { NuError err = kNuErrNone; TableEntry* tablePtr; int atBit; uint32_t entry, oldcode, incode, ptr; uint32_t lastByte, finalc; const uint8_t* inbuf; const uint8_t* inbufend; uint8_t* outbuf; uint8_t* outbufend; uint8_t* stackPtr; /*DBUG_LZW(("### LZW/2 block start (compIn=%d, rleOut=%d, entry=0x%04x)\n", expectedInputUsed, expectedLen, lzwState->entry));*/ Assert(lzwState != NULL); Assert(expectedLen > 0 && expectedLen <= kNuLZWBlockSize); inbuf = lzwState->dataPtr; inbufend = lzwState->dataPtr + expectedInputUsed; outbuf = lzwState->lzwOutBuf; outbufend = outbuf + expectedLen; entry = lzwState->entry; tablePtr = lzwState->trie - 256; /* don't store 256 empties */ stackPtr = lzwState->stack; atBit = 0; lastByte = 0; /* * If the table isn't empty, initialize from the saved state and * jump straight into the main loop. * * There's a funny situation that arises when a table clear is the * second-to-last code in the previous chunk. After we see the * table clear, we get the next code and use it to initialize "oldcode" * and "incode" -- but we don't advance "entry" yet. The way that * ShrinkIt originally worked, the next time we came through we'd * see what we thought was an empty table and we'd reinitialize. So * we use "resetFix" to keep track of this situation. */ if (entry != kNuLZWFirstCode || lzwState->resetFix) { /* table not empty */ oldcode = lzwState->oldcode; incode = lzwState->incode; finalc = lzwState->finalc; lzwState->resetFix = false; goto main_loop; } clear_table: /* table is either empty or was just explicitly cleared; reset */ entry = kNuLZWFirstCode; /* 0x0101 */ if (outbuf == outbufend) { /* block must've ended on a table clear */ DBUG(("--- RARE: ending clear\n")); /* reset values, mostly to quiet gcc's "used before init" warnings */ oldcode = incode = finalc = 0; goto main_loop; /* the while condition will fall through */ } finalc = oldcode = incode = Nu_LZWGetCode(&inbuf, entry, &atBit, &lastByte); *outbuf++ = incode; /*printf("PUT 0x%02x\n", *(outbuf-1));*/ if (incode > 0xff) { err = kNuErrBadData; Nu_ReportError(lzwState->NU_BLOB, err, "invalid initial LZW symbol"); goto bail; } if (outbuf == outbufend) { /* if we're out of data, raise the "reset fix" flag */ DBUG(("--- RARE: resetFix!\n")); lzwState->resetFix = true; /* fall through; the while condition will let us slip past */ } main_loop: while (outbuf < outbufend) { incode = ptr = Nu_LZWGetCode(&inbuf, entry, &atBit, &lastByte); //DBUG_LZW(("### read incode=0x%04x\n", incode)); if (incode == kNuLZWClearCode) /* table clear - 0x0100 */ goto clear_table; /* handle KwKwK case */ if (ptr >= entry) { //DBUG_LZW(("### KwKwK (ptr=%d entry=%d)\n", ptr, entry)); if (ptr != entry) { /* bad code -- this would make us read uninitialized data */ DBUG(("--- bad code (ptr=%d entry=%d)\n", ptr, entry)); err = kNuErrBadData; return err; } Nu_LZWPush((uint8_t)finalc); ptr = oldcode; } /* fill the stack by chasing up the trie */ while (ptr > 0xff) { Nu_LZWPush(tablePtr[ptr].ch); ptr = tablePtr[ptr].prefix; Assert(ptr < 4096); } /* done chasing up, now dump the stack, starting with ptr */ finalc = ptr; *outbuf++ = ptr; /*printf("PUT 0x%02x\n", *(outbuf-1));*/ while (!Nu_LZWStackEmpty()) { *outbuf++ = Nu_LZWPop(); /*printf("POP/PUT 0x%02x\n", *(outbuf-1));*/ } /* add the new prefix to the trie -- last string plus new char */ /*DBUG_LZW(("### entry 0x%04x gets prefix=0x%04x and ch=0x%02x\n", entry, oldcode, finalc));*/ Assert(finalc <= 0xff); tablePtr[entry].ch = finalc; tablePtr[entry].prefix = oldcode; entry++; oldcode = incode; } bail: /*DBUG_LZW(("### end of block\n"));*/ if (expectedInputUsed != (uint32_t) -1 && inbuf != inbufend) { /* data was corrupted; if we keep going this will get worse */ DBUG(("--- inbuf != inbufend in ExpandLZW2 (diff=%d)\n", inbufend - inbuf)); err = kNuErrBadData; return err; } Assert(outbuf == outbufend); /* adjust input buffer */ lzwState->dataInBuffer -= (inbuf - lzwState->dataPtr); Assert(lzwState->dataInBuffer < 32767*65536); lzwState->dataPtr = (uint8_t*)inbuf; /* save off local copies of stuff */ lzwState->entry = entry; lzwState->oldcode = oldcode; lzwState->incode = incode; lzwState->finalc = finalc; return err; } /* * Expands a chunk of RLEd data into 4K of output. */ static NuError Nu_ExpandRLE(LZWExpandState* lzwState, const uint8_t* inbuf, uint32_t expectedInputUsed) { NuError err = kNuErrNone; uint8_t *outbuf; uint8_t *outbufend; const uint8_t *inbufend; uint8_t uch, rleEscape; int count; outbuf = lzwState->rleOutBuf; outbufend = outbuf + kNuLZWBlockSize; inbufend = inbuf + expectedInputUsed; rleEscape = lzwState->rleEscape; while (outbuf < outbufend) { uch = *inbuf++; if (uch == rleEscape) { uch = *inbuf++; count = *inbuf++; if (outbuf + count >= outbufend) { /* don't overrun buffer */ Assert(outbuf != outbufend); break; } while (count-- >= 0) *outbuf++ = uch; } else { *outbuf++ = uch; } } if (outbuf != outbufend) { err = kNuErrBadData; Nu_ReportError(lzwState->NU_BLOB, err, "RLE output glitch (off by %d)", (int)(outbufend-outbuf)); goto bail; } if (inbuf != inbufend) { err = kNuErrBadData; Nu_ReportError(lzwState->NU_BLOB, err, "RLE input glitch (off by %d)", (int)(inbufend-inbuf)); goto bail; } bail: return err; } /* * Utility function to get a byte from the input buffer. */ static inline uint8_t Nu_GetHeaderByte(LZWExpandState* lzwState) { lzwState->dataInBuffer--; Assert(lzwState->dataInBuffer > 0); return *lzwState->dataPtr++; } /* * Expand ShrinkIt-style "LZW/1" and "LZW/2". * * This manages the input data buffer, passing chunks of compressed data * into the appropriate expansion function. * * Pass in NULL for "pThreadCrc" if no thread CRC is desired. Otherwise, * "*pThreadCrc" should already be set to its initial value. On exit it * will contain the CRC of the uncompressed data. */ NuError Nu_ExpandLZW(NuArchive* pArchive, const NuRecord* pRecord, const NuThread* pThread, FILE* infp, NuFunnel* pFunnel, uint16_t* pThreadCrc) { NuError err = kNuErrNone; Boolean isType2; LZWExpandState* lzwState; uint32_t compRemaining, uncompRemaining, minSize; Assert(pArchive != NULL); Assert(pThread != NULL); Assert(infp != NULL); Assert(pFunnel != NULL); /* * Do some initialization and set-up. */ if (pArchive->lzwExpandState == NULL) { err = Nu_AllocLZWExpandState(pArchive); BailError(err); } Assert(pArchive->lzwExpandState != NULL); Assert(pArchive->compBuf != NULL); lzwState = pArchive->lzwExpandState; lzwState->pArchive = pArchive; if (pThread->thThreadFormat == kNuThreadFormatLZW1) { isType2 = false; minSize = 7; /* crc-lo,crc-hi,vol,rle-delim,len-lo,len-hi,lzw-used */ lzwState->chunkCrc = kNuInitialChunkCRC; /* 0x0000 */ } else if (pThread->thThreadFormat == kNuThreadFormatLZW2) { isType2 = true; minSize = 4; /* vol,rle-delim,len-lo,len-hi */ } else { err = kNuErrBadFormat; goto bail; } uncompRemaining = pThread->actualThreadEOF; compRemaining = pThread->thCompThreadEOF; if (compRemaining < minSize) { err = kNuErrBadData; Nu_ReportError(NU_BLOB, err, "thread too short to be valid LZW"); goto bail; } if (compRemaining && !uncompRemaining) { err = kNuErrBadData; Nu_ReportError(NU_BLOB, err, "compressed data but no uncompressed data??"); goto bail; } /* * Read the LZW header out of the data stream. */ if (!isType2) { lzwState->fileCrc = getc(infp); lzwState->fileCrc |= getc(infp) << 8; compRemaining -= 2; } lzwState->diskVol = getc(infp); /* disk volume #; not really used */ lzwState->rleEscape = getc(infp); /* RLE escape char for this thread */ compRemaining -= 2; lzwState->dataInBuffer = 0; lzwState->dataPtr = NULL; /* reset pointers */ lzwState->entry = kNuLZWFirstCode; /* 0x0101 */ lzwState->resetFix = false; /*DBUG_LZW(("### LZW%d block, vol=0x%02x, rleEsc=0x%02x\n", isType2 +1, lzwState->diskVol, lzwState->rleEscape));*/ /* * Read large blocks of the source file into compBuf, taking care not * to read past the end of the thread data. * * The motivation for doing it this way rather than just reading the * next compressed chunk are (1) compBuf is considerably larger than * stdio BUFSIZ on most systems, and (2) for LZW/1 we don't know the * size of the compressed data anyway. * * We need to ensure that we have at least one full compressed chunk * in the buffer. Since the compressor will refuse to store the * compressed data if it grows, we know that we need 4K plus the * chunk header. * * Once we have what looks like a full chunk, invoke the LZW decoder. */ while (uncompRemaining) { Boolean rleUsed; Boolean lzwUsed; uint32_t getSize; uint32_t rleLen; /* length after RLE; 4096 if no RLE */ uint32_t lzwLen = 0; /* type 2 only */ uint32_t writeLen, inCount; const uint8_t* writeBuf; /* if we're low, and there's more data available, read more */ if (lzwState->dataInBuffer < kNuLZWDesiredChunk && compRemaining) { /* * First thing we do is slide the old data to the start of * the buffer. */ if (lzwState->dataInBuffer) { Assert(lzwState->dataPtr != NULL); Assert(pArchive->compBuf != lzwState->dataPtr); memmove(pArchive->compBuf, lzwState->dataPtr, lzwState->dataInBuffer); } lzwState->dataPtr = pArchive->compBuf; /* * Next we read as much as we can. */ if (kNuGenCompBufSize - lzwState->dataInBuffer < compRemaining) getSize = kNuGenCompBufSize - lzwState->dataInBuffer; else getSize = compRemaining; /*printf("+++ READING %ld\n", getSize);*/ err = Nu_FRead(infp, lzwState->dataPtr + lzwState->dataInBuffer, getSize); if (err != kNuErrNone) { Nu_ReportError(NU_BLOB, err, "failed reading compressed data (%u bytes)", getSize); goto bail; } lzwState->dataInBuffer += getSize; compRemaining -= getSize; Assert(compRemaining < 32767*65536); Assert(lzwState->dataInBuffer <= kNuGenCompBufSize); } Assert(lzwState->dataInBuffer); /* * Read the LZW block header. */ if (isType2) { rleLen = Nu_GetHeaderByte(lzwState); rleLen |= Nu_GetHeaderByte(lzwState) << 8; lzwUsed = rleLen & 0x8000 ? true : false; rleLen &= 0x1fff; rleUsed = (rleLen != kNuLZWBlockSize); if (lzwUsed) { lzwLen = Nu_GetHeaderByte(lzwState); lzwLen |= Nu_GetHeaderByte(lzwState) << 8; lzwLen -= 4; /* don't include header bytes */ } } else { rleLen = Nu_GetHeaderByte(lzwState); rleLen |= Nu_GetHeaderByte(lzwState) << 8; lzwUsed = Nu_GetHeaderByte(lzwState); if (lzwUsed != 0 && lzwUsed != 1) { err = kNuErrBadData; Nu_ReportError(NU_BLOB, err, "garbled LZW header"); goto bail; } rleUsed = (rleLen != kNuLZWBlockSize); } /*DBUG_LZW(("### CHUNK rleLen=%d(%d) lzwLen=%d(%d) uncompRem=%ld\n", rleLen, rleUsed, lzwLen, lzwUsed, uncompRemaining));*/ if (uncompRemaining <= kNuLZWBlockSize) writeLen = uncompRemaining; /* last block */ else writeLen = kNuLZWBlockSize; #ifndef NDEBUG writeBuf = NULL; #endif /* * Decode the chunk, and point "writeBuf" at the uncompressed data. * * LZW always expands from the read buffer into lzwState->lzwOutBuf. * RLE expands from a specific buffer to lzwState->rleOutBuf. */ if (lzwUsed) { if (!isType2) { err = Nu_ExpandLZW1(lzwState, rleLen); } else { if (pRecord->isBadMac || pArchive->valIgnoreLZW2Len) { /* might be big-endian, might be okay; just ignore it */ lzwLen = (uint32_t) -1; } else if (lzwState->dataInBuffer < lzwLen) { /* rare -- GSHK will do this if you don't let it finish */ err = kNuErrBufferUnderrun; Nu_ReportError(NU_BLOB, err, "not enough compressed data " "-- archive truncated during creation?"); goto bail; } err = Nu_ExpandLZW2(lzwState, rleLen, lzwLen); } BailError(err); if (rleUsed) { err = Nu_ExpandRLE(lzwState, lzwState->lzwOutBuf, rleLen); BailError(err); writeBuf = lzwState->rleOutBuf; } else { writeBuf = lzwState->lzwOutBuf; } } else { if (rleUsed) { err = Nu_ExpandRLE(lzwState, lzwState->dataPtr, rleLen); BailError(err); writeBuf = lzwState->rleOutBuf; inCount = rleLen; } else { writeBuf = lzwState->dataPtr; inCount = writeLen; } /* * Advance the input buffer data pointers to consume the input. * The LZW expansion functions do this for us, but we're not * using LZW. */ lzwState->dataPtr += inCount; lzwState->dataInBuffer -= inCount; Assert(lzwState->dataInBuffer < 32767*65536); /* no LZW used, reset pointers */ lzwState->entry = kNuLZWFirstCode; /* 0x0101 */ lzwState->resetFix = false; } Assert(writeBuf != NULL); /* * Compute the CRC of the uncompressed data, and write it. For * LZW/1, the CRC of the last block includes the zeros that pad * it out to 4096 bytes. * * See commentary in the compression code for why we have to * compute two CRCs for LZW/1. */ if (pThreadCrc != NULL) { *pThreadCrc = Nu_CalcCRC16(*pThreadCrc, writeBuf, writeLen); } if (!isType2) { lzwState->chunkCrc = Nu_CalcCRC16(lzwState->chunkCrc, writeBuf, kNuLZWBlockSize); } /* write the data, possibly doing an EOL conversion */ err = Nu_FunnelWrite(pArchive, pFunnel, writeBuf, writeLen); if (err != kNuErrNone) { if (err != kNuErrAborted) Nu_ReportError(NU_BLOB, err, "unable to write output"); goto bail; } uncompRemaining -= writeLen; Assert(uncompRemaining < 32767*65536); } /* * It appears that ShrinkIt appends an extra byte after the last * LZW block. The byte is included in the compThreadEOF, but isn't * consumed by the LZW expansion routine, so it's usually harmless. * * It is *possible* for extra bytes to be here legitimately, but very * unlikely. The very last block is always padded out to 4K with * zeros. If you found a situation where that last block failed * to compress with RLE and LZW (perhaps the last block filled up * all but the last 2 or 3 bytes with uncompressible data), but * earlier data made the overall file compressible, you would have * a few stray bytes in the archive. * * This is a little easier to do if the last block has lots of single * 0xdb characters in it, since that requires RLE to escape them. * * Whatever the case, issue a warning if it looks like there's too * many of them. */ if (lzwState->dataInBuffer > 1) { DBUG(("--- Found %ld bytes following compressed data (compRem=%ld)\n", lzwState->dataInBuffer, compRemaining)); if (lzwState->dataInBuffer > 32) { Nu_ReportError(NU_BLOB, kNuErrNone, "(Warning) lots of fluff (%u)", lzwState->dataInBuffer); } } /* * We might be okay with stray bytes in the thread, but we're definitely * not okay with anything identified as compressed data being unused. */ if (compRemaining) { err = kNuErrBadData; Nu_ReportError(NU_BLOB, err, "not all compressed data was used (%u/%u)", compRemaining, lzwState->dataInBuffer); goto bail; } /* * ShrinkIt used to put the CRC in the stream and not in the thread * header. For LZW/1, we check the CRC here; for LZW/2, we hope it's * in the thread header. (As noted in the compression code, it's * possible to end up with two CRCs or no CRCs.) */ if (!isType2 && !pArchive->valIgnoreCRC) { if (lzwState->chunkCrc != lzwState->fileCrc) { if (!Nu_ShouldIgnoreBadCRC(pArchive, pRecord, kNuErrBadDataCRC)) { err = kNuErrBadDataCRC; Nu_ReportError(NU_BLOB, err, "expected 0x%04x, got 0x%04x (LZW/1)", lzwState->fileCrc, lzwState->chunkCrc); (void) Nu_FunnelFlush(pArchive, pFunnel); goto bail; } } else { DBUG(("--- LZW/1 CRCs match (0x%04x)\n", lzwState->chunkCrc)); } } bail: return err; } #endif /*ENABLE_LZW*/