/* * NuFX archive manipulation library * Copyright (C) 2000-2007 by Andy McFadden, All Rights Reserved. * This is free software; you can redistribute it and/or modify it under the * terms of the BSD License, see the file COPYING-LIB. * * Huffman/RLE "squeeze" compression, based on SQ/USQ. This format is * listed in the NuFX documentation, but to my knowledge has never * actually been used (until now). Neither P8 ShrinkIt v3.4 nor II Unshrink * handle the format correctly, so this is really only useful as an * experiment. * * The algorithm appears to date back to the CP/M days. This implementation * is based on "xsq"/"xusq" v1.7u by Richard Greenlaw (from December 1982). * The code was also present in ARC v5.x. * * The "nusq.c" implementation found in NuLib was by Marcel J.E. Mol, * who got it from Don Elton's sq3/usq2 programs for the Apple II. * * The SQ file format begins with this: * +00 magic number (0xff76) * +02 checksum on uncompressed data * +04 filename, ending with \0 * The NuFX format skips the above, starting immediately after it: * +00 node count * +02 node value array [node count], two bytes each * +xx data immediately follows array * * NuFX drops the magic number, checksum, and filename from the header, * since (with v3 records) all three are redundant. You can enable this * if you want to experiment with SQ-compatible output. */ #include "NufxLibPriv.h" #ifdef ENABLE_SQ /* if this is defined, create and unpack the full SQ header (debugging only) */ /* #define FULL_SQ_HEADER */ #define kNuSQMagic 0xff76 /* magic value for file header */ #define kNuSQRLEDelim 0x90 /* RLE delimiter */ #define kNuSQEOFToken 256 /* distinguished stop symbol */ #define kNuSQNumVals 257 /* 256 symbols + stop */ /* * =========================================================================== * Compression * =========================================================================== */ #define kNuSQNoChild (-1) /* indicates end of path through tree */ #define kNuSQNumNodes (kNuSQNumVals + kNuSQNumVals -1) #define kNuSQMaxCount 65535 /* max value you can store in 16 bits */ /* states for the RLE encoding */ typedef enum { kNuSQRLEStateUnknown = 0, kNuSQRLEStateNoHist, /* nothing yet */ kNuSQRLEStateSentChar, /* lastchar set, no lookahead yet */ kNuSQRLEStateSendNewC, /* found run of two, send 2nd w/o DLE */ kNuSQRLEStateSendCnt, /* newchar set, DLE sent, send count next */ } NuSQRLEState; /* nodes in the Huffman encoding tree */ typedef struct EncTreeNode { int weight; /* #of appearances */ int tdepth; /* length on longest path in tree */ int lchild, rchild; /* indexes to next level */ } EncTreeNode; /* * State during compression. */ typedef struct SQState { NuArchive* pArchive; int doCalcCRC; /* boolean; if set, compute CRC on input */ ushort crc; NuStraw* pStraw; long uncompRemaining; #ifdef FULL_SQ_HEADER ushort checksum; #endif /* * RLE state stuff. */ NuSQRLEState rleState; int lastSym; int likeCount; /* * Huffman state stuff. */ EncTreeNode node[kNuSQNumNodes]; int treeHead; /* index to head node of final tree */ /* encoding table */ int codeLen[kNuSQNumVals]; /* number of bits in code for symbol N */ ushort code[kNuSQNumVals]; /* bits for symbol N (first bit in lsb) */ ushort tmpCode; /* temporary code value */ } SQState; /* * Get the next byte from the input straw. Also updates the checksum * and SQ CRC, if "doCalcCRC" is set to true. * * This isn't exactly fast, but then this isn't exactly a fast algorithm, * and there's not much point in optimizing something that isn't going * to get used much. * * Returns kNuSQEOFToken as the value when we're out of data. */ static NuError Nu_SQGetcCRC(SQState* pSqState, int* pSym) { NuError err; uchar c; if (!pSqState->uncompRemaining) { *pSym = kNuSQEOFToken; return kNuErrNone; } err = Nu_StrawRead(pSqState->pArchive, pSqState->pStraw, &c, 1); if (err == kNuErrNone) { if (pSqState->doCalcCRC) { #ifdef FULL_SQ_HEADER pSqState->checksum += c; #endif pSqState->crc = Nu_CalcCRC16(pSqState->crc, &c, 1); } *pSym = c; pSqState->uncompRemaining--; } return err; } /* * Get the next byte from the post-RLE input stream. * * Returns kNuSQEOFToken in "*pSum" when we reach the end of the input. */ static NuError Nu_SQGetcRLE(SQState* pSqState, int* pSym) { NuError err = kNuErrNone; int likeCount, newSym; switch (pSqState->rleState) { case kNuSQRLEStateNoHist: /* No relevant history */ pSqState->rleState = kNuSQRLEStateSentChar; err = Nu_SQGetcCRC(pSqState, pSym); pSqState->lastSym = *pSym; break; case kNuSQRLEStateSentChar: /* lastChar is set, need lookahead */ switch (pSqState->lastSym) { case kNuSQRLEDelim: /* send all DLEs escaped; note this is horrible for a run of DLEs */ pSqState->rleState = kNuSQRLEStateNoHist; *pSym = 0; /* zero len is how we define an escaped DLE */ break; case kNuSQEOFToken: *pSym = kNuSQEOFToken; break; default: /* * Try for a run, using the character we previous read as * the base. Thus, if the next character we read matches, * we have a run of two. The count describes the total * length of the run, including the character we've already * emitted. */ likeCount = 0; do { likeCount++; err = Nu_SQGetcCRC(pSqState, &newSym); if (err != kNuErrNone) goto bail; } while (newSym == pSqState->lastSym && likeCount < 255); switch (likeCount) { case 1: /* not a run, return first one we got */ pSqState->lastSym = newSym; *pSym = newSym; break; case 2: /* not long enough for run; return second one next time thru */ pSqState->rleState = kNuSQRLEStateSendNewC; *pSym = pSqState->lastSym; /* 1st new one */ pSqState->lastSym = newSym; /* 2nd new one */ break; default: pSqState->rleState = kNuSQRLEStateSendCnt; pSqState->likeCount = likeCount; pSqState->lastSym = newSym; /* 1st one after the run */ *pSym = kNuSQRLEDelim; break; } } break; case kNuSQRLEStateSendNewC: /* send first char past a run of two */ pSqState->rleState = kNuSQRLEStateSentChar; *pSym = pSqState->lastSym; break; case kNuSQRLEStateSendCnt: /* Sent DLE for repeat sequence, send count */ pSqState->rleState = kNuSQRLEStateSendNewC; *pSym = pSqState->likeCount; break; default: { NuArchive* pArchive = pSqState->pArchive; err = kNuErrInternal; Nu_ReportError(NU_BLOB, err, "invalid state %d in SQ RLE encode", pSqState->rleState); break; } } bail: return err; } /* * Comment from xsq.c: * * This translation uses the Huffman algorithm to develop a * binary tree representing the decoding information for * a variable length bit string code for each input value. * Each string's length is in inverse proportion to its * frequency of appearance in the incoming data stream. * The encoding table is derived from the decoding table. * * The range of valid values into the Huffman algorithm are * the values of a byte stored in an integer plus the special * endfile value chosen to be an adjacent value. Overall, 0-SPEOF. * * The "node" array of structures contains the nodes of the * binary tree. The first NUMVALS nodes are the leaves of the * tree and represent the values of the data bytes being * encoded and the special endfile, SPEOF. * The remaining nodes become the internal nodes of the tree. * * In the original design it was believed that * a Huffman code would fit in the same number of * bits that will hold the sum of all the counts. * That was disproven by a user's file and was a rare but * infamous bug. This version attempts to choose among equally * weighted subtrees according to their maximum depths to avoid * unnecessarily long codes. In case that is not sufficient * to guarantee codes <= 16 bits long, we initially scale * the counts so the total fits in an unsigned integer, but * if codes longer than 16 bits are generated the counts are * rescaled to a lower ceiling and code generation is retried. */ /* * Return the greater of two integers. */ static int Nu_SQMax(int a, int b) { if (a > b) return a; else return b; } /* * Compare two trees, if a > b return true, else return false. * Priority is given to weight, then depth. "a" and "b" are heaps, * so we only need to look at the root element. */ static int Nu_SQCmpTrees(SQState* pSqState, int a, int b) { if (pSqState->node[a].weight > pSqState->node[b].weight) return true; if (pSqState->node[a].weight == pSqState->node[b].weight) if (pSqState->node[a].tdepth > pSqState->node[b].tdepth) return true; return false; } /* * heap() and adjust() maintain a list of binary trees as a * heap with the top indexing the binary tree on the list * which has the least weight or, in case of equal weights, * least depth in its longest path. The depth part is not * strictly necessary, but tends to avoid long codes which * might provoke rescaling. */ /* * Recursively make a heap from a heap with a new top. */ static void Nu_SQHeapAdjust(SQState* pSqState, int list[], int top, int bottom) { int k, temp; k = 2 * top + 1; /* left child of top */ temp = list[top]; /* remember root node of top tree */ if (k <= bottom) { if (k < bottom && Nu_SQCmpTrees(pSqState, list[k], list[k + 1])) k++; /* k indexes "smaller" child (in heap of trees) of top */ /* now make top index "smaller" of old top and smallest child */ if (Nu_SQCmpTrees(pSqState, temp, list[k])) { list[top] = list[k]; list[k] = temp; /* Make the changed list a heap */ Nu_SQHeapAdjust(pSqState, list, k, bottom); /*recursive*/ } } } /* * Create a heap. */ static void Nu_SQHeap(SQState* pSqState, int list[], int length) { int i; for (i = (length - 2) / 2; i >= 0; i--) Nu_SQHeapAdjust(pSqState, list, i, length - 1); } /* * Build the encoding tree. * * HUFFMAN ALGORITHM: develops the single element trees * into a single binary tree by forming subtrees rooted in * interior nodes having weights equal to the sum of weights of all * their descendents and having depth counts indicating the * depth of their longest paths. * * When all trees have been formed into a single tree satisfying * the heap property (on weight, with depth as a tie breaker) * then the binary code assigned to a leaf (value to be encoded) * is then the series of left (0) and right (1) * paths leading from the root to the leaf. * Note that trees are removed from the heaped list by * moving the last element over the top element and * reheaping the shorter list. */ static void Nu_SQBuildTree(SQState* pSqState, int list[], int len) { int freenode; /* next free node in tree */ EncTreeNode* frnp; /* free node pointer */ int lch, rch; /* temporaries for left, right children */ /* * Initialize index to next available (non-leaf) node. * Lower numbered nodes correspond to leaves (data values). */ freenode = kNuSQNumVals; while (len > 1) { /* * Take from list two btrees with least weight * and build an interior node pointing to them. * This forms a new tree. */ lch = list[0]; /* This one will be left child */ /* delete top (least) tree from the list of trees */ list[0] = list[--len]; Nu_SQHeapAdjust(pSqState, list, 0, len - 1); /* Take new top (least) tree. Reuse list slot later */ rch = list[0]; /* This one will be right child */ /* * Form new tree from the two least trees using * a free node as root. Put the new tree in the list. */ frnp = &pSqState->node[freenode]; /* address of next free node */ list[0] = freenode++; /* put at top for now */ frnp->lchild = lch; frnp->rchild = rch; frnp->weight = pSqState->node[lch].weight + pSqState->node[rch].weight; frnp->tdepth = 1 + Nu_SQMax(pSqState->node[lch].tdepth, pSqState->node[rch].tdepth); /* reheap list to get least tree at top*/ Nu_SQHeapAdjust(pSqState, list, 0, len - 1); } pSqState->treeHead = list[0]; /* head of final tree */ } /* * Recursive routine to walk the indicated subtree and level * and maintain the current path code in bstree. When a leaf * is found the entire code string and length are put into * the encoding table entry for the leaf's data value . * * Returns zero on success, nonzero if codes are too long. */ static int Nu_SQBuildEncTable(SQState* pSqState, int level, int root) { int l, r; l = pSqState->node[root].lchild; r = pSqState->node[root].rchild; if (l == kNuSQNoChild && r == kNuSQNoChild) { /* Leaf. Previous path determines bit string * code of length level (bits 0 to level - 1). * Ensures unused code bits are zero. */ pSqState->codeLen[root] = level; pSqState->code[root] = pSqState->tmpCode & (((ushort)~0) >> (16 - level)); return (level > 16) ? -1 : 0; } else { if (l != kNuSQNoChild) { /* Clear path bit and continue deeper */ pSqState->tmpCode &= ~(1 << level); /* NOTE RECURSION */ if (Nu_SQBuildEncTable(pSqState, level + 1, l) != 0) return -1; } if (r != kNuSQNoChild) { /* Set path bit and continue deeper */ pSqState->tmpCode |= 1 << level; /* NOTE RECURSION */ if (Nu_SQBuildEncTable(pSqState, level + 1, r) != 0) return -1; } } return 0; /* if we got here we're ok so far */ } /* * The count of number of occurrances of each input value * have already been prevented from exceeding MAXCOUNT. * Now we must scale them so that their sum doesn't exceed * ceiling and yet no non-zero count can become zero. * This scaling prevents errors in the weights of the * interior nodes of the Huffman tree and also ensures that * the codes will fit in an unsigned integer. Rescaling is * used if necessary to limit the code length. */ static void Nu_SQScale(SQState* pSqState, int ceiling) { int i; int wt, ovflw, divisor; ushort sum; int increased; /* flag */ do { for (i = sum = ovflw = 0; i < kNuSQNumVals; i++) { if (pSqState->node[i].weight > (ceiling - sum)) ovflw++; sum += pSqState->node[i].weight; } divisor = ovflw + 1; /* use the high 16 bits of the sum */ /* Ensure no non-zero values are lost */ increased = false; for (i = 0; i < kNuSQNumVals; i++) { wt = pSqState->node[i].weight; if (wt < divisor && wt != 0) { /* Don't fail to provide a code if it's used at all */ pSqState->node[i].weight = divisor; increased = true; } } } while(increased); /* scaling factor choosen and minimums are set; now do the downscale */ if (divisor > 1) { for (i = 0; i < kNuSQNumVals; i++) pSqState->node[i].weight /= divisor; } } /* * Build a frequency table from the post-RLE input stream, then generate * an encoding tree from the results. */ static NuError Nu_SQComputeHuffTree(SQState* pSqState) { NuError err = kNuErrNone; int btreeList[kNuSQNumVals]; /* list of intermediate binary trees */ int listLen; /* length of btreeList */ int ceiling; /* limit for scaling */ int i, sym, result; /* init tree */ for (i = 0; i < kNuSQNumNodes; i++) { pSqState->node[i].weight = 0; pSqState->node[i].tdepth = 0; pSqState->node[i].lchild = kNuSQNoChild; pSqState->node[i].rchild = kNuSQNoChild; } DBUG(("+++ SQ scanning...\n")); do { int* pWeight; err = Nu_SQGetcRLE(pSqState, &sym); if (err != kNuErrNone) goto bail; Assert(sym >= 0 && sym <= kNuSQEOFToken); pWeight = &pSqState->node[(unsigned)sym].weight; if (*pWeight != kNuSQMaxCount) (*pWeight)++; } while (sym != kNuSQEOFToken); DBUG(("+++ SQ generating tree...\n")); ceiling = kNuSQMaxCount; do { if (ceiling != kNuSQMaxCount) { DBUG(("+++ SQ rescaling\n")); } /* pick a divisor and scale everything to fit in "ceiling" */ Nu_SQScale(pSqState, ceiling); ceiling /= 2; /* in case we need to rescale */ /* * Build list of single node binary trees having * leaves for the input values with non-zero counts */ for (i = listLen = 0; i < kNuSQNumVals; i++) { if (pSqState->node[i].weight != 0) { pSqState->node[i].tdepth = 0; btreeList[listLen++] = i; } } /* * Arrange list of trees into a heap with the entry * indexing the node with the least weight a the top. */ Nu_SQHeap(pSqState, btreeList, listLen); /* convert the list of trees to a single decoding tree */ Nu_SQBuildTree(pSqState, btreeList, listLen); /* initialize encoding table */ for (i = 0; i < kNuSQNumVals; i++) pSqState->codeLen[i] = 0; /* * Recursively build the encoding table; returns non-zero (failure) * if any code is > 16 bits long. */ result = Nu_SQBuildEncTable(pSqState, 0, pSqState->treeHead); } while (result != 0); #if 0 { int jj; printf("init_huff\n"); for (jj = 0; jj < kNuSQNumNodes; jj++) { printf("NODE %d: w=%d d=%d l=%d r=%d\n", jj, pSqState->node[jj].weight, pSqState->node[jj].tdepth, pSqState->node[jj].lchild, pSqState->node[jj].rchild); } } #endif bail: return err; } /* * Compress data from input to output, using the values in the "code" * and "codeLen" arrays. */ static NuError Nu_SQCompressInput(SQState* pSqState, FILE* fp, long* pCompressedLen) { NuError err = kNuErrNone; int sym = kNuSQEOFToken-1; unsigned long bits, code; /* must hold at least 23 bits */ int codeLen, gotbits; long compressedLen; DBUG(("+++ SQ compressing\n")); Assert(sizeof(bits) >= 4); compressedLen = *pCompressedLen; bits = 0; gotbits = 0; while (sym != kNuSQEOFToken) { err = Nu_SQGetcRLE(pSqState, &sym); if (err != kNuErrNone) goto bail; code = pSqState->code[sym]; codeLen = pSqState->codeLen[sym]; bits |= code << gotbits; gotbits += codeLen; /* if we have more than a byte, output it */ while (gotbits > 7) { putc(bits & 0xff, fp); compressedLen++; bits >>= 8; gotbits -= 8; } } if (gotbits) { Assert(gotbits < 8); putc(bits & 0xff, fp); compressedLen++; } bail: *pCompressedLen = compressedLen; return err; } /* * Write a 16-bit value in little-endian order. */ static NuError Nu_SQWriteShort(FILE* outfp, short val) { NuError err; uchar tmpc; tmpc = val & 0xff; err = Nu_FWrite(outfp, &tmpc, 1); if (err != kNuErrNone) goto bail; tmpc = (val >> 8) & 0xff; err = Nu_FWrite(outfp, &tmpc, 1); if (err != kNuErrNone) goto bail; bail: return err; } /* * Compress "srcLen" bytes into SQ format, from "pStraw" to "fp". * * This requires two passes through the input. * * Bit of trivia: "sq3" on the Apple II self-destructs if you hand * it an empty file. "xsq" works fine, creating an empty tree that * "xusq" unpacks. */ NuError Nu_CompressHuffmanSQ(NuArchive* pArchive, NuStraw* pStraw, FILE* fp, ulong srcLen, ulong* pDstLen, ushort* pCrc) { NuError err = kNuErrNone; SQState sqState; long compressedLen; int i, j, numNodes; err = Nu_AllocCompressionBufferIFN(pArchive); if (err != kNuErrNone) return err; sqState.pArchive = pArchive; sqState.crc = 0; if (pCrc == nil) { sqState.doCalcCRC = false; } else { sqState.doCalcCRC = true; sqState.crc = *pCrc; } #ifdef FULL_SQ_HEADER sqState.checksum = 0; #endif /* * Pass 1: analysis. Perform a frequency analysis on the post-RLE * input file. This will calculate the file CRCs as a side effect. */ sqState.rleState = kNuSQRLEStateNoHist; sqState.uncompRemaining = srcLen; sqState.pStraw = pStraw; (void) Nu_StrawSetProgressState(pStraw, kNuProgressAnalyzing); err = Nu_SQComputeHuffTree(&sqState); BailError(err); if (pCrc != nil) *pCrc = sqState.crc; /* * Pass 2: compression. Using the encoding tree we computed, * compress the input with RLE and Huffman. Start by writing * the file header and rewinding the input file. */ sqState.doCalcCRC = false; /* don't need to re-compute */ sqState.rleState = kNuSQRLEStateNoHist; /* reset */ compressedLen = 0; /* rewind for next pass */ (void) Nu_StrawSetProgressState(pStraw, kNuProgressCompressing); err = Nu_StrawRewind(pArchive, pStraw); BailError(err); sqState.uncompRemaining = srcLen; #ifdef FULL_SQ_HEADER /* write file header */ err = Nu_SQWriteShort(fp, kNuSQMagic); BailError(err); compressedLen += 2; err = Nu_SQWriteShort(fp, sqState.checksum); BailError(err); compressedLen += 2; { static const char fakename[] = "s.qqq"; err = Nu_FWrite(fp, fakename, sizeof(fakename)); BailError(err); compressedLen += sizeof(fakename); } #endif /* * Original description: * Write out a simplified decoding tree. Only the interior * nodes are written. When a child is a leaf index * (representing a data value) it is recoded as * -(index + 1) to distinguish it from interior indexes * which are recoded as positive indexes in the new tree. * Note that this tree will be empty for an empty file. */ if (sqState.treeHead < kNuSQNumVals) numNodes = 0; else numNodes = sqState.treeHead - (kNuSQNumVals - 1); err = Nu_SQWriteShort(fp, (short) numNodes); BailError(err); compressedLen += 2; for (i = sqState.treeHead, j = 0; j < numNodes; j++, i--) { int l, r; l = sqState.node[i].lchild; r = sqState.node[i].rchild; l = l < kNuSQNumVals ? -(l + 1) : sqState.treeHead - l; r = r < kNuSQNumVals ? -(r + 1) : sqState.treeHead - r; err = Nu_SQWriteShort(fp, (short) l); BailError(err); err = Nu_SQWriteShort(fp, (short) r); BailError(err); compressedLen += 4; /*DBUG(("TREE %d: %d %d\n", j, l, r));*/ } /* * Convert the input to RLE/Huffman. */ err = Nu_SQCompressInput(&sqState, fp, &compressedLen); BailError(err); /* * Done! */ *pDstLen = compressedLen; bail: return err; } /* * =========================================================================== * Expansion * =========================================================================== */ /* * State during uncompression. */ typedef struct USQState { ulong dataInBuffer; uchar* dataPtr; int bitPosn; int bits; /* * Decoding tree; first "nodeCount" values are populated. Positive * values are indicies to another node in the tree, negative values * are literals (+1 because "negative zero" doesn't work well). */ int nodeCount; struct { short child[2]; /* left/right kids, must be signed 16-bit */ } decTree[kNuSQNumVals-1]; } USQState; /* * Decode the next symbol from the Huffman stream. */ static NuError Nu_USQDecodeHuffSymbol(USQState* pUsqState, int* pVal) { short val = 0; int bits, bitPosn; bits = pUsqState->bits; /* local copy */ bitPosn = pUsqState->bitPosn; do { if (++bitPosn > 7) { /* grab the next byte and use that */ bits = *pUsqState->dataPtr++; bitPosn = 0; if (!pUsqState->dataInBuffer--) return kNuErrBufferUnderrun; val = pUsqState->decTree[val].child[1 & bits]; } else { /* still got bits; shift right and use it */ val = pUsqState->decTree[val].child[1 & (bits >>= 1)]; } } while (val >= 0); /* val is negative literal; add one to make it zero-based then negate it */ *pVal = -(val + 1); pUsqState->bits = bits; pUsqState->bitPosn = bitPosn; return kNuErrNone; } /* * Read two bytes of signed data out of the buffer. */ static inline NuError Nu_USQReadShort(USQState* pUsqState, short* pShort) { if (pUsqState->dataInBuffer < 2) return kNuErrBufferUnderrun; *pShort = *pUsqState->dataPtr++; *pShort |= (*pUsqState->dataPtr++) << 8; pUsqState->dataInBuffer -= 2; return kNuErrNone; } /* * Expand "SQ" format. * * Because we have a stop symbol, knowing the uncompressed length of * the file is not essential. */ NuError Nu_ExpandHuffmanSQ(NuArchive* pArchive, const NuRecord* pRecord, const NuThread* pThread, FILE* infp, NuFunnel* pFunnel, ushort* pCrc) { NuError err = kNuErrNone; USQState usqState; ulong compRemaining, getSize; #ifdef FULL_SQ_HEADER ushort magic, fileChecksum, checksum; #endif short nodeCount; int i, inrep; uchar lastc = 0; err = Nu_AllocCompressionBufferIFN(pArchive); if (err != kNuErrNone) return err; Assert(pArchive->compBuf != nil); usqState.dataInBuffer = 0; usqState.dataPtr = pArchive->compBuf; compRemaining = pThread->thCompThreadEOF; #ifdef FULL_SQ_HEADER if (compRemaining < 8) #else if (compRemaining < 3) #endif { err = kNuErrBadData; Nu_ReportError(NU_BLOB, err, "thread too short to be valid SQ data"); goto bail; } getSize = compRemaining; if (getSize > kNuGenCompBufSize) getSize = kNuGenCompBufSize; /* * Grab a big chunk. "compRemaining" is the amount of compressed * data left in the file, usqState.dataInBuffer is the amount of * compressed data left in the buffer. */ err = Nu_FRead(infp, usqState.dataPtr, getSize); if (err != kNuErrNone) { Nu_ReportError(NU_BLOB, err, "failed reading compressed data (%ld bytes)", getSize); goto bail; } usqState.dataInBuffer += getSize; compRemaining -= getSize; /* * Read the header. We assume that the header is less than * kNuGenCompBufSize bytes, which is pretty fair since the buffer is * currently 20x larger than the longest possible header (sq allowed * 300+ for the filename, plus 257*2 for the tree, plus misc). */ Assert(kNuGenCompBufSize > 1200); #ifdef FULL_SQ_HEADER err = Nu_USQReadShort(&usqState, &magic); BailError(err); if (magic != kNuSQMagic) { err = kNuErrBadData; Nu_ReportError(NU_BLOB, err, "bad magic number in SQ block"); goto bail; } err = Nu_USQReadShort(&usqState, &fileChecksum); BailError(err); checksum = 0; while (*usqState.dataPtr++ != '\0') usqState.dataInBuffer--; usqState.dataInBuffer--; #endif err = Nu_USQReadShort(&usqState, &nodeCount); BailError(err); if (nodeCount < 0 || nodeCount >= kNuSQNumVals) { err = kNuErrBadData; Nu_ReportError(NU_BLOB, err, "invalid decode tree in SQ (%d nodes)", nodeCount); goto bail; } usqState.nodeCount = nodeCount; /* initialize for possibly empty tree (only happens on an empty file) */ usqState.decTree[0].child[0] = -(kNuSQEOFToken+1); usqState.decTree[0].child[1] = -(kNuSQEOFToken+1); /* read the nodes, ignoring "read errors" until we're done */ for (i = 0; i < nodeCount; i++) { err = Nu_USQReadShort(&usqState, &usqState.decTree[i].child[0]); err = Nu_USQReadShort(&usqState, &usqState.decTree[i].child[1]); } if (err != kNuErrNone) { err = kNuErrBadData; Nu_ReportError(NU_BLOB, err, "SQ data looks truncated at tree"); goto bail; } usqState.bitPosn = 99; /* force an immediate read */ /* * Start pulling data out of the file. We have to Huffman-decode * the input, and then feed that into an RLE expander. * * A completely lopsided (and broken) Huffman tree could require * 256 tree descents, so we want to try to ensure we have at least 256 * bits in the buffer. Otherwise, we could get a false buffer underrun * indication back from DecodeHuffSymbol. * * The SQ sources actually guarantee that a code will fit entirely * in 16 bits, but there's no reason not to use the larger value. */ inrep = false; while (1) { int val; if (usqState.dataInBuffer < 65 && compRemaining) { /* * Less than 256 bits, but there's more in the file. * * First thing we do is slide the old data to the start of * the buffer. */ if (usqState.dataInBuffer) { Assert(pArchive->compBuf != usqState.dataPtr); memmove(pArchive->compBuf, usqState.dataPtr, usqState.dataInBuffer); } usqState.dataPtr = pArchive->compBuf; /* * Next we read as much as we can. */ if (kNuGenCompBufSize - usqState.dataInBuffer < compRemaining) getSize = kNuGenCompBufSize - usqState.dataInBuffer; else getSize = compRemaining; err = Nu_FRead(infp, usqState.dataPtr + usqState.dataInBuffer, getSize); if (err != kNuErrNone) { Nu_ReportError(NU_BLOB, err, "failed reading compressed data (%ld bytes)", getSize); goto bail; } usqState.dataInBuffer += getSize; compRemaining -= getSize; Assert(compRemaining < 32767*65536); Assert(usqState.dataInBuffer <= kNuGenCompBufSize); } err = Nu_USQDecodeHuffSymbol(&usqState, &val); if (err != kNuErrNone) { Nu_ReportError(NU_BLOB, err, "failed decoding huff symbol"); goto bail; } if (val == kNuSQEOFToken) break; /* * Feed the symbol into the RLE decoder. */ if (inrep) { /* * Last char was RLE delim, handle this specially. We use * --val instead of val-- because we already emitted the * first occurrence of the char (right before the RLE delim). */ if (val == 0) { /* special case -- just an escaped RLE delim */ lastc = kNuSQRLEDelim; val = 2; } while (--val) { if (pCrc != nil) *pCrc = Nu_CalcCRC16(*pCrc, &lastc, 1); err = Nu_FunnelWrite(pArchive, pFunnel, &lastc, 1); #ifdef FULL_SQ_HEADER checksum += lastc; #endif } inrep = false; } else { /* last char was ordinary */ if (val == kNuSQRLEDelim) { /* set a flag and catch the count the next time around */ inrep = true; } else { lastc = val; if (pCrc != nil) *pCrc = Nu_CalcCRC16(*pCrc, &lastc, 1); err = Nu_FunnelWrite(pArchive, pFunnel, &lastc, 1); #ifdef FULL_SQ_HEADER checksum += lastc; #endif } } } if (inrep) { err = kNuErrBadData; Nu_ReportError(NU_BLOB, err, "got stop symbol when run length expected"); goto bail; } #ifdef FULL_SQ_HEADER /* verify the checksum stored in the SQ file */ if (checksum != fileChecksum && !pArchive->valIgnoreCRC) { if (!Nu_ShouldIgnoreBadCRC(pArchive, pRecord, kNuErrBadDataCRC)) { err = kNuErrBadDataCRC; Nu_ReportError(NU_BLOB, err, "expected 0x%04x, got 0x%04x (SQ)", fileChecksum, checksum); (void) Nu_FunnelFlush(pArchive, pFunnel); goto bail; } } else { DBUG(("--- SQ checksums match (0x%04x)\n", checksum)); } #endif /* * SQ2 adds an extra 0xff to the end, xsq doesn't. In any event, it * appears that having an extra byte at the end is okay. */ if (usqState.dataInBuffer > 1) { DBUG(("--- Found %ld bytes following compressed data (compRem=%ld)\n", usqState.dataInBuffer, compRemaining)); Nu_ReportError(NU_BLOB, kNuErrNone, "(Warning) unexpected fluff (%ld)", usqState.dataInBuffer); } bail: return err; } #endif /*ENABLE_SQ*/