From 90069ef14e901145b2467d57e7cccdb84287102a Mon Sep 17 00:00:00 2001 From: Andy McFadden Date: Thu, 17 Aug 2023 11:26:00 -0700 Subject: [PATCH] Fix bug in compression code In some situations, a run of exactly 15 literals would be encoded incorrectly. --- fhpack.cpp | 29 +++++++--- make-test-pic.cpp | 145 ++++++++++++++++++++++++++-------------------- 2 files changed, 102 insertions(+), 72 deletions(-) diff --git a/fhpack.cpp b/fhpack.cpp index 41cdb30..4d90d61 100644 --- a/fhpack.cpp +++ b/fhpack.cpp @@ -1,7 +1,7 @@ /* * fhpack, an Apple II hi-res picture compressor. * By Andy McFadden - * Version 1.0, August 2015 + * Version 1.0.1, August 2023 * * Copyright 2015 by faddenSoft. All Rights Reserved. * See the LICENSE.txt file for distribution terms (Apache 2.0). @@ -88,7 +88,6 @@ every 255 literals (4/4 byte, 1 for literal len extension, 1 for match len extension that holds the "no match" symbol). Globally we add +1 for the magic number. The "end-of-data" symbol replaces the "no match" symbol, so overall it's int(ceil(8192/255)) * 33 + 1 = 100 bytes. - */ /* Implementation notes: @@ -158,7 +157,7 @@ enum ProgramMode { */ static void usage(const char* argv0) { - fprintf(stderr, "fhpack v1.0 by Andy McFadden -*- "); + fprintf(stderr, "fhpack v1.0.1 by Andy McFadden -*- "); fprintf(stderr, "Copyright 2015 by faddenSoft\n"); fprintf(stderr, "Source code available from https://github.com/fadden/fhpack\n\n"); @@ -447,9 +446,11 @@ size_t compressBufferOptimally(uint8_t* outBuf, const uint8_t* inBuf, // backwards, we can end up with 32 literals followed // by 255 literals, rather than the other way around. DBUG((" output literal-literal (%zd)\n", numLiterals)); - if (numLiterals <= INITIAL_LEN) { + if (numLiterals < INITIAL_LEN) { + // output 0-14 literals *outPtr++ = (numLiterals << 4) | 0x0f; } else { + // output 15+(0-240) literals *outPtr++ = 0xff; *outPtr++ = numLiterals - INITIAL_LEN; } @@ -513,9 +514,12 @@ size_t compressBufferOptimally(uint8_t* outBuf, const uint8_t* inBuf, // Dump any remaining literals, with the end-of-data indicator // in the match len. - if (numLiterals <= INITIAL_LEN) { + DBUG(("ending with numLiterals=%zd\n", numLiterals)); + if (numLiterals < INITIAL_LEN) { + // 0-14 literals, only need the nibble *outPtr++ = (numLiterals << 4) | 0x0f; } else { + // 15-255 literals, need the extra byte *outPtr++ = 0xff; *outPtr++ = numLiterals - INITIAL_LEN; } @@ -570,7 +574,7 @@ size_t compressBufferGreedily(uint8_t* outBuf, const uint8_t* inBuf, if (numLiterals == MAX_LITERAL_LEN) { // We've maxed out the literal string length. Emit // the previously literals with an empty match indicator. - DBUG((" max literals reached")); + DBUG((" max literals reached\n")); *outPtr++ = 0xff; // literal-len=15, match-len=15 *outPtr++ = MAX_LITERAL_LEN - INITIAL_LEN; // 240 memcpy(outPtr, literalSrcPtr, numLiterals); @@ -630,9 +634,12 @@ size_t compressBufferGreedily(uint8_t* outBuf, const uint8_t* inBuf, // Dump any remaining literals, with the end-of-data indicator // in the match len. - if (numLiterals <= INITIAL_LEN) { + DBUG(("ending with numLiterals=%zd\n", numLiterals)); + if (numLiterals < INITIAL_LEN) { + // 0-14 literals, only need the nibble *outPtr++ = (numLiterals << 4) | 0x0f; } else { + // 15-255 literals, need the extra byte *outPtr++ = 0xff; *outPtr++ = numLiterals - INITIAL_LEN; } @@ -673,7 +680,9 @@ size_t uncompressBuffer(uint8_t* outBuf, const uint8_t* inBuf, size_t inLen) DBUG(("Literals: %d\n", literalLen)); if ((outPtr - outBuf) + literalLen > (long) MAX_SIZE || (inPtr - inBuf) + literalLen > (long) inLen) { - fprintf(stderr, "Buffer overrun\n"); + fprintf(stderr, + "Buffer overrun L: outPosn=%zd inPosn=%zd len=%d inLen=%ld\n", + outPtr - outBuf, inPtr - inBuf, literalLen, inLen); return 0; } memcpy(outPtr, inPtr, literalLen); @@ -707,7 +716,9 @@ size_t uncompressBuffer(uint8_t* outBuf, const uint8_t* inBuf, size_t inLen) uint8_t* srcPtr = outBuf + matchOffset; if ((outPtr - outBuf) + matchLen > MAX_SIZE || (srcPtr - outBuf) + matchLen > MAX_SIZE) { - fprintf(stderr, "Buffer overrun\n"); + fprintf(stderr, + "Buffer overrun M: outPosn=%zd srcPosn=%zd len=%d\n", + outPtr - outBuf, srcPtr - outBuf, matchLen); return 0; } while (matchLen-- != 0) { diff --git a/make-test-pic.cpp b/make-test-pic.cpp index eb23b75..696077c 100644 --- a/make-test-pic.cpp +++ b/make-test-pic.cpp @@ -14,6 +14,74 @@ const char* TEST_ALL_ZERO = "allzero#060000"; const char* TEST_ALL_GREEN = "allgreen#060000"; const char* TEST_NO_MATCH = "nomatch#060000"; +const char* TEST_HALF_HALF = "halfhalf#060000"; + +// Generates a pattern that foils the matcher. +static void WriteNoMatch(FILE* fp) +{ + for (int ic = 0; ic < 252; ic++) { + putc(ic, fp); + putc(ic+1, fp); + putc(ic+2, fp); + putc(ic+3, fp); + } + // 1008 + for (int ic = 0; ic < 252; ic++) { + putc(ic, fp); + putc(ic+2, fp); + putc(ic+1, fp); + putc(ic+3, fp); + } + // 2016 + for (int ic = 0; ic < 252; ic++) { + putc(ic, fp); + putc(ic+1, fp); + putc(ic+3, fp); + putc(ic+2, fp); + } + // 3024 + for (int ic = 0; ic < 252; ic++) { + putc(ic, fp); + putc(ic+3, fp); + putc(ic+2, fp); + putc(ic+1, fp); + } + // 4032 + for (int ic = 0; ic < 252; ic++) { + putc(ic, fp); + putc(ic+3, fp); + putc(ic+1, fp); + putc(ic+2, fp); + } + // 5040 + for (int ic = 0; ic < 252; ic++) { + putc(ic+1, fp); + putc(ic, fp); + putc(ic+2, fp); + putc(ic+3, fp); + } + // 6048 + for (int ic = 0; ic < 252; ic++) { + putc(ic+1, fp); + putc(ic+2, fp); + putc(ic, fp); + putc(ic+3, fp); + } + // 7056 + for (int ic = 0; ic < 252; ic++) { + putc(ic+1, fp); + putc(ic+2, fp); + putc(ic+3, fp); + putc(ic, fp); + } + // 8064 + for (int ic = 0; ic < 32; ic++) { + putc(ic+2, fp); + putc(ic+1, fp); + putc(ic+3, fp); + putc(ic, fp); + } +} int main() { @@ -44,71 +112,22 @@ int main() printf("NOT overwriting %s\n", TEST_NO_MATCH); } else { fp = fopen(TEST_NO_MATCH, "w"); - for (int ic = 0; ic < 252; ic++) { - putc(ic, fp); - putc(ic+1, fp); - putc(ic+2, fp); - putc(ic+3, fp); - } - // 1008 - for (int ic = 0; ic < 252; ic++) { - putc(ic, fp); - putc(ic+2, fp); - putc(ic+1, fp); - putc(ic+3, fp); - } - // 2016 - for (int ic = 0; ic < 252; ic++) { - putc(ic, fp); - putc(ic+1, fp); - putc(ic+3, fp); - putc(ic+2, fp); - } - // 3024 - for (int ic = 0; ic < 252; ic++) { - putc(ic, fp); - putc(ic+3, fp); - putc(ic+2, fp); - putc(ic+1, fp); - } - // 4032 - for (int ic = 0; ic < 252; ic++) { - putc(ic, fp); - putc(ic+3, fp); - putc(ic+1, fp); - putc(ic+2, fp); - } - // 5040 - for (int ic = 0; ic < 252; ic++) { - putc(ic+1, fp); - putc(ic, fp); - putc(ic+2, fp); - putc(ic+3, fp); - } - // 6048 - for (int ic = 0; ic < 252; ic++) { - putc(ic+1, fp); - putc(ic+2, fp); - putc(ic, fp); - putc(ic+3, fp); - } - // 7056 - for (int ic = 0; ic < 252; ic++) { - putc(ic+1, fp); - putc(ic+2, fp); - putc(ic+3, fp); - putc(ic, fp); - } - // 8064 - for (int ic = 0; ic < 32; ic++) { - putc(ic+2, fp); - putc(ic+1, fp); - putc(ic+3, fp); - putc(ic, fp); - } + WriteNoMatch(fp); fclose(fp); } + if (access(TEST_HALF_HALF, F_OK) == 0) { + printf("NOT overwriting %s\n", TEST_HALF_HALF); + } else { + fp = fopen(TEST_HALF_HALF, "w"); + for (int i = 0; i < 4096; i++) { + putc(0x00, fp); + } + WriteNoMatch(fp); + fclose(fp); + truncate(TEST_HALF_HALF, 8192); + } + + return 0; } -