From ec9c833f58299ac68b328fa57ff323ea065ce0e2 Mon Sep 17 00:00:00 2001 From: Robert Greene Date: Sun, 22 Jun 2008 17:57:23 +0000 Subject: [PATCH] Adding basic LZW and RLE decompression algorithms implemented as InputStreams. The tests aren't quite test, but do verify they are functional. --- .../com/webcodepro/shrinkit/NufxScan.java | 30 ++++- .../shrinkit/io/BitInputStream.java | 89 +++++++++++++++ .../shrinkit/io/LzwInputStream.java | 106 ++++++++++++++++++ .../shrinkit/io/RleInputStream.java | 54 +++++++++ .../shrinkit/io/BitInputStreamTest.java | 78 +++++++++++++ .../shrinkit/io/HGR.COLORS-thread-sample.bin | Bin 0 -> 168 bytes .../com/webcodepro/shrinkit/io/LzwTest.java | 98 ++++++++++++++++ .../com/webcodepro/shrinkit/io/RleTest.java | 78 +++++++++++++ 8 files changed, 531 insertions(+), 2 deletions(-) create mode 100644 src/com/webcodepro/shrinkit/io/BitInputStream.java create mode 100644 src/com/webcodepro/shrinkit/io/LzwInputStream.java create mode 100644 src/com/webcodepro/shrinkit/io/RleInputStream.java create mode 100644 test_src/com/webcodepro/shrinkit/io/BitInputStreamTest.java create mode 100644 test_src/com/webcodepro/shrinkit/io/HGR.COLORS-thread-sample.bin create mode 100644 test_src/com/webcodepro/shrinkit/io/LzwTest.java create mode 100644 test_src/com/webcodepro/shrinkit/io/RleTest.java diff --git a/local_src/com/webcodepro/shrinkit/NufxScan.java b/local_src/com/webcodepro/shrinkit/NufxScan.java index 285d393..2b29d0c 100644 --- a/local_src/com/webcodepro/shrinkit/NufxScan.java +++ b/local_src/com/webcodepro/shrinkit/NufxScan.java @@ -12,10 +12,16 @@ import java.io.InputStreamReader; * Scan through the directories in NufxScan.txt, looking for * *.SHK and *.SDK files. When one is found, do a file listing * (including compression types) and dump to screen. + *

+ * Adding some minor hard-coded searching capabilities. * * @author robgreene@users.sourceforge.net */ public class NufxScan { + private static File archiveWithSmallestCompressedFile; + private static String smallestCompressedFilename; + private static long sizeOfSmallestCompressedFile; + public static void main(String[] args) throws IOException { BufferedReader r = new BufferedReader(new InputStreamReader(NufxScan.class.getResourceAsStream("NufxScan.txt"))); String line = r.readLine(); @@ -38,6 +44,12 @@ public class NufxScan { for (File file : files) { display(file); } + if (sizeOfSmallestCompressedFile != 0) { + System.out.printf("\n\nSmallest compressed file:\n"); + System.out.printf("Archive = %s\n", archiveWithSmallestCompressedFile.getAbsoluteFile()); + System.out.printf("Filename = %s\n", smallestCompressedFilename); + System.out.printf("Size = %08x (%d)\n", sizeOfSmallestCompressedFile, sizeOfSmallestCompressedFile); + } } private static void display(File archive) throws IOException { @@ -47,29 +59,43 @@ public class NufxScan { throw new IOException("Unable to locate '" + archive.getAbsoluteFile() + "'"); } NuFileArchive a = new NuFileArchive(is); - System.out.println("Ver# Threads FSId FSIn Access FileType ExtraTyp Stor Thread Formats..... Filename"); - System.out.println("==== ======== ==== ==== ======== ======== ======== ==== =================== =============================="); + System.out.println("Ver# Threads FSId FSIn Access FileType ExtraTyp Stor Thread Formats..... OrigSize CompSize Filename"); + System.out.println("==== ======== ==== ==== ======== ======== ======== ==== =================== ======== ======== =============================="); for (HeaderBlock b : a.getHeaderBlocks()) { System.out.printf("%04x %08x %04x %04x %08x %08x %08x %04x ", b.getVersionNumber(), b.getTotalThreads(), b.getFileSysId(), b.getFileSysInfo(), b.getAccess(), b.getFileType(), b.getExtraType(), b.getStorageType()); int threadsPrinted = 0; String filename = b.getFilename(); + long origSize = 0; + long compSize = 0; + boolean compressed = false; for (ThreadRecord r : b.getThreadRecords()) { threadsPrinted++; System.out.printf("%04x ", r.getThreadFormat().getThreadFormat()); + compressed |= (r.getThreadFormat() != ThreadFormat.UNCOMPRESSED); if (r.getThreadKind() == ThreadKind.FILENAME) { filename = r.getText(); } + if (r.getThreadClass() == ThreadClass.DATA) { + origSize+= r.getThreadEof(); + compSize+= r.getCompThreadEof(); + } } while (threadsPrinted < 4) { System.out.printf(" "); threadsPrinted++; } + System.out.printf("%08x %08x ", origSize, compSize); if (filename == null || filename.length() == 0) { filename = ""; } System.out.println(filename); + if (compressed && (sizeOfSmallestCompressedFile == 0 || compSize < sizeOfSmallestCompressedFile)) { + sizeOfSmallestCompressedFile = compSize; + archiveWithSmallestCompressedFile = archive; + smallestCompressedFilename = filename; + } } System.out.println(); } diff --git a/src/com/webcodepro/shrinkit/io/BitInputStream.java b/src/com/webcodepro/shrinkit/io/BitInputStream.java new file mode 100644 index 0000000..c976432 --- /dev/null +++ b/src/com/webcodepro/shrinkit/io/BitInputStream.java @@ -0,0 +1,89 @@ +package com.webcodepro.shrinkit.io; + +import java.io.IOException; +import java.io.InputStream; + +/** + * The BitInputStream allows varying bit sizes to be pulled out of the + * wrapped InputStream. This is useful for LZW type compression algorithms + * where 9-12 bit codes are used instead of the 8-bit byte. + *

+ * Warning: The read(byte[]) and read(byte[], int, int) + * methods of InputStream will not work appropriately with any + * bit size > 8 bits. + * + * @author robgreene@users.sourceforge.net + */ +public class BitInputStream extends InputStream { + /** The low-tech way to compute a bit mask. Allowing up to 16 bits at this time. */ + private static int[] BIT_MASKS = new int[] { + 0x0000, 0x0001, 0x0003, 0x0007, 0x000f, + 0x001f, 0x003f, 0x007f, 0x00ff, 0x01ff, + 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, + 0x7fff, 0xffff + }; + + /** Our source of data. */ + private InputStream is; + /** The number of bits to read for a request. This can be adjusted dynamically. */ + private int requestedNumberOfBits; + /** The current bit mask to use when returning a read() request. */ + private int bitMask; + /** The buffer containing our bits. An int allows 32 bits which should cover up to a 24 bit read if my math is correct. :-) */ + private int data = 0; + /** Number of bits remaining in our buffer */ + private int bitsOfData = 0; + + /** + * Create a BitInputStream wrapping the given InputStream + * and reading the number of bits specified. + */ + public BitInputStream(InputStream is, int startingNumberOfBits) { + this.is = is; + setRequestedNumberOfBits(startingNumberOfBits); + } + + /** + * Set the number of bits to be read with each call to read(). + */ + public void setRequestedNumberOfBits(int numberOfBits) { + this.requestedNumberOfBits = numberOfBits; + this.bitMask = BIT_MASKS[numberOfBits]; + } + + /** + * Increase the requested number of bits by one. + * This is the general usage and prevents client from needing to track + * the requested number of bits or from making various method calls. + */ + public void increaseRequestedNumberOfBits() { + setRequestedNumberOfBits(requestedNumberOfBits + 1); + } + + /** + * Answer with the current bit mask for the current bit size. + */ + public int getBitMask() { + return bitMask; + } + + /** + * Read a number of bits off of the wrapped InputStream. + */ + public int read() throws IOException { + while (bitsOfData < requestedNumberOfBits) { + int b = is.read(); + if (b == -1) return b; + if (bitsOfData > 0) { + b <<= bitsOfData; + } + data|= b; + bitsOfData+= 8; + } + int b = data & bitMask; + data >>= requestedNumberOfBits; + bitsOfData-= requestedNumberOfBits; + return b; + } +} + diff --git a/src/com/webcodepro/shrinkit/io/LzwInputStream.java b/src/com/webcodepro/shrinkit/io/LzwInputStream.java new file mode 100644 index 0000000..03cc484 --- /dev/null +++ b/src/com/webcodepro/shrinkit/io/LzwInputStream.java @@ -0,0 +1,106 @@ +package com.webcodepro.shrinkit.io; + +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.Queue; +import java.util.concurrent.ConcurrentLinkedQueue; + +/** + * This is the generic Shrinkit LZW decompression algorithm. + * It does not deal with the vagaries of the LZW/1 and LZW/2 data streams. + * It does, however, deal with dictionary clears (0x100) and the + * BitInputStream bit sizes. + * + * @author robgreene@users.sourceforge.net + */ +public class LzwInputStream extends InputStream { + private BitInputStream is; + private List dictionary; + private Queue outputBuffer = new ConcurrentLinkedQueue(); + // See Wikipedia entry on LZW for variable naming + private int k; + private int[] w; + private int[] entry; + + /** + * Create the LzwInputStream based on the given + * BitInputStream. + * @see BitInputStream + */ + public LzwInputStream(BitInputStream is) { + this.is = is; + } + + /** + * Answer with the next byte from the (now) decompressed input stream. + */ + public int read() throws IOException { + if (outputBuffer.isEmpty()) { + fillBuffer(); + } + return outputBuffer.remove(); + } + + /** + * Fill the buffer up with some decompressed data. + * This may range from one byte to many bytes, depending on what is in the + * dictionary. + * @see http://en.wikipedia.org/wiki/Lzw for the general algorithm + */ + public void fillBuffer() throws IOException { + if (dictionary == null) { + is.setRequestedNumberOfBits(9); + // Setup default dictionary for all bytes + dictionary = new ArrayList(); + for (short i=0; i<256; i++) dictionary.add(new int[] { i }); + dictionary.add(new int[] { 0x100 }); // 0x100 not used by NuFX + // Setup for decompression; + k = is.read(); + outputBuffer.add(k); + if (k == -1) return; + w = new int[] { k }; + } + // LZW decompression + k = is.read(); + if (k == -1) { + outputBuffer.add(k); + return; + } + if (k == 0x100) { + clearDictionary(); + fillBuffer(); // Warning: recursive call + return; + } + if (k < dictionary.size()) { + entry = dictionary.get(k); + } else if (k == dictionary.size()) { + entry = Arrays.copyOf(w, w.length+1); + entry[w.length] = w[0]; + } else { + throw new IOException("Invalid code of <" + k + "> encountered"); + } + for (int i : entry) outputBuffer.add(i); + int[] newEntry = Arrays.copyOf(w, w.length+1); + newEntry[w.length] = entry[0]; + dictionary.add(newEntry); + w = entry; + // Exclusive the current bitmask against the new dictionary size -- if all bits are + // on, we'll get 0. (That is, all 9 bits on is 0x01ff exclusive or bit mask of 0x01ff + // yields 0x0000.) This tells us we need to increase the number of bits we're puling + // from the bit stream. + if ((dictionary.size() ^ is.getBitMask()) == 0) { + is.increaseRequestedNumberOfBits(); + } + } + + /** + * Clear out the dictionary. It will be rebuilt on the next call to + * fillBuffer. + */ + public void clearDictionary() { + dictionary = null; + } +} diff --git a/src/com/webcodepro/shrinkit/io/RleInputStream.java b/src/com/webcodepro/shrinkit/io/RleInputStream.java new file mode 100644 index 0000000..6e3939d --- /dev/null +++ b/src/com/webcodepro/shrinkit/io/RleInputStream.java @@ -0,0 +1,54 @@ +package com.webcodepro.shrinkit.io; + +import java.io.IOException; +import java.io.InputStream; + +import com.webcodepro.shrinkit.ByteSource; + +/** + * The RleInputStream handles the NuFX RLE data stream. + * This data stream is byte oriented. If a repeat occurs, + * the data stream will contain the marker byte, byte to + * repeat, and the number of repeats (zero based; ie, $00=1, + * $01=2, ... $ff=256). The default marker is $DB. + * + * @author robgreene@users.sourceforge.net + */ +public class RleInputStream extends InputStream { + private ByteSource bs; + private int escapeChar; + private int repeatedByte; + private int numBytes = -1; + + /** + * Create an RLE input stream with the default marker byte. + */ + public RleInputStream(ByteSource bs) { + this(bs, 0xdb); + } + /** + * Create an RLE input stream with the specified marker byte. + */ + public RleInputStream(ByteSource bs, int escapeChar) { + this.bs = bs; + this.escapeChar = escapeChar; + } + + /** + * Read the next byte from the input stream. + */ + public int read() throws IOException { + if (numBytes == -1) { + int b = bs.read(); + if (b == escapeChar) { + repeatedByte = bs.read(); + numBytes = bs.read(); + } else { + return b; + } + } + numBytes--; + return repeatedByte; + } + +} diff --git a/test_src/com/webcodepro/shrinkit/io/BitInputStreamTest.java b/test_src/com/webcodepro/shrinkit/io/BitInputStreamTest.java new file mode 100644 index 0000000..3b4e5b0 --- /dev/null +++ b/test_src/com/webcodepro/shrinkit/io/BitInputStreamTest.java @@ -0,0 +1,78 @@ +package com.webcodepro.shrinkit.io; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import junit.framework.TestCase; + +/** + * Exercise the BitInputStream. + * + * @author robgreene@users.sourceforge.net + */ +public class BitInputStreamTest extends TestCase { + public void test1() throws IOException { + byte[] data = new byte[] { + 0x01, 0x01, 0x01, 0x01, + 0x01, 0x01, 0x01, 0x01 + }; + BitInputStream is = new BitInputStream(new ByteArrayInputStream(data), 9); + // 8-bit groups: 00000001 00000001 00000001 00000001 00000001 00000001 00000001 00000001 + // 9-bit groups: 100000001 010000000 001000000 000100000 000010000 000001000 000000100 0 + assertEquals(0x101, is.read()); + assertEquals(0x080, is.read()); + assertEquals(0x040, is.read()); + assertEquals(0x020, is.read()); + assertEquals(0x010, is.read()); + assertEquals(0x008, is.read()); + assertEquals(0x004, is.read()); + } + + public void testCheatin1() throws IOException { + byte[] data = new byte[] { + (byte)0x54, (byte)0x90, (byte)0x24, (byte)0x99, (byte)0x02, (byte)0x62, (byte)0x20, (byte)0x88, + (byte)0x80, (byte)0x45, (byte)0x40, (byte)0x5C, (byte)0x09, (byte)0x92, (byte)0x45, (byte)0x61, + (byte)0xC2, (byte)0x85, (byte)0x53, (byte)0x90, (byte)0x80, (byte)0x78, (byte)0x52, (byte)0x45, + (byte)0x0A, (byte)0x88, (byte)0x21, (byte)0x4C, (byte)0x9E, (byte)0x20, (byte)0x9C, (byte)0xC2, + (byte)0x42, (byte)0x61, (byte)0x90, (byte)0x88, (byte)0x13, (byte)0x2B, (byte)0x5E, (byte)0xCC, + (byte)0xB8, (byte)0xB1, (byte)0x23, (byte)0x44, (byte)0x89, (byte)0x14, (byte)0x2D, (byte)0x62, + (byte)0xD4, (byte)0x88, (byte)0xA4, (byte)0xC8, (byte)0x14, (byte)0x17, (byte)0x20, (byte)0x0E, + (byte)0x0A, (byte)0x24, (byte)0x68, (byte)0x10, (byte)0xA1, (byte)0xC7, (byte)0x86, (byte)0x57, + (byte)0x1E, (byte)0x7E, (byte)0x44, (byte)0x29, (byte)0x72, (byte)0x65, (byte)0x49, (byte)0x10, + (byte)0x53, (byte)0x9E, (byte)0x80, (byte)0x28, (byte)0x12, (byte)0x44, (byte)0x0A, (byte)0x93, + (byte)0x86, (byte)0x49, (byte)0x9C, (byte)0xC8, (byte)0x4C, (byte)0xD8, (byte)0xE4, (byte)0x89, + (byte)0x14, (byte)0x27, (byte)0x49, (byte)0x8F, (byte)0xB8, (byte)0xD8, (byte)0x06, (byte)0xE0, + (byte)0x1F, (byte)0x55, (byte)0xAB, (byte)0x55, (byte)0xAF, (byte)0x6A, (byte)0xCD, (byte)0xCA, + (byte)0x15, (byte)0xAB, (byte)0xD7, (byte)0xAD, (byte)0x5F, (byte)0xBB, (byte)0x52, (byte)0xC5, + (byte)0x03, (byte)0x00 + }; + BitInputStream is = new BitInputStream(new ByteArrayInputStream(data), 9); + int b = 0; + while (b != -1) { + b = is.read(); + System.out.printf("%04x ", b); + } + } + + /** + * Simply ensure that we read the right bit codes from the LZW stream. This starts with 9 bits and + * ultimately might work up to a 12 bit code. + */ + public void testBitDecoder() throws IOException { + BitInputStream is = new BitInputStream(new ByteArrayInputStream(getHgrColorsLzw1()), 9); + int[] expected = new int[] { 0x0db, 0x000, 0x007, 0x0db, 0x055, 0x103, 0x02a, 0x103, 0x000, 0x06f, 0x0db }; + for (int i=0; iUKk*+lGOWfr)_uNa`~@2GXB^m_dL6Bqk*qCC&xp!Dx^S6fi?+ULdUi#7tl_ zfDE974v_No(f4o<(sTCr@ec|H^3j2G6vJ&-2FBYAGB?6f7j!i;+{>}{5LkG~o}cO8 VWDmguF5RN~S!YfNDmN4}004iM8t?!B literal 0 HcmV?d00001 diff --git a/test_src/com/webcodepro/shrinkit/io/LzwTest.java b/test_src/com/webcodepro/shrinkit/io/LzwTest.java new file mode 100644 index 0000000..0b0625d --- /dev/null +++ b/test_src/com/webcodepro/shrinkit/io/LzwTest.java @@ -0,0 +1,98 @@ +package com.webcodepro.shrinkit.io; + +import java.io.ByteArrayInputStream; +import java.io.IOException; + +import junit.framework.TestCase; + +import com.webcodepro.shrinkit.ByteSource; + +/** + * Exercise the LZW encoder and decoders. + * + * @author robgreene@users.sourceforge.net + */ +public class LzwTest extends TestCase { + public void testLzwDecoder() throws IOException { + LzwInputStream is = new LzwInputStream(new BitInputStream(new ByteArrayInputStream(getHgrColorsLzw1()), 9)); + int[] expected = getHgrColorsUncompressed(); + + int[] actual = new int[expected.length]; + for (int i=0; i 0) System.out.println(); + System.out.printf("%08x: ", bytes); + } + System.out.printf("%02x ", b); + bytes++; + } + } + + public void testLzwDecoder3() throws IOException { + LzwInputStream is = new LzwInputStream(new BitInputStream(new ByteArrayInputStream(getTextFileLzw1()), 9)); + System.out.printf("\n\nText File decoded...\n\n"); + int i = 0; + int b = 0; + while (b != -1) { + b = is.read(); + if (b != -1) System.out.printf("$%04x: %02x (%c)\n", i++, b, b); + } + System.out.printf("** END **"); + } + + protected byte[] getHgrColorsLzw1() { + return new byte[] { + (byte)0xdb, 0x00, 0x1c, (byte)0xd8, 0x56, 0x65, (byte)0xa0, (byte)0x8a, + (byte)0x81, 0x00, (byte)0xde, 0x6c, 0x3b, 0x48, 0x10, (byte)0xa1, + (byte)0xc2, 0x3f, 0x0f, 0x02, (byte)0xfe, (byte)0x93, 0x48, 0x11, + (byte)0xc0, 0x44, (byte)0x8b, 0x15, 0x2f, 0x6a, (byte)0xcc, (byte)0xc8, + 0x11, 0x23, (byte)0x80, 0x73, 0x00 + }; + } + protected int[] getHgrColorsUncompressed() { + return new int[] { + 0xdb, 0x00, 0x07, 0xdb, 0x55, 0x07, 0xdb, 0x2a, + 0x07, 0xdb, 0x00, 0x6f, 0xdb, 0x2a, 0x07, 0xdb, + 0x55, 0x07, 0xdb, 0x00, 0x6f, 0xdb, 0x7f, 0x0f, + 0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00, + 0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb, + 0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff, + 0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00, + 0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb, + 0x00, 0xe7 + }; + } + + protected byte[] getTextFileLzw1() { + return new byte[] { + 0x54, (byte)0x90, 0x24, (byte)0x99, 0x02, 0x62, 0x20, (byte)0x88, + (byte)0x80, 0x45, 0x40, 0x5C, 0x09, (byte)0x92, 0x45, 0x61, + (byte)0xC2, (byte)0x85, 0x53, (byte)0x90, (byte)0x80, 0x78, 0x52, 0x45, + 0x0A, (byte)0x88, 0x21, 0x4C, (byte)0x9E, 0x20, (byte)0x9C, (byte)0xC2, + 0x42, 0x61, (byte)0x90, (byte)0x88, 0x13, 0x2B, 0x5E, (byte)0xCC, + (byte)0xB8, (byte)0xB1, 0x23, 0x44, (byte)0x89, 0x14, 0x2D, 0x62, + (byte)0xD4, (byte)0x88, (byte)0xA4, (byte)0xC8, 0x14, 0x17, 0x20, 0x0E, + 0x0A, 0x24, 0x68, 0x10, (byte)0xA1, (byte)0xC7, (byte)0x86, 0x57, + 0x1E, 0x7E, 0x44, 0x29, 0x72, 0x65, 0x49, 0x10, + 0x53, (byte)0x9E, (byte)0x80, 0x28, 0x12, 0x44, 0x0A, (byte)0x93, + (byte)0x86, 0x49, (byte)0x9C, (byte)0xC8, 0x4C, (byte)0xD8, (byte)0xE4, (byte)0x89, + 0x14, 0x27, 0x49, (byte)0x8F, (byte)0xB8, (byte)0xD8, 0x06, (byte)0xE0, + 0x1F, 0x55, (byte)0xAB, 0x55, (byte)0xAF, 0x6A, (byte)0xCD, (byte)0xCA, + 0x15, (byte)0xAB, (byte)0xD7, (byte)0xAD, 0x5F, (byte)0xBB, 0x52, (byte)0xC5, + 0x03, 0x00 + }; + } +} diff --git a/test_src/com/webcodepro/shrinkit/io/RleTest.java b/test_src/com/webcodepro/shrinkit/io/RleTest.java new file mode 100644 index 0000000..fb315df --- /dev/null +++ b/test_src/com/webcodepro/shrinkit/io/RleTest.java @@ -0,0 +1,78 @@ +package com.webcodepro.shrinkit.io; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; + +import junit.framework.TestCase; + +import com.webcodepro.shrinkit.ByteSource; + +/** + * Exercise the RLE encoder and decoders. + * + * @author robgreene@users.sourceforge.net + */ +public class RleTest extends TestCase { + public void testInputStream() throws IOException { + InputStream is = new RleInputStream(new ByteSource(getPatternFileRle())); + ByteArrayOutputStream os = new ByteArrayOutputStream(); + copy(is,os); + byte[] expected = getPatternFileUncompressed(); + byte[] actual = os.toByteArray(); + assertEquals(expected, actual); + } + + private void assertEquals(byte[] expected, byte[] actual) { + assertEquals(expected.length, actual.length); + for (int i=0; i