Adding basic LZW and RLE decompression algorithms implemented as InputStreams. The tests aren't quite test, but do verify they are functional.

This commit is contained in:
Robert Greene 2008-06-22 17:57:23 +00:00
parent ad3558f84a
commit ec9c833f58
8 changed files with 531 additions and 2 deletions

View File

@ -12,10 +12,16 @@ import java.io.InputStreamReader;
* Scan through the directories in NufxScan.txt, looking for
* *.SHK and *.SDK files. When one is found, do a file listing
* (including compression types) and dump to screen.
* <p>
* Adding some minor hard-coded searching capabilities.
*
* @author robgreene@users.sourceforge.net
*/
public class NufxScan {
private static File archiveWithSmallestCompressedFile;
private static String smallestCompressedFilename;
private static long sizeOfSmallestCompressedFile;
public static void main(String[] args) throws IOException {
BufferedReader r = new BufferedReader(new InputStreamReader(NufxScan.class.getResourceAsStream("NufxScan.txt")));
String line = r.readLine();
@ -38,6 +44,12 @@ public class NufxScan {
for (File file : files) {
display(file);
}
if (sizeOfSmallestCompressedFile != 0) {
System.out.printf("\n\nSmallest compressed file:\n");
System.out.printf("Archive = %s\n", archiveWithSmallestCompressedFile.getAbsoluteFile());
System.out.printf("Filename = %s\n", smallestCompressedFilename);
System.out.printf("Size = %08x (%d)\n", sizeOfSmallestCompressedFile, sizeOfSmallestCompressedFile);
}
}
private static void display(File archive) throws IOException {
@ -47,29 +59,43 @@ public class NufxScan {
throw new IOException("Unable to locate '" + archive.getAbsoluteFile() + "'");
}
NuFileArchive a = new NuFileArchive(is);
System.out.println("Ver# Threads FSId FSIn Access FileType ExtraTyp Stor Thread Formats..... Filename");
System.out.println("==== ======== ==== ==== ======== ======== ======== ==== =================== ==============================");
System.out.println("Ver# Threads FSId FSIn Access FileType ExtraTyp Stor Thread Formats..... OrigSize CompSize Filename");
System.out.println("==== ======== ==== ==== ======== ======== ======== ==== =================== ======== ======== ==============================");
for (HeaderBlock b : a.getHeaderBlocks()) {
System.out.printf("%04x %08x %04x %04x %08x %08x %08x %04x ",
b.getVersionNumber(), b.getTotalThreads(), b.getFileSysId(), b.getFileSysInfo(), b.getAccess(),
b.getFileType(), b.getExtraType(), b.getStorageType());
int threadsPrinted = 0;
String filename = b.getFilename();
long origSize = 0;
long compSize = 0;
boolean compressed = false;
for (ThreadRecord r : b.getThreadRecords()) {
threadsPrinted++;
System.out.printf("%04x ", r.getThreadFormat().getThreadFormat());
compressed |= (r.getThreadFormat() != ThreadFormat.UNCOMPRESSED);
if (r.getThreadKind() == ThreadKind.FILENAME) {
filename = r.getText();
}
if (r.getThreadClass() == ThreadClass.DATA) {
origSize+= r.getThreadEof();
compSize+= r.getCompThreadEof();
}
}
while (threadsPrinted < 4) {
System.out.printf(" ");
threadsPrinted++;
}
System.out.printf("%08x %08x ", origSize, compSize);
if (filename == null || filename.length() == 0) {
filename = "<Unknown>";
}
System.out.println(filename);
if (compressed && (sizeOfSmallestCompressedFile == 0 || compSize < sizeOfSmallestCompressedFile)) {
sizeOfSmallestCompressedFile = compSize;
archiveWithSmallestCompressedFile = archive;
smallestCompressedFilename = filename;
}
}
System.out.println();
}

View File

@ -0,0 +1,89 @@
package com.webcodepro.shrinkit.io;
import java.io.IOException;
import java.io.InputStream;
/**
* The BitInputStream allows varying bit sizes to be pulled out of the
* wrapped InputStream. This is useful for LZW type compression algorithms
* where 9-12 bit codes are used instead of the 8-bit byte.
* <p>
* Warning: The <code>read(byte[])</code> and <code>read(byte[], int, int)</code>
* methods of <code>InputStream</code> will not work appropriately with any
* bit size &gt; 8 bits.
*
* @author robgreene@users.sourceforge.net
*/
public class BitInputStream extends InputStream {
/** The low-tech way to compute a bit mask. Allowing up to 16 bits at this time. */
private static int[] BIT_MASKS = new int[] {
0x0000, 0x0001, 0x0003, 0x0007, 0x000f,
0x001f, 0x003f, 0x007f, 0x00ff, 0x01ff,
0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff,
0x7fff, 0xffff
};
/** Our source of data. */
private InputStream is;
/** The number of bits to read for a request. This can be adjusted dynamically. */
private int requestedNumberOfBits;
/** The current bit mask to use when returning a <code>read()</code> request. */
private int bitMask;
/** The buffer containing our bits. An int allows 32 bits which should cover up to a 24 bit read if my math is correct. :-) */
private int data = 0;
/** Number of bits remaining in our buffer */
private int bitsOfData = 0;
/**
* Create a BitInputStream wrapping the given <code>InputStream</code>
* and reading the number of bits specified.
*/
public BitInputStream(InputStream is, int startingNumberOfBits) {
this.is = is;
setRequestedNumberOfBits(startingNumberOfBits);
}
/**
* Set the number of bits to be read with each call to <code>read()</code>.
*/
public void setRequestedNumberOfBits(int numberOfBits) {
this.requestedNumberOfBits = numberOfBits;
this.bitMask = BIT_MASKS[numberOfBits];
}
/**
* Increase the requested number of bits by one.
* This is the general usage and prevents client from needing to track
* the requested number of bits or from making various method calls.
*/
public void increaseRequestedNumberOfBits() {
setRequestedNumberOfBits(requestedNumberOfBits + 1);
}
/**
* Answer with the current bit mask for the current bit size.
*/
public int getBitMask() {
return bitMask;
}
/**
* Read a number of bits off of the wrapped InputStream.
*/
public int read() throws IOException {
while (bitsOfData < requestedNumberOfBits) {
int b = is.read();
if (b == -1) return b;
if (bitsOfData > 0) {
b <<= bitsOfData;
}
data|= b;
bitsOfData+= 8;
}
int b = data & bitMask;
data >>= requestedNumberOfBits;
bitsOfData-= requestedNumberOfBits;
return b;
}
}

View File

@ -0,0 +1,106 @@
package com.webcodepro.shrinkit.io;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Queue;
import java.util.concurrent.ConcurrentLinkedQueue;
/**
* This is the generic Shrinkit LZW decompression algorithm.
* It does not deal with the vagaries of the LZW/1 and LZW/2 data streams.
* It does, however, deal with dictionary clears (0x100) and the
* <code>BitInputStream</code> bit sizes.
*
* @author robgreene@users.sourceforge.net
*/
public class LzwInputStream extends InputStream {
private BitInputStream is;
private List<int[]> dictionary;
private Queue<Integer> outputBuffer = new ConcurrentLinkedQueue<Integer>();
// See Wikipedia entry on LZW for variable naming
private int k;
private int[] w;
private int[] entry;
/**
* Create the <code>LzwInputStream</code> based on the given
* <code>BitInputStream</code>.
* @see BitInputStream
*/
public LzwInputStream(BitInputStream is) {
this.is = is;
}
/**
* Answer with the next byte from the (now) decompressed input stream.
*/
public int read() throws IOException {
if (outputBuffer.isEmpty()) {
fillBuffer();
}
return outputBuffer.remove();
}
/**
* Fill the buffer up with some decompressed data.
* This may range from one byte to many bytes, depending on what is in the
* dictionary.
* @see http://en.wikipedia.org/wiki/Lzw for the general algorithm
*/
public void fillBuffer() throws IOException {
if (dictionary == null) {
is.setRequestedNumberOfBits(9);
// Setup default dictionary for all bytes
dictionary = new ArrayList<int[]>();
for (short i=0; i<256; i++) dictionary.add(new int[] { i });
dictionary.add(new int[] { 0x100 }); // 0x100 not used by NuFX
// Setup for decompression;
k = is.read();
outputBuffer.add(k);
if (k == -1) return;
w = new int[] { k };
}
// LZW decompression
k = is.read();
if (k == -1) {
outputBuffer.add(k);
return;
}
if (k == 0x100) {
clearDictionary();
fillBuffer(); // Warning: recursive call
return;
}
if (k < dictionary.size()) {
entry = dictionary.get(k);
} else if (k == dictionary.size()) {
entry = Arrays.copyOf(w, w.length+1);
entry[w.length] = w[0];
} else {
throw new IOException("Invalid code of <" + k + "> encountered");
}
for (int i : entry) outputBuffer.add(i);
int[] newEntry = Arrays.copyOf(w, w.length+1);
newEntry[w.length] = entry[0];
dictionary.add(newEntry);
w = entry;
// Exclusive the current bitmask against the new dictionary size -- if all bits are
// on, we'll get 0. (That is, all 9 bits on is 0x01ff exclusive or bit mask of 0x01ff
// yields 0x0000.) This tells us we need to increase the number of bits we're puling
// from the bit stream.
if ((dictionary.size() ^ is.getBitMask()) == 0) {
is.increaseRequestedNumberOfBits();
}
}
/**
* Clear out the dictionary. It will be rebuilt on the next call to
* <code>fillBuffer</code>.
*/
public void clearDictionary() {
dictionary = null;
}
}

View File

@ -0,0 +1,54 @@
package com.webcodepro.shrinkit.io;
import java.io.IOException;
import java.io.InputStream;
import com.webcodepro.shrinkit.ByteSource;
/**
* The RleInputStream handles the NuFX RLE data stream.
* This data stream is byte oriented. If a repeat occurs,
* the data stream will contain the marker byte, byte to
* repeat, and the number of repeats (zero based; ie, $00=1,
* $01=2, ... $ff=256). The default marker is $DB.
*
* @author robgreene@users.sourceforge.net
*/
public class RleInputStream extends InputStream {
private ByteSource bs;
private int escapeChar;
private int repeatedByte;
private int numBytes = -1;
/**
* Create an RLE input stream with the default marker byte.
*/
public RleInputStream(ByteSource bs) {
this(bs, 0xdb);
}
/**
* Create an RLE input stream with the specified marker byte.
*/
public RleInputStream(ByteSource bs, int escapeChar) {
this.bs = bs;
this.escapeChar = escapeChar;
}
/**
* Read the next byte from the input stream.
*/
public int read() throws IOException {
if (numBytes == -1) {
int b = bs.read();
if (b == escapeChar) {
repeatedByte = bs.read();
numBytes = bs.read();
} else {
return b;
}
}
numBytes--;
return repeatedByte;
}
}

View File

@ -0,0 +1,78 @@
package com.webcodepro.shrinkit.io;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import junit.framework.TestCase;
/**
* Exercise the BitInputStream.
*
* @author robgreene@users.sourceforge.net
*/
public class BitInputStreamTest extends TestCase {
public void test1() throws IOException {
byte[] data = new byte[] {
0x01, 0x01, 0x01, 0x01,
0x01, 0x01, 0x01, 0x01
};
BitInputStream is = new BitInputStream(new ByteArrayInputStream(data), 9);
// 8-bit groups: 00000001 00000001 00000001 00000001 00000001 00000001 00000001 00000001
// 9-bit groups: 100000001 010000000 001000000 000100000 000010000 000001000 000000100 0
assertEquals(0x101, is.read());
assertEquals(0x080, is.read());
assertEquals(0x040, is.read());
assertEquals(0x020, is.read());
assertEquals(0x010, is.read());
assertEquals(0x008, is.read());
assertEquals(0x004, is.read());
}
public void testCheatin1() throws IOException {
byte[] data = new byte[] {
(byte)0x54, (byte)0x90, (byte)0x24, (byte)0x99, (byte)0x02, (byte)0x62, (byte)0x20, (byte)0x88,
(byte)0x80, (byte)0x45, (byte)0x40, (byte)0x5C, (byte)0x09, (byte)0x92, (byte)0x45, (byte)0x61,
(byte)0xC2, (byte)0x85, (byte)0x53, (byte)0x90, (byte)0x80, (byte)0x78, (byte)0x52, (byte)0x45,
(byte)0x0A, (byte)0x88, (byte)0x21, (byte)0x4C, (byte)0x9E, (byte)0x20, (byte)0x9C, (byte)0xC2,
(byte)0x42, (byte)0x61, (byte)0x90, (byte)0x88, (byte)0x13, (byte)0x2B, (byte)0x5E, (byte)0xCC,
(byte)0xB8, (byte)0xB1, (byte)0x23, (byte)0x44, (byte)0x89, (byte)0x14, (byte)0x2D, (byte)0x62,
(byte)0xD4, (byte)0x88, (byte)0xA4, (byte)0xC8, (byte)0x14, (byte)0x17, (byte)0x20, (byte)0x0E,
(byte)0x0A, (byte)0x24, (byte)0x68, (byte)0x10, (byte)0xA1, (byte)0xC7, (byte)0x86, (byte)0x57,
(byte)0x1E, (byte)0x7E, (byte)0x44, (byte)0x29, (byte)0x72, (byte)0x65, (byte)0x49, (byte)0x10,
(byte)0x53, (byte)0x9E, (byte)0x80, (byte)0x28, (byte)0x12, (byte)0x44, (byte)0x0A, (byte)0x93,
(byte)0x86, (byte)0x49, (byte)0x9C, (byte)0xC8, (byte)0x4C, (byte)0xD8, (byte)0xE4, (byte)0x89,
(byte)0x14, (byte)0x27, (byte)0x49, (byte)0x8F, (byte)0xB8, (byte)0xD8, (byte)0x06, (byte)0xE0,
(byte)0x1F, (byte)0x55, (byte)0xAB, (byte)0x55, (byte)0xAF, (byte)0x6A, (byte)0xCD, (byte)0xCA,
(byte)0x15, (byte)0xAB, (byte)0xD7, (byte)0xAD, (byte)0x5F, (byte)0xBB, (byte)0x52, (byte)0xC5,
(byte)0x03, (byte)0x00
};
BitInputStream is = new BitInputStream(new ByteArrayInputStream(data), 9);
int b = 0;
while (b != -1) {
b = is.read();
System.out.printf("%04x ", b);
}
}
/**
* Simply ensure that we read the right bit codes from the LZW stream. This starts with 9 bits and
* ultimately might work up to a 12 bit code.
*/
public void testBitDecoder() throws IOException {
BitInputStream is = new BitInputStream(new ByteArrayInputStream(getHgrColorsLzw1()), 9);
int[] expected = new int[] { 0x0db, 0x000, 0x007, 0x0db, 0x055, 0x103, 0x02a, 0x103, 0x000, 0x06f, 0x0db };
for (int i=0; i<expected.length; i++) {
assertEquals("Testing value #" + i, expected[i], is.read());
}
}
protected byte[] getHgrColorsLzw1() {
return new byte[] {
(byte)0xdb, 0x00, 0x1c, (byte)0xd8, 0x56, 0x65, (byte)0xa0, (byte)0x8a,
(byte)0x81, 0x00, (byte)0xde, 0x6c, 0x3b, 0x48, 0x10, (byte)0xa1,
(byte)0xc2, 0x3f, 0x0f, 0x02, (byte)0xfe, (byte)0x93, 0x48, 0x11,
(byte)0xc0, 0x44, (byte)0x8b, 0x15, 0x2f, 0x6a, (byte)0xcc, (byte)0xc8,
0x11, 0x23, (byte)0x80, 0x73, 0x00
};
}
}

View File

@ -0,0 +1,98 @@
package com.webcodepro.shrinkit.io;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import junit.framework.TestCase;
import com.webcodepro.shrinkit.ByteSource;
/**
* Exercise the LZW encoder and decoders.
*
* @author robgreene@users.sourceforge.net
*/
public class LzwTest extends TestCase {
public void testLzwDecoder() throws IOException {
LzwInputStream is = new LzwInputStream(new BitInputStream(new ByteArrayInputStream(getHgrColorsLzw1()), 9));
int[] expected = getHgrColorsUncompressed();
int[] actual = new int[expected.length];
for (int i=0; i<actual.length; i++) actual[i] = is.read();
assertEquals("Expecting end of stream", -1, is.read());
for (int i=0; i<expected.length; i++) {
assertEquals("Testing value #" + i, expected[i], actual[i]);
}
}
public void testLzwDecoder2() throws IOException {
RleInputStream is = new RleInputStream(new ByteSource(new LzwInputStream(new BitInputStream(new ByteArrayInputStream(getHgrColorsLzw1()), 9))));
int bytes = 0;
int b;
while ( (b = is.read()) != -1) {
if (bytes % 16 == 0) {
if (bytes > 0) System.out.println();
System.out.printf("%08x: ", bytes);
}
System.out.printf("%02x ", b);
bytes++;
}
}
public void testLzwDecoder3() throws IOException {
LzwInputStream is = new LzwInputStream(new BitInputStream(new ByteArrayInputStream(getTextFileLzw1()), 9));
System.out.printf("\n\nText File decoded...\n\n");
int i = 0;
int b = 0;
while (b != -1) {
b = is.read();
if (b != -1) System.out.printf("$%04x: %02x (%c)\n", i++, b, b);
}
System.out.printf("** END **");
}
protected byte[] getHgrColorsLzw1() {
return new byte[] {
(byte)0xdb, 0x00, 0x1c, (byte)0xd8, 0x56, 0x65, (byte)0xa0, (byte)0x8a,
(byte)0x81, 0x00, (byte)0xde, 0x6c, 0x3b, 0x48, 0x10, (byte)0xa1,
(byte)0xc2, 0x3f, 0x0f, 0x02, (byte)0xfe, (byte)0x93, 0x48, 0x11,
(byte)0xc0, 0x44, (byte)0x8b, 0x15, 0x2f, 0x6a, (byte)0xcc, (byte)0xc8,
0x11, 0x23, (byte)0x80, 0x73, 0x00
};
}
protected int[] getHgrColorsUncompressed() {
return new int[] {
0xdb, 0x00, 0x07, 0xdb, 0x55, 0x07, 0xdb, 0x2a,
0x07, 0xdb, 0x00, 0x6f, 0xdb, 0x2a, 0x07, 0xdb,
0x55, 0x07, 0xdb, 0x00, 0x6f, 0xdb, 0x7f, 0x0f,
0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00,
0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb,
0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff,
0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00,
0xff, 0xdb, 0x00, 0xff, 0xdb, 0x00, 0xff, 0xdb,
0x00, 0xe7
};
}
protected byte[] getTextFileLzw1() {
return new byte[] {
0x54, (byte)0x90, 0x24, (byte)0x99, 0x02, 0x62, 0x20, (byte)0x88,
(byte)0x80, 0x45, 0x40, 0x5C, 0x09, (byte)0x92, 0x45, 0x61,
(byte)0xC2, (byte)0x85, 0x53, (byte)0x90, (byte)0x80, 0x78, 0x52, 0x45,
0x0A, (byte)0x88, 0x21, 0x4C, (byte)0x9E, 0x20, (byte)0x9C, (byte)0xC2,
0x42, 0x61, (byte)0x90, (byte)0x88, 0x13, 0x2B, 0x5E, (byte)0xCC,
(byte)0xB8, (byte)0xB1, 0x23, 0x44, (byte)0x89, 0x14, 0x2D, 0x62,
(byte)0xD4, (byte)0x88, (byte)0xA4, (byte)0xC8, 0x14, 0x17, 0x20, 0x0E,
0x0A, 0x24, 0x68, 0x10, (byte)0xA1, (byte)0xC7, (byte)0x86, 0x57,
0x1E, 0x7E, 0x44, 0x29, 0x72, 0x65, 0x49, 0x10,
0x53, (byte)0x9E, (byte)0x80, 0x28, 0x12, 0x44, 0x0A, (byte)0x93,
(byte)0x86, 0x49, (byte)0x9C, (byte)0xC8, 0x4C, (byte)0xD8, (byte)0xE4, (byte)0x89,
0x14, 0x27, 0x49, (byte)0x8F, (byte)0xB8, (byte)0xD8, 0x06, (byte)0xE0,
0x1F, 0x55, (byte)0xAB, 0x55, (byte)0xAF, 0x6A, (byte)0xCD, (byte)0xCA,
0x15, (byte)0xAB, (byte)0xD7, (byte)0xAD, 0x5F, (byte)0xBB, 0x52, (byte)0xC5,
0x03, 0x00
};
}
}

View File

@ -0,0 +1,78 @@
package com.webcodepro.shrinkit.io;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import junit.framework.TestCase;
import com.webcodepro.shrinkit.ByteSource;
/**
* Exercise the RLE encoder and decoders.
*
* @author robgreene@users.sourceforge.net
*/
public class RleTest extends TestCase {
public void testInputStream() throws IOException {
InputStream is = new RleInputStream(new ByteSource(getPatternFileRle()));
ByteArrayOutputStream os = new ByteArrayOutputStream();
copy(is,os);
byte[] expected = getPatternFileUncompressed();
byte[] actual = os.toByteArray();
assertEquals(expected, actual);
}
private void assertEquals(byte[] expected, byte[] actual) {
assertEquals(expected.length, actual.length);
for (int i=0; i<expected.length; i++) {
assertEquals("Byte mismatch at offset " + i, expected[i], actual[i]);
}
}
private void copy(InputStream is, OutputStream os) throws IOException {
int b = is.read();
while (b != -1) {
os.write(b);
b = is.read();
}
}
private byte[] getPatternFileRle() {
return new byte[] {
(byte)0xdb, 0x01, (byte)0xfd,
(byte)0xdb, 0x02, (byte)0xfc,
(byte)0xdb, 0x03, (byte)0xfb,
(byte)0xdb, 0x04, (byte)0xfa,
(byte)0xdb, 0x05, (byte)0xf9,
(byte)0xdb, 0x06, (byte)0xf8,
(byte)0xdb, 0x07, (byte)0xf7,
(byte)0xdb, 0x08, (byte)0xf6,
(byte)0xdb, 0x09, (byte)0xf5,
(byte)0xdb, 0x0a, (byte)0xf4,
(byte)0xdb, 0x0b, (byte)0xf3,
(byte)0xdb, 0x0c, (byte)0xf2,
(byte)0xdb, 0x0d, (byte)0xf1,
(byte)0xdb, 0x0e, (byte)0xf0,
(byte)0xdb, 0x0f, (byte)0xef,
(byte)0xdb, 0x10, (byte)0xee,
(byte)0xdb, 0x11, (byte)0x97
};
}
private byte[] getPatternFileUncompressed() {
byte[] data = new byte[4096];
int value = 0x01;
int nextCount = 0xfd;
int count = 0xfe;
for (int i=0; i<data.length; i++) {
data[i] = (byte)(value & 0xff);
count--;
if (count == 0) {
count = nextCount;
nextCount--;
value++;
}
}
return data;
}
}