From 58af449378a65240cb02383016cc33a908a75d52 Mon Sep 17 00:00:00 2001
From: Robert Greene <robgreene@users.sourceforge.net>
Date: Mon, 23 Jun 2008 04:03:42 +0000
Subject: [PATCH] This adds NuFX LZW/1 decompression support.

---
 .../shrinkit/io/Lzw1InputStream.java          | 146 ++++++++++++++++++
 .../shrinkit/io/RleInputStream.java           |   6 +-
 .../com/webcodepro/shrinkit/io/Lzw1Test.java  |  47 ++++++
 .../com/webcodepro/shrinkit/io/RleTest.java   |  11 +-
 .../shrinkit/io/TestCaseHelper.java           |  20 +++
 5 files changed, 217 insertions(+), 13 deletions(-)
 create mode 100644 src/com/webcodepro/shrinkit/io/Lzw1InputStream.java
 create mode 100644 test_src/com/webcodepro/shrinkit/io/Lzw1Test.java
 create mode 100644 test_src/com/webcodepro/shrinkit/io/TestCaseHelper.java
diff --git a/src/com/webcodepro/shrinkit/io/Lzw1InputStream.java b/src/com/webcodepro/shrinkit/io/Lzw1InputStream.java
new file mode 100644
index 0000000..7168dab
--- /dev/null
+++ b/src/com/webcodepro/shrinkit/io/Lzw1InputStream.java
@@ -0,0 +1,146 @@
+package com.webcodepro.shrinkit.io;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import com.webcodepro.shrinkit.CRC16;
+
+/**
+ * The <code>Lzw1InputStream</code> reads a data fork or
+ * resource fork written in the NuFX LZW/1 format.
+ * <p>
+ * The layout of the LZW/1 data is as follows:
+ * <table border="0">
+ * <tr>
+ *   <th colspan="3">"Fork" Header</th>
+ * </tr><tr>
+ *   <td>+0</td>
+ *   <td>Word</td>
+ *   <td>CRC-16 of the uncompressed data within the thread</td>
+ * </tr><tr>
+ *   <td>+2</td>
+ *   <td>Byte</td>
+ *   <td>Low-level volume number use to format 5.25" disks</td>
+ * </tr><tr>
+ *   <td>+3</td>
+ *   <td>Byte</td>
+ *   <td>RLE character used to decode this thread</td>
+ * </tr><tr>
+ *   <th colspan="3">Each subsequent 4K chunk of data</th>
+ * </tr><tr>
+ *   <td>+0</td>
+ *   <td>Word</td>
+ *   <td>Length after RLE compression (if RLE is not used, length 
+ *       will be 4096</td>
+ * </tr><tr>
+ *   <td>+2</td>
+ *   <td>Byte</td>
+ *   <td>A $01 indicates LZW applied to this chunk; $00 that LZW
+ *       <b>was not</b> applied to this chunk</td>
+ * </tr>
+ * <table>
+ * <p>
+ * Note that the LZW string table is <em>cleared</em> after
+ * every chunk.
+ *  
+ * @author robgreene@users.sourceforge.net
+ */
+public class Lzw1InputStream extends InputStream {
+	/** This is the raw data stream with all markers and compressed data. */
+	private LittleEndianByteInputStream dataStream;
+	/** Used for an LZW-only <code>InputStream</code>. */
+	private LzwInputStream lzwStream;
+	/** Used for an RLE-only <code>InputStream</code>. */
+	private RleInputStream rleStream;
+	/** Used for an LZW+RLE <code>InputStream</code>. */
+	private InputStream lzwRleStream;
+	/** This is the generic decompression stream from which we read. */
+	private InputStream decompressionStream;
+	/** Counts the number of bytes in the 4096 byte chunk. */
+	private int bytesLeftInChunk;
+	/** This is the CRC-16 for the uncompressed fork. */
+	private int givenCrc = -1;
+	/** This is the volume number for 5.25" disks. */
+	private int volumeNumber;
+	/** This is the RLE character to use. */
+	private int rleCharacter;
+	/** Used to track the CRC of data we've extracted */
+	private CRC16 dataCrc = new CRC16();
+	
+	/**
+	 * Create the LZW/1 input stream.
+	 */
+	public Lzw1InputStream(LittleEndianByteInputStream dataStream) {
+		this.dataStream = dataStream;
+	}
+
+	/**
+	 * Read the next byte in the decompressed data stream.
+	 */
+	public int read() throws IOException {
+		if (givenCrc == -1) {					// read the data or resource fork header
+			givenCrc = dataStream.readWord();
+			volumeNumber = dataStream.readByte();
+			rleCharacter = dataStream.readByte();
+			lzwStream = new LzwInputStream(new BitInputStream(dataStream, 9));
+			rleStream = new RleInputStream(dataStream);
+			lzwRleStream = new RleInputStream(lzwStream);
+		}
+		if (bytesLeftInChunk == 0) {		// read the chunk header
+			bytesLeftInChunk = 4096;		// NuFX always reads 4096 bytes
+			lzwStream.clearDictionary();	// Always clear dictionary
+			int length = dataStream.readWord();
+			int lzwFlag = dataStream.readByte();
+			int flag = lzwFlag + (length == 4096 ? 0 : 2);
+			switch (flag) {
+			case 0:		decompressionStream = dataStream;
+						break;
+			case 1:		decompressionStream = lzwStream;
+						break;
+			case 2:		decompressionStream = rleStream;
+						break;
+			case 3:		decompressionStream = lzwRleStream;
+						break;
+			default:	throw new IOException("Unknown type of decompression, flag = " + flag);
+			}
+		}
+		// Now we can read a data byte
+		int b = decompressionStream.read();
+		dataCrc.update(b);
+		return b;
+	}
+	
+	/**
+	 * Indicates if the computed CRC matches the CRC given in the data stream.
+	 */
+	public boolean isCrcValid() {
+		return givenCrc == dataCrc.getValue();
+	}
+	
+	// GENERATED CODE
+
+	public int getGivenCrc() {
+		return givenCrc;
+	}
+	public void setGivenCrc(int givenCrc) {
+		this.givenCrc = givenCrc;
+	}
+	public int getVolumeNumber() {
+		return volumeNumber;
+	}
+	public void setVolumeNumber(int volumeNumber) {
+		this.volumeNumber = volumeNumber;
+	}
+	public int getRleCharacter() {
+		return rleCharacter;
+	}
+	public void setRleCharacter(int rleCharacter) {
+		this.rleCharacter = rleCharacter;
+	}
+	public CRC16 getDataCrc() {
+		return dataCrc;
+	}
+	public void setDataCrc(CRC16 dataCrc) {
+		this.dataCrc = dataCrc;
+	}
+}
diff --git a/src/com/webcodepro/shrinkit/io/RleInputStream.java b/src/com/webcodepro/shrinkit/io/RleInputStream.java
index 2015d74..9e2d42d 100644
--- a/src/com/webcodepro/shrinkit/io/RleInputStream.java
+++ b/src/com/webcodepro/shrinkit/io/RleInputStream.java
@@ -14,7 +14,7 @@ import java.io.InputStream;
  * @author robgreene@users.sourceforge.net
  */
 public class RleInputStream extends InputStream {
-	private LittleEndianByteInputStream bs;
+	private InputStream bs;
 	private int escapeChar;
 	private int repeatedByte;
 	private int numBytes = -1;
@@ -22,13 +22,13 @@ public class RleInputStream extends InputStream {
 	/**
 	 * Create an RLE input stream with the default marker byte.
 	 */
-	public RleInputStream(LittleEndianByteInputStream bs) {
+	public RleInputStream(InputStream bs) {
 		this(bs, 0xdb);
 	}
 	/**
 	 * Create an RLE input stream with the specified marker byte.
 	 */
-	public RleInputStream(LittleEndianByteInputStream bs, int escapeChar) {
+	public RleInputStream(InputStream bs, int escapeChar) {
 		this.bs = bs;
 		this.escapeChar = escapeChar;
 	}
diff --git a/test_src/com/webcodepro/shrinkit/io/Lzw1Test.java b/test_src/com/webcodepro/shrinkit/io/Lzw1Test.java
new file mode 100644
index 0000000..c6dd857
--- /dev/null
+++ b/test_src/com/webcodepro/shrinkit/io/Lzw1Test.java
@@ -0,0 +1,47 @@
+package com.webcodepro.shrinkit.io;
+
+import java.io.IOException;
+
+/**
+ * Test some LZW/1 format streams.
+ * 
+ * @author robgreene@users.sourceforge.net
+ */
+public class Lzw1Test extends TestCaseHelper {
+	public void testTextFile() throws IOException {
+		Lzw1InputStream is = new Lzw1InputStream(new LittleEndianByteInputStream(getTextFileLzw1StreamData()));
+		byte[] expected = getTextFileData();
+		byte[] actual = new byte[expected.length];
+		is.read(actual);
+		assertEquals(expected, actual);
+		assertTrue(is.isCrcValid());
+	}
+	
+	private byte[] getTextFileLzw1StreamData() {
+		return new byte[] {
+				(byte)0xCA, 0x42, 0x00, (byte)0xDB, (byte)0xB7, 0x00, 0x01, 0x54, 
+				(byte)0x90, 0x24, (byte)0x99, 0x02, 0x62, 0x20, (byte)0x88, (byte)0x80, 
+				0x45, 0x40, 0x5C, 0x09, (byte)0x92, 0x45, 0x61, (byte)0xC2, 
+				(byte)0x85, 0x53, (byte)0x90, (byte)0x80, 0x78, 0x52, 0x45, 0x0A, 
+				(byte)0x88, 0x21, 0x4C, (byte)0x9E, 0x20, (byte)0x9C, (byte)0xC2, 0x42, 
+				0x61, (byte)0x90, (byte)0x88, 0x13, 0x2B, 0x5E, (byte)0xCC, (byte)0xB8, 
+				(byte)0xB1, 0x23, 0x44, (byte)0x89, 0x14, 0x2D, 0x62, (byte)0xD4, 
+				(byte)0x88, (byte)0xA4, (byte)0xC8, 0x14, 0x17, 0x20, 0x0E, 0x0A, 
+				0x24, 0x68, 0x10, (byte)0xA1, (byte)0xC7, (byte)0x86, 0x57, 0x1E, 
+				0x7E, 0x44, 0x29, 0x72, 0x65, 0x49, 0x10, 0x53, 
+				(byte)0x9E, (byte)0x80, 0x28, 0x12, 0x44, 0x0A, (byte)0x93, (byte)0x86, 
+				0x49, (byte)0x9C, (byte)0xC8, 0x4C, (byte)0xD8, (byte)0xE4, (byte)0x89, 0x14, 
+				0x27, 0x49, (byte)0x8F, (byte)0xB8, (byte)0xD8, 0x06, (byte)0xE0, 0x1F, 
+				0x55, (byte)0xAB, 0x55, (byte)0xAF, 0x6A, (byte)0xCD, (byte)0xCA, 0x15, 
+				(byte)0xAB, (byte)0xD7, (byte)0xAD, 0x5F, (byte)0xBB, 0x52, (byte)0xC5, 0x03, 
+				0x00	
+		};
+	}
+	private byte[] getTextFileData() {
+		byte[] data = new byte[4096];	// file was forced to be 4096 bytes long
+		String s = "THIS IS THE WAY WE WASH OUR CLOTHES, WASH OUR CLOTHES, WASH OUR CLOTHES.  " +
+				"THIS IS THE WAY WE WASH OUR CLOTHES, SO EARLY IN THE MORNING.";
+		System.arraycopy(s.getBytes(), 0, data, 0, s.length());
+		return data;
+	}
+}
diff --git a/test_src/com/webcodepro/shrinkit/io/RleTest.java b/test_src/com/webcodepro/shrinkit/io/RleTest.java
index c85b987..7b99e6f 100644
--- a/test_src/com/webcodepro/shrinkit/io/RleTest.java
+++ b/test_src/com/webcodepro/shrinkit/io/RleTest.java
@@ -5,15 +5,12 @@ import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 
-import junit.framework.TestCase;
-
-
 /**
  * Exercise the RLE encoder and decoders.  
  * 
  * @author robgreene@users.sourceforge.net
  */
-public class RleTest extends TestCase {
+public class RleTest extends TestCaseHelper {
 	public void testInputStream() throws IOException {
 		InputStream is = new RleInputStream(new LittleEndianByteInputStream(getPatternFileRle()));
 		ByteArrayOutputStream os = new ByteArrayOutputStream();
@@ -23,12 +20,6 @@ public class RleTest extends TestCase {
 		assertEquals(expected, actual);
 	}
 	
-	private void assertEquals(byte[] expected, byte[] actual) {
-		assertEquals(expected.length, actual.length);
-		for (int i=0; i<expected.length; i++) {
-			assertEquals("Byte mismatch at offset " + i, expected[i], actual[i]);
-		}
-	}
 	private void copy(InputStream is, OutputStream os) throws IOException {
 		int b = is.read();
 		while (b != -1) {
diff --git a/test_src/com/webcodepro/shrinkit/io/TestCaseHelper.java b/test_src/com/webcodepro/shrinkit/io/TestCaseHelper.java
new file mode 100644
index 0000000..ace5676
--- /dev/null
+++ b/test_src/com/webcodepro/shrinkit/io/TestCaseHelper.java
@@ -0,0 +1,20 @@
+package com.webcodepro.shrinkit.io;
+
+import junit.framework.TestCase;
+
+/**
+ * Some commmon testing methods.
+ * 
+ * @author robgreene@users.sourceforge.net
+ */
+public abstract class TestCaseHelper extends TestCase {
+	/**
+	 * Compare two byte arrays.
+	 */
+	public void assertEquals(byte[] expected, byte[] actual) {
+		assertEquals(expected.length, actual.length);
+		for (int i=0; i<expected.length; i++) {
+			assertEquals("Byte mismatch at offset " + i, expected[i], actual[i]);
+		}
+	}
+}

"Fork" Header
+0	Word	CRC-16 of the uncompressed data within the thread
+2	Byte	Low-level volume number use to format 5.25" disks
+3	Byte	RLE character used to decode this thread
Each subsequent 4K chunk of data
+0	Word	Length after RLE compression (if RLE is not used, length + * will be 4096
+2	Byte	A $01 indicates LZW applied to this chunk; $00 that LZW + * was not applied to this chunk