Adding support for LZW/2 scheme. Fixed both LZW/1 and LZW/2 to handle "multichunk" files (that is, files more than 4K long). Renamed the NuFX-specific LZW streams to be NufxLzw[12]InputStream. Tests need to be revamped heavily. Added some sample archives with both formats that have the same answer.

2024-12-22 18:29:33 +00:00 · 2008-06-25 03:22:17 +00:00 · 2008-06-25 03:22:17 +00:00 · ac9626e9c0
commit ac9626e9c0
parent 58af449378
9 changed files with 248 additions and 12 deletions
--- a/src/com/webcodepro/shrinkit/io/BitInputStream.java
+++ b/src/com/webcodepro/shrinkit/io/BitInputStream.java
@ -85,5 +85,14 @@ public class BitInputStream extends InputStream {
        bitsOfData-= requestedNumberOfBits; 
        return b; 
    }
    /**
     * When shifting from buffer to buffer, the input stream also should be reset.
     * This allows the "left over" bits to be cleared.
     */
    public void clearRemainingBitsOfData() {
    	this.bitsOfData = 0;
    	this.data = 0;
    }
 } 
--- a/src/com/webcodepro/shrinkit/io/LzwInputStream.java
+++ b/src/com/webcodepro/shrinkit/io/LzwInputStream.java
@ -20,6 +20,7 @@ public class LzwInputStream extends InputStream {
 	private BitInputStream is;
 	private List<int[]> dictionary;
 	private Queue<Integer> outputBuffer = new ConcurrentLinkedQueue<Integer>();
 	private boolean newBuffer = true;
 	// See Wikipedia entry on LZW for variable naming
 	private int k;
 	private int[] w;
@ -57,11 +58,14 @@ public class LzwInputStream extends InputStream {
 			dictionary = new ArrayList<int[]>();
 			for (short i=0; i<256; i++) dictionary.add(new int[] { i });
 			dictionary.add(new int[] { 0x100 });	// 0x100 not used by NuFX
 		}
 		if (newBuffer) {
 			// Setup for decompression;
 			k = is.read();
 			outputBuffer.add(k);
 			if (k == -1) return; 
 			w = new int[] { k };
 			newBuffer = false;
 		}
 		// LZW decompression
 		k = is.read();
@ -102,5 +106,36 @@ public class LzwInputStream extends InputStream {
 	 */
 	public void clearDictionary() {
 		dictionary = null;
 		is.setRequestedNumberOfBits(9);
 		is.clearRemainingBitsOfData();
 		outputBuffer.clear();
 		k = 0;
 		w = null;
 		entry = null;
 		newBuffer = true;
 	}
 //	/**
 //	 * Provide necessary housekeeping to reset LZW stream between NuFX buffer changes.
 //	 * The dictionary is the only item that is not cleared -- that needs to be done
 //	 * explicitly since behavior between LZW/1 and LZW/2 differ. 
 //	 */
 //	public void resetState() {
 //		is.clearRemainingBitsOfData();
 //		outputBuffer.clear();
 //		k = 0;
 //		w = null;
 //		entry = null;
 //		newBuffer = true;
 //	}
 	/**
 	 * Provide necessary housekeeping to reset LZW stream between NuFX buffer changes.
 	 * The dictionary is the only item that is not cleared -- that needs to be done
 	 * explicitly since behavior between LZW/1 and LZW/2 differ. 
 	 */
 	public void clearData() {
 		is.clearRemainingBitsOfData();
 		outputBuffer.clear();
 	}
 }
--- a/src/com/webcodepro/shrinkit/io/NufxLzw1InputStream.java
+++ b/src/com/webcodepro/shrinkit/io/NufxLzw1InputStream.java
@ -6,7 +6,7 @@ import java.io.InputStream;
 import com.webcodepro.shrinkit.CRC16;
 /**
- * The <code>Lzw1InputStream</code> reads a data fork or
+ * The <code>NufxLzw1InputStream</code> reads a data fork or
 * resource fork written in the NuFX LZW/1 format.
 * <p>
 * The layout of the LZW/1 data is as follows:
@ -45,7 +45,7 @@ import com.webcodepro.shrinkit.CRC16;
 *  
 * @author robgreene@users.sourceforge.net
 */
-public class Lzw1InputStream extends InputStream {
+public class NufxLzw1InputStream extends InputStream {
 	/** This is the raw data stream with all markers and compressed data. */
 	private LittleEndianByteInputStream dataStream;
 	/** Used for an LZW-only <code>InputStream</code>. */
@ -70,7 +70,7 @@ public class Lzw1InputStream extends InputStream {
 	/**
 	 * Create the LZW/1 input stream.
 	 */
-	public Lzw1InputStream(LittleEndianByteInputStream dataStream) {
+	public NufxLzw1InputStream(LittleEndianByteInputStream dataStream) {
 		this.dataStream = dataStream;
 	}
@ -83,12 +83,13 @@ public class Lzw1InputStream extends InputStream {
 			volumeNumber = dataStream.readByte();
 			rleCharacter = dataStream.readByte();
 			lzwStream = new LzwInputStream(new BitInputStream(dataStream, 9));
-			rleStream = new RleInputStream(dataStream);
+			rleStream = new RleInputStream(dataStream, rleCharacter);
 			lzwRleStream = new RleInputStream(lzwStream);
 		}
 		if (bytesLeftInChunk == 0) {		// read the chunk header
 			bytesLeftInChunk = 4096;		// NuFX always reads 4096 bytes
 			lzwStream.clearDictionary();	// Always clear dictionary
 //			lzwStream.newBuffer();
 			int length = dataStream.readWord();
 			int lzwFlag = dataStream.readByte();
 			int flag = lzwFlag + (length == 4096 ? 0 : 2);
@ -106,6 +107,7 @@ public class Lzw1InputStream extends InputStream {
 		}
 		// Now we can read a data byte
 		int b = decompressionStream.read();
 		bytesLeftInChunk--;
 		dataCrc.update(b);
 		return b;
 	}
@ -137,10 +139,7 @@ public class Lzw1InputStream extends InputStream {
 	public void setRleCharacter(int rleCharacter) {
 		this.rleCharacter = rleCharacter;
 	}
-	public CRC16 getDataCrc() {
+	public long getDataCrc() {
-		return dataCrc;
+		return dataCrc.getValue();
 	}
 	public void setDataCrc(CRC16 dataCrc) {
 		this.dataCrc = dataCrc;
 	}
 }
--- a/src/com/webcodepro/shrinkit/io/NufxLzw2InputStream.java
+++ b/src/com/webcodepro/shrinkit/io/NufxLzw2InputStream.java
@ -0,0 +1,132 @@
 package com.webcodepro.shrinkit.io;
 import java.io.IOException;
 import java.io.InputStream;
 import com.webcodepro.shrinkit.CRC16;
 /**
 * The <code>NufxLzw2InputStream</code> reads a data fork or
 * resource fork written in the NuFX LZW/2 format.
 * <p>
 * The layout of the LZW/2 data is as follows:
 * <table border="0">
 * <tr>
 *   <th colspan="3">"Fork" Header</th>
 * </tr><tr>
 *   <td>+0</td>
 *   <td>Byte</td>
 *   <td>Low-level volume number used to format 5.25" disks</td>
 * </tr><tr>
 *   <td>+1</td>
 *   <td>Byte</td>
 *   <td>RLE character used to decode this thread</td>
 * </tr><tr>
 *   <th colspan="3">Each subsequent 4K chunk of data</th>
 * </tr><tr>
 *   <td>+0</td>
 *   <td>Word</td>
 *   <td>Bits 0-12: Length after RLE compression<br/>
 *       Bit 15: LZW flag (set to 1 if LZW used)</td>
 * </tr><tr>
 *   <td>+2</td>
 *   <td>Word</td>
 *   <td>If LZW flag = 1, total bytes in chunk<br/>
 *       Else (flag = 0) start of data</td>
 * </tr>
 * <table>
 * <p>
 * The LZW/2 dictionary is only cleared when the table becomes full and is indicated
 * in the input stream by 0x100.  It is also cleared whenever a chunk that is not
 * LZW encoded is encountered.
 *  
 * @author robgreene@users.sourceforge.net
 */
 public class NufxLzw2InputStream extends InputStream {
 	/** This is the raw data stream with all markers and compressed data. */
 	private LittleEndianByteInputStream dataStream;
 	/** Used for an LZW-only <code>InputStream</code>. */
 	private LzwInputStream lzwStream;
 	/** Used for an RLE-only <code>InputStream</code>. */
 	private RleInputStream rleStream;
 	/** Used for an LZW+RLE <code>InputStream</code>. */
 	private InputStream lzwRleStream;
 	/** This is the generic decompression stream from which we read. */
 	private InputStream decompressionStream;
 	/** Counts the number of bytes in the 4096 byte chunk. */
 	private int bytesLeftInChunk;
 	/** This is the volume number for 5.25" disks. */
 	private int volumeNumber = -1;
 	/** This is the RLE character to use. */
 	private int rleCharacter;
 	/** Used to track the CRC of data we've extracted */
 	private CRC16 dataCrc = new CRC16();
 	/**
 	 * Create the LZW/2 input stream.
 	 */
 	public NufxLzw2InputStream(LittleEndianByteInputStream dataStream) {
 		this.dataStream = dataStream;
 	}
 	/**
 	 * Read the next byte in the decompressed data stream.
 	 */
 	public int read() throws IOException {
 		if (volumeNumber == -1) {				// read the data or resource fork header
 			volumeNumber = dataStream.readByte();
 			rleCharacter = dataStream.readByte();
 			lzwStream = new LzwInputStream(new BitInputStream(dataStream, 9));
 			rleStream = new RleInputStream(dataStream, rleCharacter);
 			lzwRleStream = new RleInputStream(lzwStream);
 		}
 		if (bytesLeftInChunk == 0) {		// read the chunk header
 			bytesLeftInChunk = 4096;		// NuFX always reads 4096 bytes
 //			lzwStream.newBuffer();			// Allow the LZW stream to do a little housekeeping
 			lzwStream.clearData();			// Allow the LZW stream to do a little housekeeping
 			int word = dataStream.readWord();
 			int length = word & 0x7fff;
 			int lzwFlag = word & 0x8000;
 			if (lzwFlag == 0) {				// We clear dictionary whenever a non-LZW chunk is encountered
 				lzwStream.clearDictionary();
 			} else {
 				dataStream.readWord();		// At this time, I just throw away the total bytes in this chunk...
 			}
 			int flag = (lzwFlag == 0 ? 0 : 1) + (length == 4096 ? 0 : 2);
 			switch (flag) {
 			case 0:		decompressionStream = dataStream;
 						break;
 			case 1:		decompressionStream = lzwStream;
 						break;
 			case 2:		decompressionStream = rleStream;
 						break;
 			case 3:		decompressionStream = lzwRleStream;
 						break;
 			default:	throw new IOException("Unknown type of decompression, flag = " + flag);
 			}
 		}
 		// Now we can read a data byte
 		int b = decompressionStream.read();
 		bytesLeftInChunk--;
 		dataCrc.update(b);
 		return b;
 	}
 	// GENERATED CODE
 	public int getVolumeNumber() {
 		return volumeNumber;
 	}
 	public void setVolumeNumber(int volumeNumber) {
 		this.volumeNumber = volumeNumber;
 	}
 	public int getRleCharacter() {
 		return rleCharacter;
 	}
 	public void setRleCharacter(int rleCharacter) {
 		this.rleCharacter = rleCharacter;
 	}
 	public long getDataCrc() {
 		return dataCrc.getValue();
 	}
 }
--- a/test_src/com/webcodepro/shrinkit/io/APPLE.II-LZW1.SHK
+++ b/test_src/com/webcodepro/shrinkit/io/APPLE.II-LZW1.SHK
--- a/test_src/com/webcodepro/shrinkit/io/APPLE.II-LZW2.SHK
+++ b/test_src/com/webcodepro/shrinkit/io/APPLE.II-LZW2.SHK
--- a/test_src/com/webcodepro/shrinkit/io/APPLE.II.txt
+++ b/test_src/com/webcodepro/shrinkit/io/APPLE.II.txt
--- a/test_src/com/webcodepro/shrinkit/io/NufxLzw1Test.java
+++ b/test_src/com/webcodepro/shrinkit/io/NufxLzw1Test.java
@ -1,15 +1,21 @@
 package com.webcodepro.shrinkit.io;
 import java.io.IOException;
 import java.util.List;
 import com.webcodepro.shrinkit.HeaderBlock;
 import com.webcodepro.shrinkit.NuFileArchive;
 import com.webcodepro.shrinkit.ThreadKind;
 import com.webcodepro.shrinkit.ThreadRecord;
 /**
 * Test some LZW/1 format streams.
 * 
 * @author robgreene@users.sourceforge.net
 */
-public class Lzw1Test extends TestCaseHelper {
+public class NufxLzw1Test extends TestCaseHelper {
 	public void testTextFile() throws IOException {
-		Lzw1InputStream is = new Lzw1InputStream(new LittleEndianByteInputStream(getTextFileLzw1StreamData()));
+		NufxLzw1InputStream is = new NufxLzw1InputStream(new LittleEndianByteInputStream(getTextFileLzw1StreamData()));
 		byte[] expected = getTextFileData();
 		byte[] actual = new byte[expected.length];
 		is.read(actual);
@ -17,6 +23,27 @@ public class Lzw1Test extends TestCaseHelper {
 		assertTrue(is.isCrcValid());
 	}
 	public void testAppleIIShk() throws IOException {
 		NuFileArchive archive = new NuFileArchive(getClass().getResourceAsStream("APPLE.II-LZW1.SHK"));
 		List<HeaderBlock> blocks = archive.getHeaderBlocks();
 		HeaderBlock block = blocks.get(0);	// only one file
 		if (block.getFilename() != null) System.out.printf("\n\n%s\n\n", block.getFilename());
 		List<ThreadRecord> records = block.getThreadRecords();
 		for (ThreadRecord record : records) {
 			if (record.getThreadKind() == ThreadKind.FILENAME) {
 				System.out.printf("\n\n%s\n\n", record.getText());
 			}
 			long bytes = record.getThreadEof();
 			if (record.getThreadKind() == ThreadKind.DATA_FORK) {
 				NufxLzw1InputStream is = new NufxLzw1InputStream(new LittleEndianByteInputStream(record.getRawInputStream()));
 				while ( bytes-- > 0 ) {
 					System.out.print((char)is.read());
 				}
 			}
 		}
 	}
 	private byte[] getTextFileLzw1StreamData() {
 		return new byte[] {
 				(byte)0xCA, 0x42, 0x00, (byte)0xDB, (byte)0xB7, 0x00, 0x01, 0x54, 
--- a/test_src/com/webcodepro/shrinkit/io/NufxLzw2Test.java
+++ b/test_src/com/webcodepro/shrinkit/io/NufxLzw2Test.java
@ -0,0 +1,33 @@
 package com.webcodepro.shrinkit.io;
 import java.io.IOException;
 import java.util.List;
 import junit.framework.TestCase;
 import com.webcodepro.shrinkit.HeaderBlock;
 import com.webcodepro.shrinkit.NuFileArchive;
 import com.webcodepro.shrinkit.ThreadKind;
 import com.webcodepro.shrinkit.ThreadRecord;
 public class NufxLzw2Test extends TestCase {
 	public void testPascalFile() throws IOException {
 		NuFileArchive archive = new NuFileArchive(getClass().getResourceAsStream("APPLE.II-LZW2.SHK"));
 		List<HeaderBlock> blocks = archive.getHeaderBlocks();
 		HeaderBlock block = blocks.get(0);
 		if (block.getFilename() != null) System.out.printf("\n\n%s\n\n", block.getFilename());
 		List<ThreadRecord> records = block.getThreadRecords();
 		for (ThreadRecord record : records) {
 			if (record.getThreadKind() == ThreadKind.FILENAME) {
 				System.out.printf("\n\n%s\n\n", record.getText());
 			}
 			long bytes = record.getThreadEof();
 			if (record.getThreadKind() == ThreadKind.DATA_FORK) {
 				NufxLzw2InputStream is = new NufxLzw2InputStream(new LittleEndianByteInputStream(record.getRawInputStream()));
 				while ( bytes-- > 0 ) {
 					System.out.print((char)is.read());
 				}
 			}
 		}
 	}
 }