Adding support for LZW/2 scheme. Fixed both LZW/1 and LZW/2 to handle "multichunk" files (that is, files more than 4K long). Renamed the NuFX-specific LZW streams to be NufxLzw[12]InputStream. Tests need to be revamped heavily. Added some sample archives with both formats that have the same answer.

This commit is contained in:
Robert Greene 2008-06-25 03:22:17 +00:00
parent 58af449378
commit ac9626e9c0
9 changed files with 248 additions and 12 deletions

View File

@ -85,5 +85,14 @@ public class BitInputStream extends InputStream {
bitsOfData-= requestedNumberOfBits; bitsOfData-= requestedNumberOfBits;
return b; return b;
} }
/**
* When shifting from buffer to buffer, the input stream also should be reset.
* This allows the "left over" bits to be cleared.
*/
public void clearRemainingBitsOfData() {
this.bitsOfData = 0;
this.data = 0;
}
} }

View File

@ -20,6 +20,7 @@ public class LzwInputStream extends InputStream {
private BitInputStream is; private BitInputStream is;
private List<int[]> dictionary; private List<int[]> dictionary;
private Queue<Integer> outputBuffer = new ConcurrentLinkedQueue<Integer>(); private Queue<Integer> outputBuffer = new ConcurrentLinkedQueue<Integer>();
private boolean newBuffer = true;
// See Wikipedia entry on LZW for variable naming // See Wikipedia entry on LZW for variable naming
private int k; private int k;
private int[] w; private int[] w;
@ -57,11 +58,14 @@ public class LzwInputStream extends InputStream {
dictionary = new ArrayList<int[]>(); dictionary = new ArrayList<int[]>();
for (short i=0; i<256; i++) dictionary.add(new int[] { i }); for (short i=0; i<256; i++) dictionary.add(new int[] { i });
dictionary.add(new int[] { 0x100 }); // 0x100 not used by NuFX dictionary.add(new int[] { 0x100 }); // 0x100 not used by NuFX
}
if (newBuffer) {
// Setup for decompression; // Setup for decompression;
k = is.read(); k = is.read();
outputBuffer.add(k); outputBuffer.add(k);
if (k == -1) return; if (k == -1) return;
w = new int[] { k }; w = new int[] { k };
newBuffer = false;
} }
// LZW decompression // LZW decompression
k = is.read(); k = is.read();
@ -102,5 +106,36 @@ public class LzwInputStream extends InputStream {
*/ */
public void clearDictionary() { public void clearDictionary() {
dictionary = null; dictionary = null;
is.setRequestedNumberOfBits(9);
is.clearRemainingBitsOfData();
outputBuffer.clear();
k = 0;
w = null;
entry = null;
newBuffer = true;
}
// /**
// * Provide necessary housekeeping to reset LZW stream between NuFX buffer changes.
// * The dictionary is the only item that is not cleared -- that needs to be done
// * explicitly since behavior between LZW/1 and LZW/2 differ.
// */
// public void resetState() {
// is.clearRemainingBitsOfData();
// outputBuffer.clear();
// k = 0;
// w = null;
// entry = null;
// newBuffer = true;
// }
/**
* Provide necessary housekeeping to reset LZW stream between NuFX buffer changes.
* The dictionary is the only item that is not cleared -- that needs to be done
* explicitly since behavior between LZW/1 and LZW/2 differ.
*/
public void clearData() {
is.clearRemainingBitsOfData();
outputBuffer.clear();
} }
} }

View File

@ -6,7 +6,7 @@ import java.io.InputStream;
import com.webcodepro.shrinkit.CRC16; import com.webcodepro.shrinkit.CRC16;
/** /**
* The <code>Lzw1InputStream</code> reads a data fork or * The <code>NufxLzw1InputStream</code> reads a data fork or
* resource fork written in the NuFX LZW/1 format. * resource fork written in the NuFX LZW/1 format.
* <p> * <p>
* The layout of the LZW/1 data is as follows: * The layout of the LZW/1 data is as follows:
@ -45,7 +45,7 @@ import com.webcodepro.shrinkit.CRC16;
* *
* @author robgreene@users.sourceforge.net * @author robgreene@users.sourceforge.net
*/ */
public class Lzw1InputStream extends InputStream { public class NufxLzw1InputStream extends InputStream {
/** This is the raw data stream with all markers and compressed data. */ /** This is the raw data stream with all markers and compressed data. */
private LittleEndianByteInputStream dataStream; private LittleEndianByteInputStream dataStream;
/** Used for an LZW-only <code>InputStream</code>. */ /** Used for an LZW-only <code>InputStream</code>. */
@ -70,7 +70,7 @@ public class Lzw1InputStream extends InputStream {
/** /**
* Create the LZW/1 input stream. * Create the LZW/1 input stream.
*/ */
public Lzw1InputStream(LittleEndianByteInputStream dataStream) { public NufxLzw1InputStream(LittleEndianByteInputStream dataStream) {
this.dataStream = dataStream; this.dataStream = dataStream;
} }
@ -83,12 +83,13 @@ public class Lzw1InputStream extends InputStream {
volumeNumber = dataStream.readByte(); volumeNumber = dataStream.readByte();
rleCharacter = dataStream.readByte(); rleCharacter = dataStream.readByte();
lzwStream = new LzwInputStream(new BitInputStream(dataStream, 9)); lzwStream = new LzwInputStream(new BitInputStream(dataStream, 9));
rleStream = new RleInputStream(dataStream); rleStream = new RleInputStream(dataStream, rleCharacter);
lzwRleStream = new RleInputStream(lzwStream); lzwRleStream = new RleInputStream(lzwStream);
} }
if (bytesLeftInChunk == 0) { // read the chunk header if (bytesLeftInChunk == 0) { // read the chunk header
bytesLeftInChunk = 4096; // NuFX always reads 4096 bytes bytesLeftInChunk = 4096; // NuFX always reads 4096 bytes
lzwStream.clearDictionary(); // Always clear dictionary lzwStream.clearDictionary(); // Always clear dictionary
// lzwStream.newBuffer();
int length = dataStream.readWord(); int length = dataStream.readWord();
int lzwFlag = dataStream.readByte(); int lzwFlag = dataStream.readByte();
int flag = lzwFlag + (length == 4096 ? 0 : 2); int flag = lzwFlag + (length == 4096 ? 0 : 2);
@ -106,6 +107,7 @@ public class Lzw1InputStream extends InputStream {
} }
// Now we can read a data byte // Now we can read a data byte
int b = decompressionStream.read(); int b = decompressionStream.read();
bytesLeftInChunk--;
dataCrc.update(b); dataCrc.update(b);
return b; return b;
} }
@ -137,10 +139,7 @@ public class Lzw1InputStream extends InputStream {
public void setRleCharacter(int rleCharacter) { public void setRleCharacter(int rleCharacter) {
this.rleCharacter = rleCharacter; this.rleCharacter = rleCharacter;
} }
public CRC16 getDataCrc() { public long getDataCrc() {
return dataCrc; return dataCrc.getValue();
}
public void setDataCrc(CRC16 dataCrc) {
this.dataCrc = dataCrc;
} }
} }

View File

@ -0,0 +1,132 @@
package com.webcodepro.shrinkit.io;
import java.io.IOException;
import java.io.InputStream;
import com.webcodepro.shrinkit.CRC16;
/**
* The <code>NufxLzw2InputStream</code> reads a data fork or
* resource fork written in the NuFX LZW/2 format.
* <p>
* The layout of the LZW/2 data is as follows:
* <table border="0">
* <tr>
* <th colspan="3">"Fork" Header</th>
* </tr><tr>
* <td>+0</td>
* <td>Byte</td>
* <td>Low-level volume number used to format 5.25" disks</td>
* </tr><tr>
* <td>+1</td>
* <td>Byte</td>
* <td>RLE character used to decode this thread</td>
* </tr><tr>
* <th colspan="3">Each subsequent 4K chunk of data</th>
* </tr><tr>
* <td>+0</td>
* <td>Word</td>
* <td>Bits 0-12: Length after RLE compression<br/>
* Bit 15: LZW flag (set to 1 if LZW used)</td>
* </tr><tr>
* <td>+2</td>
* <td>Word</td>
* <td>If LZW flag = 1, total bytes in chunk<br/>
* Else (flag = 0) start of data</td>
* </tr>
* <table>
* <p>
* The LZW/2 dictionary is only cleared when the table becomes full and is indicated
* in the input stream by 0x100. It is also cleared whenever a chunk that is not
* LZW encoded is encountered.
*
* @author robgreene@users.sourceforge.net
*/
public class NufxLzw2InputStream extends InputStream {
/** This is the raw data stream with all markers and compressed data. */
private LittleEndianByteInputStream dataStream;
/** Used for an LZW-only <code>InputStream</code>. */
private LzwInputStream lzwStream;
/** Used for an RLE-only <code>InputStream</code>. */
private RleInputStream rleStream;
/** Used for an LZW+RLE <code>InputStream</code>. */
private InputStream lzwRleStream;
/** This is the generic decompression stream from which we read. */
private InputStream decompressionStream;
/** Counts the number of bytes in the 4096 byte chunk. */
private int bytesLeftInChunk;
/** This is the volume number for 5.25" disks. */
private int volumeNumber = -1;
/** This is the RLE character to use. */
private int rleCharacter;
/** Used to track the CRC of data we've extracted */
private CRC16 dataCrc = new CRC16();
/**
* Create the LZW/2 input stream.
*/
public NufxLzw2InputStream(LittleEndianByteInputStream dataStream) {
this.dataStream = dataStream;
}
/**
* Read the next byte in the decompressed data stream.
*/
public int read() throws IOException {
if (volumeNumber == -1) { // read the data or resource fork header
volumeNumber = dataStream.readByte();
rleCharacter = dataStream.readByte();
lzwStream = new LzwInputStream(new BitInputStream(dataStream, 9));
rleStream = new RleInputStream(dataStream, rleCharacter);
lzwRleStream = new RleInputStream(lzwStream);
}
if (bytesLeftInChunk == 0) { // read the chunk header
bytesLeftInChunk = 4096; // NuFX always reads 4096 bytes
// lzwStream.newBuffer(); // Allow the LZW stream to do a little housekeeping
lzwStream.clearData(); // Allow the LZW stream to do a little housekeeping
int word = dataStream.readWord();
int length = word & 0x7fff;
int lzwFlag = word & 0x8000;
if (lzwFlag == 0) { // We clear dictionary whenever a non-LZW chunk is encountered
lzwStream.clearDictionary();
} else {
dataStream.readWord(); // At this time, I just throw away the total bytes in this chunk...
}
int flag = (lzwFlag == 0 ? 0 : 1) + (length == 4096 ? 0 : 2);
switch (flag) {
case 0: decompressionStream = dataStream;
break;
case 1: decompressionStream = lzwStream;
break;
case 2: decompressionStream = rleStream;
break;
case 3: decompressionStream = lzwRleStream;
break;
default: throw new IOException("Unknown type of decompression, flag = " + flag);
}
}
// Now we can read a data byte
int b = decompressionStream.read();
bytesLeftInChunk--;
dataCrc.update(b);
return b;
}
// GENERATED CODE
public int getVolumeNumber() {
return volumeNumber;
}
public void setVolumeNumber(int volumeNumber) {
this.volumeNumber = volumeNumber;
}
public int getRleCharacter() {
return rleCharacter;
}
public void setRleCharacter(int rleCharacter) {
this.rleCharacter = rleCharacter;
}
public long getDataCrc() {
return dataCrc.getValue();
}
}

Binary file not shown.

Binary file not shown.

File diff suppressed because one or more lines are too long

View File

@ -1,15 +1,21 @@
package com.webcodepro.shrinkit.io; package com.webcodepro.shrinkit.io;
import java.io.IOException; import java.io.IOException;
import java.util.List;
import com.webcodepro.shrinkit.HeaderBlock;
import com.webcodepro.shrinkit.NuFileArchive;
import com.webcodepro.shrinkit.ThreadKind;
import com.webcodepro.shrinkit.ThreadRecord;
/** /**
* Test some LZW/1 format streams. * Test some LZW/1 format streams.
* *
* @author robgreene@users.sourceforge.net * @author robgreene@users.sourceforge.net
*/ */
public class Lzw1Test extends TestCaseHelper { public class NufxLzw1Test extends TestCaseHelper {
public void testTextFile() throws IOException { public void testTextFile() throws IOException {
Lzw1InputStream is = new Lzw1InputStream(new LittleEndianByteInputStream(getTextFileLzw1StreamData())); NufxLzw1InputStream is = new NufxLzw1InputStream(new LittleEndianByteInputStream(getTextFileLzw1StreamData()));
byte[] expected = getTextFileData(); byte[] expected = getTextFileData();
byte[] actual = new byte[expected.length]; byte[] actual = new byte[expected.length];
is.read(actual); is.read(actual);
@ -17,6 +23,27 @@ public class Lzw1Test extends TestCaseHelper {
assertTrue(is.isCrcValid()); assertTrue(is.isCrcValid());
} }
public void testAppleIIShk() throws IOException {
NuFileArchive archive = new NuFileArchive(getClass().getResourceAsStream("APPLE.II-LZW1.SHK"));
List<HeaderBlock> blocks = archive.getHeaderBlocks();
HeaderBlock block = blocks.get(0); // only one file
if (block.getFilename() != null) System.out.printf("\n\n%s\n\n", block.getFilename());
List<ThreadRecord> records = block.getThreadRecords();
for (ThreadRecord record : records) {
if (record.getThreadKind() == ThreadKind.FILENAME) {
System.out.printf("\n\n%s\n\n", record.getText());
}
long bytes = record.getThreadEof();
if (record.getThreadKind() == ThreadKind.DATA_FORK) {
NufxLzw1InputStream is = new NufxLzw1InputStream(new LittleEndianByteInputStream(record.getRawInputStream()));
while ( bytes-- > 0 ) {
System.out.print((char)is.read());
}
}
}
}
private byte[] getTextFileLzw1StreamData() { private byte[] getTextFileLzw1StreamData() {
return new byte[] { return new byte[] {
(byte)0xCA, 0x42, 0x00, (byte)0xDB, (byte)0xB7, 0x00, 0x01, 0x54, (byte)0xCA, 0x42, 0x00, (byte)0xDB, (byte)0xB7, 0x00, 0x01, 0x54,

View File

@ -0,0 +1,33 @@
package com.webcodepro.shrinkit.io;
import java.io.IOException;
import java.util.List;
import junit.framework.TestCase;
import com.webcodepro.shrinkit.HeaderBlock;
import com.webcodepro.shrinkit.NuFileArchive;
import com.webcodepro.shrinkit.ThreadKind;
import com.webcodepro.shrinkit.ThreadRecord;
public class NufxLzw2Test extends TestCase {
public void testPascalFile() throws IOException {
NuFileArchive archive = new NuFileArchive(getClass().getResourceAsStream("APPLE.II-LZW2.SHK"));
List<HeaderBlock> blocks = archive.getHeaderBlocks();
HeaderBlock block = blocks.get(0);
if (block.getFilename() != null) System.out.printf("\n\n%s\n\n", block.getFilename());
List<ThreadRecord> records = block.getThreadRecords();
for (ThreadRecord record : records) {
if (record.getThreadKind() == ThreadKind.FILENAME) {
System.out.printf("\n\n%s\n\n", record.getText());
}
long bytes = record.getThreadEof();
if (record.getThreadKind() == ThreadKind.DATA_FORK) {
NufxLzw2InputStream is = new NufxLzw2InputStream(new LittleEndianByteInputStream(record.getRawInputStream()));
while ( bytes-- > 0 ) {
System.out.print((char)is.read());
}
}
}
}
}