mirror of
https://github.com/AppleCommander/ShrinkItArchive.git
synced 2024-12-31 13:30:29 +00:00
Adding support for LZW/2 scheme. Fixed both LZW/1 and LZW/2 to handle "multichunk" files (that is, files more than 4K long). Renamed the NuFX-specific LZW streams to be NufxLzw[12]InputStream. Tests need to be revamped heavily. Added some sample archives with both formats that have the same answer.
This commit is contained in:
parent
58af449378
commit
ac9626e9c0
@ -85,5 +85,14 @@ public class BitInputStream extends InputStream {
|
||||
bitsOfData-= requestedNumberOfBits;
|
||||
return b;
|
||||
}
|
||||
|
||||
/**
|
||||
* When shifting from buffer to buffer, the input stream also should be reset.
|
||||
* This allows the "left over" bits to be cleared.
|
||||
*/
|
||||
public void clearRemainingBitsOfData() {
|
||||
this.bitsOfData = 0;
|
||||
this.data = 0;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@ public class LzwInputStream extends InputStream {
|
||||
private BitInputStream is;
|
||||
private List<int[]> dictionary;
|
||||
private Queue<Integer> outputBuffer = new ConcurrentLinkedQueue<Integer>();
|
||||
private boolean newBuffer = true;
|
||||
// See Wikipedia entry on LZW for variable naming
|
||||
private int k;
|
||||
private int[] w;
|
||||
@ -57,11 +58,14 @@ public class LzwInputStream extends InputStream {
|
||||
dictionary = new ArrayList<int[]>();
|
||||
for (short i=0; i<256; i++) dictionary.add(new int[] { i });
|
||||
dictionary.add(new int[] { 0x100 }); // 0x100 not used by NuFX
|
||||
}
|
||||
if (newBuffer) {
|
||||
// Setup for decompression;
|
||||
k = is.read();
|
||||
outputBuffer.add(k);
|
||||
if (k == -1) return;
|
||||
w = new int[] { k };
|
||||
newBuffer = false;
|
||||
}
|
||||
// LZW decompression
|
||||
k = is.read();
|
||||
@ -102,5 +106,36 @@ public class LzwInputStream extends InputStream {
|
||||
*/
|
||||
public void clearDictionary() {
|
||||
dictionary = null;
|
||||
is.setRequestedNumberOfBits(9);
|
||||
is.clearRemainingBitsOfData();
|
||||
outputBuffer.clear();
|
||||
k = 0;
|
||||
w = null;
|
||||
entry = null;
|
||||
newBuffer = true;
|
||||
}
|
||||
|
||||
// /**
|
||||
// * Provide necessary housekeeping to reset LZW stream between NuFX buffer changes.
|
||||
// * The dictionary is the only item that is not cleared -- that needs to be done
|
||||
// * explicitly since behavior between LZW/1 and LZW/2 differ.
|
||||
// */
|
||||
// public void resetState() {
|
||||
// is.clearRemainingBitsOfData();
|
||||
// outputBuffer.clear();
|
||||
// k = 0;
|
||||
// w = null;
|
||||
// entry = null;
|
||||
// newBuffer = true;
|
||||
// }
|
||||
|
||||
/**
|
||||
* Provide necessary housekeeping to reset LZW stream between NuFX buffer changes.
|
||||
* The dictionary is the only item that is not cleared -- that needs to be done
|
||||
* explicitly since behavior between LZW/1 and LZW/2 differ.
|
||||
*/
|
||||
public void clearData() {
|
||||
is.clearRemainingBitsOfData();
|
||||
outputBuffer.clear();
|
||||
}
|
||||
}
|
||||
|
@ -6,7 +6,7 @@ import java.io.InputStream;
|
||||
import com.webcodepro.shrinkit.CRC16;
|
||||
|
||||
/**
|
||||
* The <code>Lzw1InputStream</code> reads a data fork or
|
||||
* The <code>NufxLzw1InputStream</code> reads a data fork or
|
||||
* resource fork written in the NuFX LZW/1 format.
|
||||
* <p>
|
||||
* The layout of the LZW/1 data is as follows:
|
||||
@ -45,7 +45,7 @@ import com.webcodepro.shrinkit.CRC16;
|
||||
*
|
||||
* @author robgreene@users.sourceforge.net
|
||||
*/
|
||||
public class Lzw1InputStream extends InputStream {
|
||||
public class NufxLzw1InputStream extends InputStream {
|
||||
/** This is the raw data stream with all markers and compressed data. */
|
||||
private LittleEndianByteInputStream dataStream;
|
||||
/** Used for an LZW-only <code>InputStream</code>. */
|
||||
@ -70,7 +70,7 @@ public class Lzw1InputStream extends InputStream {
|
||||
/**
|
||||
* Create the LZW/1 input stream.
|
||||
*/
|
||||
public Lzw1InputStream(LittleEndianByteInputStream dataStream) {
|
||||
public NufxLzw1InputStream(LittleEndianByteInputStream dataStream) {
|
||||
this.dataStream = dataStream;
|
||||
}
|
||||
|
||||
@ -83,12 +83,13 @@ public class Lzw1InputStream extends InputStream {
|
||||
volumeNumber = dataStream.readByte();
|
||||
rleCharacter = dataStream.readByte();
|
||||
lzwStream = new LzwInputStream(new BitInputStream(dataStream, 9));
|
||||
rleStream = new RleInputStream(dataStream);
|
||||
rleStream = new RleInputStream(dataStream, rleCharacter);
|
||||
lzwRleStream = new RleInputStream(lzwStream);
|
||||
}
|
||||
if (bytesLeftInChunk == 0) { // read the chunk header
|
||||
bytesLeftInChunk = 4096; // NuFX always reads 4096 bytes
|
||||
lzwStream.clearDictionary(); // Always clear dictionary
|
||||
// lzwStream.newBuffer();
|
||||
int length = dataStream.readWord();
|
||||
int lzwFlag = dataStream.readByte();
|
||||
int flag = lzwFlag + (length == 4096 ? 0 : 2);
|
||||
@ -106,6 +107,7 @@ public class Lzw1InputStream extends InputStream {
|
||||
}
|
||||
// Now we can read a data byte
|
||||
int b = decompressionStream.read();
|
||||
bytesLeftInChunk--;
|
||||
dataCrc.update(b);
|
||||
return b;
|
||||
}
|
||||
@ -137,10 +139,7 @@ public class Lzw1InputStream extends InputStream {
|
||||
public void setRleCharacter(int rleCharacter) {
|
||||
this.rleCharacter = rleCharacter;
|
||||
}
|
||||
public CRC16 getDataCrc() {
|
||||
return dataCrc;
|
||||
}
|
||||
public void setDataCrc(CRC16 dataCrc) {
|
||||
this.dataCrc = dataCrc;
|
||||
public long getDataCrc() {
|
||||
return dataCrc.getValue();
|
||||
}
|
||||
}
|
132
src/com/webcodepro/shrinkit/io/NufxLzw2InputStream.java
Normal file
132
src/com/webcodepro/shrinkit/io/NufxLzw2InputStream.java
Normal file
@ -0,0 +1,132 @@
|
||||
package com.webcodepro.shrinkit.io;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStream;
|
||||
|
||||
import com.webcodepro.shrinkit.CRC16;
|
||||
|
||||
/**
|
||||
* The <code>NufxLzw2InputStream</code> reads a data fork or
|
||||
* resource fork written in the NuFX LZW/2 format.
|
||||
* <p>
|
||||
* The layout of the LZW/2 data is as follows:
|
||||
* <table border="0">
|
||||
* <tr>
|
||||
* <th colspan="3">"Fork" Header</th>
|
||||
* </tr><tr>
|
||||
* <td>+0</td>
|
||||
* <td>Byte</td>
|
||||
* <td>Low-level volume number used to format 5.25" disks</td>
|
||||
* </tr><tr>
|
||||
* <td>+1</td>
|
||||
* <td>Byte</td>
|
||||
* <td>RLE character used to decode this thread</td>
|
||||
* </tr><tr>
|
||||
* <th colspan="3">Each subsequent 4K chunk of data</th>
|
||||
* </tr><tr>
|
||||
* <td>+0</td>
|
||||
* <td>Word</td>
|
||||
* <td>Bits 0-12: Length after RLE compression<br/>
|
||||
* Bit 15: LZW flag (set to 1 if LZW used)</td>
|
||||
* </tr><tr>
|
||||
* <td>+2</td>
|
||||
* <td>Word</td>
|
||||
* <td>If LZW flag = 1, total bytes in chunk<br/>
|
||||
* Else (flag = 0) start of data</td>
|
||||
* </tr>
|
||||
* <table>
|
||||
* <p>
|
||||
* The LZW/2 dictionary is only cleared when the table becomes full and is indicated
|
||||
* in the input stream by 0x100. It is also cleared whenever a chunk that is not
|
||||
* LZW encoded is encountered.
|
||||
*
|
||||
* @author robgreene@users.sourceforge.net
|
||||
*/
|
||||
public class NufxLzw2InputStream extends InputStream {
|
||||
/** This is the raw data stream with all markers and compressed data. */
|
||||
private LittleEndianByteInputStream dataStream;
|
||||
/** Used for an LZW-only <code>InputStream</code>. */
|
||||
private LzwInputStream lzwStream;
|
||||
/** Used for an RLE-only <code>InputStream</code>. */
|
||||
private RleInputStream rleStream;
|
||||
/** Used for an LZW+RLE <code>InputStream</code>. */
|
||||
private InputStream lzwRleStream;
|
||||
/** This is the generic decompression stream from which we read. */
|
||||
private InputStream decompressionStream;
|
||||
/** Counts the number of bytes in the 4096 byte chunk. */
|
||||
private int bytesLeftInChunk;
|
||||
/** This is the volume number for 5.25" disks. */
|
||||
private int volumeNumber = -1;
|
||||
/** This is the RLE character to use. */
|
||||
private int rleCharacter;
|
||||
/** Used to track the CRC of data we've extracted */
|
||||
private CRC16 dataCrc = new CRC16();
|
||||
|
||||
/**
|
||||
* Create the LZW/2 input stream.
|
||||
*/
|
||||
public NufxLzw2InputStream(LittleEndianByteInputStream dataStream) {
|
||||
this.dataStream = dataStream;
|
||||
}
|
||||
|
||||
/**
|
||||
* Read the next byte in the decompressed data stream.
|
||||
*/
|
||||
public int read() throws IOException {
|
||||
if (volumeNumber == -1) { // read the data or resource fork header
|
||||
volumeNumber = dataStream.readByte();
|
||||
rleCharacter = dataStream.readByte();
|
||||
lzwStream = new LzwInputStream(new BitInputStream(dataStream, 9));
|
||||
rleStream = new RleInputStream(dataStream, rleCharacter);
|
||||
lzwRleStream = new RleInputStream(lzwStream);
|
||||
}
|
||||
if (bytesLeftInChunk == 0) { // read the chunk header
|
||||
bytesLeftInChunk = 4096; // NuFX always reads 4096 bytes
|
||||
// lzwStream.newBuffer(); // Allow the LZW stream to do a little housekeeping
|
||||
lzwStream.clearData(); // Allow the LZW stream to do a little housekeeping
|
||||
int word = dataStream.readWord();
|
||||
int length = word & 0x7fff;
|
||||
int lzwFlag = word & 0x8000;
|
||||
if (lzwFlag == 0) { // We clear dictionary whenever a non-LZW chunk is encountered
|
||||
lzwStream.clearDictionary();
|
||||
} else {
|
||||
dataStream.readWord(); // At this time, I just throw away the total bytes in this chunk...
|
||||
}
|
||||
int flag = (lzwFlag == 0 ? 0 : 1) + (length == 4096 ? 0 : 2);
|
||||
switch (flag) {
|
||||
case 0: decompressionStream = dataStream;
|
||||
break;
|
||||
case 1: decompressionStream = lzwStream;
|
||||
break;
|
||||
case 2: decompressionStream = rleStream;
|
||||
break;
|
||||
case 3: decompressionStream = lzwRleStream;
|
||||
break;
|
||||
default: throw new IOException("Unknown type of decompression, flag = " + flag);
|
||||
}
|
||||
}
|
||||
// Now we can read a data byte
|
||||
int b = decompressionStream.read();
|
||||
bytesLeftInChunk--;
|
||||
dataCrc.update(b);
|
||||
return b;
|
||||
}
|
||||
|
||||
// GENERATED CODE
|
||||
|
||||
public int getVolumeNumber() {
|
||||
return volumeNumber;
|
||||
}
|
||||
public void setVolumeNumber(int volumeNumber) {
|
||||
this.volumeNumber = volumeNumber;
|
||||
}
|
||||
public int getRleCharacter() {
|
||||
return rleCharacter;
|
||||
}
|
||||
public void setRleCharacter(int rleCharacter) {
|
||||
this.rleCharacter = rleCharacter;
|
||||
}
|
||||
public long getDataCrc() {
|
||||
return dataCrc.getValue();
|
||||
}
|
||||
}
|
BIN
test_src/com/webcodepro/shrinkit/io/APPLE.II-LZW1.SHK
Normal file
BIN
test_src/com/webcodepro/shrinkit/io/APPLE.II-LZW1.SHK
Normal file
Binary file not shown.
BIN
test_src/com/webcodepro/shrinkit/io/APPLE.II-LZW2.SHK
Normal file
BIN
test_src/com/webcodepro/shrinkit/io/APPLE.II-LZW2.SHK
Normal file
Binary file not shown.
1
test_src/com/webcodepro/shrinkit/io/APPLE.II.txt
Normal file
1
test_src/com/webcodepro/shrinkit/io/APPLE.II.txt
Normal file
File diff suppressed because one or more lines are too long
@ -1,15 +1,21 @@
|
||||
package com.webcodepro.shrinkit.io;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import com.webcodepro.shrinkit.HeaderBlock;
|
||||
import com.webcodepro.shrinkit.NuFileArchive;
|
||||
import com.webcodepro.shrinkit.ThreadKind;
|
||||
import com.webcodepro.shrinkit.ThreadRecord;
|
||||
|
||||
/**
|
||||
* Test some LZW/1 format streams.
|
||||
*
|
||||
* @author robgreene@users.sourceforge.net
|
||||
*/
|
||||
public class Lzw1Test extends TestCaseHelper {
|
||||
public class NufxLzw1Test extends TestCaseHelper {
|
||||
public void testTextFile() throws IOException {
|
||||
Lzw1InputStream is = new Lzw1InputStream(new LittleEndianByteInputStream(getTextFileLzw1StreamData()));
|
||||
NufxLzw1InputStream is = new NufxLzw1InputStream(new LittleEndianByteInputStream(getTextFileLzw1StreamData()));
|
||||
byte[] expected = getTextFileData();
|
||||
byte[] actual = new byte[expected.length];
|
||||
is.read(actual);
|
||||
@ -17,6 +23,27 @@ public class Lzw1Test extends TestCaseHelper {
|
||||
assertTrue(is.isCrcValid());
|
||||
}
|
||||
|
||||
public void testAppleIIShk() throws IOException {
|
||||
NuFileArchive archive = new NuFileArchive(getClass().getResourceAsStream("APPLE.II-LZW1.SHK"));
|
||||
List<HeaderBlock> blocks = archive.getHeaderBlocks();
|
||||
HeaderBlock block = blocks.get(0); // only one file
|
||||
if (block.getFilename() != null) System.out.printf("\n\n%s\n\n", block.getFilename());
|
||||
List<ThreadRecord> records = block.getThreadRecords();
|
||||
for (ThreadRecord record : records) {
|
||||
if (record.getThreadKind() == ThreadKind.FILENAME) {
|
||||
System.out.printf("\n\n%s\n\n", record.getText());
|
||||
}
|
||||
long bytes = record.getThreadEof();
|
||||
if (record.getThreadKind() == ThreadKind.DATA_FORK) {
|
||||
NufxLzw1InputStream is = new NufxLzw1InputStream(new LittleEndianByteInputStream(record.getRawInputStream()));
|
||||
while ( bytes-- > 0 ) {
|
||||
System.out.print((char)is.read());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
private byte[] getTextFileLzw1StreamData() {
|
||||
return new byte[] {
|
||||
(byte)0xCA, 0x42, 0x00, (byte)0xDB, (byte)0xB7, 0x00, 0x01, 0x54,
|
33
test_src/com/webcodepro/shrinkit/io/NufxLzw2Test.java
Normal file
33
test_src/com/webcodepro/shrinkit/io/NufxLzw2Test.java
Normal file
@ -0,0 +1,33 @@
|
||||
package com.webcodepro.shrinkit.io;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.List;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import com.webcodepro.shrinkit.HeaderBlock;
|
||||
import com.webcodepro.shrinkit.NuFileArchive;
|
||||
import com.webcodepro.shrinkit.ThreadKind;
|
||||
import com.webcodepro.shrinkit.ThreadRecord;
|
||||
|
||||
public class NufxLzw2Test extends TestCase {
|
||||
public void testPascalFile() throws IOException {
|
||||
NuFileArchive archive = new NuFileArchive(getClass().getResourceAsStream("APPLE.II-LZW2.SHK"));
|
||||
List<HeaderBlock> blocks = archive.getHeaderBlocks();
|
||||
HeaderBlock block = blocks.get(0);
|
||||
if (block.getFilename() != null) System.out.printf("\n\n%s\n\n", block.getFilename());
|
||||
List<ThreadRecord> records = block.getThreadRecords();
|
||||
for (ThreadRecord record : records) {
|
||||
if (record.getThreadKind() == ThreadKind.FILENAME) {
|
||||
System.out.printf("\n\n%s\n\n", record.getText());
|
||||
}
|
||||
long bytes = record.getThreadEof();
|
||||
if (record.getThreadKind() == ThreadKind.DATA_FORK) {
|
||||
NufxLzw2InputStream is = new NufxLzw2InputStream(new LittleEndianByteInputStream(record.getRawInputStream()));
|
||||
while ( bytes-- > 0 ) {
|
||||
System.out.print((char)is.read());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user