Fixed a bug, and fixed a big counting error.

This commit is contained in:
Martin Haye 2016-12-29 16:06:16 -08:00
parent 64bc3117f0
commit b5ca1d32aa
2 changed files with 27 additions and 91 deletions

View File

@ -1211,94 +1211,31 @@ class A2PackPartitions
//println "Reading font #$num from '$path'."
fonts[name] = [num:num, buf:readBinary(path)]
}
static int lx47Uncomp = 0
static int lx47Comp = 0
static int uncompTotal = 0
static int lz4Total = 0
static int lx47Total = 0
static int lx47Savings = 0
// Transform the LZ4 format to something we call "LZ4M", where the small offsets are stored
// as one byte instead of two. In our data, that's about 1/3 of the offsets.
//
def old_testLx47(data, inLen, uncompData, uncompLen)
{
def lw = new Lx47Algorithm.Lx47Writer(uncompLen)
def sp = 0
while (true)
{
// First comes the token: 4 bits literal len, 4 bits match len
def token = (data[sp++] & 0xFF)
def matchLen = token & 0xF
def literalLen = token >> 4
// The literal length might get extended
if (literalLen == 15) {
while (true) {
token = (data[sp++] & 0xFF)
literalLen += token
if (token != 0xFF)
break
}
}
//println String.format("Literal: ptr=\$%x, len=\$%x.", sp, literalLen)
// Output literal len, and copy the literal bytes
lw.writeLiteralLen(literalLen+1)
for ( ; literalLen > 0; --literalLen)
lw.writeByte(data[sp++])
// The last block has only literals, and no match
if (sp == inLen)
break
// Grab the offset
token = data[sp++] & 0xFF
def offset = token | ((data[sp++] & 0xFF) << 8)
// Output the low part of the offset, then any extraneous high bits
assert offset >= 1
lw.writeOffset(offset)
// The match length might get extended
if (matchLen == 15) {
while (true) {
token = (data[sp++] & 0xFF)
matchLen += token
if (token != 0xFF)
break
}
}
//println String.format("Match: offset=\$%x, len=\$%x.", offset, matchLen)
// Encode the match len
matchLen += 4 // min match length is 4
lw.writeMatchLen(matchLen-3)
}
def savings = inLen - lw.outPos
lx47Savings += savings
println String.format("nOffsets=%d nPrev1=%d nPrev2=%d", nOffsets, nPrevOffsets, nPrev2Offsets)
println String.format("lz47 savings=%d bigLits=%d bigMatches=%d, total=%d", savings, nBigLits, nBigMatches, lx47Savings)
}
// Transform the LZ4 format to something we call "LZ4M", where the small offsets are stored
// as one byte instead of two. In our data, that's about 1/3 of the offsets.
//
def testLx47(data, inLen, uncompData, uncompLen)
def testLx47(inData, inLen, lz4Len)
{
def lx47 = new Lx47Algorithm()
def inputData = new byte[inLen]
System.arraycopy(data, 0, inputData, 0, inLen)
System.arraycopy(inData, 0, inputData, 0, inLen)
def outputData = lx47.compress(inputData)
def uncomp = new byte[inLen]
lx47.decompress(outputData, uncomp)
assert uncomp == inputData
def savings = inLen - outputData.length
lx47Uncomp += uncompLen
lx47Comp += (uncompLen - outputData.length)
def savings = lz4Len - outputData.length
uncompTotal += inLen
lx47Savings += savings
println String.format("lz47 savings=%d total_uncomp=%d total_comp=%d total_savings=%d",
savings, lx47Uncomp, lx47Comp, lx47Savings)
lz4Total += lz4Len
lx47Total += outputData.length
println String.format("lz47 usize=%d savings=%d utot=%d lz4tot=%d lx47tot=%d total_savings=%d",
inLen, savings, uncompTotal, lz4Total, lx47Total, lx47Savings)
}
// Transform the LZ4 format to something we call "LZ4M", where the small offsets are stored
@ -1417,7 +1354,7 @@ class A2PackPartitions
// Then recompress to LZ4M (pretty much always smaller)
def recompressedLen = recompress(compressedData, compressedLen, uncompressedData, uncompressedLen)
testLx47(compressedData, recompressedLen, uncompressedData, uncompressedLen)
testLx47(uncompressedData, uncompressedLen, recompressedLen)
// If we saved at least 20 bytes, take the compressed version.
if ((uncompressedLen - recompressedLen) >= 20) {

View File

@ -11,8 +11,8 @@ import java.util.Arrays;
public class Lx47Algorithm
{
static final int MAX_OFFSET = 16384; /* range 1..2176 */
static final int MAX_LEN = 256; /* range 2..65536 */
static final int MAX_OFFSET = 65536; /* range 1..2176 */
static final int MAX_LEN = 65536; /* range 2..65536 */
LinkedList<String> debugs = new LinkedList<String>();
@ -224,28 +224,30 @@ public class Lx47Algorithm
}
}
byte[] compressOptimal(Optimal[] optimal, byte[] input_data) {
byte[] compressOptimal(Optimal[] optimal, byte[] input_data)
{
int input_index;
int input_prev;
int i;
//for (i=0; i<optimal.length; i++)
// System.out.format("opt[%d]: bits=%d len=%d lits=%d\n", i, optimal[i].bits, optimal[i].len, optimal[i].lits);
// System.out.format("opt[%d]: bits=%d off=%d len=%d lits=%d\n", i,
// optimal[i].bits, optimal[i].offset, optimal[i].len, optimal[i].lits);
/* calculate and allocate output buffer */
input_index = input_data.length-1;
int output_bits = optimal[input_index].bits;
if (optimal[input_index].lits == 0)
output_bits++; // zero-length lit str
output_bits += countEliasGammaBits(MAX_LEN);
output_bits++; // zero-length lit str at end
int output_size = (output_bits+7)/8;
byte[] output_data = new byte[output_size];
/* un-reverse optimal sequence */
int first_index = -1;
optimal[input_index].next = -1;
while (input_index >= 0) {
input_prev = input_index - (optimal[input_index].len > 0 ? optimal[input_index].len : optimal[input_index].lits);
if (input_prev > 0)
if (input_prev >= 0)
optimal[input_prev].next = input_index;
else
first_index = input_index;
@ -257,7 +259,7 @@ public class Lx47Algorithm
/* process all bytes */
boolean prevIsLit = false;
addDebug("start");
for (input_index = first_index; input_index > 0; input_index = optimal[input_index].next)
for (input_index = first_index; input_index >= 0; input_index = optimal[input_index].next)
{
if (optimal[input_index].len == 0) {
@ -295,7 +297,6 @@ public class Lx47Algorithm
w.writeLiteralLen(0);
}
addDebug("EOF");
w.writeMatchLen(MAX_LEN+1);
assert w.outPos == output_size : String.format("size miscalc: got %d, want %d", w.outPos, output_size);
System.arraycopy(w.buf, 0, output_data, 0, w.outPos);
@ -417,13 +418,11 @@ public class Lx47Algorithm
output_data[outPos++] = (byte) r.readByte();
chkDebug("lit $%x", output_data[outPos-1]);
}
// Not a literal, so it's a sequence. First get the length.
len = r.readMatchLen();
if (len < 0) // EOF mark?
if (outPos == output_data.length)
break;
// Then get offset, and copy data
// Not a literal, so it's a sequence. Get len, offset, and copy.
len = r.readMatchLen();
int off = r.readOffset();
chkDebug("seq l=%d o=%d", len, off);
while (len-- > 0) {
@ -440,7 +439,7 @@ public class Lx47Algorithm
public byte[] compress(byte[] input_data) {
if (false) {
input_data = "helloabchellodefhelloabcx".getBytes();
input_data = "aaaaaaaaa".getBytes();
byte[] testComp = compressOptimal(optimize(input_data), input_data);
byte[] testDecomp = new byte[input_data.length];
decompress(testComp, testDecomp);