More work on compression.

This commit is contained in:
Martin Haye 2014-03-06 09:45:11 -08:00
parent 983f61b5c0
commit 85f434aadf
3 changed files with 113 additions and 56 deletions

View File

@ -36,6 +36,8 @@ class PackPartitions
def compressor = LZ4Factory.fastestInstance().highCompressor()
def ADD_COMP_CHECKSUMS = true
def parseMap(map, tiles)
{
// Parse each row of the map
@ -567,33 +569,41 @@ class PackPartitions
outLen += matchLen
}
// If checksums are enabled, add an exclusive-or checksum to the end.
if (ADD_COMP_CHECKSUMS) {
def cksum = 0
(0..<dp).each { cksum ^= data[it] }
data[dp++] = (byte) cksum
}
assert outLen == expOutLen
return dp
}
def totalUncompSize = 0
def totalLZ4Size = 0
def totalLZ4MSize = 0
def compressionSavings = 0
def compress(buf)
{
// First, grab the uncompressed data into a byte array
def uncompressedLen = buf.position()
totalUncompSize += uncompressedLen
def uncompressedData = new byte[uncompressedLen]
buf.position(0)
buf.get(uncompressedData)
// Now compress it with LZ4
assert uncompressedLen < 327678 : "data block too big"
def maxCompressedLen = compressor.maxCompressedLength(uncompressedLen)
def compressedData = new byte[maxCompressedLen]
def compressedLen = compressor.compress(uncompressedData, 0, uncompressedLen,
compressedData, 0, maxCompressedLen)
totalLZ4Size += compressedLen
// Then recompress to LZ4M (pretty much always smaller)
def recompressedLen = recompress(compressedData, compressedLen, uncompressedLen)
totalLZ4MSize += recompressedLen
// If we saved at least 50 bytes, take the compressed version.
if ((uncompressedLen - recompressedLen) >= 50) {
// If we saved at least 20 bytes, take the compressed version.
if ((uncompressedLen - recompressedLen) >= 20) {
//println " Compress. rawLen=$uncompressedLen compLen=$recompressedLen"
compressionSavings += (uncompressedLen - recompressedLen) - 2 - (ADD_COMP_CHECKSUMS ? 1 : 0)
return [data:compressedData, len:recompressedLen,
compressed:true, uncompressedLen:uncompressedLen]
}
@ -622,13 +632,13 @@ class PackPartitions
// Write the four bytes for each resource (6 for compressed resources)
chunks.each { chunk ->
hdrBuf.put((byte)chunk.type | (chunk.buf.compressed ? 0x10 : 0))
hdrBuf.put((byte)chunk.type)
assert chunk.num >= 1 && chunk.num <= 255
hdrBuf.put((byte)chunk.num)
def len = chunk.buf.len
//println " chunk: type=${chunk.type}, num=${chunk.num}, len=$len"
hdrBuf.put((byte)(len & 0xFF))
hdrBuf.put((byte)(len >> 8))
hdrBuf.put((byte)(len >> 8) | (chunk.buf.compressed ? 0x80 : 0))
if (chunk.buf.compressed) {
def clen = chunk.buf.uncompressedLen;
hdrBuf.put((byte)(clen & 0xFF))
@ -707,15 +717,39 @@ class PackPartitions
println "Writing output file."
new File(binPath).withOutputStream { stream -> writePartition(stream) }
println "Compression saved $compressionSavings bytes."
if (compressionSavings > 0) {
def endSize = new File(binPath).length()
def origSize = endSize + compressionSavings
def savPct = String.format("%.1f", compressionSavings * 100.0 / origSize)
println "Size $origSize -> $endSize ($savPct% savings)"
}
println "Done."
}
static void main(String[] args)
{
// Verify that assertions are enabled
def flag = false
try {
assert false
}
catch (AssertionError e) {
flag = true
}
if (!flag) {
println "Error: assertions must be enabled. Run with 'ea-'"
System.exit(1);
}
// Check the arguments
if (args.size() != 2) {
println "Usage: convert yourOutlawFile.xml DISK.PART.0.bin"
System.exit(1);
}
// Go for it.
new PackPartitions().pack(args[0], args[1])
}
}

View File

@ -26,6 +26,7 @@ isAuxCmd = $A ; len 1
unused0B = $B ; len 1
pSrc = $C ; len 2
pDst = $E ; len 2
ucLen = $10 ; len 2
; Memory buffers
fileBuf = $4000 ; len $400
@ -911,11 +912,11 @@ sequenceError: !zone
;------------------------------------------------------------------------------
startHeaderScan: !zone
lda #2 ; start scanning the partition header just after len
lda #0
sta pTmp
lda #>headerBuf
sta pTmp+1
ldy #0
ldy #2 ; start scanning the partition header just after len
rts
;------------------------------------------------------------------------------
@ -929,7 +930,7 @@ disk_queueLoad: !zone
.scan: lda (pTmp),y ; get resource type
beq .notFound ; if zero, this is end of table: failed to find the resource
iny
and #$F ; mask off any flags we added
and #$F ; mask off any flags
cmp resType ; is it the type we're looking for?
bne .bump3 ; no, skip this resource
lda (pTmp),y ; get resource num
@ -960,12 +961,16 @@ disk_queueLoad: !zone
sta tSegRes,x
ldx tmp ; get back lo part of addr
rts ; success! all done.
.bump3: iny ; skip the remaining 3 bytes of the 4-byte record
.bump3: iny ; skip resource number
iny
lda (pTmp),y ; get hi byte of length.
bpl + ; if hi bit clear, it's not compressed
iny ; skip uncompressed size too
iny
bne .scan ; happily, 4-byte records always end at page boundary
inc pTmp+1 ; page boundary hit - go to next page of header
bne .scan ; always taken
+ iny ; advance to next entry
bpl .scan ; if Y is small, loop again
jsr adjYpTmp ; keep it small
jmp .scan ; go for more
.notFound:
ldx #<+
ldy #>+
@ -993,27 +998,30 @@ disk_finishLoad: !zone
bmi .load ; hi bit set -> queued for load
iny ; not set, not queued, so skip over it
iny
bne .next ; always taken
.load: tax ; save flag ($40) to decide where to read
bne .next
.load: tax ; save aux flag ($40) to decide where to read
and #$3F ; mask off the aux flag and load flag
sta (pTmp),y ; to restore the header to its pristine state
iny
and #$F ; mask to get just the resource type
sta resType ; save type for later
sta (pTmp),y ; also restore the header to its pristine state
iny
lda (pTmp),y ; get resource num
iny
sta resNum ; save resource number
lda #0 ; start by assuming main mem
sta isAuxCmd
txa ; get the flags back
ldx #0 ; index for main mem
and #$40 ; check for aux flag
beq + ; not aux, skip
inx ; index for aux mem
+ stx isAuxCmd ; save main/aux selector
sty .ysave ; Save Y so we can resume scanning later.
inc isAuxCmd ; set aux flag
+ sty .ysave ; Save Y so we can resume scanning later.
!if DEBUG { jsr .debug1 }
lda (pTmp),y ; grab resource length
lda (pTmp),y ; grab resource length on disk
sta readLen ; save for reading
iny
lda (pTmp),y ; hi byte too
pha ; save the hi-bit flag
and #$7F ; mask off the flag to get the real (on-disk) length
sta readLen+1
jsr scanForResource ; find the segment number allocated to this resource
beq .addrErr ; it better have been allocated
@ -1040,14 +1048,17 @@ disk_finishLoad: !zone
sta .setMarkPos
iny
lda (pTmp),y ; hi byte of length
adc .setMarkPos+1
bpl + ; if hi bit is clear, resource is uncompressed
iny ; skip compressed size
iny
+ adc .setMarkPos+1
sta .setMarkPos+1
bcc +
inc .setMarkPos+2 ; account for partitions > 64K
+ iny ; increment to next entry
bne .scan ; exactly 64 4-byte entries per 256 bytes
inc pTmp+1
bne .scan ; always taken
bpl .scan ; if Y index is still small, loop again
jsr adjYpTmp ; adjust pTmp and Y to make it small again
jmp .scan ; back for more
.keepOpenChk: lda #11 ; self-modified to 0 or 1 at start of routine
bne .keepOpen
!if DEBUG { +prStr : !text "Closing partition file.",0 }
@ -1085,6 +1096,18 @@ disk_finishLoad: !zone
rts
} ; end DEBUG
;------------------------------------------------------------------------------
adjYpTmp: !zone
tya
and #$7F ; adjust Y index to keep it small
tay
lda pTmp ; and bump pointer...
eor #$80 ; ...by 128 bytes
sta pTmp
bmi + ; if still the same page, we're done
inc pTmp+1 ; go to next page
+ rts
;------------------------------------------------------------------------------
closeFile: !zone
sta .closeFileRef
@ -1132,10 +1155,7 @@ readToAux: !zone
ldx #0
+ stx readLen
sta readLen+1 ; save number of pages
jsr mli ; now read
!byte MLI_READ
!word readParams
bcs .err
jsr readToMain ; now read
lda #>auxBuf ; set up copy pointers
sta .ld+2
lda readLen ; set up:
@ -1172,7 +1192,6 @@ readToAux: !zone
.or: ora #11 ; self-modified above
bne .nextGroup ; anything left to read, do more
rts ; all done
.err: jmp prodosError
;------------------------------------------------------------------------------
lz4Decompress: !zone
@ -1210,9 +1229,9 @@ lz4Decompress: !zone
ldx pDst ; calculate the end of the dest buffer
txa ; also put low byte of ptr in X (where we'll use it constantly)
clc
adc reqLen ; add in the uncompressed length
adc ucLen ; add in the uncompressed length
sta .endChk1+1 ; that's what we'll need to check to see if we're done
lda reqLen+1 ; grab, but don't add, hi byte of dest length
lda ucLen+1 ; grab, but don't add, hi byte of dest length
adc #0 ; no, we don't add pDst+1 - see endChk2
sta .endChk2+1 ; this is essentially a count of dest page bumps
lda pDst+1 ; grab the hi byte of dest pointer
@ -1221,7 +1240,7 @@ lz4Decompress: !zone
ldy pSrc ; Y will always track the hi byte of the source ptr
lda #0 ; so zero out the low byte of the ptr itself
sta pSrc
sta reqLen+1 ; reqLen+1 always needs to be zero
sta ucLen+1 ; ucLen+1 always needs to be zero
; Grab the next token in the compressed data
.getToken:
+LOAD_YSRC ; load next source byte
@ -1230,11 +1249,11 @@ lz4Decompress: !zone
lsr
lsr ; ...into the lo 4 bits
lsr
beq .endChk1 ; if reqLen=0, there is no literal data.
cmp #$F ; reqLen=15 is a special marker
beq .endChk1 ; if ucLen=0, there is no literal data.
cmp #$F ; ucLen=15 is a special marker
bcc + ; not special, go copy the literals
jsr .longLen ; special marker: extend the length
+ sta reqLen ; record resulting length (lo byte)
+ sta ucLen ; record resulting length (lo byte)
.auxWr1 sta setAuxWr ; this gets self-modified depending on if target is in main or aux mem
.litCopy: ; loop to copy the literals
+LOAD_YSRC ; grab a literal source byte
@ -1243,9 +1262,9 @@ lz4Decompress: !zone
inx ; inc low byte of ptr
bne + ; non-zero, done
jsr .nextDstPage ; zero, need to go to next page
+ dec reqLen ; count bytes
+ dec ucLen ; count bytes
bne .litCopy ; if non-zero, loop again
dec reqLen+1 ; low-byte of count is zero. Decrement hi-byte
dec ucLen+1 ; low-byte of count is zero. Decrement hi-byte
bpl .litCopy ; If non-negative, loop again. NOTE: This would fail if we had blocks >= 32K
sta clrAuxWr ; back to writing main mem
@ -1309,26 +1328,26 @@ lz4Decompress: !zone
.auxRd2 sta setAuxRd ; and back to aux mem (if isAuxCmd)
+ dey ; count bytes -- first page yet?
bne .srcLoad ; loop for more
dec reqLen+1 ; count pages
dec ucLen+1 ; count pages
bpl .srcLoad ; loop for more. NOTE: this would fail if we had blocks >= 32K
+ rts ; done copying bytes
}
.matchShadow_end = *
; Subroutine called when length token = $F, to extend the length by additional bytes
.longLen:
sta reqLen ; save what we got so far
sta ucLen ; save what we got so far
- +LOAD_YSRC ; get another byte
cmp #$FF ; special value of $FF?
bcc + ; no, we're done
clc
adc reqLen ; add $FF to reqLen
sta reqLen
adc ucLen ; add $FF to ucLen
sta ucLen
bcc - ; no carry, only lo byte has changed
inc reqLen+1 ; increment hi byte of reqLen
inc ucLen+1 ; increment hi byte of ucLen
bcs - ; always taken
+ adc reqLen ; carry already clear (we got here from cmp/bcc)
+ adc ucLen ; carry already clear (we got here from cmp/bcc)
bcc +
inc reqLen+1
inc ucLen+1
+ rts
nextSrcPage:

View File

@ -65,10 +65,14 @@
; File Header:
; bytes 0-1: Total # of bytes in header (lo, hi)
; bytes 2-n: Table of repeated resource entries
; Resource entry:
; byte 0: resource type (1-15), 0 for end of table
; byte 1: resource number (1-255)
; bytes 2-3: number of bytes in resource (lo, hi)
; Followed by one entry per resource. Each entry:
; byte 0: low nibble: resource type (1-15), 0 for end of table
; high nibble: reserved (should be zero)
; byte 1: resource number (1-255)
; bytes 2-3: number of on-disk bytes in resource (lo, hi)
; if resource is compressed, this is the compressed size, and the hi bit
; will be set. If so, bytes 4 and 5 will appear; if not, they'll be absent.
; (bytes 4-5): (only present if compressed) decompressed size (lo, hi)
; The remainder of the file is the data for the resources, in order of their
; table appearance.
;