More work on compression.

This commit is contained in:
Martin Haye
2014-03-06 09:45:11 -08:00
parent 983f61b5c0
commit 85f434aadf
3 changed files with 113 additions and 56 deletions

View File

@@ -36,6 +36,8 @@ class PackPartitions
def compressor = LZ4Factory.fastestInstance().highCompressor() def compressor = LZ4Factory.fastestInstance().highCompressor()
def ADD_COMP_CHECKSUMS = true
def parseMap(map, tiles) def parseMap(map, tiles)
{ {
// Parse each row of the map // Parse each row of the map
@@ -567,33 +569,41 @@ class PackPartitions
outLen += matchLen outLen += matchLen
} }
// If checksums are enabled, add an exclusive-or checksum to the end.
if (ADD_COMP_CHECKSUMS) {
def cksum = 0
(0..<dp).each { cksum ^= data[it] }
data[dp++] = (byte) cksum
}
assert outLen == expOutLen assert outLen == expOutLen
return dp return dp
} }
def totalUncompSize = 0 def compressionSavings = 0
def totalLZ4Size = 0
def totalLZ4MSize = 0
def compress(buf) def compress(buf)
{ {
// First, grab the uncompressed data into a byte array
def uncompressedLen = buf.position() def uncompressedLen = buf.position()
totalUncompSize += uncompressedLen
def uncompressedData = new byte[uncompressedLen] def uncompressedData = new byte[uncompressedLen]
buf.position(0) buf.position(0)
buf.get(uncompressedData) buf.get(uncompressedData)
// Now compress it with LZ4
assert uncompressedLen < 327678 : "data block too big"
def maxCompressedLen = compressor.maxCompressedLength(uncompressedLen) def maxCompressedLen = compressor.maxCompressedLength(uncompressedLen)
def compressedData = new byte[maxCompressedLen] def compressedData = new byte[maxCompressedLen]
def compressedLen = compressor.compress(uncompressedData, 0, uncompressedLen, def compressedLen = compressor.compress(uncompressedData, 0, uncompressedLen,
compressedData, 0, maxCompressedLen) compressedData, 0, maxCompressedLen)
totalLZ4Size += compressedLen
// Then recompress to LZ4M (pretty much always smaller)
def recompressedLen = recompress(compressedData, compressedLen, uncompressedLen) def recompressedLen = recompress(compressedData, compressedLen, uncompressedLen)
totalLZ4MSize += recompressedLen
// If we saved at least 50 bytes, take the compressed version. // If we saved at least 20 bytes, take the compressed version.
if ((uncompressedLen - recompressedLen) >= 50) { if ((uncompressedLen - recompressedLen) >= 20) {
//println " Compress. rawLen=$uncompressedLen compLen=$recompressedLen" //println " Compress. rawLen=$uncompressedLen compLen=$recompressedLen"
compressionSavings += (uncompressedLen - recompressedLen) - 2 - (ADD_COMP_CHECKSUMS ? 1 : 0)
return [data:compressedData, len:recompressedLen, return [data:compressedData, len:recompressedLen,
compressed:true, uncompressedLen:uncompressedLen] compressed:true, uncompressedLen:uncompressedLen]
} }
@@ -622,13 +632,13 @@ class PackPartitions
// Write the four bytes for each resource (6 for compressed resources) // Write the four bytes for each resource (6 for compressed resources)
chunks.each { chunk -> chunks.each { chunk ->
hdrBuf.put((byte)chunk.type | (chunk.buf.compressed ? 0x10 : 0)) hdrBuf.put((byte)chunk.type)
assert chunk.num >= 1 && chunk.num <= 255 assert chunk.num >= 1 && chunk.num <= 255
hdrBuf.put((byte)chunk.num) hdrBuf.put((byte)chunk.num)
def len = chunk.buf.len def len = chunk.buf.len
//println " chunk: type=${chunk.type}, num=${chunk.num}, len=$len" //println " chunk: type=${chunk.type}, num=${chunk.num}, len=$len"
hdrBuf.put((byte)(len & 0xFF)) hdrBuf.put((byte)(len & 0xFF))
hdrBuf.put((byte)(len >> 8)) hdrBuf.put((byte)(len >> 8) | (chunk.buf.compressed ? 0x80 : 0))
if (chunk.buf.compressed) { if (chunk.buf.compressed) {
def clen = chunk.buf.uncompressedLen; def clen = chunk.buf.uncompressedLen;
hdrBuf.put((byte)(clen & 0xFF)) hdrBuf.put((byte)(clen & 0xFF))
@@ -707,15 +717,39 @@ class PackPartitions
println "Writing output file." println "Writing output file."
new File(binPath).withOutputStream { stream -> writePartition(stream) } new File(binPath).withOutputStream { stream -> writePartition(stream) }
println "Compression saved $compressionSavings bytes."
if (compressionSavings > 0) {
def endSize = new File(binPath).length()
def origSize = endSize + compressionSavings
def savPct = String.format("%.1f", compressionSavings * 100.0 / origSize)
println "Size $origSize -> $endSize ($savPct% savings)"
}
println "Done." println "Done."
} }
static void main(String[] args) static void main(String[] args)
{ {
// Verify that assertions are enabled
def flag = false
try {
assert false
}
catch (AssertionError e) {
flag = true
}
if (!flag) {
println "Error: assertions must be enabled. Run with 'ea-'"
System.exit(1);
}
// Check the arguments
if (args.size() != 2) { if (args.size() != 2) {
println "Usage: convert yourOutlawFile.xml DISK.PART.0.bin" println "Usage: convert yourOutlawFile.xml DISK.PART.0.bin"
System.exit(1); System.exit(1);
} }
// Go for it.
new PackPartitions().pack(args[0], args[1]) new PackPartitions().pack(args[0], args[1])
} }
} }

View File

@@ -26,6 +26,7 @@ isAuxCmd = $A ; len 1
unused0B = $B ; len 1 unused0B = $B ; len 1
pSrc = $C ; len 2 pSrc = $C ; len 2
pDst = $E ; len 2 pDst = $E ; len 2
ucLen = $10 ; len 2
; Memory buffers ; Memory buffers
fileBuf = $4000 ; len $400 fileBuf = $4000 ; len $400
@@ -911,11 +912,11 @@ sequenceError: !zone
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
startHeaderScan: !zone startHeaderScan: !zone
lda #2 ; start scanning the partition header just after len lda #0
sta pTmp sta pTmp
lda #>headerBuf lda #>headerBuf
sta pTmp+1 sta pTmp+1
ldy #0 ldy #2 ; start scanning the partition header just after len
rts rts
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
@@ -929,7 +930,7 @@ disk_queueLoad: !zone
.scan: lda (pTmp),y ; get resource type .scan: lda (pTmp),y ; get resource type
beq .notFound ; if zero, this is end of table: failed to find the resource beq .notFound ; if zero, this is end of table: failed to find the resource
iny iny
and #$F ; mask off any flags we added and #$F ; mask off any flags
cmp resType ; is it the type we're looking for? cmp resType ; is it the type we're looking for?
bne .bump3 ; no, skip this resource bne .bump3 ; no, skip this resource
lda (pTmp),y ; get resource num lda (pTmp),y ; get resource num
@@ -960,12 +961,16 @@ disk_queueLoad: !zone
sta tSegRes,x sta tSegRes,x
ldx tmp ; get back lo part of addr ldx tmp ; get back lo part of addr
rts ; success! all done. rts ; success! all done.
.bump3: iny ; skip the remaining 3 bytes of the 4-byte record .bump3: iny ; skip resource number
iny iny
lda (pTmp),y ; get hi byte of length.
bpl + ; if hi bit clear, it's not compressed
iny ; skip uncompressed size too
iny iny
bne .scan ; happily, 4-byte records always end at page boundary + iny ; advance to next entry
inc pTmp+1 ; page boundary hit - go to next page of header bpl .scan ; if Y is small, loop again
bne .scan ; always taken jsr adjYpTmp ; keep it small
jmp .scan ; go for more
.notFound: .notFound:
ldx #<+ ldx #<+
ldy #>+ ldy #>+
@@ -993,27 +998,30 @@ disk_finishLoad: !zone
bmi .load ; hi bit set -> queued for load bmi .load ; hi bit set -> queued for load
iny ; not set, not queued, so skip over it iny ; not set, not queued, so skip over it
iny iny
bne .next ; always taken bne .next
.load: tax ; save flag ($40) to decide where to read .load: tax ; save aux flag ($40) to decide where to read
and #$3F ; mask off the aux flag and load flag
sta (pTmp),y ; to restore the header to its pristine state
iny
and #$F ; mask to get just the resource type and #$F ; mask to get just the resource type
sta resType ; save type for later sta resType ; save type for later
sta (pTmp),y ; also restore the header to its pristine state
iny
lda (pTmp),y ; get resource num lda (pTmp),y ; get resource num
iny iny
sta resNum ; save resource number sta resNum ; save resource number
lda #0 ; start by assuming main mem
sta isAuxCmd
txa ; get the flags back txa ; get the flags back
ldx #0 ; index for main mem
and #$40 ; check for aux flag and #$40 ; check for aux flag
beq + ; not aux, skip beq + ; not aux, skip
inx ; index for aux mem inc isAuxCmd ; set aux flag
+ stx isAuxCmd ; save main/aux selector + sty .ysave ; Save Y so we can resume scanning later.
sty .ysave ; Save Y so we can resume scanning later.
!if DEBUG { jsr .debug1 } !if DEBUG { jsr .debug1 }
lda (pTmp),y ; grab resource length lda (pTmp),y ; grab resource length on disk
sta readLen ; save for reading sta readLen ; save for reading
iny iny
lda (pTmp),y ; hi byte too lda (pTmp),y ; hi byte too
pha ; save the hi-bit flag
and #$7F ; mask off the flag to get the real (on-disk) length
sta readLen+1 sta readLen+1
jsr scanForResource ; find the segment number allocated to this resource jsr scanForResource ; find the segment number allocated to this resource
beq .addrErr ; it better have been allocated beq .addrErr ; it better have been allocated
@@ -1040,14 +1048,17 @@ disk_finishLoad: !zone
sta .setMarkPos sta .setMarkPos
iny iny
lda (pTmp),y ; hi byte of length lda (pTmp),y ; hi byte of length
adc .setMarkPos+1 bpl + ; if hi bit is clear, resource is uncompressed
iny ; skip compressed size
iny
+ adc .setMarkPos+1
sta .setMarkPos+1 sta .setMarkPos+1
bcc + bcc +
inc .setMarkPos+2 ; account for partitions > 64K inc .setMarkPos+2 ; account for partitions > 64K
+ iny ; increment to next entry + iny ; increment to next entry
bne .scan ; exactly 64 4-byte entries per 256 bytes bpl .scan ; if Y index is still small, loop again
inc pTmp+1 jsr adjYpTmp ; adjust pTmp and Y to make it small again
bne .scan ; always taken jmp .scan ; back for more
.keepOpenChk: lda #11 ; self-modified to 0 or 1 at start of routine .keepOpenChk: lda #11 ; self-modified to 0 or 1 at start of routine
bne .keepOpen bne .keepOpen
!if DEBUG { +prStr : !text "Closing partition file.",0 } !if DEBUG { +prStr : !text "Closing partition file.",0 }
@@ -1085,6 +1096,18 @@ disk_finishLoad: !zone
rts rts
} ; end DEBUG } ; end DEBUG
;------------------------------------------------------------------------------
adjYpTmp: !zone
tya
and #$7F ; adjust Y index to keep it small
tay
lda pTmp ; and bump pointer...
eor #$80 ; ...by 128 bytes
sta pTmp
bmi + ; if still the same page, we're done
inc pTmp+1 ; go to next page
+ rts
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
closeFile: !zone closeFile: !zone
sta .closeFileRef sta .closeFileRef
@@ -1132,10 +1155,7 @@ readToAux: !zone
ldx #0 ldx #0
+ stx readLen + stx readLen
sta readLen+1 ; save number of pages sta readLen+1 ; save number of pages
jsr mli ; now read jsr readToMain ; now read
!byte MLI_READ
!word readParams
bcs .err
lda #>auxBuf ; set up copy pointers lda #>auxBuf ; set up copy pointers
sta .ld+2 sta .ld+2
lda readLen ; set up: lda readLen ; set up:
@@ -1172,7 +1192,6 @@ readToAux: !zone
.or: ora #11 ; self-modified above .or: ora #11 ; self-modified above
bne .nextGroup ; anything left to read, do more bne .nextGroup ; anything left to read, do more
rts ; all done rts ; all done
.err: jmp prodosError
;------------------------------------------------------------------------------ ;------------------------------------------------------------------------------
lz4Decompress: !zone lz4Decompress: !zone
@@ -1210,9 +1229,9 @@ lz4Decompress: !zone
ldx pDst ; calculate the end of the dest buffer ldx pDst ; calculate the end of the dest buffer
txa ; also put low byte of ptr in X (where we'll use it constantly) txa ; also put low byte of ptr in X (where we'll use it constantly)
clc clc
adc reqLen ; add in the uncompressed length adc ucLen ; add in the uncompressed length
sta .endChk1+1 ; that's what we'll need to check to see if we're done sta .endChk1+1 ; that's what we'll need to check to see if we're done
lda reqLen+1 ; grab, but don't add, hi byte of dest length lda ucLen+1 ; grab, but don't add, hi byte of dest length
adc #0 ; no, we don't add pDst+1 - see endChk2 adc #0 ; no, we don't add pDst+1 - see endChk2
sta .endChk2+1 ; this is essentially a count of dest page bumps sta .endChk2+1 ; this is essentially a count of dest page bumps
lda pDst+1 ; grab the hi byte of dest pointer lda pDst+1 ; grab the hi byte of dest pointer
@@ -1221,7 +1240,7 @@ lz4Decompress: !zone
ldy pSrc ; Y will always track the hi byte of the source ptr ldy pSrc ; Y will always track the hi byte of the source ptr
lda #0 ; so zero out the low byte of the ptr itself lda #0 ; so zero out the low byte of the ptr itself
sta pSrc sta pSrc
sta reqLen+1 ; reqLen+1 always needs to be zero sta ucLen+1 ; ucLen+1 always needs to be zero
; Grab the next token in the compressed data ; Grab the next token in the compressed data
.getToken: .getToken:
+LOAD_YSRC ; load next source byte +LOAD_YSRC ; load next source byte
@@ -1230,11 +1249,11 @@ lz4Decompress: !zone
lsr lsr
lsr ; ...into the lo 4 bits lsr ; ...into the lo 4 bits
lsr lsr
beq .endChk1 ; if reqLen=0, there is no literal data. beq .endChk1 ; if ucLen=0, there is no literal data.
cmp #$F ; reqLen=15 is a special marker cmp #$F ; ucLen=15 is a special marker
bcc + ; not special, go copy the literals bcc + ; not special, go copy the literals
jsr .longLen ; special marker: extend the length jsr .longLen ; special marker: extend the length
+ sta reqLen ; record resulting length (lo byte) + sta ucLen ; record resulting length (lo byte)
.auxWr1 sta setAuxWr ; this gets self-modified depending on if target is in main or aux mem .auxWr1 sta setAuxWr ; this gets self-modified depending on if target is in main or aux mem
.litCopy: ; loop to copy the literals .litCopy: ; loop to copy the literals
+LOAD_YSRC ; grab a literal source byte +LOAD_YSRC ; grab a literal source byte
@@ -1243,9 +1262,9 @@ lz4Decompress: !zone
inx ; inc low byte of ptr inx ; inc low byte of ptr
bne + ; non-zero, done bne + ; non-zero, done
jsr .nextDstPage ; zero, need to go to next page jsr .nextDstPage ; zero, need to go to next page
+ dec reqLen ; count bytes + dec ucLen ; count bytes
bne .litCopy ; if non-zero, loop again bne .litCopy ; if non-zero, loop again
dec reqLen+1 ; low-byte of count is zero. Decrement hi-byte dec ucLen+1 ; low-byte of count is zero. Decrement hi-byte
bpl .litCopy ; If non-negative, loop again. NOTE: This would fail if we had blocks >= 32K bpl .litCopy ; If non-negative, loop again. NOTE: This would fail if we had blocks >= 32K
sta clrAuxWr ; back to writing main mem sta clrAuxWr ; back to writing main mem
@@ -1309,26 +1328,26 @@ lz4Decompress: !zone
.auxRd2 sta setAuxRd ; and back to aux mem (if isAuxCmd) .auxRd2 sta setAuxRd ; and back to aux mem (if isAuxCmd)
+ dey ; count bytes -- first page yet? + dey ; count bytes -- first page yet?
bne .srcLoad ; loop for more bne .srcLoad ; loop for more
dec reqLen+1 ; count pages dec ucLen+1 ; count pages
bpl .srcLoad ; loop for more. NOTE: this would fail if we had blocks >= 32K bpl .srcLoad ; loop for more. NOTE: this would fail if we had blocks >= 32K
+ rts ; done copying bytes + rts ; done copying bytes
} }
.matchShadow_end = * .matchShadow_end = *
; Subroutine called when length token = $F, to extend the length by additional bytes ; Subroutine called when length token = $F, to extend the length by additional bytes
.longLen: .longLen:
sta reqLen ; save what we got so far sta ucLen ; save what we got so far
- +LOAD_YSRC ; get another byte - +LOAD_YSRC ; get another byte
cmp #$FF ; special value of $FF? cmp #$FF ; special value of $FF?
bcc + ; no, we're done bcc + ; no, we're done
clc clc
adc reqLen ; add $FF to reqLen adc ucLen ; add $FF to ucLen
sta reqLen sta ucLen
bcc - ; no carry, only lo byte has changed bcc - ; no carry, only lo byte has changed
inc reqLen+1 ; increment hi byte of reqLen inc ucLen+1 ; increment hi byte of ucLen
bcs - ; always taken bcs - ; always taken
+ adc reqLen ; carry already clear (we got here from cmp/bcc) + adc ucLen ; carry already clear (we got here from cmp/bcc)
bcc + bcc +
inc reqLen+1 inc ucLen+1
+ rts + rts
nextSrcPage: nextSrcPage:

View File

@@ -65,10 +65,14 @@
; File Header: ; File Header:
; bytes 0-1: Total # of bytes in header (lo, hi) ; bytes 0-1: Total # of bytes in header (lo, hi)
; bytes 2-n: Table of repeated resource entries ; bytes 2-n: Table of repeated resource entries
; Resource entry: ; Followed by one entry per resource. Each entry:
; byte 0: resource type (1-15), 0 for end of table ; byte 0: low nibble: resource type (1-15), 0 for end of table
; byte 1: resource number (1-255) ; high nibble: reserved (should be zero)
; bytes 2-3: number of bytes in resource (lo, hi) ; byte 1: resource number (1-255)
; bytes 2-3: number of on-disk bytes in resource (lo, hi)
; if resource is compressed, this is the compressed size, and the hi bit
; will be set. If so, bytes 4 and 5 will appear; if not, they'll be absent.
; (bytes 4-5): (only present if compressed) decompressed size (lo, hi)
; The remainder of the file is the data for the resources, in order of their ; The remainder of the file is the data for the resources, in order of their
; table appearance. ; table appearance.
; ;