Making progress on LZ4 compression.

2024-11-15 04:08:04 +00:00 · 2014-03-06 07:25:04 -08:00 · 2014-03-06 07:25:04 -08:00 · 983f61b5c0
commit 983f61b5c0
parent 625f2a44ea
4 changed files with 217 additions and 18 deletions
--- a/Platform/Apple/tools/PackPartitions/src/org/demo/PackPartitions.groovy
+++ b/Platform/Apple/tools/PackPartitions/src/org/demo/PackPartitions.groovy
@ -502,7 +502,7 @@ class PackPartitions
        fonts[name] = [num:num, buf:readBinary(path)]
    }
    
-    // Transform the LZ4 format to something we call "LZ5", where the small offsets are stored
+    // Transform the LZ4 format to something we call "LZ4M", where the small offsets are stored
    // as one byte instead of two. In our data, that's about 1/3 of the offsets.
    //
    def recompress(data, inLen, expOutLen)
@ -573,7 +573,7 @@ class PackPartitions
    
    def totalUncompSize = 0
    def totalLZ4Size = 0
-    def totalLZ5Size = 0
+    def totalLZ4MSize = 0

    def compress(buf)
    {
@ -589,11 +589,13 @@ class PackPartitions
        totalLZ4Size += compressedLen
        
        def recompressedLen = recompress(compressedData, compressedLen, uncompressedLen)
-        totalLZ5Size += recompressedLen
+        totalLZ4MSize += recompressedLen
        
-        if (recompressedLen < uncompressedLen) {
+        // If we saved at least 50 bytes, take the compressed version.
+        if ((uncompressedLen - recompressedLen) >= 50) {
            //println "  Compress. rawLen=$uncompressedLen compLen=$recompressedLen"
-            return [data:compressedData, len:recompressedLen, compressed:true]
+            return [data:compressedData, len:recompressedLen, 
+                    compressed:true, uncompressedLen:uncompressedLen]
        }
        else {
            //println "  No compress. rawLen=$uncompressedLen compLen=$recompressedLen"
@ -613,15 +615,12 @@ class PackPartitions
        maps3D.values().each { chunks.add([type:TYPE_3D_MAP, num:it.num, buf:compress(it.buf)]) }
        textures.values().each { chunks.add([type:TYPE_TEXTURE_IMG, num:it.num, buf:compress(it.buf)]) }
        
-        println "LZ4 compression: $totalUncompSize -> $totalLZ4Size"
-        println "LZ5 compression: $totalUncompSize -> $totalLZ5Size"
-        
        // Generate the header chunk. Leave the first 2 bytes for the # of pages in the hdr
        def hdrBuf = ByteBuffer.allocate(50000)
        hdrBuf.put((byte)0)
        hdrBuf.put((byte)0)
        
-        // Write the four bytes for each resource
+        // Write the four bytes for each resource (6 for compressed resources)
        chunks.each { chunk ->
            hdrBuf.put((byte)chunk.type | (chunk.buf.compressed ? 0x10 : 0))
            assert chunk.num >= 1 && chunk.num <= 255
@ -630,6 +629,11 @@ class PackPartitions
            //println "  chunk: type=${chunk.type}, num=${chunk.num}, len=$len"
            hdrBuf.put((byte)(len & 0xFF))
            hdrBuf.put((byte)(len >> 8))
+            if (chunk.buf.compressed) {
+                def clen = chunk.buf.uncompressedLen;
+                hdrBuf.put((byte)(clen & 0xFF))
+                hdrBuf.put((byte)(clen >> 8))
+            }
        }
        
        // Terminate the header with a zero type
--- a/Platform/Apple/virtual/src/core/mem.s
+++ b/Platform/Apple/virtual/src/core/mem.s
@ -23,6 +23,9 @@ reqLen		= $6	; len 2
 resType		= $8	; len 1
 resNum		= $9	; len 1
 isAuxCmd	= $A	; len 1
+unused0B	= $B	; len 1
+pSrc		= $C	; len 2
+pDst		= $E	; len 2

 ; Memory buffers
 fileBuf		= $4000	; len $400
@ -1171,6 +1174,191 @@ readToAux: !zone
 	rts			; all done
 .err:	jmp prodosError

+;------------------------------------------------------------------------------
+lz4Decompress: !zone
+; Input: pSrc - pointer to source data
+;        pDst - pointer to destination buffer
+;        reqLen  - length of *destination* data (16-bit)
+; All inputs are destroyed by the process.
+
+!macro LOAD_YSRC {
+	lda (pSrc),y		; load byte
+	iny			; inc low byte of ptr
+	bne +			; non-zero, done
+	jsr nextSrcPage		; zero, need to go to next page
+
+}
+
+  ; Copy the match shadow down to the stack area so it can copy from aux to aux
+	ldx #.matchShadow_end - .matchShadow_beg - 1
+-	lda .matchShadow_beg,x	; get the copy from main RAM
+	sta .matchCopy,x	; and put it down in stack space where it can access both main and aux
+	dex			; next byte
+	bpl -			; loop until we grab them all (including byte 0)
+
+	ldx #<clrAuxWr		; start by assuming write to main mem
+	ldy #<clrAuxRd		; and read from main mem
+	lda isAuxCmd		; if we're decompressing to aux...
+	beq +			; no? keep those values
+	ldx #<setAuxWr		; yes, write to aux mem
+	ldy #<setAuxRd		; and read from aux mem
+ 	stx .auxWr1+1		; set all the write switches for aux/main
+	stx .auxWr2+1
+	sty .auxRd1+1		; and the read switches too
+	sty .auxRd2+1
+	
+	ldx pDst		; calculate the end of the dest buffer
+	txa			; also put low byte of ptr in X (where we'll use it constantly)
+	clc
+	adc reqLen		; add in the uncompressed length
+	sta .endChk1+1		; that's what we'll need to check to see if we're done
+	lda reqLen+1		; grab, but don't add, hi byte of dest length
+	adc #0			; no, we don't add pDst+1 - see endChk2
+	sta .endChk2+1		; this is essentially a count of dest page bumps
+	lda pDst+1		; grab the hi byte of dest pointer
+	sta .dstStore1+2	; self-modify our storage routines
+	sta .dstStore2+2
+	ldy pSrc		; Y will always track the hi byte of the source ptr
+	lda #0			; so zero out the low byte of the ptr itself
+	sta pSrc
+	sta reqLen+1		; reqLen+1 always needs to be zero
+	; Grab the next token in the compressed data
+.getToken:
+	+LOAD_YSRC		; load next source byte
+	pha			; save the token byte. We use half now, and half later
+	lsr			; shift to get the hi 4 bits...
+	lsr
+	lsr			; ...into the lo 4 bits
+	lsr
+	beq .endChk1		; if reqLen=0, there is no literal data.
+	cmp #$F			; reqLen=15 is a special marker
+	bcc +			; not special, go copy the literals
+	jsr .longLen		; special marker: extend the length
+	sta reqLen		; record resulting length (lo byte)
+.auxWr1	sta setAuxWr		; this gets self-modified depending on if target is in main or aux mem
+.litCopy:			; loop to copy the literals
+	+LOAD_YSRC		; grab a literal source byte
+.dstStore1:
+	sta $1100,x		; hi-byte gets self-modified to point to dest page
+	inx			; inc low byte of ptr
+	bne +			; non-zero, done
+	jsr .nextDstPage	; zero, need to go to next page
+	dec reqLen		; count bytes
+	bne .litCopy		; if non-zero, loop again
+	dec reqLen+1		; low-byte of count is zero. Decrement hi-byte
+	bpl .litCopy        	; If non-negative, loop again. NOTE: This would fail if we had blocks >= 32K
+	sta clrAuxWr		; back to writing main mem
+	  
+.endChk1:
+	cpx #11			; end check - self-modified earlier
+	bcc .decodeMatch	; if less, keep going
+.endChk2:
+	lda #0              	; have we finished all pages?
+	bne .decodeMatch	; no, keep going
+	pla			; toss unused match length
+	rts			; all done!
+	; Now that we've finished with the literals, decode the match section
+.decodeMatch:
+	+LOAD_YSRC		; grab first byte of match offset
+	sta tmp			; save for later
+	bmi .far		; if hi bit is set, there will be a second byte
+	lda #0			; otherwise, second byte is assumed to be zero
+	beq .doInv		; always taken
+.far:	+LOAD_YSRC		; grab second byte of offset
+	asl tmp			; toss the unused hi bit of the lo byte
+ 	lsr			; shift out lo bit of the hi byte
+	rol tmp			; to fill in the hi bit of the lo byte
+.doInv:	sta tmp+1		; got the hi byte of the offset now
+	lda #0			; calculate zero minus the offset, to obtain ptr diff
+	sec
+	sbc tmp
+	sta .srcLoad+1		; that's how much less to read from
+	lda .dstStore2+2	; same with hi byte of offset
+	sbc tmp+1
+	sta .srcLoad+2		; to hi byte of offsetted pointer
+.getMatchLen:
+	pla			; recover the token byte
+	and #$F			; mask to get just the match length
+	clc
+	adc #4			; adjust: min match is 4 bytes
+	cmp #$13		; was it the special value $0F? ($F + 4 = $13)
+	bne +			; if not, no need to extend length
+	jsr .longLen		; need to extend the length
+	sty tmp			; save index to source pointer, so we can use Y...
+	tay			; ...to count bytes
+.auxWr2	sta setAuxWr		; self-modified earlier, based on isAuxCmd
+	jsr .matchCopy		; copy match bytes (aux->aux, or main->main)
+	sta clrAuxWr		; back to reading main mem
+ 	ldy tmp			; restore index to source pointer
+	jmp .getToken		; go on to the next token in the compressed stream
+	; Subroutine to copy bytes, either main->main or aux->aux. We put it down in the
+	; stack space ($100) so it can access either area. The stack doesn't get bank-switched
+	; by setAuxRd/clrAuxRd.
+.matchShadow_beg = *
+!pseudopc $100 {
+.matchCopy:
+.auxRd1	sta setAuxRd  		; self-modified based on isAuxCmd
+.srcLoad:
+	lda $1100,x		; self-modified earlier for offsetted source
+.dstStore2:
+	sta $1100,x		; self-modified earlier for dest buffer
+	inx			; inc to next src/dst byte
+	bne +			; non-zero, skip page bump
+	sta clrAuxRd		; page bump needs to operate in main mem
+	jsr .nextDstPage	; do the bump
+.auxRd2	sta setAuxRd		; and back to aux mem (if isAuxCmd)
+	dey			; count bytes -- first page yet?
+	bne .srcLoad		; loop for more
+	dec reqLen+1		; count pages
+	bpl .srcLoad		; loop for more. NOTE: this would fail if we had blocks >= 32K
+	rts			; done copying bytes
+}
+.matchShadow_end = *
+	; Subroutine called when length token = $F, to extend the length by additional bytes
+.longLen:
+	sta reqLen		; save what we got so far
+-	+LOAD_YSRC		; get another byte
+	cmp #$FF		; special value of $FF? 
+	bcc +			; no, we're done
+	clc
+	adc reqLen		; add $FF to reqLen
+	sta reqLen
+	bcc -			; no carry, only lo byte has changed
+	inc reqLen+1		; increment hi byte of reqLen
+	bcs -			; always taken
+	adc reqLen		; carry already clear (we got here from cmp/bcc)
+	bcc +
+	inc reqLen+1
+	rts
+
+nextSrcPage:
+	inc pSrc+1
+	lda pSrc+1
+	cmp #$50		; TODO
+	beq +
+	rts
+	bit rdRamWr		; get current state of aux wr flag
+	php			; and save it
+	sta clrAuxWr		; now write to main mem so we can increment stuff in code blocks
+	; load more pages here
+	; TODO
+	; done loading more here
+	jmp .restoreAuxWr
+
+.nextDstPage:
+	bit rdRamWr		; get current state of aux wr flag
+	php			; and save it
+	sta clrAuxWr		; now write to main mem so we can increment stuff in code blocks
+	inc .srcLoad+2		; inc offset pointer for match copies
+	inc .dstStore1+2	; inc pointers for dest stores
+	inc .dstStore2+2
+	dec .endChk2+1		; decrement total page counter
+.restoreAuxWr:
+	plp			; get back the flag
+	bpl +			; if it wasn't set, skip
+	sta setAuxWr		; otherwise, go back to writing aux mem
+	rts
+  
 ;------------------------------------------------------------------------------
 ; Segment tables

--- a/Platform/Apple/virtual/src/include/global.i
+++ b/Platform/Apple/virtual/src/include/global.i
@ -47,6 +47,22 @@ setAuxWr	= $C005
 clrAuxZP	= $C008
 setAuxZP	= $C009
 kbdStrobe	= $C010
+rdLCBnk2	= $C011		;reading from LC bank $Dx 2 
+rdLCRam		= $C012		;reading from LC RAM 
+rdRamRd		= $C013		;reading from aux/alt 48K 
+rdRamWr		= $C014		;writing to aux/alt 48K 
+rdCXRom		= $C015		;using internal Slot ROM 
+rdAuxZP		= $C016		;using Slot zero page, stack, & LC 
+rdC3Rom		= $C017		;using external (Slot) C3 ROM 
+rd80Col		= $C018		;80STORE is On- using 80-column memory mapping 
+rdVblBar	= $C019		;not VBL (VBL signal low) 
+rdText		= $C01A		;using text mode 
+rdMixed		= $C01B		;using mixed mode 
+rdPage2		= $C01C		;using text/graphics page2 
+rdHires		= $C01D		;using Hi-res graphics mode 
+rdAltCh		= $C01E		;using alternate character set 
+rd80Vid		= $C01F		;using 80-column display mode 
+
 clrText		= $C050
 setText		= $C051
 clrMixed	= $C052
--- a/Platform/Apple/virtual/src/include/link.cfg
+++ b/Platform/Apple/virtual/src/include/link.cfg
@ -1,9 +0,0 @@
-MEMORY {
-    ZP:  file = "", define = yes, start = $0000, size = $00100;
-    STK: file = "", define = yes, start = %S,    size = $00100;
-    RAM: file = %O,               start = %S,    size = $10000;
-}
-SEGMENTS {
-    CODE:     load = RAM, type = rw, align = $100;
-    ZEROPAGE: load = ZP,  type = zp;
-}