From 2ef262640493bbadd3a8c8fe3e9317f1707242f2 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Thu, 5 Mar 2020 00:25:59 -0500 Subject: [PATCH] mist: use fast lzsa everywhere --- mist/Makefile | 2 +- mist/TODO | 3 + mist/decompress_fast_v2.s | 367 ++++++++++++++++++++++++++++++++++++++ mist/hello.bas | 4 +- mist/lz4_decode.s | 201 --------------------- mist/mist.s | 13 +- mist/mist_title.s | 59 +----- 7 files changed, 386 insertions(+), 263 deletions(-) create mode 100644 mist/decompress_fast_v2.s delete mode 100644 mist/lz4_decode.s diff --git a/mist/Makefile b/mist/Makefile index 70c0408d..8e1b0bf9 100644 --- a/mist/Makefile +++ b/mist/Makefile @@ -47,7 +47,7 @@ MIST: mist.o mist.o: mist.s zp.inc hardware.inc \ graphics_island/mist_graphics.inc \ - gr_copy.s gr_unrle.s audio.s text_print.s decompress_small_v2.s + gr_copy.s audio.s text_print.s decompress_fast_v2.s ca65 -o mist.o mist.s -l mist.lst #### diff --git a/mist/TODO b/mist/TODO index fd119f8a..f9cdbc64 100644 --- a/mist/TODO +++ b/mist/TODO @@ -2,6 +2,9 @@ + double link noise (once when leave, once when get there?) + allow turning around ++ adjust grab window on letter ++ only grab when facing object ++ add missing red book step Make game winnable: + Allow access to all marker switches diff --git a/mist/decompress_fast_v2.s b/mist/decompress_fast_v2.s new file mode 100644 index 00000000..e4ee4ff6 --- /dev/null +++ b/mist/decompress_fast_v2.s @@ -0,0 +1,367 @@ +; note -- modified by Vince Weaver to assemble with ca65 + +; ----------------------------------------------------------------------------- +; Decompress raw LZSA2 block. +; Create one with lzsa -r -f2 +; +; in: +; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address +; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address +; +; out: +; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1 +; +; ----------------------------------------------------------------------------- +; Backward decompression is also supported, use lzsa -r -b -f2 +; To use it, also define BACKWARD_DECOMPRESS=1 before including this code! +; +; in: +; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data +; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer +; +; out: +; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1 +; +; ----------------------------------------------------------------------------- +; +; Copyright (C) 2019 Emmanuel Marty, Peter Ferrie +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. +; ----------------------------------------------------------------------------- + +;NIBCOUNT = $FC ; zero-page location for temp offset + +decompress_lzsa2_fast: + + sta LZSA_DST_HI + + ldy #$00 + sty LZSA_DST_LO + sty NIBCOUNT + +decode_token: + jsr getsrc ; read token byte: XYZ|LL|MMM + pha ; preserve token on stack + + and #$18 ; isolate literals count (LL) + beq no_literals ; skip if no literals to copy + cmp #$18 ; LITERALS_RUN_LEN_V2? + bcc prepare_copy_literals ; if less, count is directly embedded in token + + jsr getnibble ; get extra literals length nibble + ; add nibble to len from token + adc #$02 ; (LITERALS_RUN_LEN_V2) minus carry + cmp #$12 ; LITERALS_RUN_LEN_V2 + 15 ? + bcc prepare_copy_literals_direct ; if less, literals count is complete + + jsr getsrc ; get extra byte of variable literals count + ; the carry is always set by the CMP above + ; GETSRC doesn't change it + sbc #$EE ; overflow? + jmp prepare_copy_literals_direct + +prepare_copy_literals_large: + ; handle 16 bits literals count + ; literals count = directly these 16 bits + jsr getlargesrc ; grab low 8 bits in X, high 8 bits in A + tay ; put high 8 bits in Y + bcs prepare_copy_literals_high ; (*same as JMP PREPARE_COPY_LITERALS_HIGH but shorter) + +prepare_copy_literals: + lsr ; shift literals count into place + lsr + lsr + +prepare_copy_literals_direct: + tax + bcs prepare_copy_literals_large ; if so, literals count is large + +prepare_copy_literals_high: + txa + beq copy_literals + iny + +copy_literals: + jsr getput ; copy one byte of literals + dex + bne copy_literals + dey + bne copy_literals + +no_literals: + pla ; retrieve token from stack + pha ; preserve token again + asl + bcs repmatch_or_large_offset ; 1YZ: rep-match or 13/16 bit offset + + asl ; 0YZ: 5 or 9 bit offset + bcs offset_9_bit + + ; 00Z: 5 bit offset + + ldx #$FF ; set offset bits 15-8 to 1 + + jsr getcombinedbits ; rotate Z bit into bit 0, read nibble for bits 4-1 + ora #$E0 ; set bits 7-5 to 1 + bne got_offset_lo ; go store low byte of match offset and prepare match + +offset_9_bit: ; 01Z: 9 bit offset + ;;asl ; shift Z (offset bit 8) in place + rol + rol + and #$01 + eor #$FF ; set offset bits 15-9 to 1 + bne got_offset_hi ; go store high byte, read low byte of match offset and prepare match + ; (*same as JMP GOT_OFFSET_HI but shorter) + +repmatch_or_large_offset: + asl ; 13 bit offset? + bcs repmatch_or_16bit ; handle rep-match or 16-bit offset if not + + ; 10Z: 13 bit offset + + jsr getcombinedbits ; rotate Z bit into bit 8, read nibble for bits 12-9 + adc #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512) + bne got_offset_hi ; go store high byte, read low byte of match offset and prepare match + ; (*same as JMP GOT_OFFSET_HI but shorter) + +repmatch_or_16bit: ; rep-match or 16 bit offset + ;;ASL ; XYZ=111? + bmi rep_match ; reuse previous offset if so (rep-match) + + ; 110: handle 16 bit offset + jsr getsrc ; grab high 8 bits +got_offset_hi: + tax + jsr getsrc ; grab low 8 bits +got_offset_lo: + sta OFFSLO ; store low byte of match offset + stx OFFSHI ; store high byte of match offset + +rep_match: +.ifdef BACKWARD_DECOMPRESS + + ; Backward decompression - substract match offset + + sec ; add dest + match offset + lda putdst+1 ; low 8 bits +OFFSLO = *+1 + sbc #$AA + sta copy_match_loop+1 ; store back reference address + lda putdst+2 +OFFSHI = *+1 + sbc #$AA ; high 8 bits + sta copy_match_loop+2 ; store high 8 bits of address + sec + +.else + + ; Forward decompression - add match offset + + clc ; add dest + match offset + lda putdst+1 ; low 8 bits +OFFSLO = *+1 + adc #$AA + sta copy_match_loop+1 ; store back reference address +OFFSHI = *+1 + lda #$AA ; high 8 bits + adc putdst+2 + sta copy_match_loop+2 ; store high 8 bits of address +.endif + + pla ; retrieve token from stack again + and #$07 ; isolate match len (MMM) + adc #$01 ; add MIN_MATCH_SIZE_V2 and carry + cmp #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2? + bcc prepare_copy_match ; if less, length is directly embedded in token + + jsr getnibble ; get extra match length nibble + ; add nibble to len from token + adc #$08 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry + cmp #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15? + bcc prepare_copy_match ; if less, match length is complete + + jsr getsrc ; get extra byte of variable match length + ; the carry is always set by the CMP above + ; GETSRC doesn't change it + sbc #$E8 ; overflow? + +prepare_copy_match: + tax + bcc prepare_copy_match_y ; if not, the match length is complete + beq decompression_done ; if EOD code, bail + + ; Handle 16 bits match length + jsr getlargesrc ; grab low 8 bits in X, high 8 bits in A + tay ; put high 8 bits in Y + +prepare_copy_match_y: + txa + beq copy_match_loop + iny + +copy_match_loop: + lda $AAAA ; get one byte of backreference + jsr putdst ; copy to destination + +.ifdef BACKWARD_DECOMPRESS + + ; Backward decompression -- put backreference bytes backward + + lda copy_match_loop+1 + beq getmatch_adj_hi +getmatch_done: + dec copy_match_loop+1 + +.else + + ; Forward decompression -- put backreference bytes forward + + inc copy_match_loop+1 + beq getmatch_adj_hi +getmatch_done: + +.endif + + dex + bne copy_match_loop + dey + bne copy_match_loop + jmp decode_token + +.ifdef BACKWARD_DECOMPRESS + +getmatch_adj_hi: + dec copy_match_loop+2 + jmp getmatch_done + +.else + +getmatch_adj_hi: + inc copy_match_loop+2 + jmp getmatch_done +.endif + +getcombinedbits: + eor #$80 + asl + php + + jsr getnibble ; get nibble into bits 0-3 (for offset bits 1-4) + plp ; merge Z bit as the carry bit (for offset bit 0) +combinedbitz: + rol ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared +decompression_done: + rts + +getnibble: +NIBBLES = *+1 + lda #$AA + lsr NIBCOUNT + bcc need_nibbles + and #$0F ; isolate low 4 bits of nibble + rts + +need_nibbles: + inc NIBCOUNT + jsr getsrc ; get 2 nibbles + sta NIBBLES + lsr + lsr + lsr + lsr + sec + rts + +.ifdef BACKWARD_DECOMPRESS + + ; Backward decompression -- get and put bytes backward + +getput: + jsr getsrc +putdst: +LZSA_DST_LO = *+1 +LZSA_DST_HI = *+2 + sta $AAAA + lda putdst+1 + beq putdst_adj_hi + dec putdst+1 + rts + +putdst_adj_hi: + dec putdst+2 + dec putdst+1 + rts + +getlargesrc: + jsr getsrc ; grab low 8 bits + tax ; move to X + ; fall through grab high 8 bits + +getsrc: +LZSA_SRC_LO = *+1 +LZSA_SRC_HI = *+2 + lda $AAAA + pha + lda getsrc+1 + beq getsrc_adj_hi + dec getsrc+1 + pla + rts + +getsrc_adj_hi: + dec getsrc+2 + dec getsrc+1 + pla + rts + +.else + + ; Forward decompression -- get and put bytes forward + +getput: + jsr getsrc +putdst: +LZSA_DST_LO = *+1 +LZSA_DST_HI = *+2 + sta $AAAA + inc putdst+1 + beq putdst_adj_hi + rts + +putdst_adj_hi: + inc putdst+2 + rts + +getlargesrc: + jsr getsrc ; grab low 8 bits + tax ; move to X + ; fall through grab high 8 bits + +getsrc: +LZSA_SRC_LO = *+1 +LZSA_SRC_HI = *+2 + lda $AAAA + inc getsrc+1 + beq getsrc_adj_hi + rts + +getsrc_adj_hi: + inc getsrc+2 + rts +.endif + diff --git a/mist/hello.bas b/mist/hello.bas index 30a1b4ca..4e8655ef 100644 --- a/mist/hello.bas +++ b/mist/hello.bas @@ -1,5 +1,5 @@ 5 HOME -10 PRINT " LOADING MIST V0.2" +10 PRINT " LOADING MIST V0.3" 20 PRINT 30 PRINT " ______" 40 PRINT " A \/\/\/ SOFTWARE PRODUCTION" @@ -9,6 +9,6 @@ 80 PRINT 90 PRINT "SPOILERS (BECAUSE NOT ALL IMPLEMENTED)" 100 PRINT " CLOCK: 2:40 2-2-1" -105 PRINT " CLICK ALL SWITCHES EXCEPT DOCK" +105 PRINT " ALL SWITCHES DOWN EXCEPT DOCK" 110 PRINT " FIREPLACE PATTERN IN BOOK" 120 PRINT CHR$(4);"BRUN LOADER" diff --git a/mist/lz4_decode.s b/mist/lz4_decode.s deleted file mode 100644 index 598430da..00000000 --- a/mist/lz4_decode.s +++ /dev/null @@ -1,201 +0,0 @@ -; LZ4 data decompressor for Apple II - -; Code by Peter Ferrie (qkumba) (peter.ferrie@gmail.com) -; "LZ4 unpacker in 143 bytes (6502 version) (2013)" -; http://pferrie.host22.com/misc/appleii.htm -; This is that code, but with comments and labels added for clarity. -; I also found a bug when decoding with runs of multiples of 256 -; which has since been fixed upstream. - -; For LZ4 reference see -; https://github.com/lz4/lz4/wiki/lz4_Frame_format.md - -; LZ4 summary: -; -; HEADER: -; Should: check for magic number 04 22 4d 18 -; FLG: 64 in our case (01=version, block.index=1, block.checksum=0 -; size=0, checksum=1, reserved -; MAX Blocksize: 40 (64kB) -; HEADER CHECKSUM: a7 -; BLOCK HEADER: 4 bytes (le) length If highest bit set, uncompressed! -; data (see below), followed by checksum? -; BLOCKS: -; Token byte. High 4-bits literal length, low 4-bits copy length -; + If literal length==15, then following byte gets added to length -; If that byte was 255, then keep adding bytes until not 255 -; + The literal bytes follow. There may be zero of them -; + Next is block copy info. little-endian 2-byte offset to -; be subtracted from current read position indicating source -; + The low 4-bits of the token are the copy length, which needs -; 4 added to it. As with the literal length, if it is 15 then -; you read a byte and add (and if that byte is 255, keep adding) - -; At end you have 4 byte end-of-block marker (all zeros?) then -; 4 bytes of checksum (if marked in flags) -; our code does that, so be sure to set end -8 - - -;LZ4_SRC EQU $00 -;LZ4_DST EQU $02 -;LZ4_END EQU $04 -;COUNT EQU $06 -;DELTA EQU $08 - - - ;====================== - ; LZ4 decode - ;====================== - ; input buffer in LZ4_SRC - ; end of input in LZ4_END - ; output buffer in LZ4_DST - - -lz4_decode: - - -unpmain: - ldy #0 ; used to index, always zero - -parsetoken: - jsr getsrc ; get next token - pha ; save for later (need bottom 4 bits) - - lsr ; number of literals in top 4 bits - lsr ; so shift into place - lsr - lsr - beq copymatches ; if zero, then no literals - ; jump ahead and copy - - jsr buildcount ; add up all the literal sizes - ; result is in ram[count+1]-1:A - tax ; now in ram[count+1]-1:X - jsr docopy ; copy the literals - - lda LZ4_SRC ; 16-bit compare - cmp LZ4_END ; to see if we have reached the end - lda LZ4_SRC+1 - sbc LZ4_END+1 - bcs done - -copymatches: - jsr getsrc ; get 16-bit delta value - sta DELTA - jsr getsrc - sta DELTA+1 - - pla ; restore token - and #$0f ; get bottom 4 bits - ; match count. 0 means 4 - ; 15 means 19+, must be calculated - - jsr buildcount ; add up count bits, in ram[count+1]-:A - - clc - adc #4 ; adjust count by 4 (minmatch) - - tax ; now in ramp[count+1]-1:X - - beq copy_no_adjust ; BUGFIX, don't increment if - ; exactly a multiple of 0x100 - bcc copy_no_adjust - - inc COUNT+1 ; increment if we overflowed -copy_no_adjust: - - lda LZ4_SRC+1 ; save src on stack - pha - lda LZ4_SRC - pha - - sec ; subtract delta - lda LZ4_DST ; from destination, make new src - sbc DELTA - sta LZ4_SRC - lda LZ4_DST+1 - sbc DELTA+1 - sta LZ4_SRC+1 - - jsr docopy ; do the copy - - pla ; restore the src - sta LZ4_SRC - pla - sta LZ4_SRC+1 - - jmp parsetoken ; back to parsing tokens - -done: - pla - rts - - ;========= - ; getsrc - ;========= - ; gets byte from src into A, increments pointer -getsrc: - lda (LZ4_SRC), Y ; get a byte from src - inc LZ4_SRC ; increment pointer - bne done_getsrc ; update 16-bit pointer - inc LZ4_SRC+1 ; on 8-bit overflow -done_getsrc: - rts - - ;============ - ; buildcount - ;============ -buildcount: - ldx #1 ; high count starts at 1 - stx COUNT+1 ; (loops at zero?) - cmp #$0f ; if LITERAL_COUNT < 15, we are done - bne done_buildcount -buildcount_loop: - sta COUNT ; save LITERAL_COUNT (15) - jsr getsrc ; get the next byte - tax ; put in X - clc - adc COUNT ; add new byte to old value - bcc bc_8bit_oflow ; if overflow, increment high byte - inc COUNT+1 -bc_8bit_oflow: - inx ; check if read value was 255 - beq buildcount_loop ; if it was, keep looping and adding -done_buildcount: - rts - - ;============ - ; getput - ;============ - ; gets a byte, then puts the byte -getput: - jsr getsrc - ; fallthrough to putdst - - ;============= - ; putdst - ;============= - ; store A into destination -putdst: - sta (LZ4_DST), Y ; store A into destination - inc LZ4_DST ; increment 16-bit pointer - bne putdst_end ; if overflow, increment top byte - inc LZ4_DST+1 -putdst_end: - rts - - ;============================= - ; docopy - ;============================= - ; copies ram[count+1]-1:X bytes - ; from src to dst -docopy: - -docopy_loop: - jsr getput ; get/put byte - dex ; decrement count - bne docopy_loop ; if not zero, loop - dec COUNT+1 ; if zero, decrement high byte - bne docopy_loop ; if not zero, loop - - rts diff --git a/mist/mist.s b/mist/mist.s index e5844225..0021d46a 100644 --- a/mist/mist.s +++ b/mist/mist.s @@ -427,7 +427,7 @@ change_direction: lda (LOCATION_STRUCT_L),Y sta LZSA_SRC_HI lda #$c ; load to page $c00 - jsr decompress_lzsa2 + jsr decompress_lzsa2_fast rts @@ -650,7 +650,7 @@ red_book_loop: lda #>red_book_static_lzsa sta LZSA_SRC_HI lda #$c ; load to page $c00 - jsr decompress_lzsa2 + jsr decompress_lzsa2_fast jsr gr_copy_to_current @@ -664,7 +664,7 @@ red_book_loop: lda #>red_book_static2_lzsa sta LZSA_SRC_HI lda #$c ; load to page $c00 - jsr decompress_lzsa2 + jsr decompress_lzsa2_fast jsr gr_copy_to_current @@ -687,7 +687,7 @@ red_book_loop: lda #>red_book_open_lzsa sta LZSA_SRC_HI lda #$c ; load to page $c00 - jsr decompress_lzsa2 + jsr decompress_lzsa2_fast jsr gr_copy_to_current @@ -721,7 +721,7 @@ red_book_done: lda #>red_book_shelf_lzsa sta LZSA_SRC_HI lda #$c ; load to page $c00 - jsr decompress_lzsa2 + jsr decompress_lzsa2_fast rts @@ -735,13 +735,12 @@ red_book_done: ;========================== .include "gr_copy.s" -; .include "gr_unrle.s" .include "gr_offsets.s" .include "gr_pageflip.s" .include "gr_putsprite_crop.s" .include "text_print.s" .include "gr_fast_clear.s" - .include "decompress_small_v2.s" + .include "decompress_fast_v2.s" .include "audio.s" diff --git a/mist/mist_title.s b/mist/mist_title.s index e9aade14..f9024169 100644 --- a/mist/mist_title.s +++ b/mist/mist_title.s @@ -21,51 +21,20 @@ mist_start: bit HIRES bit FULLGR - - ;=================== - ; Init RTS disk code - ;=================== - -; jsr rts_init - ;=================== ; Load graphics ;=================== reload_everything: - ; load MIST_TITLE.LZ4 to $a000 - ; then decompress it to $2000 (HGR PAGE0) -; lda #mist_title_filename -; sta OUTH -; jsr opendir_filename ; open and read entire file into memory - - ; size in ldsizeh:ldsizel (f1/f0) - -; clc lda #file sta LZ4_SRC+1 -; adc ldsizeh -; sta LZ4_END+1 + sta LZSA_SRC_HI - lda #file_end - sta LZ4_END+1 - - lda #<$2000 - sta LZ4_DST - lda #>$2000 - sta LZ4_DST+1 - - jsr lz4_decode + lda #$20 + jsr decompress_lzsa2_fast bit KEYRESET @@ -81,23 +50,9 @@ keyloop: rts + .include "decompress_fast_v2.s" - -; .include "gr_putsprite.s" -; .include "gr_offsets.s" -; .include "gr_fast_clear.s" -; .include "gr_hline.s" -; .include "wait_keypress.s" - .include "lz4_decode.s" -; .include "rts.s" - - -; filename to open is 30-character Apple text: -;mist_title_filename: ; .byte "MIST_TITLE.LZ4",0 -; .byte 'M'|$80,'I'|$80,'S'|$80,'T'|$80,'_'|$80,'T'|$80,'I'|$80,'T'|$80 -; .byte 'L'|$80,'E'|$80,'.'|$80,'L'|$80,'Z'|$80,'4'|$80,$00 - file: -.incbin "graphics_title/MIST_TITLE.LZ4" -file_end: +.incbin "graphics_title/mist_title.lzsa" +