From 1f4ed008d15cc8145bf2b0b3a7686b26b68223b6 Mon Sep 17 00:00:00 2001 From: Vince Weaver Date: Mon, 25 Feb 2019 21:01:06 -0500 Subject: [PATCH] ootw: more lz4 work --- gr-utils/png2lz4.c | 33 +++++-- ootw/Makefile | 2 +- ootw/compress_test.s | 114 +++++++++++++++++++++++- ootw/hardware.inc | 1 + ootw/lz4_decode.s | 208 +++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 349 insertions(+), 9 deletions(-) create mode 100644 ootw/lz4_decode.s diff --git a/gr-utils/png2lz4.c b/gr-utils/png2lz4.c index cf6954b6..75428109 100644 --- a/gr-utils/png2lz4.c +++ b/gr-utils/png2lz4.c @@ -14,6 +14,8 @@ #include #include +#include + #include "loadpng.h" #include "lz4.h" @@ -36,7 +38,7 @@ static int gr_lz4(int out_type, char *varname, int xsize, int ysize, unsigned char gr[1024]; unsigned char output[2048]; int x,y; - int size; + int size,count=0; /* our image pointer is not-interleaved, but it does */ /* have the top/bottom pixels properly packed for us */ @@ -52,7 +54,7 @@ static int gr_lz4(int out_type, char *varname, int xsize, int ysize, for(y=0;y<24;y++) { for(x=0;x<40;x++) { gr[(gr_offsets[y]-0x400)+x]= - image[((y/2)*xsize)+x]; + image[(y*xsize)+x]; } } @@ -71,28 +73,45 @@ static int gr_lz4(int out_type, char *varname, int xsize, int ysize, 2048, // src capacity 16); // compression level + + /* Note, unlike the on-disk format we do *not* */ + /* have to skip 11 bytes at front or 8 bytes at end */ + /* also, we write the 16-bit size (little endian) at front */ + if (out_type==OUTPUT_C) { fprintf(stdout,"unsigned char %s[]={",varname); + printf("\t0x%02X,0x%02X,\n",(size)&0xff, + ((size)>>8)&0xff); for(x=0;x>8)&0xff); for(x=0;x(test_rle) + sta GBASH + lda #<(test_rle) + sta GBASL + + lda #$0c ; load to $c00 + + jsr load_rle_gr + + dec GAIT + bne rle_inner_loop + + dec OFFSET + bne rle_outer_loop + + + jsr BELL + + jsr gr_copy_to_current + jsr page_flip + +uz_loop: + lda KEYPRESS + bpl uz_loop + bit KEYRESET + + + ;====================== + ; clear between tests + + lda DRAW_PAGE + pha + + lda #$8 ; clear c00 + sta DRAW_PAGE + jsr clear_top + jsr clear_bottom + + pla + sta DRAW_PAGE + +vz_loop: + lda KEYPRESS + bpl vz_loop + bit KEYRESET + + + ;============================ + ; Test LZ4 version + + ; point to source + lda #<(test_lz4) + sta LZ4_SRC + lda #>(test_lz4) + sta LZ4_SRC+1 + + lda #$C ; page to write to + + jsr lz4_decode + + jsr gr_copy_to_current + jsr page_flip + +forever: + jmp forever + + + +.include "gr_unrle.s" +.include "gr_offsets.s" +.include "gr_pageflip.s" +.include "gr_copy.s" +.include "gr_fast_clear.s" +.include "lz4_decode.s" + +.include "compress_test.inc" + + diff --git a/ootw/hardware.inc b/ootw/hardware.inc index 69ea5b93..05b79f67 100644 --- a/ootw/hardware.inc +++ b/ootw/hardware.inc @@ -36,6 +36,7 @@ CLRTOP = $F836 ;; clear only top of low-res screen SETCOL = $F864 ;; COLOR=A TEXT = $FB36 TABV = $FB5B ;; VTAB to A +BELL = $FBDD ;; ring the bell BASCALC = $FBC1 ;; VTAB = $FC22 ;; VTAB to CV HOME = $FC58 ;; Clear the text screen diff --git a/ootw/lz4_decode.s b/ootw/lz4_decode.s new file mode 100644 index 00000000..910b59e3 --- /dev/null +++ b/ootw/lz4_decode.s @@ -0,0 +1,208 @@ +; LZ4 data decompressor for Apple II + + +; NOTE: this version is optimized for loading LORES graphics +; on even page boundaries (usually $C00) +; Don't use it for generic purposes! + +; Code originally by Peter Ferrie (qkumba) (peter.ferrie@gmail.com) +; "LZ4 unpacker in 143 bytes (6502 version) (2013)" +; http://pferrie.host22.com/misc/appleii.htm + +; For LZ4 reference see +; https://github.com/lz4/lz4/wiki/lz4_Frame_format.md + + +; We expect src in LZ4_SRC +; Incoming Accumulator is page to write to +; Size is in first 2 bytes pointed to by LZ4_SRC +; LZ4 data should have 11 byte header stripped off beginning +; and 8 byte checksum stripped off the end + +;LZ4_SRC EQU $00 ; 25:10 (size=7c) +;LZ4_DST EQU $02 ; 0c:00 +;LZ4_END EQU $04 ; 25:8c +;COUNT EQU $06 +;DELTA EQU $08 + + + ;====================== + ; LZ4 decode + ;====================== + ; input buffer in LZ4_SRC + ; A is destination page + ; size in first two bytes + +lz4_decode: + sta LZ4_DST+1 ; set to page we want + lda #0 + sta LZ4_DST + + ldy #0 + + ; calculate LZ4_END based on start and total size in + ; first two bytes + clc + lda (LZ4_SRC),Y ; size (low) + adc LZ4_SRC + sta LZ4_END + iny + lda (LZ4_SRC),Y ; size (high) + adc LZ4_SRC+1 + sta LZ4_END+1 + + ; skip past size + clc + lda LZ4_SRC + adc #2 + sta LZ4_SRC + lda LZ4_SRC+1 + adc #0 + sta LZ4_SRC+1 + + +unpmain: + ldy #0 ; used to index, always zero + +parsetoken: + jsr getsrc ; get next token + pha ; save for later (need bottom 4 bits) + + lsr ; number of literals in top 4 bits + lsr ; so shift into place + lsr + lsr + beq copymatches ; if zero, then no literals + ; jump ahead and copy + + jsr buildcount ; add up all the literal sizes + ; result is in ram[count+1]-1:A + tax ; now in ram[count+1]-1:X + jsr docopy ; copy the literals + + lda LZ4_SRC ; 16-bit compare + cmp LZ4_END ; to see if we have reached the end + lda LZ4_SRC+1 + sbc LZ4_END+1 + bcs done + +copymatches: + jsr getsrc ; get 16-bit delta value + sta DELTA + jsr getsrc + sta DELTA+1 + + pla ; restore token + and #$0f ; get bottom 4 bits + ; match count. 0 means 4 + ; 15 means 19+, must be calculated + + jsr buildcount ; add up count bits, in ram[count+1]-:A + + clc + adc #4 ; adjust count by 4 (minmatch) + + tax ; now in ramp[count+1]-1:X + + beq copy_no_adjust ; BUGFIX, don't increment if + ; exactly a multiple of 0x100 + bcc copy_no_adjust + + inc COUNT+1 ; increment if we overflowed +copy_no_adjust: + + lda LZ4_SRC+1 ; save src on stack + pha + lda LZ4_SRC + pha + + sec ; subtract delta + lda LZ4_DST ; from destination, make new src + sbc DELTA + sta LZ4_SRC + lda LZ4_DST+1 + sbc DELTA+1 + sta LZ4_SRC+1 + + jsr docopy ; do the copy + + pla ; restore the src + sta LZ4_SRC + pla + sta LZ4_SRC+1 + + jmp parsetoken ; back to parsing tokens + +done: + pla + rts + + ;========= + ; getsrc + ;========= + ; gets byte from src into A, increments pointer +getsrc: + lda (LZ4_SRC), Y ; get a byte from src + inc LZ4_SRC ; increment pointer + bne done_getsrc ; update 16-bit pointer + inc LZ4_SRC+1 ; on 8-bit overflow +done_getsrc: + rts + + ;============ + ; buildcount + ;============ +buildcount: + ldx #1 ; high count starts at 1 + stx COUNT+1 ; (loops at zero?) + cmp #$0f ; if LITERAL_COUNT < 15, we are done + bne done_buildcount +buildcount_loop: + sta COUNT ; save LITERAL_COUNT (15) + jsr getsrc ; get the next byte + tax ; put in X + clc + adc COUNT ; add new byte to old value + bcc bc_8bit_oflow ; if overflow, increment high byte + inc COUNT+1 +bc_8bit_oflow: + inx ; check if read value was 255 + beq buildcount_loop ; if it was, keep looping and adding +done_buildcount: + rts + + ;============ + ; getput + ;============ + ; gets a byte, then puts the byte +getput: + jsr getsrc + ; fallthrough to putdst + + ;============= + ; putdst + ;============= + ; store A into destination +putdst: + sta (LZ4_DST), Y ; store A into destination + inc LZ4_DST ; increment 16-bit pointer + bne putdst_end ; if overflow, increment top byte + inc LZ4_DST+1 +putdst_end: + rts + + ;============================= + ; docopy + ;============================= + ; copies ram[count+1]-1:X bytes + ; from src to dst +docopy: + +docopy_loop: + jsr getput ; get/put byte + dex ; decrement count + bne docopy_loop ; if not zero, loop + dec COUNT+1 ; if zero, decrement high byte + bne docopy_loop ; if not zero, loop + + rts