ootw: more lz4 work

This commit is contained in:
Vince Weaver 2019-02-25 21:01:06 -05:00
parent 9531786f05
commit 1f4ed008d1
5 changed files with 349 additions and 9 deletions

View File

@ -14,6 +14,8 @@
#include <string.h>
#include <stdarg.h>
#include <fcntl.h>
#include "loadpng.h"
#include "lz4.h"
@ -36,7 +38,7 @@ static int gr_lz4(int out_type, char *varname, int xsize, int ysize,
unsigned char gr[1024];
unsigned char output[2048];
int x,y;
int size;
int size,count=0;
/* our image pointer is not-interleaved, but it does */
/* have the top/bottom pixels properly packed for us */
@ -52,7 +54,7 @@ static int gr_lz4(int out_type, char *varname, int xsize, int ysize,
for(y=0;y<24;y++) {
for(x=0;x<40;x++) {
gr[(gr_offsets[y]-0x400)+x]=
image[((y/2)*xsize)+x];
image[(y*xsize)+x];
}
}
@ -71,28 +73,45 @@ static int gr_lz4(int out_type, char *varname, int xsize, int ysize,
2048, // src capacity
16); // compression level
/* Note, unlike the on-disk format we do *not* */
/* have to skip 11 bytes at front or 8 bytes at end */
/* also, we write the 16-bit size (little endian) at front */
if (out_type==OUTPUT_C) {
fprintf(stdout,"unsigned char %s[]={",varname);
printf("\t0x%02X,0x%02X,\n",(size)&0xff,
((size)>>8)&0xff);
for(x=0;x<size;x++) {
if (x%16==0) {
if (count%16==0) {
printf("\n\t");
}
printf("0x%02X,",output[x]);
count++;
}
printf("\n};\n");
}
else if (out_type==OUTPUT_ASM) {
fprintf(stdout,"%s:",varname);
// int blargh;
// blargh=open("blargh",O_CREAT|O_WRONLY,0777);
// write(blargh,gr,1024);
// close(blargh);
fprintf(stdout,"%s:\n",varname);
// size includes this size value
printf("\t.byte $%02X,$%02X",(size+2)&0xff,
((size+2)>>8)&0xff);
for(x=0;x<size;x++) {
if (x%16==0) {
if (count%16==0) {
printf("\n\t.byte ");
}
else {
printf(",");
}
printf("$%02X",output[x]);
count++;
}
printf("\n};\n");
}
else if (out_type==OUTPUT_RAW) {
write(1,output,size);
@ -101,7 +120,7 @@ static int gr_lz4(int out_type, char *varname, int xsize, int ysize,
return -1;
}
return size;
return (size+2);
}

View File

@ -131,7 +131,7 @@ compress_test.inc: intro_graphics/07_soda/drinking01.png
COMPRESS-TEST: compress_test.o
ld65 -o COMPRESS-TEST compress_test.o -C ../linker_scripts/apple2_2000.inc
compress_test.o: compress_test.s compress_test.inc
compress_test.o: compress_test.s lz4_decode.s compress_test.inc
ca65 -o compress_test.o compress_test.s -l compress_test.lst
#####

View File

@ -1,4 +1,116 @@
; Test if we can use LZ4 instead of RLE for compression of LORES images
;
;
.include "hardware.inc"
.include "zp.inc"
start:
rts
;===========================
; Enable graphics
bit LORES
bit SET_GR
bit FULLGR
;===========================
; Setup pages
lda #4
sta DRAW_PAGE
lda #0
sta DISP_PAGE
;============================
; Test RLE version
jsr BELL
lda #1 ; 8
sta OFFSET
rle_outer_loop:
lda #1 ; 100
sta GAIT
rle_inner_loop:
lda #>(test_rle)
sta GBASH
lda #<(test_rle)
sta GBASL
lda #$0c ; load to $c00
jsr load_rle_gr
dec GAIT
bne rle_inner_loop
dec OFFSET
bne rle_outer_loop
jsr BELL
jsr gr_copy_to_current
jsr page_flip
uz_loop:
lda KEYPRESS
bpl uz_loop
bit KEYRESET
;======================
; clear between tests
lda DRAW_PAGE
pha
lda #$8 ; clear c00
sta DRAW_PAGE
jsr clear_top
jsr clear_bottom
pla
sta DRAW_PAGE
vz_loop:
lda KEYPRESS
bpl vz_loop
bit KEYRESET
;============================
; Test LZ4 version
; point to source
lda #<(test_lz4)
sta LZ4_SRC
lda #>(test_lz4)
sta LZ4_SRC+1
lda #$C ; page to write to
jsr lz4_decode
jsr gr_copy_to_current
jsr page_flip
forever:
jmp forever
.include "gr_unrle.s"
.include "gr_offsets.s"
.include "gr_pageflip.s"
.include "gr_copy.s"
.include "gr_fast_clear.s"
.include "lz4_decode.s"
.include "compress_test.inc"

View File

@ -36,6 +36,7 @@ CLRTOP = $F836 ;; clear only top of low-res screen
SETCOL = $F864 ;; COLOR=A
TEXT = $FB36
TABV = $FB5B ;; VTAB to A
BELL = $FBDD ;; ring the bell
BASCALC = $FBC1 ;;
VTAB = $FC22 ;; VTAB to CV
HOME = $FC58 ;; Clear the text screen

208
ootw/lz4_decode.s Normal file
View File

@ -0,0 +1,208 @@
; LZ4 data decompressor for Apple II
; NOTE: this version is optimized for loading LORES graphics
; on even page boundaries (usually $C00)
; Don't use it for generic purposes!
; Code originally by Peter Ferrie (qkumba) (peter.ferrie@gmail.com)
; "LZ4 unpacker in 143 bytes (6502 version) (2013)"
; http://pferrie.host22.com/misc/appleii.htm
; For LZ4 reference see
; https://github.com/lz4/lz4/wiki/lz4_Frame_format.md
; We expect src in LZ4_SRC
; Incoming Accumulator is page to write to
; Size is in first 2 bytes pointed to by LZ4_SRC
; LZ4 data should have 11 byte header stripped off beginning
; and 8 byte checksum stripped off the end
;LZ4_SRC EQU $00 ; 25:10 (size=7c)
;LZ4_DST EQU $02 ; 0c:00
;LZ4_END EQU $04 ; 25:8c
;COUNT EQU $06
;DELTA EQU $08
;======================
; LZ4 decode
;======================
; input buffer in LZ4_SRC
; A is destination page
; size in first two bytes
lz4_decode:
sta LZ4_DST+1 ; set to page we want
lda #0
sta LZ4_DST
ldy #0
; calculate LZ4_END based on start and total size in
; first two bytes
clc
lda (LZ4_SRC),Y ; size (low)
adc LZ4_SRC
sta LZ4_END
iny
lda (LZ4_SRC),Y ; size (high)
adc LZ4_SRC+1
sta LZ4_END+1
; skip past size
clc
lda LZ4_SRC
adc #2
sta LZ4_SRC
lda LZ4_SRC+1
adc #0
sta LZ4_SRC+1
unpmain:
ldy #0 ; used to index, always zero
parsetoken:
jsr getsrc ; get next token
pha ; save for later (need bottom 4 bits)
lsr ; number of literals in top 4 bits
lsr ; so shift into place
lsr
lsr
beq copymatches ; if zero, then no literals
; jump ahead and copy
jsr buildcount ; add up all the literal sizes
; result is in ram[count+1]-1:A
tax ; now in ram[count+1]-1:X
jsr docopy ; copy the literals
lda LZ4_SRC ; 16-bit compare
cmp LZ4_END ; to see if we have reached the end
lda LZ4_SRC+1
sbc LZ4_END+1
bcs done
copymatches:
jsr getsrc ; get 16-bit delta value
sta DELTA
jsr getsrc
sta DELTA+1
pla ; restore token
and #$0f ; get bottom 4 bits
; match count. 0 means 4
; 15 means 19+, must be calculated
jsr buildcount ; add up count bits, in ram[count+1]-:A
clc
adc #4 ; adjust count by 4 (minmatch)
tax ; now in ramp[count+1]-1:X
beq copy_no_adjust ; BUGFIX, don't increment if
; exactly a multiple of 0x100
bcc copy_no_adjust
inc COUNT+1 ; increment if we overflowed
copy_no_adjust:
lda LZ4_SRC+1 ; save src on stack
pha
lda LZ4_SRC
pha
sec ; subtract delta
lda LZ4_DST ; from destination, make new src
sbc DELTA
sta LZ4_SRC
lda LZ4_DST+1
sbc DELTA+1
sta LZ4_SRC+1
jsr docopy ; do the copy
pla ; restore the src
sta LZ4_SRC
pla
sta LZ4_SRC+1
jmp parsetoken ; back to parsing tokens
done:
pla
rts
;=========
; getsrc
;=========
; gets byte from src into A, increments pointer
getsrc:
lda (LZ4_SRC), Y ; get a byte from src
inc LZ4_SRC ; increment pointer
bne done_getsrc ; update 16-bit pointer
inc LZ4_SRC+1 ; on 8-bit overflow
done_getsrc:
rts
;============
; buildcount
;============
buildcount:
ldx #1 ; high count starts at 1
stx COUNT+1 ; (loops at zero?)
cmp #$0f ; if LITERAL_COUNT < 15, we are done
bne done_buildcount
buildcount_loop:
sta COUNT ; save LITERAL_COUNT (15)
jsr getsrc ; get the next byte
tax ; put in X
clc
adc COUNT ; add new byte to old value
bcc bc_8bit_oflow ; if overflow, increment high byte
inc COUNT+1
bc_8bit_oflow:
inx ; check if read value was 255
beq buildcount_loop ; if it was, keep looping and adding
done_buildcount:
rts
;============
; getput
;============
; gets a byte, then puts the byte
getput:
jsr getsrc
; fallthrough to putdst
;=============
; putdst
;=============
; store A into destination
putdst:
sta (LZ4_DST), Y ; store A into destination
inc LZ4_DST ; increment 16-bit pointer
bne putdst_end ; if overflow, increment top byte
inc LZ4_DST+1
putdst_end:
rts
;=============================
; docopy
;=============================
; copies ram[count+1]-1:X bytes
; from src to dst
docopy:
docopy_loop:
jsr getput ; get/put byte
dex ; decrement count
bne docopy_loop ; if not zero, loop
dec COUNT+1 ; if zero, decrement high byte
bne docopy_loop ; if not zero, loop
rts