cc65/libsrc/common/lz4.s

287 lines
4.8 KiB
ArmAsm

;
; Lauri Kasanen, 6 Jun 2017
; (C) Mega Cat Studios
; An optimized LZ4 decompressor
;
; Almost 7 times faster, uses no RAM (vs 14 bytes BSS), and takes 1/4 the space
; vs the official C source.
;
.importzp sp, sreg, regsave, regbank
.importzp tmp1, tmp2, tmp3, tmp4, ptr1, ptr2, ptr3, ptr4
.macpack longbranch
.import memcpy_upwards,pushax,popax
.export _decompress_lz4
out = regsave
written = regsave + 2
tmp = tmp1
token = tmp2
offset = ptr3
in = sreg
outlen = ptr4
; ---------------------------------------------------------------
; void decompress_lz4 (const u8 *in, u8 * const out, const u16 outlen)
; ---------------------------------------------------------------
.segment "CODE"
.proc _decompress_lz4: near
sta outlen
stx outlen+1
jsr popax
sta out
stx out+1
jsr popax
sta in
stx in+1
;
; written = 0;
;
lda #$00
sta written
;
; while (written < outlen) {
;
jmp L0046
;
; token = *in++;
;
L0004: ldy #$00
lda (in),y
sta token
inc in
bne L000A
inc in+1
L000A:
;
; offset = token >> 4;
;
ldx #$00
lsr a
lsr a
lsr a
lsr a
sta offset
stx offset+1
;
; token &= 0xf;
; token += 4; // Minmatch
;
lda token
and #$0F
clc
adc #$04
sta token
;
; if (offset == 15) {
;
lda offset
cmp #$0F
L0013: bne L001A
;
; tmp = *in++;
;
ldy #$00
lda (in),y
sta tmp
inc in
bne L0017
inc in+1
L0017:
;
; offset += tmp;
;
clc
adc offset
sta offset
lda #$00
adc offset+1
sta offset+1
;
; if (tmp == 255)
;
lda tmp
cmp #$FF
;
; goto moreliterals;
;
jmp L0013
;
; if (offset) {
;
L001A: lda offset
ora offset+1
beq L001C
;
; memcpy(&out[written], in, offset);
;
lda out
clc
adc written
sta ptr2
lda out+1
adc written+1
tax
lda ptr2
stx ptr2+1
jsr pushax
lda in
ldx in+1
sta ptr1
stx ptr1+1
; ldy #$00 - not needed as pushax zeroes Y
jsr memcpy_upwards
;
; written += offset;
;
lda offset
clc
adc written
sta written
lda offset+1
adc written+1
sta written+1
;
; in += offset;
;
lda offset
clc
adc in
sta in
lda offset+1
adc in+1
sta in+1
;
; if (written >= outlen)
;
L001C: lda written
cmp outlen
lda written+1
sbc outlen+1
;
; return;
;
bcc L0047
rts
;
; memcpy(&offset, in, 2);
;
L0047: ldy #$00
lda (in),y
sta offset
iny
lda (in),y
sta offset+1
;
; in += 2;
;
lda #$02
clc
adc in
sta in
bcc L002F
inc in+1
;
; copysrc = out + written - offset;
;
L002F: lda out
clc
adc written
tay
lda out+1
adc written+1
tax
tya
sec
sbc offset
sta ptr1
txa
sbc offset+1
sta ptr1+1
;
; offset = token;
;
lda #$00
sta offset+1
lda token
sta offset
;
; if (token == 19) {
;
cmp #$13
L0045: bne L003C
;
; tmp = *in++;
;
ldy #$00
lda (in),y
sta tmp
inc in
bne L0039
inc in+1
L0039:
;
; offset += tmp;
;
clc
adc offset
sta offset
tya
adc offset+1
sta offset+1
;
; if (tmp == 255)
;
lda tmp
cmp #$FF
;
; goto morematches;
;
jmp L0045
;
; memcpy(&out[written], copysrc, offset);
;
L003C: lda out
clc
adc written
sta ptr2
lda out+1
adc written+1
tax
lda ptr2
stx ptr2+1
jsr pushax
jsr memcpy_upwards
;
; written += offset;
;
lda offset
clc
adc written
sta written
lda offset+1
adc written+1
L0046: sta written+1
;
; while (written < outlen) {
;
lda written
cmp outlen
lda written+1
sbc outlen+1
jcc L0004
rts
.endproc