1
0
mirror of https://github.com/cc65/cc65.git synced 2025-08-08 06:25:17 +00:00

Optimize LZ4 decompressor

Use a walking out pointer instead of &out[written]. This
simplifies the code by 27 bytes and spares 15% cycles.
Tested with both the unit test and code uncompressing
10kB of data.

Renamed the labels for legibility.
This commit is contained in:
Colin Leroy-Mira
2025-03-12 20:10:55 +01:00
committed by Oliver Schmidt
parent ee540678e6
commit fb63a839bf

View File

@@ -14,12 +14,11 @@
.export _decompress_lz4 .export _decompress_lz4
out = regsave out = regsave
written = regsave + 2 end = regsave + 2
tmp = tmp1 tmp = tmp1
token = tmp2 token = tmp2
offset = ptr3 offset = ptr3
in = sreg in = sreg
outlen = ptr4
; --------------------------------------------------------------- ; ---------------------------------------------------------------
; void decompress_lz4 (const u8 *in, u8 * const out, const u16 outlen) ; void decompress_lz4 (const u8 *in, u8 * const out, const u16 outlen)
@@ -29,37 +28,43 @@ outlen = ptr4
.proc _decompress_lz4: near .proc _decompress_lz4: near
sta outlen sta tmp
stx outlen+1 stx tmp+1
;
; end = out + outlen;
;
jsr popax jsr popax
sta out sta out
stx out+1 clc
adc tmp
sta end
txa
sta out+1
adc tmp+1
sta end+1
jsr popax jsr popax
sta in sta in
stx in+1 stx in+1
; ;
; written = 0; ; while (out < end) {
; ;
lda #$00 jmp check_len
sta written
;
; while (written < outlen) {
;
jmp L0046
; ;
; token = *in++; ; token = *in++;
; ;
L0004: ldy #$00 get_token:
ldy #$00
lda (in),y lda (in),y
sta token tay ; Backup token to Y
inc in inc in
bne L000A bne :+
inc in+1 inc in+1
L000A:
:
; ;
; offset = token >> 4; ; offset = token >> 4;
; ;
@@ -74,7 +79,7 @@ L000A:
; token &= 0xf; ; token &= 0xf;
; token += 4; // Minmatch ; token += 4; // Minmatch
; ;
lda token tya ; Get token back from Y
and #$0F and #$0F
clc clc
adc #$04 adc #$04
@@ -84,7 +89,8 @@ L000A:
; ;
lda offset lda offset
cmp #$0F cmp #$0F
L0013: bne L001A moreliterals:
bne check_offset_not_zero
; ;
; tmp = *in++; ; tmp = *in++;
; ;
@@ -93,9 +99,10 @@ L0013: bne L001A
sta tmp sta tmp
inc in inc in
bne L0017 bne :+
inc in+1 inc in+1
L0017:
:
; ;
; offset += tmp; ; offset += tmp;
; ;
@@ -113,24 +120,20 @@ L0017:
; ;
; goto moreliterals; ; goto moreliterals;
; ;
jmp L0013 jmp moreliterals
; ;
; if (offset) { ; if (offset) {
; ;
L001A: lda offset check_offset_not_zero:
lda offset
ora offset+1 ora offset+1
beq L001C beq check_end
; ;
; memcpy(&out[written], in, offset); ; memcpy(out, in, offset);
; ;
lda out lda out
clc
adc written
sta ptr2 sta ptr2
lda out+1 ldx out+1
adc written+1
tax
lda ptr2
stx ptr2+1 stx ptr2+1
jsr pushax jsr pushax
lda in lda in
@@ -140,15 +143,15 @@ L001A: lda offset
; ldy #$00 - not needed as pushax zeroes Y ; ldy #$00 - not needed as pushax zeroes Y
jsr memcpy_upwards jsr memcpy_upwards
; ;
; written += offset; ; out += offset;
; memcpy returned a pointer to out
; ;
lda offset
clc clc
adc written adc offset
sta written sta out
lda offset+1 txa
adc written+1 adc offset+1
sta written+1 sta out+1
; ;
; in += offset; ; in += offset;
; ;
@@ -160,21 +163,23 @@ L001A: lda offset
adc in+1 adc in+1
sta in+1 sta in+1
; ;
; if (written >= outlen) ; if (out >= end)
; ;
L001C: lda written check_end:
cmp outlen lda out
lda written+1 cmp end
sbc outlen+1 lda out+1
sbc end+1
; ;
; return; ; return;
; ;
bcc L0047 bcc end_not_reached
rts rts
; ;
; memcpy(&offset, in, 2); ; memcpy(&offset, in, 2);
; ;
L0047: ldy #$00 end_not_reached:
ldy #$00
lda (in),y lda (in),y
sta offset sta offset
iny iny
@@ -187,23 +192,18 @@ L0047: ldy #$00
clc clc
adc in adc in
sta in sta in
bcc L002F bcc :+
inc in+1 inc in+1
:
; ;
; copysrc = out + written - offset; ; copysrc = out - offset;
; ;
L002F: lda out lda out
clc
adc written
tay
lda out+1
adc written+1
tax
tya
sec sec
sbc offset sbc offset
sta ptr1 sta ptr1
txa lda out+1
sbc offset+1 sbc offset+1
sta ptr1+1 sta ptr1+1
; ;
@@ -217,7 +217,8 @@ L002F: lda out
; if (token == 19) { ; if (token == 19) {
; ;
cmp #$13 cmp #$13
L0045: bne L003C morematches:
bne token_not_19
; ;
; tmp = *in++; ; tmp = *in++;
; ;
@@ -226,9 +227,9 @@ L0045: bne L003C
sta tmp sta tmp
inc in inc in
bne L0039 bne :+
inc in+1 inc in+1
L0039: :
; ;
; offset += tmp; ; offset += tmp;
; ;
@@ -246,41 +247,36 @@ L0039:
; ;
; goto morematches; ; goto morematches;
; ;
jmp L0045 jmp morematches
; ;
; memcpy(&out[written], copysrc, offset); ; memcpy(out, copysrc, offset);
; ;
L003C: lda out token_not_19:
clc lda out
adc written
sta ptr2 sta ptr2
lda out+1 ldx out+1
adc written+1
tax
lda ptr2
stx ptr2+1 stx ptr2+1
jsr pushax jsr pushax
jsr memcpy_upwards jsr memcpy_upwards
; ;
; written += offset; ; out += offset;
; ;
lda offset
clc clc
adc written adc offset
sta written sta out
lda offset+1 txa
adc written+1 adc offset+1
L0046: sta written+1 sta out+1 ; 0 on the first loop iteration
check_len:
; ;
; while (written < outlen) { ; while (out < end) {
; ;
lda written lda out
cmp outlen cmp end
lda written+1 lda out+1
sbc outlen+1 sbc end+1
jcc L0004 jcc get_token
rts rts
.endproc .endproc