1
0
mirror of https://github.com/cc65/cc65.git synced 2026-03-11 23:42:18 +00:00
Files
cc65/libsrc/common/lz4.s
2025-06-04 06:37:59 +00:00

337 lines
5.6 KiB
ArmAsm

;
; Lauri Kasanen, 6 Jun 2017
; (C) Mega Cat Studios
; An optimized LZ4 decompressor
;
; Almost 7 times faster, uses no RAM (vs 14 bytes BSS), and takes 1/4 the space
; vs the official C source.
;
;
; C implementation was:
; void decompress_lz4 (unsigned char *in, unsigned char *out, const int outlen) {
; unsigned char token, tmp;
; unsigned int offset;
; unsigned char *end = out+outlen;
; unsigned char *copysrc;
;
; while (out < end) {
; token = *in++;
; offset = token >> 4;
;
; token &= 0x0f;
; token += 4; // Minmatch
;
; if (offset == 15) {
; moreliterals:
; tmp = *in++;
; offset += tmp;
; if (tmp == 255)
; goto moreliterals;
; }
;
; if (offset) {
; memcpy(out, in, offset);
; out += offset;
; in += offset;
; }
;
; if (out >= end) {
; return;
; }
;
; offset = (*in);
; in++;
; offset += (*in)<<8;
; in++;
;
; copysrc = out - offset;
; offset = token;
;
; if (token == 19) {
; morematches:
; tmp = *in++;
; offset += tmp;
; if (tmp == 255)
; goto morematches;
; }
;
; memcpy(out, copysrc, offset);
; out += offset;
; }
; }
.importzp c_sp, sreg, regsave, regbank
.importzp tmp1, tmp2, tmp3, tmp4, ptr1, ptr2, ptr3, ptr4
.macpack longbranch
.import memcpy_upwards,pushax,popax
.export _decompress_lz4
out = regsave
end = regsave + 2
tmp = tmp1
token = tmp2
offset = ptr3
in = sreg
; ---------------------------------------------------------------
; void decompress_lz4 (const u8 *in, u8 * const out, const u16 outlen)
; ---------------------------------------------------------------
.segment "CODE"
.proc _decompress_lz4: near
sta tmp
stx tmp+1
;
; end = out + outlen;
;
jsr popax
sta out
clc
adc tmp
sta end
txa
sta out+1
adc tmp+1
sta end+1
jsr popax
sta in
stx in+1
;
; while (out < end) {
;
jmp check_len
;
; token = *in++;
;
get_token:
ldy #$00
lda (in),y
tay ; Backup token to Y
inc in
bne :+
inc in+1
:
;
; offset = token >> 4;
;
ldx #$00
lsr a
lsr a
lsr a
lsr a
sta offset
stx offset+1
;
; token &= 0xf;
; token += 4; // Minmatch
;
tya ; Get token back from Y
and #$0F
clc
adc #$04
sta token
;
; if (offset == 15) {
;
lda offset
cmp #$0F
moreliterals:
bne check_offset_not_zero
;
; tmp = *in++;
;
ldy #$00
lda (in),y
sta tmp
inc in
bne :+
inc in+1
:
;
; offset += tmp;
;
clc
adc offset
sta offset
lda #$00
adc offset+1
sta offset+1
;
; if (tmp == 255)
;
lda tmp
cmp #$FF
;
; goto moreliterals;
;
jmp moreliterals
;
; if (offset) {
;
check_offset_not_zero:
lda offset
ora offset+1
beq check_end
;
; memcpy(out, in, offset);
;
lda out
sta ptr2
ldx out+1
stx ptr2+1
jsr pushax
lda in
ldx in+1
sta ptr1
stx ptr1+1
; ldy #$00 - not needed as pushax zeroes Y
jsr memcpy_upwards
;
; out += offset;
; memcpy returned a pointer to out
;
clc
adc offset
sta out
txa
adc offset+1
sta out+1
;
; in += offset;
;
lda offset
clc
adc in
sta in
lda offset+1
adc in+1
sta in+1
;
; if (out >= end)
;
check_end:
lda out
cmp end
lda out+1
sbc end+1
;
; return;
;
bcc end_not_reached
rts
;
; memcpy(&offset, in, 2);
;
end_not_reached:
ldy #$00
lda (in),y
sta offset
iny
lda (in),y
sta offset+1
;
; in += 2;
;
lda #$02
clc
adc in
sta in
bcc :+
inc in+1
:
;
; copysrc = out - offset;
;
lda out
sec
sbc offset
sta ptr1
lda out+1
sbc offset+1
sta ptr1+1
;
; offset = token;
;
lda #$00
sta offset+1
lda token
sta offset
;
; if (token == 19) {
;
cmp #$13
morematches:
bne token_not_19
;
; tmp = *in++;
;
ldy #$00
lda (in),y
sta tmp
inc in
bne :+
inc in+1
:
;
; offset += tmp;
;
clc
adc offset
sta offset
tya
adc offset+1
sta offset+1
;
; if (tmp == 255)
;
lda tmp
cmp #$FF
;
; goto morematches;
;
jmp morematches
;
; memcpy(out, copysrc, offset);
;
token_not_19:
lda out
sta ptr2
ldx out+1
stx ptr2+1
jsr pushax
jsr memcpy_upwards
;
; out += offset;
;
clc
adc offset
sta out
txa
adc offset+1
sta out+1 ; 0 on the first loop iteration
check_len:
;
; while (out < end) {
;
lda out
cmp end
lda out+1
sbc end+1
jcc get_token
rts
.endproc