From efc19e9d93d3064214aca7a9fa87439e4b5927f9 Mon Sep 17 00:00:00 2001 From: specke Date: Tue, 5 Apr 2022 11:41:52 +0100 Subject: [PATCH] New small decompressor for LZSA2 (-5 bytes, +1% speed) --- asm/z80/unlzsa2_small.asm | 132 ++++++++++++++++++++------------------ 1 file changed, 71 insertions(+), 61 deletions(-) diff --git a/asm/z80/unlzsa2_small.asm b/asm/z80/unlzsa2_small.asm index 0544c9a..ee3de63 100644 --- a/asm/z80/unlzsa2_small.asm +++ b/asm/z80/unlzsa2_small.asm @@ -1,5 +1,5 @@ ; -; Size-optimized LZSA2 decompressor by spke & uniabis (139 bytes) +; Size-optimized LZSA2 decompressor by spke & uniabis (134 bytes) ; ; ver.00 by spke for LZSA 1.0.0 (02-09/06/2019, 145 bytes); ; ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression); @@ -7,6 +7,8 @@ ; ver.03 by spke for LZSA 1.0.7 (01/08/2019, 140(-4) bytes, -1.4% speed and small re-organization of macros); ; ver.04 by spke for LZSA 1.1.0 (26/09/2019, removed usage of IY, added full revision history) ; ver.05 by spke for LZSA 1.1.1 (11/10/2019, 139(-1) bytes, +0.1% speed) +; ver.06 by spke (11-12/04/2021, added some comments) +; ver.07 by spke (04-05/04/2022, 134(-5) bytes, +1% speed, using self-modifying code by default) ; ; The data must be compressed using the command line compressor by Emmanuel Marty ; The compression is done as follows: @@ -55,8 +57,8 @@ ; 3. This notice may not be removed or altered from any source distribution. ; -; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b -; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180 +; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b (+5 bytes, -3% speed) +; DEFINE AVOID_SELFMODIFYING_CODE ; uncomment to disallow self-modifying code (-1 byte, -4% speed) IFNDEF BACKWARD_DECOMPRESS @@ -65,7 +67,7 @@ ENDM MACRO ADD_OFFSET - ex de,hl : add hl,de + add hl,de ENDM MACRO BLOCKCOPY @@ -79,7 +81,10 @@ ENDM MACRO ADD_OFFSET - push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes + ;push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes + ; HL = DE - HL + ld a,e : sub l : ld l,a + ld a,d : sbc h : ld h,a ; 6*4 = 24t / 6 bytes ENDM MACRO BLOCKCOPY @@ -88,85 +93,90 @@ ENDIF - IFNDEF HD64180 +@DecompressLZSA2: + ; in many places we assume that B = 0 + ; flag P in A' signals the need to re-load the nibble store + xor a : ld b,a : exa : jr .ReadToken - MACRO LD_IX_DE - ld ixl,e : ld ixh,d - ENDM +.CASE00x: ; token "00Z" stands for 5-bit offsets + ; (read a nibble for offset bits 1-4 and use the inverted bit Z + ; of the token as bit 0 of the offset; set bits 5-15 of the offset to 1) + push af + call ReadNibble.skipLDCA : ld c,a + pop af + cp %00100000 : rl c : jr .SaveOffset - MACRO LD_DE_IX - ld e,ixl : ld d,ixh - ENDM +.CASE0xx dec b : cp %01000000 : jr c,.CASE00x +.CASE01x: ; token "01Z" stands for 9-bit offsets + ; (read a byte for offset bits 0-7 and use the inverted bit Z + ; for bit 8 of the offset; set bits 9-15 of the offset to 1) + cp %01100000 +.doRLB rl b + +.OffsetReadC: ld c,(hl) : NEXT_HL + + IFNDEF AVOID_SELFMODIFYING_CODE +.SaveOffset: ld (.PrevOffset),bc : ld b,0 ELSE - - MACRO LD_IX_DE - push de : pop ix - ENDM - - MACRO LD_DE_IX - push ix : pop de - ENDM - +.SaveOffset: push bc : pop ix : ld b,0 ENDIF -@DecompressLZSA2: - xor a : ld b,a : exa : jr ReadToken +.MatchLen: and %00000111 : add 2 : cp 9 + call z,ExtendedCode -CASE00x: call ReadNibble - ld e,a : ld a,c - cp %00100000 : rl e : jr SaveOffset +.CopyMatch: ld c,a + push hl ; BC = len, DE = dest, HL = -offset, SP -> [src] -CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x + IFNDEF AVOID_SELFMODIFYING_CODE +.PrevOffset EQU $+1 : ld hl,0 + ELSE + push ix : pop hl + ENDIF + ADD_OFFSET + BLOCKCOPY ; BC = 0, DE = dest + pop hl ; HL = src -CASE01x: cp %01100000 : rl d +.ReadToken: ld a,(hl) : NEXT_HL : push af + and %00011000 : jr z,.NoLiterals -OffsetReadE: ld e,(hl) : NEXT_HL - -SaveOffset: LD_IX_DE + rrca : rrca : rrca + call pe,ExtendedCode -MatchLen: and %00000111 : add 2 : cp 9 : call z,ExtendedCode + ld c,a + BLOCKCOPY -CopyMatch: ld c,a - ex (sp),hl ; BC = len, DE = -offset, HL = dest, SP -> [src] - ADD_OFFSET ; BC = len, DE = dest, HL = dest+(-offset), SP -> [src] - BLOCKCOPY ; BC = 0, DE = dest - pop hl ; HL = src +.NoLiterals: pop af : or a : jp p,.CASE0xx -ReadToken: ld a,(hl) : NEXT_HL : push af - and %00011000 : jr z,NoLiterals +.CASE1xx cp %11000000 : jr c,.CASE10x + ; token "111" stands for repeat offsets + ; (reuse the offset value of the previous match command) + cp %11100000 : jr nc,.MatchLen - rrca : rrca : rrca - call pe,ExtendedCode +.CASE110: ; token "110" stands for 16-bit offset + ; (read a byte for offset bits 8-15, then another byte for offset bits 0-7) + ld b,(hl) : NEXT_HL : jr .OffsetReadC - ld c,a - BLOCKCOPY +.CASE10x: ; token "10Z" stands for 13-bit offsets + ; (read a nibble for offset bits 9-12 and use the inverted bit Z + ; for bit 8 of the offset, then read a byte for offset bits 0-7. + ; set bits 13-15 of the offset to 1. substract 512 from the offset to get the final value) + call ReadNibble : ld b,a + ld a,c : cp %10100000 + dec b : jr .doRLB -NoLiterals: pop af : push de - or a : jp p,CASE0xx - -CASE1xx cp %11000000 : jr nc,CASE11x - -CASE10x: call ReadNibble - ld d,a : ld a,c - cp %10100000 ;: rl d - dec d : rl d : DB #CA ; jr OffsetReadE ; #CA is JP Z,.. to skip all commands in CASE110 before jr OffsetReadE - -CASE110: ld d,(hl) : NEXT_HL : jr OffsetReadE - -CASE11x cp %11100000 : jr c,CASE110 - -CASE111: LD_DE_IX : jr MatchLen ExtendedCode: call ReadNibble : inc a : jr z,ExtraByte sub #F0+1 : add c : ret ExtraByte ld a,15 : add c : add (hl) : NEXT_HL : ret nc ld a,(hl) : NEXT_HL ld b,(hl) : NEXT_HL : ret nz - pop de : pop de ; RET is not needed, because RET from ReadNibble is sufficient + pop bc ; RET is not needed, because RET from ReadNibble is sufficient -ReadNibble: ld c,a : xor a : exa : ret m -UpdateNibble ld a,(hl) : or #F0 : exa + +ReadNibble: ld c,a +.skipLDCA xor a : exa : ret m + ld a,(hl) : or #F0 : exa ld a,(hl) : NEXT_HL : or #0F rrca : rrca : rrca : rrca : ret