Faster LZSA1 z80 decompression

2025-08-08 18:25:00 +00:00 · 2019-08-27 13:16:20 +02:00
parent ef259e6867
commit 9de7e930e9
2 changed files with 284 additions and 275 deletions
--- a/asm/z80/unlzsa1_fast_v1.asm
+++ b/asm/z80/unlzsa1_fast_v1.asm
@@ -1,5 +1,6 @@
 ;
-;  Speed-optimized LZSA1 decompressor by spke (v.1 03.1-22/08/2019, 107 bytes);
+;  Speed-optimized LZSA1 decompressor by spke (v.2 03/04/2019-27/08/2019; 111 bytes);
+;  with improvements by uniabis (30/07/2019,22/08/2019; -3 bytes, +3% speed).
 ;
 ;  The data must be compressed using the command line compressor by Emmanuel Marty
 ;  The compression is done as follows:
@@ -59,6 +60,10 @@
 		ex de,hl : add hl,de
 		ENDM

+		MACRO COPY_MATCH
+		ldi : ldi : ldir
+		ENDM
+
 		MACRO BLOCKCOPY
 		ldir
 		ENDM
@@ -74,6 +79,10 @@
 		ld a,d : sbc h : ld h,a						; 4*4+3*4 = 28t / 7 bytes
 		ENDM

+		MACRO COPY_MATCH
+		ldd : ldd : lddr
+		ENDM
+
 		MACRO BLOCKCOPY
 		lddr
 		ENDM
@@ -83,8 +92,8 @@
@DecompressLZSA1:
 		ld b,0 : jr ReadToken

-NoLiterals:	xor (hl) : NEXT_HL
-		push de : ld e,(hl) : jp m,LongOffset
+NoLiterals:	xor (hl)
+		push de : NEXT_HL : ld e,(hl) : jp m,LongOffset

 		; short matches have length 0+3..14+3
 ShortOffset:	ld d,#FF : add 3 : cp 15+3 : jr nc,LongerMatch
@@ -93,7 +102,7 @@ ShortOffset:	ld d,#FF : add 3 : cp 15+3 : jr nc,LongerMatch
 CopyMatch:	ld c,a
 .UseC		NEXT_HL : ex (sp),hl						; BC = len, DE = offset, HL = dest, SP ->[dest,src]
 		ADD_OFFSET							; BC = len, DE = dest, HL = dest-offset, SP->[src]
-		BLOCKCOPY : pop hl						; BC = 0, DE = dest, HL = src
+		COPY_MATCH : pop hl						; BC = 0, DE = dest, HL = src
 	
 ReadToken:	; first a byte token "O|LLL|MMMM" is read from the stream,
 		; where LLL is the number of literals and MMMM is
@@ -103,8 +112,8 @@ ReadToken:	; first a byte token "O|LLL|MMMM" is read from the stream,
 		cp #70 : jr z,MoreLiterals					; LLL=7 means 7+ literals...
 		rrca : rrca : rrca : rrca					; LLL<7 means 0..6 literals...

-		ld c,a : ld a,(hl) : NEXT_HL
-		BLOCKCOPY
+		ld c,a : ld a,(hl)
+		NEXT_HL : BLOCKCOPY

 		; next we read the first byte of the offset
 		push de : ld e,(hl)
@@ -121,7 +130,7 @@ LongerMatch:	NEXT_HL : add (hl) : jr nc,CopyMatch
 		; the codes are designed to overflow;
 		; the overflow value 1 means read 1 extra byte
 		; and overflow value 0 means read 2 extra bytes
-.code1		NEXT_HL : ld b,a : ld c,(hl) : jr nz,CopyMatch.UseC
+.code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyMatch.UseC
 .code0		NEXT_HL : ld b,(hl)

 		; the two-byte match length equal to zero
@@ -130,17 +139,17 @@ LongerMatch:	NEXT_HL : add (hl) : jr nc,CopyMatch
 		pop de : ret

 MoreLiterals:	; there are three possible situations here
-		xor (hl) : NEXT_HL : exa
-		ld a,7 : add (hl) : NEXT_HL : jr c,ManyLiterals
+		xor (hl) : exa
+		ld a,7 : NEXT_HL : add (hl) : jr c,ManyLiterals

 CopyLiterals:	ld c,a
-.UseC		BLOCKCOPY
+.UseC		NEXT_HL : BLOCKCOPY

 		push de : ld e,(hl)
 		exa : jp p,ShortOffset : jr LongOffset

 ManyLiterals:
-.code1		ld b,a : ld c,(hl) : NEXT_HL : jr nz,CopyLiterals.UseC
-.code0		ld b,(hl) : NEXT_HL : jr CopyLiterals.UseC
+.code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyLiterals.UseC
+.code0		NEXT_HL : ld b,(hl) : jr CopyLiterals.UseC


--- a/asm/z80/unlzsa1_small_v1.asm
+++ b/asm/z80/unlzsa1_small_v1.asm
@@ -1,5 +1,5 @@
 ;
-;  Size-optimized LZSA1 decompressor by spke (v.1 23/04/2019, 67 bytes);
+;  Size-optimized LZSA1 decompressor by spke (v.1 23/04/2019, 68 bytes);
 ;  with improvements by uniabis (30/07/2019, -1 byte, +3% speed).
 ;
 ;  The data must be compressed using the command line compressor by Emmanuel Marty
@@ -86,7 +86,7 @@
 		; first a byte token "O|LLL|MMMM" is read from the stream,
 		; where LLL is the number of literals and MMMM is
 		; a length of the match that follows after the literals
-ReadToken:	ld a,(hl) : NEXT_HL : push af
+ReadToken:	ld a,(hl) : exa : ld a,(hl) : NEXT_HL
 		and #70 : jr z,NoLiterals

 		rrca : rrca : rrca : rrca					; LLL<7 means 0..6 literals...
@@ -95,10 +95,10 @@ ReadToken:	ld a,(hl) : NEXT_HL : push af
 		ld c,a : BLOCKCOPY

 		; next we read the low byte of the -offset
-NoLiterals:	pop af : push de : ld e,(hl) : NEXT_HL : ld d,#FF
+NoLiterals:	push de : ld e,(hl) : NEXT_HL : ld d,#FF
 		; the top bit of token is set if
 		; the offset contains the high byte as well
-		or a : jp p,ShortOffset
+		exa : or a : jp p,ShortOffset

 LongOffset:	ld d,(hl) : NEXT_HL