Faster LZSA1 z80 decompression

2025-01-18 12:30:33 +00:00 · 2019-08-27 13:16:20 +02:00 · 2019-08-27 13:16:20 +02:00 · 9de7e930e9
commit 9de7e930e9
parent ef259e6867
2 changed files with 284 additions and 275 deletions
--- a/asm/z80/unlzsa1_fast_v1.asm
+++ b/asm/z80/unlzsa1_fast_v1.asm
@ -1,146 +1,155 @@
-;
+;
-;  Speed-optimized LZSA1 decompressor by spke (v.1 03.1-22/08/2019, 107 bytes);
+;  Speed-optimized LZSA1 decompressor by spke (v.2 03/04/2019-27/08/2019; 111 bytes);
-;
+;  with improvements by uniabis (30/07/2019,22/08/2019; -3 bytes, +3% speed).
-;  The data must be compressed using the command line compressor by Emmanuel Marty
+;
-;  The compression is done as follows:
+;  The data must be compressed using the command line compressor by Emmanuel Marty
-;
+;  The compression is done as follows:
-;  lzsa.exe -f1 -r <sourcefile> <outfile>
+;
-;
+;  lzsa.exe -f1 -r <sourcefile> <outfile>
-;  where option -r asks for the generation of raw (frame-less) data.
+;
-;
+;  where option -r asks for the generation of raw (frame-less) data.
-;  The decompression is done in the standard way:
+;
-;
+;  The decompression is done in the standard way:
-;  ld hl,FirstByteOfCompressedData
+;
-;  ld de,FirstByteOfMemoryForDecompressedData
+;  ld hl,FirstByteOfCompressedData
-;  call DecompressLZSA1
+;  ld de,FirstByteOfMemoryForDecompressedData
-;
+;  call DecompressLZSA1
-;  Backward compression is also supported; you can compress files backward using:
+;
-;
+;  Backward compression is also supported; you can compress files backward using:
-;  lzsa.exe -f1 -r -b <sourcefile> <outfile>
+;
-;
+;  lzsa.exe -f1 -r -b <sourcefile> <outfile>
-;  and decompress the resulting files using:
+;
-;
+;  and decompress the resulting files using:
-;  ld hl,LastByteOfCompressedData
+;
-;  ld de,LastByteOfMemoryForDecompressedData
+;  ld hl,LastByteOfCompressedData
-;  call DecompressLZSA1
+;  ld de,LastByteOfMemoryForDecompressedData
-;
+;  call DecompressLZSA1
-;  (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
+;
-;
+;  (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
-;  Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
+;
-;  see https://github.com/emmanuel-marty/lzsa for more information
+;  Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
-;
+;  see https://github.com/emmanuel-marty/lzsa for more information
-;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
+;
-;
+;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
-;  This software is provided 'as-is', without any express or implied
+;
-;  warranty.  In no event will the authors be held liable for any damages
+;  This software is provided 'as-is', without any express or implied
-;  arising from the use of this software.
+;  warranty.  In no event will the authors be held liable for any damages
-;
+;  arising from the use of this software.
-;  Permission is granted to anyone to use this software for any purpose,
+;
-;  including commercial applications, and to alter it and redistribute it
+;  Permission is granted to anyone to use this software for any purpose,
-;  freely, subject to the following restrictions:
+;  including commercial applications, and to alter it and redistribute it
-;
+;  freely, subject to the following restrictions:
-;  1. The origin of this software must not be misrepresented; you must not
+;
-;     claim that you wrote the original software. If you use this software
+;  1. The origin of this software must not be misrepresented; you must not
-;     in a product, an acknowledgment in the product documentation would be
+;     claim that you wrote the original software. If you use this software
-;     appreciated but is not required.
+;     in a product, an acknowledgment in the product documentation would be
-;  2. Altered source versions must be plainly marked as such, and must not be
+;     appreciated but is not required.
-;     misrepresented as being the original software.
+;  2. Altered source versions must be plainly marked as such, and must not be
-;  3. This notice may not be removed or altered from any source distribution.
+;     misrepresented as being the original software.
-
+;  3. This notice may not be removed or altered from any source distribution.
-;	DEFINE	BACKWARD_DECOMPRESS
+
-
+;	DEFINE	BACKWARD_DECOMPRESS
-	IFNDEF	BACKWARD_DECOMPRESS
+
-
+	IFNDEF	BACKWARD_DECOMPRESS
-		MACRO NEXT_HL
+
-		inc hl
+		MACRO NEXT_HL
-		ENDM
+		inc hl
-
+		ENDM
-		MACRO ADD_OFFSET
+
-		ex de,hl : add hl,de
+		MACRO ADD_OFFSET
-		ENDM
+		ex de,hl : add hl,de
-
+		ENDM
-		MACRO BLOCKCOPY
+
-		ldir
+		MACRO COPY_MATCH
-		ENDM
+		ldi : ldi : ldir
-
+		ENDM
-	ELSE
+
-
+		MACRO BLOCKCOPY
-		MACRO NEXT_HL
+		ldir
-		dec hl
+		ENDM
-		ENDM
+
-
+	ELSE
-		MACRO ADD_OFFSET
+
-		ex de,hl : ld a,e : sub l : ld l,a
+		MACRO NEXT_HL
-		ld a,d : sbc h : ld h,a						; 4*4+3*4 = 28t / 7 bytes
+		dec hl
-		ENDM
+		ENDM
-
+
-		MACRO BLOCKCOPY
+		MACRO ADD_OFFSET
-		lddr
+		ex de,hl : ld a,e : sub l : ld l,a
-		ENDM
+		ld a,d : sbc h : ld h,a						; 4*4+3*4 = 28t / 7 bytes
-
+		ENDM
-	ENDIF
+
-
+		MACRO COPY_MATCH
-@DecompressLZSA1:
+		ldd : ldd : lddr
-		ld b,0 : jr ReadToken
+		ENDM
-
+
-NoLiterals:	xor (hl) : NEXT_HL
+		MACRO BLOCKCOPY
-		push de : ld e,(hl) : jp m,LongOffset
+		lddr
-
+		ENDM
- 		; short matches have length 0+3..14+3
+
-ShortOffset:	ld d,#FF : add 3 : cp 15+3 : jr nc,LongerMatch
+	ENDIF
-
+
-		; placed here this saves a JP per iteration
+@DecompressLZSA1:
-CopyMatch:	ld c,a
+		ld b,0 : jr ReadToken
-.UseC		NEXT_HL : ex (sp),hl						; BC = len, DE = offset, HL = dest, SP ->[dest,src]
+
-		ADD_OFFSET							; BC = len, DE = dest, HL = dest-offset, SP->[src]
+NoLiterals:	xor (hl)
-		BLOCKCOPY : pop hl						; BC = 0, DE = dest, HL = src
+		push de : NEXT_HL : ld e,(hl) : jp m,LongOffset
-	
+
-ReadToken:	; first a byte token "O|LLL|MMMM" is read from the stream,
+ 		; short matches have length 0+3..14+3
-		; where LLL is the number of literals and MMMM is
+ShortOffset:	ld d,#FF : add 3 : cp 15+3 : jr nc,LongerMatch
-		; a length of the match that follows after the literals
+
-		ld a,(hl) : and #70 : jr z,NoLiterals
+		; placed here this saves a JP per iteration
-
+CopyMatch:	ld c,a
-		cp #70 : jr z,MoreLiterals					; LLL=7 means 7+ literals...
+.UseC		NEXT_HL : ex (sp),hl						; BC = len, DE = offset, HL = dest, SP ->[dest,src]
-		rrca : rrca : rrca : rrca					; LLL<7 means 0..6 literals...
+		ADD_OFFSET							; BC = len, DE = dest, HL = dest-offset, SP->[src]
-
+		COPY_MATCH : pop hl						; BC = 0, DE = dest, HL = src
-		ld c,a : ld a,(hl) : NEXT_HL
+	
-		BLOCKCOPY
+ReadToken:	; first a byte token "O|LLL|MMMM" is read from the stream,
-
+		; where LLL is the number of literals and MMMM is
-		; next we read the first byte of the offset
+		; a length of the match that follows after the literals
-		push de : ld e,(hl)
+		ld a,(hl) : and #70 : jr z,NoLiterals
-		; the top bit of token is set if the offset contains two bytes
+
-		and #8F : jp p,ShortOffset
+		cp #70 : jr z,MoreLiterals					; LLL=7 means 7+ literals...
-
+		rrca : rrca : rrca : rrca					; LLL<7 means 0..6 literals...
-LongOffset:	; read second byte of the offset
+
-		NEXT_HL : ld d,(hl)
+		ld c,a : ld a,(hl)
-		add -128+3 : cp 15+3 : jp c,CopyMatch
+		NEXT_HL : BLOCKCOPY
-
+
-		; MMMM=15 indicates a multi-byte number of literals
+		; next we read the first byte of the offset
-LongerMatch:	NEXT_HL : add (hl) : jr nc,CopyMatch
+		push de : ld e,(hl)
-
+		; the top bit of token is set if the offset contains two bytes
-		; the codes are designed to overflow;
+		and #8F : jp p,ShortOffset
-		; the overflow value 1 means read 1 extra byte
+
-		; and overflow value 0 means read 2 extra bytes
+LongOffset:	; read second byte of the offset
-.code1		NEXT_HL : ld b,a : ld c,(hl) : jr nz,CopyMatch.UseC
+		NEXT_HL : ld d,(hl)
-.code0		NEXT_HL : ld b,(hl)
+		add -128+3 : cp 15+3 : jp c,CopyMatch
-
+
-		; the two-byte match length equal to zero
+		; MMMM=15 indicates a multi-byte number of literals
-		; designates the end-of-data marker
+LongerMatch:	NEXT_HL : add (hl) : jr nc,CopyMatch
-		ld a,b : or c : jr nz,CopyMatch.UseC
+
-		pop de : ret
+		; the codes are designed to overflow;
-
+		; the overflow value 1 means read 1 extra byte
-MoreLiterals:	; there are three possible situations here
+		; and overflow value 0 means read 2 extra bytes
-		xor (hl) : NEXT_HL : exa
+.code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyMatch.UseC
-		ld a,7 : add (hl) : NEXT_HL : jr c,ManyLiterals
+.code0		NEXT_HL : ld b,(hl)
-
+
-CopyLiterals:	ld c,a
+		; the two-byte match length equal to zero
-.UseC		BLOCKCOPY
+		; designates the end-of-data marker
-
+		ld a,b : or c : jr nz,CopyMatch.UseC
-		push de : ld e,(hl)
+		pop de : ret
-		exa : jp p,ShortOffset : jr LongOffset
+
-
+MoreLiterals:	; there are three possible situations here
-ManyLiterals:
+		xor (hl) : exa
-.code1		ld b,a : ld c,(hl) : NEXT_HL : jr nz,CopyLiterals.UseC
+		ld a,7 : NEXT_HL : add (hl) : jr c,ManyLiterals
-.code0		ld b,(hl) : NEXT_HL : jr CopyLiterals.UseC
+
-
+CopyLiterals:	ld c,a
-
+.UseC		NEXT_HL : BLOCKCOPY
 		push de : ld e,(hl)
 		exa : jp p,ShortOffset : jr LongOffset
 ManyLiterals:
 .code1		ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyLiterals.UseC
 .code0		NEXT_HL : ld b,(hl) : jr CopyLiterals.UseC
--- a/asm/z80/unlzsa1_small_v1.asm
+++ b/asm/z80/unlzsa1_small_v1.asm
@ -1,129 +1,129 @@
-;
+;
-;  Size-optimized LZSA1 decompressor by spke (v.1 23/04/2019, 67 bytes);
+;  Size-optimized LZSA1 decompressor by spke (v.1 23/04/2019, 68 bytes);
-;  with improvements by uniabis (30/07/2019, -1 byte, +3% speed).
+;  with improvements by uniabis (30/07/2019, -1 byte, +3% speed).
-;
+;
-;  The data must be compressed using the command line compressor by Emmanuel Marty
+;  The data must be compressed using the command line compressor by Emmanuel Marty
-;  The compression is done as follows:
+;  The compression is done as follows:
-;
+;
-;  lzsa.exe -f1 -r <sourcefile> <outfile>
+;  lzsa.exe -f1 -r <sourcefile> <outfile>
-;
+;
-;  where option -r asks for the generation of raw (frame-less) data.
+;  where option -r asks for the generation of raw (frame-less) data.
-;
+;
-;  The decompression is done in the standard way:
+;  The decompression is done in the standard way:
-;
+;
-;  ld hl,FirstByteOfCompressedData
+;  ld hl,FirstByteOfCompressedData
-;  ld de,FirstByteOfMemoryForDecompressedData
+;  ld de,FirstByteOfMemoryForDecompressedData
-;  call DecompressLZSA1
+;  call DecompressLZSA1
-;
+;
-;  Backward compression is also supported; you can compress files backward using:
+;  Backward compression is also supported; you can compress files backward using:
-;
+;
-;  lzsa.exe -f1 -r -b <sourcefile> <outfile>
+;  lzsa.exe -f1 -r -b <sourcefile> <outfile>
-;
+;
-;  and decompress the resulting files using:
+;  and decompress the resulting files using:
-;
+;
-;  ld hl,LastByteOfCompressedData
+;  ld hl,LastByteOfCompressedData
-;  ld de,LastByteOfMemoryForDecompressedData
+;  ld de,LastByteOfMemoryForDecompressedData
-;  call DecompressLZSA1
+;  call DecompressLZSA1
-;
+;
-;  (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
+;  (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
-;
+;
-;  Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
+;  Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
-;  see https://github.com/emmanuel-marty/lzsa for more information
+;  see https://github.com/emmanuel-marty/lzsa for more information
-;
+;
-;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
+;  Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
-;
+;
-;  This software is provided 'as-is', without any express or implied
+;  This software is provided 'as-is', without any express or implied
-;  warranty.  In no event will the authors be held liable for any damages
+;  warranty.  In no event will the authors be held liable for any damages
-;  arising from the use of this software.
+;  arising from the use of this software.
-;
+;
-;  Permission is granted to anyone to use this software for any purpose,
+;  Permission is granted to anyone to use this software for any purpose,
-;  including commercial applications, and to alter it and redistribute it
+;  including commercial applications, and to alter it and redistribute it
-;  freely, subject to the following restrictions:
+;  freely, subject to the following restrictions:
-;
+;
-;  1. The origin of this software must not be misrepresented; you must not
+;  1. The origin of this software must not be misrepresented; you must not
-;     claim that you wrote the original software. If you use this software
+;     claim that you wrote the original software. If you use this software
-;     in a product, an acknowledgment in the product documentation would be
+;     in a product, an acknowledgment in the product documentation would be
-;     appreciated but is not required.
+;     appreciated but is not required.
-;  2. Altered source versions must be plainly marked as such, and must not be
+;  2. Altered source versions must be plainly marked as such, and must not be
-;     misrepresented as being the original software.
+;     misrepresented as being the original software.
-;  3. This notice may not be removed or altered from any source distribution.
+;  3. This notice may not be removed or altered from any source distribution.
-
+
-;	DEFINE	BACKWARD_DECOMPRESS
+;	DEFINE	BACKWARD_DECOMPRESS
-
+
-	IFNDEF	BACKWARD_DECOMPRESS
+	IFNDEF	BACKWARD_DECOMPRESS
-
+
-		MACRO NEXT_HL
+		MACRO NEXT_HL
-		inc hl
+		inc hl
-		ENDM
+		ENDM
-
+
-		MACRO ADD_OFFSET
+		MACRO ADD_OFFSET
-		ex de,hl : add hl,de
+		ex de,hl : add hl,de
-		ENDM
+		ENDM
-
+
-		MACRO BLOCKCOPY
+		MACRO BLOCKCOPY
-		ldir
+		ldir
-		ENDM
+		ENDM
-
+
-	ELSE
+	ELSE
-
+
-		MACRO NEXT_HL
+		MACRO NEXT_HL
-		dec hl
+		dec hl
-		ENDM
+		ENDM
-
+
-		MACRO ADD_OFFSET
+		MACRO ADD_OFFSET
-		push hl : or a : sbc hl,de : pop de				; 11+4+15+10 = 40t / 5 bytes
+		push hl : or a : sbc hl,de : pop de				; 11+4+15+10 = 40t / 5 bytes
-		ENDM
+		ENDM
-
+
-		MACRO BLOCKCOPY
+		MACRO BLOCKCOPY
-		lddr
+		lddr
-		ENDM
+		ENDM
-
+
-	ENDIF
+	ENDIF
-
+
-@DecompressLZSA1:
+@DecompressLZSA1:
-		ld b,0
+		ld b,0
-
+
-		; first a byte token "O|LLL|MMMM" is read from the stream,
+		; first a byte token "O|LLL|MMMM" is read from the stream,
-		; where LLL is the number of literals and MMMM is
+		; where LLL is the number of literals and MMMM is
-		; a length of the match that follows after the literals
+		; a length of the match that follows after the literals
-ReadToken:	ld a,(hl) : NEXT_HL : push af
+ReadToken:	ld a,(hl) : exa : ld a,(hl) : NEXT_HL
-		and #70 : jr z,NoLiterals
+		and #70 : jr z,NoLiterals
-
+
-		rrca : rrca : rrca : rrca					; LLL<7 means 0..6 literals...
+		rrca : rrca : rrca : rrca					; LLL<7 means 0..6 literals...
-		cp #07 : call z,ReadLongBA					; LLL=7 means 7+ literals...
+		cp #07 : call z,ReadLongBA					; LLL=7 means 7+ literals...
-
+
-		ld c,a : BLOCKCOPY
+		ld c,a : BLOCKCOPY
-
+
-		; next we read the low byte of the -offset
+		; next we read the low byte of the -offset
-NoLiterals:	pop af : push de : ld e,(hl) : NEXT_HL : ld d,#FF
+NoLiterals:	push de : ld e,(hl) : NEXT_HL : ld d,#FF
-		; the top bit of token is set if
+		; the top bit of token is set if
-		; the offset contains the high byte as well
+		; the offset contains the high byte as well
-		or a : jp p,ShortOffset
+		exa : or a : jp p,ShortOffset
-
+
-LongOffset:	ld d,(hl) : NEXT_HL
+LongOffset:	ld d,(hl) : NEXT_HL
-
+
-		; last but not least, the match length is read
+		; last but not least, the match length is read
-ShortOffset:	and #0F : add 3							; MMMM<15 means match lengths 0+3..14+3
+ShortOffset:	and #0F : add 3							; MMMM<15 means match lengths 0+3..14+3
-		cp 15+3 : call z,ReadLongBA					; MMMM=15 means lengths 14+3+
+		cp 15+3 : call z,ReadLongBA					; MMMM=15 means lengths 14+3+
-		ld c,a
+		ld c,a
-
+
-		ex (sp),hl							; BC = len, DE = -offset, HL = dest, SP -> [src]
+		ex (sp),hl							; BC = len, DE = -offset, HL = dest, SP -> [src]
-		ADD_OFFSET							; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
+		ADD_OFFSET							; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
-		BLOCKCOPY							; BC = 0, DE = dest
+		BLOCKCOPY							; BC = 0, DE = dest
-		pop hl : jr ReadToken						; HL = src
+		pop hl : jr ReadToken						; HL = src
-
+
-		; a standard routine to read extended codes
+		; a standard routine to read extended codes
-		; into registers B (higher byte) and A (lower byte).
+		; into registers B (higher byte) and A (lower byte).
-ReadLongBA:	add (hl) : NEXT_HL : ret nc
+ReadLongBA:	add (hl) : NEXT_HL : ret nc
-
+
-		; the codes are designed to overflow;
+		; the codes are designed to overflow;
-		; the overflow value 1 means read 1 extra byte
+		; the overflow value 1 means read 1 extra byte
-		; and overflow value 0 means read 2 extra bytes
+		; and overflow value 0 means read 2 extra bytes
-.code1:		ld b,a : ld a,(hl) : NEXT_HL : ret nz
+.code1:		ld b,a : ld a,(hl) : NEXT_HL : ret nz
-.code0:		ld c,a : ld b,(hl) : NEXT_HL
+.code0:		ld c,a : ld b,(hl) : NEXT_HL
-
+
-		; the two-byte match length equal to zero
+		; the two-byte match length equal to zero
-		; designates the end-of-data marker
+		; designates the end-of-data marker
-		or b : ld a,c : ret nz
+		or b : ld a,c : ret nz
-		pop de : pop de : ret
+		pop de : pop de : ret
-
+