From 566e3a94e83ad58f250a3a86d23c72d6946ac0f6 Mon Sep 17 00:00:00 2001 From: introspec <31136975+specke@users.noreply.github.com> Date: Thu, 10 Oct 2019 22:50:23 +0100 Subject: [PATCH] +0.2% speed also, added an option to unroll LDIR for longer matches (which adds 38 bytes, but can be significantly faster for files with many long matches) --- asm/z80/unlzsa2_fast.asm | 67 +++++++++++++++++++++++++++------------- 1 file changed, 46 insertions(+), 21 deletions(-) diff --git a/asm/z80/unlzsa2_fast.asm b/asm/z80/unlzsa2_fast.asm index 38a1779..57026da 100644 --- a/asm/z80/unlzsa2_fast.asm +++ b/asm/z80/unlzsa2_fast.asm @@ -7,7 +7,8 @@ ; ver.03 by uniabis (30/07/2019, 213(-5) bytes, +3.8% speed and support for Hitachi HD64180); ; ver.04 by spke for LZSA 1.0.7 (01/08/2019, 214(+1) bytes, +0.2% speed and small re-organization of macros); ; ver.05 by spke (27/08/2019, 216(+2) bytes, +1.1% speed); -; ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history) +; ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history); +; ver.07 by spke for LZSA 1.1.1 (10/10/2019, +0.2% speed and an option for unrolled copying of long matches) ; ; The data must be compressed using the command line compressor by Emmanuel Marty ; The compression is done as follows: @@ -55,6 +56,7 @@ ; misrepresented as being the original software. ; 3. This notice may not be removed or altered from any source distribution. +; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+38 bytes) ; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b ; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180 @@ -68,12 +70,12 @@ ex de,hl : add hl,de ENDM - MACRO BLOCKCOPY - ldir + MACRO COPY1 + ldi ENDM - MACRO COPY_MATCH - ldi : ldir + MACRO COPYBC + ldir ENDM ELSE @@ -87,12 +89,12 @@ ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes ENDM - MACRO BLOCKCOPY - lddr + MACRO COPY1 + ldd ENDM - MACRO COPY_MATCH - ldd : lddr + MACRO COPYBC + lddr ENDM ENDIF @@ -146,7 +148,7 @@ MoreLiterals: ld b,(hl) : NEXT_HL inc a : jr z,ManyLiterals : sub #F0-3+1 CopyLiterals: ld c,a : ld a,b : ld b,0 - BLOCKCOPY + COPYBC push de : or a : jp p,CASE0xx ;: jr CASE1xx cp %11000000 : jr c,CASE10x @@ -181,7 +183,9 @@ MatchLen: inc a : and %00000111 : jr z,LongerMatch : inc a CopyMatch: ld c,a .useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src] ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src] - COPY_MATCH : pop hl + COPY1 + COPYBC +.popSrc pop hl ; compressed data stream contains records ; each record begins with the byte token "XYZ|LL|MMM" @@ -191,14 +195,14 @@ ReadToken: ld a,(hl) : and %00011000 : jp pe,Literals0011 ; process the cases 0 ld c,a : ld a,(hl) ; token is re-read for further processing .NEXTHLuseBC NEXT_HL - BLOCKCOPY + COPYBC ; the token and literals are followed by the offset push de : or a : jp p,CASE0xx CASE1xx cp %11000000 : jr nc,CASE11x - ; "10x": the case of the 5-bit offset + ; "10x": the case of the 13-bit offset CASE10x: ld c,a : exa : jr nc,.noUpdate ld a,(hl) : or #F0 : exa @@ -206,8 +210,7 @@ CASE10x: ld c,a : exa : jr nc,.noUpdate rrca : rrca : rrca : rrca .noUpdate ld d,a : ld a,c - cp %10100000 : rl d - dec d : dec d : jr ReadOffsetE + cp %10100000 : dec d : rl d : jr ReadOffsetE @@ -225,7 +228,7 @@ CASE00x: ld c,a : exa : jr nc,.noUpdate rrca : rrca : rrca : rrca .noUpdate ld e,a : ld a,c - cp %00100000 : rl e : jr SaveOffset + cp %00100000 : rl e : jp SaveOffset @@ -239,16 +242,38 @@ LongerMatch: scf : exa : jr nc,.noUpdate .noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch + IFNDEF UNROLL_LONG_MATCHES + LongMatch: add (hl) : NEXT_HL : jr nc,CopyMatch ld c,(hl) : NEXT_HL ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC pop de : ret - - - - - + ELSE + +LongMatch: add (hl) : NEXT_HL : jr c,VeryLongMatch + + ld c,a +.useC ex (sp),hl + ADD_OFFSET + COPY1 + + ; this is an unrolled equivalent of LDIR + xor a : sub c + and 8-1 : add a + ld (.jrOffset),a : jr nz,$+2 +.jrOffset EQU $-1 +.fastLDIR DUP 8 + COPY1 + EDUP + jp pe,.fastLDIR + jp CopyMatch.popSrc + +VeryLongMatch: ld c,(hl) : NEXT_HL + ld b,(hl) : NEXT_HL : jr nz,LongMatch.useC + pop de : ret + + ENDIF