+0.2% speed

also, added an option to unroll LDIR for longer matches (which adds 38 bytes, but can be significantly faster for files with many long matches)
This commit is contained in:
introspec 2019-10-10 22:50:23 +01:00 committed by GitHub
parent e3d7ec9c40
commit 566e3a94e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -7,7 +7,8 @@
; ver.03 by uniabis (30/07/2019, 213(-5) bytes, +3.8% speed and support for Hitachi HD64180);
; ver.04 by spke for LZSA 1.0.7 (01/08/2019, 214(+1) bytes, +0.2% speed and small re-organization of macros);
; ver.05 by spke (27/08/2019, 216(+2) bytes, +1.1% speed);
; ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history)
; ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history);
; ver.07 by spke for LZSA 1.1.1 (10/10/2019, +0.2% speed and an option for unrolled copying of long matches)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
@ -55,6 +56,7 @@
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+38 bytes)
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b
; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180
@ -68,12 +70,12 @@
ex de,hl : add hl,de
ENDM
MACRO BLOCKCOPY
ldir
MACRO COPY1
ldi
ENDM
MACRO COPY_MATCH
ldi : ldir
MACRO COPYBC
ldir
ENDM
ELSE
@ -87,12 +89,12 @@
ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
ENDM
MACRO BLOCKCOPY
lddr
MACRO COPY1
ldd
ENDM
MACRO COPY_MATCH
ldd : lddr
MACRO COPYBC
lddr
ENDM
ENDIF
@ -146,7 +148,7 @@ MoreLiterals: ld b,(hl) : NEXT_HL
inc a : jr z,ManyLiterals : sub #F0-3+1
CopyLiterals: ld c,a : ld a,b : ld b,0
BLOCKCOPY
COPYBC
push de : or a : jp p,CASE0xx ;: jr CASE1xx
cp %11000000 : jr c,CASE10x
@ -181,7 +183,9 @@ MatchLen: inc a : and %00000111 : jr z,LongerMatch : inc a
CopyMatch: ld c,a
.useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
COPY_MATCH : pop hl
COPY1
COPYBC
.popSrc pop hl
; compressed data stream contains records
; each record begins with the byte token "XYZ|LL|MMM"
@ -191,14 +195,14 @@ ReadToken: ld a,(hl) : and %00011000 : jp pe,Literals0011 ; process the cases 0
ld c,a : ld a,(hl) ; token is re-read for further processing
.NEXTHLuseBC NEXT_HL
BLOCKCOPY
COPYBC
; the token and literals are followed by the offset
push de : or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x
; "10x": the case of the 5-bit offset
; "10x": the case of the 13-bit offset
CASE10x: ld c,a : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
@ -206,8 +210,7 @@ CASE10x: ld c,a : exa : jr nc,.noUpdate
rrca : rrca : rrca : rrca
.noUpdate ld d,a : ld a,c
cp %10100000 : rl d
dec d : dec d : jr ReadOffsetE
cp %10100000 : dec d : rl d : jr ReadOffsetE
@ -225,7 +228,7 @@ CASE00x: ld c,a : exa : jr nc,.noUpdate
rrca : rrca : rrca : rrca
.noUpdate ld e,a : ld a,c
cp %00100000 : rl e : jr SaveOffset
cp %00100000 : rl e : jp SaveOffset
@ -239,16 +242,38 @@ LongerMatch: scf : exa : jr nc,.noUpdate
.noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch
IFNDEF UNROLL_LONG_MATCHES
LongMatch: add (hl) : NEXT_HL : jr nc,CopyMatch
ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC
pop de : ret
ELSE
LongMatch: add (hl) : NEXT_HL : jr c,VeryLongMatch
ld c,a
.useC ex (sp),hl
ADD_OFFSET
COPY1
; this is an unrolled equivalent of LDIR
xor a : sub c
and 8-1 : add a
ld (.jrOffset),a : jr nz,$+2
.jrOffset EQU $-1
.fastLDIR DUP 8
COPY1
EDUP
jp pe,.fastLDIR
jp CopyMatch.popSrc
VeryLongMatch: ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,LongMatch.useC
pop de : ret
ENDIF