+0.2% speed

also, added an option to unroll LDIR for longer matches (which adds 38 bytes, but can be significantly faster for files with many long matches)
This commit is contained in:
introspec 2019-10-10 22:50:23 +01:00 committed by GitHub
parent e3d7ec9c40
commit 566e3a94e8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -7,7 +7,8 @@
; ver.03 by uniabis (30/07/2019, 213(-5) bytes, +3.8% speed and support for Hitachi HD64180); ; ver.03 by uniabis (30/07/2019, 213(-5) bytes, +3.8% speed and support for Hitachi HD64180);
; ver.04 by spke for LZSA 1.0.7 (01/08/2019, 214(+1) bytes, +0.2% speed and small re-organization of macros); ; ver.04 by spke for LZSA 1.0.7 (01/08/2019, 214(+1) bytes, +0.2% speed and small re-organization of macros);
; ver.05 by spke (27/08/2019, 216(+2) bytes, +1.1% speed); ; ver.05 by spke (27/08/2019, 216(+2) bytes, +1.1% speed);
; ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history) ; ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history);
; ver.07 by spke for LZSA 1.1.1 (10/10/2019, +0.2% speed and an option for unrolled copying of long matches)
; ;
; The data must be compressed using the command line compressor by Emmanuel Marty ; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows: ; The compression is done as follows:
@ -55,6 +56,7 @@
; misrepresented as being the original software. ; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution. ; 3. This notice may not be removed or altered from any source distribution.
; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+38 bytes)
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b ; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b
; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180 ; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180
@ -68,12 +70,12 @@
ex de,hl : add hl,de ex de,hl : add hl,de
ENDM ENDM
MACRO BLOCKCOPY MACRO COPY1
ldir ldi
ENDM ENDM
MACRO COPY_MATCH MACRO COPYBC
ldi : ldir ldir
ENDM ENDM
ELSE ELSE
@ -87,12 +89,12 @@
ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
ENDM ENDM
MACRO BLOCKCOPY MACRO COPY1
lddr ldd
ENDM ENDM
MACRO COPY_MATCH MACRO COPYBC
ldd : lddr lddr
ENDM ENDM
ENDIF ENDIF
@ -146,7 +148,7 @@ MoreLiterals: ld b,(hl) : NEXT_HL
inc a : jr z,ManyLiterals : sub #F0-3+1 inc a : jr z,ManyLiterals : sub #F0-3+1
CopyLiterals: ld c,a : ld a,b : ld b,0 CopyLiterals: ld c,a : ld a,b : ld b,0
BLOCKCOPY COPYBC
push de : or a : jp p,CASE0xx ;: jr CASE1xx push de : or a : jp p,CASE0xx ;: jr CASE1xx
cp %11000000 : jr c,CASE10x cp %11000000 : jr c,CASE10x
@ -181,7 +183,9 @@ MatchLen: inc a : and %00000111 : jr z,LongerMatch : inc a
CopyMatch: ld c,a CopyMatch: ld c,a
.useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src] .useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src] ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
COPY_MATCH : pop hl COPY1
COPYBC
.popSrc pop hl
; compressed data stream contains records ; compressed data stream contains records
; each record begins with the byte token "XYZ|LL|MMM" ; each record begins with the byte token "XYZ|LL|MMM"
@ -191,14 +195,14 @@ ReadToken: ld a,(hl) : and %00011000 : jp pe,Literals0011 ; process the cases 0
ld c,a : ld a,(hl) ; token is re-read for further processing ld c,a : ld a,(hl) ; token is re-read for further processing
.NEXTHLuseBC NEXT_HL .NEXTHLuseBC NEXT_HL
BLOCKCOPY COPYBC
; the token and literals are followed by the offset ; the token and literals are followed by the offset
push de : or a : jp p,CASE0xx push de : or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x CASE1xx cp %11000000 : jr nc,CASE11x
; "10x": the case of the 5-bit offset ; "10x": the case of the 13-bit offset
CASE10x: ld c,a : exa : jr nc,.noUpdate CASE10x: ld c,a : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa ld a,(hl) : or #F0 : exa
@ -206,8 +210,7 @@ CASE10x: ld c,a : exa : jr nc,.noUpdate
rrca : rrca : rrca : rrca rrca : rrca : rrca : rrca
.noUpdate ld d,a : ld a,c .noUpdate ld d,a : ld a,c
cp %10100000 : rl d cp %10100000 : dec d : rl d : jr ReadOffsetE
dec d : dec d : jr ReadOffsetE
@ -225,7 +228,7 @@ CASE00x: ld c,a : exa : jr nc,.noUpdate
rrca : rrca : rrca : rrca rrca : rrca : rrca : rrca
.noUpdate ld e,a : ld a,c .noUpdate ld e,a : ld a,c
cp %00100000 : rl e : jr SaveOffset cp %00100000 : rl e : jp SaveOffset
@ -239,16 +242,38 @@ LongerMatch: scf : exa : jr nc,.noUpdate
.noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch .noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch
IFNDEF UNROLL_LONG_MATCHES
LongMatch: add (hl) : NEXT_HL : jr nc,CopyMatch LongMatch: add (hl) : NEXT_HL : jr nc,CopyMatch
ld c,(hl) : NEXT_HL ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC
pop de : ret pop de : ret
ELSE
LongMatch: add (hl) : NEXT_HL : jr c,VeryLongMatch
ld c,a
.useC ex (sp),hl
ADD_OFFSET
COPY1
; this is an unrolled equivalent of LDIR
xor a : sub c
and 8-1 : add a
ld (.jrOffset),a : jr nz,$+2
.jrOffset EQU $-1
.fastLDIR DUP 8
COPY1
EDUP
jp pe,.fastLDIR
jp CopyMatch.popSrc
VeryLongMatch: ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,LongMatch.useC
pop de : ret
ENDIF