From 44bff39de3d023d79812e9c8acde7d4da7fb7dc4 Mon Sep 17 00:00:00 2001 From: introspec <31136975+specke@users.noreply.github.com> Date: Thu, 1 Aug 2019 15:07:14 +0100 Subject: [PATCH] New faster and shorter decompressors This update is mostly about better integration of improvements by uniabis, with spke contributing several smaller size optimizations. --- asm/z80/unlzsa2_fast_v1.asm | 162 ++++++++++++++++++----------------- asm/z80/unlzsa2_small_v1.asm | 102 +++++++++++----------- 2 files changed, 132 insertions(+), 132 deletions(-) diff --git a/asm/z80/unlzsa2_fast_v1.asm b/asm/z80/unlzsa2_fast_v1.asm index 04d8668..3faaa2a 100644 --- a/asm/z80/unlzsa2_fast_v1.asm +++ b/asm/z80/unlzsa2_fast_v1.asm @@ -1,5 +1,6 @@ ; -; Speed-optimized LZSA2 decompressor by spke (v.1 02-07/06/2019 +patch1-30/07/2019, 213/211(hd64180) bytes) +; Speed-optimized LZSA2 decompressor by spke (v.1 02-07/06/2019, 214 bytes); +; with improvements by uniabis (30/07/2019, -5 bytes, +3% speed and support for Hitachi HD64180). ; ; The data must be compressed using the command line compressor by Emmanuel Marty ; The compression is done as follows: @@ -26,7 +27,7 @@ ; ; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor). ; -; Of course, LZSA2 compression algorithm is (c) 2019 Emmanuel Marty, +; Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty, ; see https://github.com/emmanuel-marty/lzsa for more information ; ; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com @@ -47,23 +48,10 @@ ; misrepresented as being the original software. ; 3. This notice may not be removed or altered from any source distribution. -; DEFINE BACKWARD_DECOMPRESS +; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b +; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180 - IFDEF BACKWARD_DECOMPRESS - - MACRO NEXT_HL - dec hl - ENDM - - MACRO ADD_OFFSET - push hl : or a : sbc hl,de : pop de - ENDM - - MACRO BLOCKCOPY - lddr - ENDM - - ELSE + IFNDEF BACKWARD_DECOMPRESS MACRO NEXT_HL inc hl @@ -77,57 +65,57 @@ ldir ENDM + ELSE + + MACRO NEXT_HL + dec hl + ENDM + + MACRO ADD_OFFSET + ex de,hl : ld a,e : sub l : ld l,a + ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes + ENDM + + MACRO BLOCKCOPY + lddr + ENDM + ENDIF - IFDEF HD64180 - MACRO LD_IX_DE - push de : pop ix - ENDM - MACRO LD_DE_IX - push ix : pop de - ENDM - ELSE + IFNDEF HD64180 + MACRO LD_IX_DE ld ixl,e : ld ixh,d ENDM + MACRO LD_DE_IX ld e,ixl : ld d,ixh ENDM + + ELSE + + MACRO LD_IX_DE + push de : pop ix + ENDM + + MACRO LD_DE_IX + push ix : pop de + ENDM + ENDIF @DecompressLZSA2: ; A' stores next nibble as %1111.... or assumed to contain trash ; B is assumed to be 0 - xor a : ld b,a : scf : exa : jr ReadToken + ld b,0 : scf : exa : jr ReadToken - -LongerMatch: scf : exa : jr nc,.noUpdate - - ld a,(hl) : or #F0 : exa - ld a,(hl) : NEXT_HL : or #0F - rrca : rrca : rrca : rrca - -.noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch - ;inc a : jr z,LongMatch : sub #F0-9+1 : jp CopyMatch - -LongMatch: ;ld a,24 : - add (hl) : NEXT_HL : jr nc,CopyMatch - ld c,(hl) : NEXT_HL - ld b,(hl) : NEXT_HL - jr nz,CopyMatch.useC - pop de : ret - - - - -ManyLiterals: ld a,18 : - add (hl) : NEXT_HL : jr nc,CopyLiterals +ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,CopyLiterals ld c,(hl) : NEXT_HL ld a,b : ld b,(hl) - jr ReadToken.useBC + jr ReadToken.NEXTHLuseBC @@ -142,18 +130,25 @@ MoreLiterals: ld b,(hl) : NEXT_HL .noUpdate ;sub #F0-3 : cp 15+3 : jr z,ManyLiterals inc a : jr z,ManyLiterals : sub #F0-3+1 -CopyLiterals: ld c,a -.useC ld a,b : ld b,0 -.useBC BLOCKCOPY - push de : or a : jp p,CASE0xx : jr CASE1xx +CopyLiterals: ld c,a : ld a,b : ld b,0 + BLOCKCOPY + push de : or a : jp p,CASE0xx ;: jr CASE1xx + + cp %11000000 : jr c,CASE10x + +CASE11x cp %11100000 : jr c,CASE110 + + ; "111": repeated offset +CASE111: LD_DE_IX : jr MatchLen +Literals0011: jr nz,MoreLiterals ; if "LL" of the byte token is equal to 0, ; there are no literals to copy -NoLiterals: xor (hl) : NEXT_HL +NoLiterals: or (hl) : NEXT_HL push de : jp m,CASE1xx ; short (5 or 9 bit long) offsets @@ -162,26 +157,25 @@ CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x ; "01x": the case of the 9-bit offset CASE01x: cp %01100000 : rl d -ReadOffsetE: ld e,(hl) : NEXT_HL +ReadOffsetE ld e,(hl) : NEXT_HL SaveOffset: LD_IX_DE MatchLen: inc a : and %00000111 : jr z,LongerMatch : inc a CopyMatch: ld c,a -.useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[src] +.useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src] ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src] BLOCKCOPY : pop hl ; compressed data stream contains records ; each record begins with the byte token "XYZ|LL|MMM" -ReadToken: ld a,(hl) : and %00011000 : jr z,NoLiterals +ReadToken: ld a,(hl) : and %00011000 : jp pe,Literals0011 ; process the cases 00 and 11 separately - jp pe,MoreLiterals ; 00 has already been processed; this identifies the case of 11 rrca : rrca : rrca ld c,a : ld a,(hl) ; token is re-read for further processing -.useBC NEXT_HL +.NEXTHLuseBC NEXT_HL BLOCKCOPY ; the token and literals are followed by the offset @@ -190,8 +184,7 @@ ReadToken: ld a,(hl) : and %00011000 : jr z,NoLiterals CASE1xx cp %11000000 : jr nc,CASE11x ; "10x": the case of the 5-bit offset -CASE10x: ld c,a - exa : jr nc,.noUpdate +CASE10x: ld c,a : exa : jr nc,.noUpdate ld a,(hl) : or #F0 : exa ld a,(hl) : NEXT_HL : or #0F @@ -201,29 +194,42 @@ CASE10x: ld c,a cp %10100000 : rl d dec d : dec d : jr ReadOffsetE - ; "00x": the case of the 5-bit offset -CASE00x: ld c,a - exa : jr nc,.noUpdate - - ld a,(hl) : or #F0 : exa - ld a,(hl) : NEXT_HL : or #0F - rrca : rrca : rrca : rrca - -.noUpdate ld e,a : ld a,c - cp %00100000 : rl e : jp SaveOffset - - ; two remaining cases -CASE11x cp %11100000 : jr c,CASE110 - - ; "111": repeated offset -CASE111: LD_DE_IX : jr MatchLen + ; "110": 16-bit offset CASE110: ld d,(hl) : NEXT_HL : jr ReadOffsetE + ; "00x": the case of the 5-bit offset +CASE00x: ld c,a : exa : jr nc,.noUpdate + + ld a,(hl) : or #F0 : exa + ld a,(hl) : NEXT_HL : or #0F + rrca : rrca : rrca : rrca + +.noUpdate ld e,a : ld a,c + cp %00100000 : rl e : jr SaveOffset + + + + + +LongerMatch: scf : exa : jr nc,.noUpdate + + ld a,(hl) : or #F0 : exa + ld a,(hl) : NEXT_HL : or #0F + rrca : rrca : rrca : rrca + +.noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch + +LongMatch: add (hl) : NEXT_HL : jr nc,CopyMatch + ld c,(hl) : NEXT_HL + ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC + pop de : ret + + diff --git a/asm/z80/unlzsa2_small_v1.asm b/asm/z80/unlzsa2_small_v1.asm index c73a50b..b65bd52 100644 --- a/asm/z80/unlzsa2_small_v1.asm +++ b/asm/z80/unlzsa2_small_v1.asm @@ -1,5 +1,6 @@ ; -; Size-optimized LZSA2 decompressor by spke (v.1 02-09/06/2019 +patch1-30/07/2019, 144 bytes) +; Size-optimized LZSA2 decompressor by spke (v.1 02-09/06/2019, 140 bytes); +; with improvements by uniabis (30/07/2019, -1 byte, +3% speed and support for Hitachi HD64180). ; ; The data must be compressed using the command line compressor by Emmanuel Marty ; The compression is done as follows: @@ -26,7 +27,7 @@ ; ; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor). ; -; Of course, LZSA2 compression algorithm is (c) 2019 Emmanuel Marty, +; Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty, ; see https://github.com/emmanuel-marty/lzsa for more information ; ; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com @@ -48,23 +49,10 @@ ; 3. This notice may not be removed or altered from any source distribution. ; -; DEFINE BACKWARD_DECOMPRESS +; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b +; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180 - IFDEF BACKWARD_DECOMPRESS - - MACRO NEXT_HL - dec hl - ENDM - - MACRO ADD_OFFSET - push hl : or a : sbc hl,de : pop de - ENDM - - MACRO BLOCKCOPY - lddr - ENDM - - ELSE + IFNDEF BACKWARD_DECOMPRESS MACRO NEXT_HL inc hl @@ -78,58 +66,68 @@ ldir ENDM + ELSE + + MACRO NEXT_HL + dec hl + ENDM + + MACRO ADD_OFFSET + push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes + ENDM + + MACRO BLOCKCOPY + lddr + ENDM + ENDIF - IFDEF HD64180 + IFNDEF HD64180 + + MACRO LD_IY_DE + ld iyl,e : ld iyh,d + ENDM + + MACRO LD_DE_IY + ld e,iyl : ld d,iyh + ENDM + + ELSE + MACRO LD_IY_DE push de : pop iy ENDM + MACRO LD_DE_IY push iy : pop de ENDM - MACRO LD_IXL_A - exx : ld l,a : exx - ENDM - MACRO LD_A_IXL - exx : ld a,l : exx - ENDM - ELSE - MACRO LD_IY_DE - ;push de : pop iy - ld iyl,e : ld iyh,d - ENDM - MACRO LD_DE_IY - ;push iy : pop de - ld e,iyl : ld d,iyh - ENDM - MACRO LD_IXL_A - ld ixl,a - ENDM - MACRO LD_A_IXL - ld a,ixl - ENDM - ENDIF + ENDIF @DecompressLZSA2: xor a : ld b,a : exa : jr ReadToken +CASE00x: call ReadNibble + ld e,a : ld a,c + cp %00100000 : rl e : jr SaveOffset + CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x CASE01x: cp %01100000 : rl d OffsetReadE: ld e,(hl) : NEXT_HL - + SaveOffset: LD_IY_DE MatchLen: and %00000111 : add 2 : cp 9 : call z,ExtendedCode CopyMatch: ld c,a - ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[src] - ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src] - BLOCKCOPY : pop hl + ex (sp),hl ; BC = len, DE = -offset, HL = dest, SP -> [src] + ADD_OFFSET ; BC = len, DE = dest, HL = dest+(-offset), SP -> [src] + BLOCKCOPY ; BC = 0, DE = dest + pop hl ; HL = src -ReadToken: ld a,(hl) : LD_IXL_A : NEXT_HL +ReadToken: ld a,(hl) : NEXT_HL : push af and %00011000 : jr z,NoLiterals rrca : rrca : rrca @@ -138,7 +136,7 @@ ReadToken: ld a,(hl) : LD_IXL_A : NEXT_HL ld c,a BLOCKCOPY -NoLiterals: push de : LD_A_IXL +NoLiterals: pop af : push de or a : jp p,CASE0xx CASE1xx cp %11000000 : jr nc,CASE11x @@ -146,24 +144,20 @@ CASE1xx cp %11000000 : jr nc,CASE11x CASE10x: call ReadNibble ld d,a : ld a,c cp %10100000 : rl d - dec d : dec d : jr OffsetReadE + dec d : dec d : DB #CA ; jr OffsetReadE ; #CA is JP Z,.. to skip all commands in CASE110 before jr OffsetReadE -CASE00x: call ReadNibble - ld e,a : ld a,c - cp %00100000 : rl e : jr SaveOffset +CASE110: ld d,(hl) : NEXT_HL : jr OffsetReadE CASE11x cp %11100000 : jr c,CASE110 CASE111: LD_DE_IY : jr MatchLen -CASE110: ld d,(hl) : NEXT_HL : jr OffsetReadE - ExtendedCode: call ReadNibble : inc a : jr z,ExtraByte sub #F0+1 : add c : ret ExtraByte ld a,15 : add c : add (hl) : NEXT_HL : ret nc ld a,(hl) : NEXT_HL ld b,(hl) : NEXT_HL : ret nz - pop de : pop de : ret + pop de : pop de ; RET is not needed, because RET from ReadNibble is sufficient ReadNibble: ld c,a : xor a : exa : ret m UpdateNibble ld a,(hl) : or #F0 : exa