From 8d0528fddc0d124a5fdb04581247dd7acb4ccf2e Mon Sep 17 00:00:00 2001 From: uniabis Date: Wed, 31 Jul 2019 01:39:27 +0900 Subject: [PATCH] hd64180 support a bit faster, a bit smaller --- asm/z80/unlzsa1_fast_v1.asm | 10 +++---- asm/z80/unlzsa1_small_v1.asm | 10 +++---- asm/z80/unlzsa2_fast_v1.asm | 51 ++++++++++++++++++++++++------------ asm/z80/unlzsa2_small_v1.asm | 51 +++++++++++++++++++++++++++++------- 4 files changed, 85 insertions(+), 37 deletions(-) diff --git a/asm/z80/unlzsa1_fast_v1.asm b/asm/z80/unlzsa1_fast_v1.asm index b8443cd..c3bbfca 100644 --- a/asm/z80/unlzsa1_fast_v1.asm +++ b/asm/z80/unlzsa1_fast_v1.asm @@ -1,5 +1,5 @@ ; -; Speed-optimized LZSA decompressor by spke (v.1 03-25/04/2019, 110 bytes) +; Speed-optimized LZSA decompressor by spke (v.1 03-25/04/2019 +patch1-30/07/2019, 109 bytes) ; ; The data must be compressed using the command line compressor by Emmanuel Marty ; The compression is done as follows: @@ -56,7 +56,7 @@ ENDM MACRO ADD_OFFSET - or a : sbc hl,de + push hl : or a : sbc hl,de : pop de ENDM MACRO BLOCKCOPY @@ -70,7 +70,7 @@ ENDM MACRO ADD_OFFSET - add hl,de + ex de,hl : add hl,de ENDM MACRO BLOCKCOPY @@ -90,8 +90,8 @@ ShortOffset: ld d,#FF : add 3 : cp 15+3 : jr nc,LongerMatch ; placed here this saves a JP per iteration CopyMatch: ld c,a -.UseC ex (sp),hl : push hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src] - ADD_OFFSET : pop de ; BC = len, DE = dest, HL = dest-offset, SP->[src] +.UseC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[src] + ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src] BLOCKCOPY : pop hl ; BC = 0, DE = dest, HL = src ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream, diff --git a/asm/z80/unlzsa1_small_v1.asm b/asm/z80/unlzsa1_small_v1.asm index df4dfc5..6568660 100644 --- a/asm/z80/unlzsa1_small_v1.asm +++ b/asm/z80/unlzsa1_small_v1.asm @@ -1,5 +1,5 @@ ; -; Size-optimized LZSA decompressor by spke (v.1 23/04/2019, 69 bytes) +; Size-optimized LZSA decompressor by spke (v.1 23/04/2019 +patch1-30/07/2019, 68 bytes) ; ; The data must be compressed using the command line compressor by Emmanuel Marty ; The compression is done as follows: @@ -56,7 +56,7 @@ ENDM MACRO ADD_OFFSET - or a : sbc hl,de + push hl : or a : sbc hl,de : pop de ENDM MACRO BLOCKCOPY @@ -70,7 +70,7 @@ ENDM MACRO ADD_OFFSET - add hl,de + ex de,hl : add hl,de ENDM MACRO BLOCKCOPY @@ -106,8 +106,8 @@ ShortOffset: and #0F : add 3 ; MMMM<15 means match lengths 0+3..14+3 cp 15+3 : call z,ReadLongBA ; MMMM=15 means lengths 14+3+ ld c,a - ex (sp),hl : push hl ; BC = len, DE = -offset, HL = dest, SP ->[dest,src] - ADD_OFFSET : pop de ; BC = len, DE = dest, HL = dest+(-offset), SP->[src] + ex (sp),hl ; BC = len, DE = -offset, HL = dest, SP ->[src] + ADD_OFFSET ; BC = len, DE = dest, HL = dest+(-offset), SP->[src] BLOCKCOPY : pop hl ; BC = 0, DE = dest, HL = src jr ReadToken diff --git a/asm/z80/unlzsa2_fast_v1.asm b/asm/z80/unlzsa2_fast_v1.asm index 56734ca..04d8668 100644 --- a/asm/z80/unlzsa2_fast_v1.asm +++ b/asm/z80/unlzsa2_fast_v1.asm @@ -1,5 +1,5 @@ ; -; Speed-optimized LZSA2 decompressor by spke (v.1 02-07/06/2019, 218 bytes) +; Speed-optimized LZSA2 decompressor by spke (v.1 02-07/06/2019 +patch1-30/07/2019, 213/211(hd64180) bytes) ; ; The data must be compressed using the command line compressor by Emmanuel Marty ; The compression is done as follows: @@ -56,7 +56,7 @@ ENDM MACRO ADD_OFFSET - or a : sbc hl,de + push hl : or a : sbc hl,de : pop de ENDM MACRO BLOCKCOPY @@ -70,7 +70,7 @@ ENDM MACRO ADD_OFFSET - add hl,de + ex de,hl : add hl,de ENDM MACRO BLOCKCOPY @@ -79,16 +79,32 @@ ENDIF + IFDEF HD64180 + MACRO LD_IX_DE + push de : pop ix + ENDM + MACRO LD_DE_IX + push ix : pop de + ENDM + ELSE + MACRO LD_IX_DE + ld ixl,e : ld ixh,d + ENDM + MACRO LD_DE_IX + ld e,ixl : ld d,ixh + ENDM + ENDIF + @DecompressLZSA2: ; A' stores next nibble as %1111.... or assumed to contain trash ; B is assumed to be 0 - xor a : ld b,a : exa : jr ReadToken + xor a : ld b,a : scf : exa : jr ReadToken -LongerMatch: exa : jp m,.noUpdate +LongerMatch: scf : exa : jr nc,.noUpdate ld a,(hl) : or #F0 : exa ld a,(hl) : NEXT_HL : or #0F @@ -110,14 +126,14 @@ LongMatch: ;ld a,24 : ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,CopyLiterals ld c,(hl) : NEXT_HL - ld a,b : ld b,(hl) : NEXT_HL - jr CopyLiterals.useBC + ld a,b : ld b,(hl) + jr ReadToken.useBC MoreLiterals: ld b,(hl) : NEXT_HL - exa : jp m,.noUpdate + scf : exa : jr nc,.noUpdate ld a,(hl) : or #F0 : exa ld a,(hl) : NEXT_HL : or #0F @@ -148,13 +164,13 @@ CASE01x: cp %01100000 : rl d ReadOffsetE: ld e,(hl) : NEXT_HL -SaveOffset: ld ixl,e : ld ixh,d +SaveOffset: LD_IX_DE MatchLen: inc a : and %00000111 : jr z,LongerMatch : inc a CopyMatch: ld c,a -.useC ex (sp),hl : push hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src] - ADD_OFFSET : pop de ; BC = len, DE = dest, HL = dest-offset, SP->[src] +.useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[src] + ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src] BLOCKCOPY : pop hl ; compressed data stream contains records @@ -164,7 +180,8 @@ ReadToken: ld a,(hl) : and %00011000 : jr z,NoLiterals jp pe,MoreLiterals ; 00 has already been processed; this identifies the case of 11 rrca : rrca : rrca - ld c,a : ld a,(hl) : NEXT_HL ; token is re-read for further processing + ld c,a : ld a,(hl) ; token is re-read for further processing +.useBC NEXT_HL BLOCKCOPY ; the token and literals are followed by the offset @@ -173,8 +190,8 @@ ReadToken: ld a,(hl) : and %00011000 : jr z,NoLiterals CASE1xx cp %11000000 : jr nc,CASE11x ; "10x": the case of the 5-bit offset -CASE10x: ld c,a : xor a - exa : jp m,.noUpdate +CASE10x: ld c,a + exa : jr nc,.noUpdate ld a,(hl) : or #F0 : exa ld a,(hl) : NEXT_HL : or #0F @@ -185,8 +202,8 @@ CASE10x: ld c,a : xor a dec d : dec d : jr ReadOffsetE ; "00x": the case of the 5-bit offset -CASE00x: ld c,a : xor a - exa : jp m,.noUpdate +CASE00x: ld c,a + exa : jr nc,.noUpdate ld a,(hl) : or #F0 : exa ld a,(hl) : NEXT_HL : or #0F @@ -199,7 +216,7 @@ CASE00x: ld c,a : xor a CASE11x cp %11100000 : jr c,CASE110 ; "111": repeated offset -CASE111: ld e,ixl : ld d,ixh : jr MatchLen +CASE111: LD_DE_IX : jr MatchLen ; "110": 16-bit offset CASE110: ld d,(hl) : NEXT_HL : jr ReadOffsetE diff --git a/asm/z80/unlzsa2_small_v1.asm b/asm/z80/unlzsa2_small_v1.asm index a2697e7..c73a50b 100644 --- a/asm/z80/unlzsa2_small_v1.asm +++ b/asm/z80/unlzsa2_small_v1.asm @@ -1,5 +1,5 @@ ; -; Size-optimized LZSA2 decompressor by spke (v.1 02-09/06/2019, 145 bytes) +; Size-optimized LZSA2 decompressor by spke (v.1 02-09/06/2019 +patch1-30/07/2019, 144 bytes) ; ; The data must be compressed using the command line compressor by Emmanuel Marty ; The compression is done as follows: @@ -57,7 +57,7 @@ ENDM MACRO ADD_OFFSET - or a : sbc hl,de + push hl : or a : sbc hl,de : pop de ENDM MACRO BLOCKCOPY @@ -71,7 +71,7 @@ ENDM MACRO ADD_OFFSET - add hl,de + ex de,hl : add hl,de ENDM MACRO BLOCKCOPY @@ -80,6 +80,37 @@ ENDIF + IFDEF HD64180 + MACRO LD_IY_DE + push de : pop iy + ENDM + MACRO LD_DE_IY + push iy : pop de + ENDM + MACRO LD_IXL_A + exx : ld l,a : exx + ENDM + MACRO LD_A_IXL + exx : ld a,l : exx + ENDM + ELSE + MACRO LD_IY_DE + ;push de : pop iy + ld iyl,e : ld iyh,d + ENDM + MACRO LD_DE_IY + ;push iy : pop de + ld e,iyl : ld d,iyh + ENDM + MACRO LD_IXL_A + ld ixl,a + ENDM + MACRO LD_A_IXL + ld a,ixl + ENDM + ENDIF + + @DecompressLZSA2: xor a : ld b,a : exa : jr ReadToken @@ -88,17 +119,17 @@ CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x CASE01x: cp %01100000 : rl d OffsetReadE: ld e,(hl) : NEXT_HL - -SaveOffset: ld iyl,e : ld iyh,d + +SaveOffset: LD_IY_DE MatchLen: and %00000111 : add 2 : cp 9 : call z,ExtendedCode CopyMatch: ld c,a - ex (sp),hl : push hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src] - ADD_OFFSET : pop de ; BC = len, DE = dest, HL = dest-offset, SP->[src] + ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[src] + ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src] BLOCKCOPY : pop hl -ReadToken: ld a,(hl) : ld ixl,a : NEXT_HL +ReadToken: ld a,(hl) : LD_IXL_A : NEXT_HL and %00011000 : jr z,NoLiterals rrca : rrca : rrca @@ -107,7 +138,7 @@ ReadToken: ld a,(hl) : ld ixl,a : NEXT_HL ld c,a BLOCKCOPY -NoLiterals: push de : ld a,ixl +NoLiterals: push de : LD_A_IXL or a : jp p,CASE0xx CASE1xx cp %11000000 : jr nc,CASE11x @@ -123,7 +154,7 @@ CASE00x: call ReadNibble CASE11x cp %11100000 : jr c,CASE110 -CASE111: ld e,iyl : ld d,iyh : jr MatchLen +CASE111: LD_DE_IY : jr MatchLen CASE110: ld d,(hl) : NEXT_HL : jr OffsetReadE