From 214e03555f9860354ec9a3f8875a26d4f8bf7efe Mon Sep 17 00:00:00 2001 From: Doug Masten Date: Mon, 29 Jun 2020 00:36:49 -0500 Subject: [PATCH 1/5] Slight speed improvement in 6809 LZSA depackers Switch instruction "tfr x,u" to "leau ,x" which is 2 clock cycles faster on M6809. On H6309 both instruction have same clock cycles. --- asm/6809/unlzsa1.s | 4 ++-- asm/6809/unlzsa1b.s | 4 ++-- asm/6809/unlzsa2.s | 4 ++-- asm/6809/unlzsa2b.s | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/asm/6809/unlzsa1.s b/asm/6809/unlzsa1.s index 7cf611d..f070058 100644 --- a/asm/6809/unlzsa1.s +++ b/asm/6809/unlzsa1.s @@ -88,13 +88,13 @@ lz1declt lsrb ; shift literals count into place lsrb lz1gotla clra ; clear A (high part of literals count) -lz1gotlt tfr x,u +lz1gotlt leau ,x tfr d,x ; transfer 16-bit count into X lz1cpylt lda ,u+ ; copy literal byte sta ,y+ leax -1,x ; decrement X and update Z flag bne lz1cpylt ; loop until all literal bytes are copied - tfr u,x + leax ,u lz1nolt ldb ,x+ ; load either 8-bit or LSB 16-bit offset (negative, signed) lda ,s ; get token again, don't pop it from the stack diff --git a/asm/6809/unlzsa1b.s b/asm/6809/unlzsa1b.s index af53284..7a2d754 100644 --- a/asm/6809/unlzsa1b.s +++ b/asm/6809/unlzsa1b.s @@ -90,13 +90,13 @@ lz1declt lsrb ; shift literals count into place lsrb lz1gotla clra ; clear A (high part of literals count) -lz1gotlt tfr x,u +lz1gotlt leau ,x tfr d,x ; transfer 16-bit count into X lz1cpylt lda ,-u ; copy literal byte sta ,-y leax -1,x ; decrement X and update Z flag bne lz1cpylt ; loop until all literal bytes are copied - tfr u,x + leax ,u lz1nolt ldb ,s ; get token again, don't pop it from the stack bmi lz1bigof ; test O bit (small or large offset) diff --git a/asm/6809/unlzsa2.s b/asm/6809/unlzsa2.s index 221abe6..131f3de 100644 --- a/asm/6809/unlzsa2.s +++ b/asm/6809/unlzsa2.s @@ -51,13 +51,13 @@ lz2declt lsrb ; shift literals count into place lsrb lz2gotla clra ; clear A (high part of literals count) -lz2gotlt tfr x,u +lz2gotlt leau ,x tfr d,x ; transfer 16-bit count into X lz2cpylt lda ,u+ ; copy literal byte sta ,y+ leax -1,x ; decrement X and update Z flag bne lz2cpylt ; loop until all literal bytes are copied - tfr u,x + leax ,u lz2nolt ldb ,s ; get token again, don't pop it from the stack diff --git a/asm/6809/unlzsa2b.s b/asm/6809/unlzsa2b.s index 0402c1f..8415395 100644 --- a/asm/6809/unlzsa2b.s +++ b/asm/6809/unlzsa2b.s @@ -52,13 +52,13 @@ lz2declt lsrb ; shift literals count into place lsrb lz2gotla clra ; clear A (high part of literals count) -lz2gotlt tfr x,u +lz2gotlt leau ,x tfr d,x ; transfer 16-bit count into X lz2cpylt lda ,-u ; copy literal byte sta ,-y leax -1,x ; decrement X and update Z flag bne lz2cpylt ; loop until all literal bytes are copied - tfr u,x + leax ,u lz2nolt ldb ,s ; get token again, don't pop it from the stack From 31d5dc24198cff5672be40883984470cba289df0 Mon Sep 17 00:00:00 2001 From: Doug Masten Date: Tue, 30 Jun 2020 14:05:59 -0500 Subject: [PATCH 2/5] Slight speed and space improvement to nibble processing in 6809 LZSA2 depackers --- asm/6809/unlzsa2.s | 9 ++++----- asm/6809/unlzsa2b.s | 13 ++++++------- 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/asm/6809/unlzsa2.s b/asm/6809/unlzsa2.s index 131f3de..9b21d8b 100644 --- a/asm/6809/unlzsa2.s +++ b/asm/6809/unlzsa2.s @@ -1,4 +1,4 @@ -; unlzsa2.s - 6809 decompression routine for raw LZSA2 - 172 bytes +; unlzsa2.s - 6809 decompression routine for raw LZSA2 - 169 bytes ; compress with lzsa -f2 -r ; ; in: x = start of compressed data @@ -24,7 +24,7 @@ ; 3. This notice may not be removed or altered from any source distribution. decompress_lzsa2 - lsr ; ; in: x = last byte of compressed data @@ -24,7 +24,7 @@ ; 3. This notice may not be removed or altered from any source distribution. decompress_lzsa2 - lsr Date: Fri, 3 Jul 2020 01:29:02 -0500 Subject: [PATCH 3/5] Tiny speed improvement to 6809 LZSA depackers Same space used but improved match length code by 1 cycle for M6809. On H6309 the clock cycles are the same. --- asm/6809/unlzsa1.s | 6 ++---- asm/6809/unlzsa1b.s | 5 ++--- asm/6809/unlzsa2.s | 8 +++----- asm/6809/unlzsa2b.s | 5 ++--- 4 files changed, 9 insertions(+), 15 deletions(-) diff --git a/asm/6809/unlzsa1.s b/asm/6809/unlzsa1.s index f070058..559a303 100644 --- a/asm/6809/unlzsa1.s +++ b/asm/6809/unlzsa1.s @@ -28,10 +28,8 @@ decompress_lzsa1 equ lz1token lz1bigof lda ,x+ ; O set: load MSB 16-bit (negative, signed) offest lz1gotof leau d,y ; put backreference start address in U (dst+offset) - puls b ; restore token - - clra ; clear A (high part of match length) - andb #$0F ; isolate MMMM (embedded match length) + ldd #$000f ; clear MSB match length and set mask for MMMM + andb ,s+ ; isolate MMMM (embedded match length) in token addb #$03 ; add MIN_MATCH_SIZE cmpb #$12 ; MATCH_RUN_LEN? bne lz1gotln ; no, we have the full match length, go copy diff --git a/asm/6809/unlzsa1b.s b/asm/6809/unlzsa1b.s index 7a2d754..ada6dcc 100644 --- a/asm/6809/unlzsa1b.s +++ b/asm/6809/unlzsa1b.s @@ -34,10 +34,9 @@ lz1gotof nega ; reverse sign of offset in D sbca #0 leau d,y ; put backreference start address in U (dst+offset) - puls b ; restore token + ldd #$000f ; clear MSB match length and set mask for MMMM + andb ,s+ ; isolate MMMM (embedded match length) in token - clra ; clear A (high part of match length) - andb #$0F ; isolate MMMM (embedded match length) addb #$03 ; add MIN_MATCH_SIZE cmpb #$12 ; MATCH_RUN_LEN? bne lz1gotln ; no, we have the full match length, go copy diff --git a/asm/6809/unlzsa2.s b/asm/6809/unlzsa2.s index 9b21d8b..a620cad 100644 --- a/asm/6809/unlzsa2.s +++ b/asm/6809/unlzsa2.s @@ -49,8 +49,8 @@ lz2token ldb ,x+ ; load next token into B: XYZ|LL|MMM lz2declt lsrb ; shift literals count into place lsrb lsrb - lz2gotla clra ; clear A (high part of literals count) + lz2gotlt leau ,x tfr d,x ; transfer 16-bit count into X lz2cpylt lda ,u+ ; copy literal byte @@ -116,10 +116,8 @@ lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match lz2gotof std Date: Fri, 3 Jul 2020 22:09:11 -0500 Subject: [PATCH 4/5] Add enhanced Hitachi 6309 version to 6809 LZSA depackers --- asm/6809/unlzsa1-6309.s | 90 ++++++++++++++++++++++++++++ asm/6809/unlzsa2-6309.s | 129 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 219 insertions(+) create mode 100644 asm/6809/unlzsa1-6309.s create mode 100644 asm/6809/unlzsa2-6309.s diff --git a/asm/6809/unlzsa1-6309.s b/asm/6809/unlzsa1-6309.s new file mode 100644 index 0000000..5866e8d --- /dev/null +++ b/asm/6809/unlzsa1-6309.s @@ -0,0 +1,90 @@ +; unlzsa1-6309.s - Hitachi 6309 decompression routine for raw LZSA1 - 92 bytes +; compress with lzsa -f1 -r +; +; in: x = start of compressed data +; y = start of decompression buffer +; out: y = end of decompression buffer + 1 +; +; Copyright (C) 2020 Emmanuel Marty, Doug Masten +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. + +decompress_lzsa1 equ lz1token + +lz1bigof lda ,x+ ; O set: load MSB 16-bit (negative, signed) offest +lz1gotof leau d,y ; put backreference start address in U (dst+offset) + + ldd #$000f ; clear MSB match length and set mask for MMMM + andb ,s+ ; isolate MMMM (embedded match length) in token + addb #$03 ; add MIN_MATCH_SIZE + cmpb #$12 ; MATCH_RUN_LEN? + bne lz1gotln ; no, we have the full match length, go copy + + addb ,x+ ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN + bcc lz1gotln ; if no overflow, we have the full length + bne lz1midln + + ldb ,x+ ; load 16-bit len in D (low part in B, high in A) + lda ,x+ ; (little endian) + bne lz1gotln ; check if we hit EOD (16-bit length = 0) + tstb + bne lz1gotln ; go copy matched bytes if not + + rts ; done, bail + +lz1midln tfr b,a ; copy high part of len into A + ldb ,x+ ; grab low 8 bits of len in B + +lz1gotln tfr d,w ; set W with match length for TFM instruction + tfm u+,y+ ; copy match bytes + +lz1token ldb ,x+ ; load next token into B: O|LLL|MMMM + pshs b ; save it + + andb #$70 ; isolate LLL (embedded literals count) in B + beq lz1nolt ; skip if no literals + cmpb #$70 ; LITERALS_RUN_LEN? + bne lz1declt ; if not, we have the complete count, go unshift + + ldb ,x+ ; load extra literals count byte + addb #$07 ; add LITERALS_RUN_LEN + bcc lz1gotla ; if no overflow, we got the complete count, copy + bne lz1midlt + + ldb ,x+ ; load low 8 bits of little-endian literals count + lda ,x+ ; load high 8 bits of literal count + bra lz1gotlt ; we now have the complete count, go copy + +lz1midlt tfr b,a ; copy high part of literals count into A + ldb ,x+ ; load low 8 bits of literals count + bra lz1gotlt ; we now have the complete count, go copy + +lz1declt lsrb ; shift literals count into place + lsrb + lsrb + lsrb + +lz1gotla clra ; clear A (high part of literals count) +lz1gotlt tfr d,w ; set W with literals count for TFM instruction + tfm x+,y+ ; copy literal bytes + +lz1nolt ldb ,x+ ; load either 8-bit or LSB 16-bit offset (negative, signed) + lda ,s ; get token again, don't pop it from the stack + bmi lz1bigof ; test O bit (small or large offset) + + lda #$ff ; set high 8 bits + bra lz1gotof diff --git a/asm/6809/unlzsa2-6309.s b/asm/6809/unlzsa2-6309.s new file mode 100644 index 0000000..cf31f1f --- /dev/null +++ b/asm/6809/unlzsa2-6309.s @@ -0,0 +1,129 @@ +; unlzsa2-6309.s - Hitachi 6309 decompression routine for raw LZSA2 - 154 bytes +; compress with lzsa -f2 -r +; +; in: x = start of compressed data +; y = start of decompression buffer +; out: y = end of decompression buffer + 1 +; +; Copyright (C) 2020 Emmanuel Marty, Doug Masten +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. + +decompress_lzsa2 + clr lz2nibct ; reset nibble available flag + bra lz2token + +lz2nibct fcb 0 ; nibble ready flag + +lz2replg lslb ; push token's Y flag bit into carry + bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset + + sex ; push token's Z flag bit into reg A + bsr lz2nibl ; get offset nibble in B + lsla ; push token's Z flag bit into carry + rolb ; shift Z flag from carry into bit 0 of B + eorb #$e1 ; set bits 13-15 of offset, reverse bit 8 + tfr b,a ; copy bits 8-15 of offset into A + suba #$02 ; substract 512 from offset + ldb ,x+ ; load low 8 bits of (negative, signed) offset + bra lz2gotof + +lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match + ldd ,x++ ; load high then low 8 bits of offset + +lz2gotof std lz2moff+2 ; store match offset + +lz2repof ldd #$0007 ; clear MSB match length and set mask for MMM + andb ,u ; isolate MMM (embedded match length) in token +lz2moff leau $aaaa,y ; put backreference start address in U (dst+offset) + addb #$02 ; add MIN_MATCH_SIZE_V2 + cmpb #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2? + bne lz2gotln ; no, we have the full match length, go copy + + bsr lz2nibl ; get offset nibble in B + addb #$09 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15? + bne lz2gotln ; if not, we have the full match length, go copy + + addb ,x+ ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15 + bcc lz2gotln ; if no overflow, we have the full length + beq lz2done ; detect EOD code + + ldb ,x+ ; load 16-bit len in D (low part in B, high in A) + lda ,x+ ; (little endian) + +lz2gotln tfr d,w ; set W with match count for TFM instruction + tfm u+,y+ ; copy match bytes + +lz2token tfr x,u ; save token address + ldb ,x+ ; load next token into B: XYZ|LL|MMM + andb #$18 ; isolate LL (embedded literals count) in B + beq lz2nolt ; skip if no literals + cmpb #$18 ; LITERALS_RUN_LEN_V2? + bne lz2declt ; if not, we have the complete count, go unshift + + bsr lz2nibl ; get extra literals length nibble in B + addb #$03 ; add LITERALS_RUN_LEN_V2 + cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ? + bne lz2gotla ; if not, we have the full literals count, go copy + + addb ,x+ ; add extra literals count byte + LITERALS_RUN_LEN + 15 + bcc lz2gotla ; if no overflow, we got the complete count, copy + + ldb ,x+ ; load low 8 bits of little-endian literals count + lda ,x+ ; load high 8 bits of literal count + bra lz2gotlt ; we now have the complete count, go copy + +lz2declt lsrb ; shift literals count into place + lsrb + lsrb +lz2gotla clra ; clear A (high part of literals count) + +lz2gotlt tfr d,w ; set W with literals count for TFM instruction + tfm x+,y+ ; copy literal bytes + +lz2nolt ldb ,u ; get token again + lslb ; push token's X flag bit into carry + bcs lz2replg ; if token's X bit is set, rep or large offset + + lslb ; push token's Y flag bit into carry + sex ; push token's Z flag bit into reg A (carry flag is not effected) + bcs lz2offs9 ; if token's Y bit is set, 9 bits offset + + bsr lz2nibl ; get offset nibble in B + lsla ; retrieve token's Z flag bit and push into carry + rolb ; shift Z flag from carry into bit 0 of B + eorb #$e1 ; set bits 5-7 of offset, reverse bit 0 + sex ; set bits 8-15 of offset to $FF + bra lz2gotof + +lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8 + ldb ,x+ ; load low 8 bits of (negative, signed) offset + bra lz2gotof + +lz2nibl ldb #$aa + com lz2nibct ; nibble ready? + bpl lz2gotnb + + ldb ,x+ ; load two nibbles + stb lz2nibl+1 ; store nibble for next time (low 4 bits) + lsrb ; shift 4 high bits of nibble down + lsrb + lsrb + lsrb +lz2gotnb andb #$0f ; only keep low 4 bits +lz2done rts From 81f49fe4300b5804c418c7046d410d584a67e965 Mon Sep 17 00:00:00 2001 From: Doug Masten Date: Fri, 3 Jul 2020 22:17:20 -0500 Subject: [PATCH 5/5] Fix byte count for unlzsa2-6309.s in 6809 LZSA depackers --- asm/6809/unlzsa2-6309.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/asm/6809/unlzsa2-6309.s b/asm/6809/unlzsa2-6309.s index cf31f1f..17970d8 100644 --- a/asm/6809/unlzsa2-6309.s +++ b/asm/6809/unlzsa2-6309.s @@ -1,4 +1,4 @@ -; unlzsa2-6309.s - Hitachi 6309 decompression routine for raw LZSA2 - 154 bytes +; unlzsa2-6309.s - Hitachi 6309 decompression routine for raw LZSA2 - 150 bytes ; compress with lzsa -f2 -r ; ; in: x = start of compressed data