From 4c8beefdcb800ee7327b8b4b3fe250358426f7d6 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Mon, 14 Aug 2023 16:58:44 +0200 Subject: [PATCH] slightly faster integer bytes multiplication routine --- compiler/res/prog8lib/math.asm | 68 +++++++----- examples/test.p8 | 186 +++++++++++++++++++++++++++++---- 2 files changed, 203 insertions(+), 51 deletions(-) diff --git a/compiler/res/prog8lib/math.asm b/compiler/res/prog8lib/math.asm index 85b79b8cd..149156de5 100644 --- a/compiler/res/prog8lib/math.asm +++ b/compiler/res/prog8lib/math.asm @@ -10,37 +10,47 @@ multiply_bytes .proc ; -- multiply 2 bytes A and Y, result as byte in A (signed or unsigned) - sta P8ZP_SCRATCH_B1 ; num1 - sty P8ZP_SCRATCH_REG ; num2 - lda #0 - beq _enterloop -_doAdd clc - adc P8ZP_SCRATCH_B1 -_loop asl P8ZP_SCRATCH_B1 -_enterloop lsr P8ZP_SCRATCH_REG - bcs _doAdd - bne _loop - rts - .pend + ; https://github.com/TobyLobster/multiply_test/blob/main/tests/mult29.a +_multiplicand = P8ZP_SCRATCH_B1 +_multiplier = P8ZP_SCRATCH_REG -multiply_bytes_into_word .proc - ; -- multiply 2 bytes A and Y, result as word in A/Y (unsigned) - sta P8ZP_SCRATCH_B1 - sty P8ZP_SCRATCH_REG - lda #0 - ldx #8 - lsr P8ZP_SCRATCH_B1 -- bcc + - clc - adc P8ZP_SCRATCH_REG -+ ror a - ror P8ZP_SCRATCH_B1 - dex - bne - - tay - lda P8ZP_SCRATCH_B1 - rts + sty _multiplicand + lsr a + sta _multiplier + lda #0 + ldx #2 +- + bcc + + clc + adc _multiplicand ++ + ror a + ror _multiplier + bcc + + clc + adc _multiplicand ++ + ror a + ror _multiplier + + bcc + + clc + adc _multiplicand ++ + ror a + ror _multiplier + bcc + + clc + adc _multiplicand ++ + ror a + ror _multiplier + dex + bne - + ; tay ; if you want 16 bits result in AY, enable this again + lda _multiplier + rts .pend diff --git a/examples/test.p8 b/examples/test.p8 index 4ea070018..4e554b5f0 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -12,56 +12,198 @@ cbm2 { main { sub start() { ubyte value + byte svalue uword wvalue - ubyte other = 99 - uword otherw = 99 - - value=13 - wvalue=99 - txt.print_ub(value*value) - txt.spc() - txt.print_uw(wvalue*wvalue) - txt.nl() - + word swvalue txt.print("byte multiply..") cbm.SETTIM(0,0,0) - repeat 100 { + repeat 200 { for value in 0 to 255 { - cx16.r0L = value*other + cx16.r0L = value*99 } } txt.print_uw(cbm.RDTIM16()) txt.nl() - txt.print("byte squares...") + txt.print("byte multiply new..") cbm.SETTIM(0,0,0) - repeat 100 { + repeat 200 { for value in 0 to 255 { - cx16.r0L = value*value + cx16.r0L = multiply_b(value, 99) } } txt.print_uw(cbm.RDTIM16()) txt.nl() + txt.print("byte multiply verify..") + for value in 0 to 255 { + if multiply_b(value,99) != value*99 { + txt.print("different!") + sys.exit(1) + } + } + txt.nl() + + txt.print("sbyte multiply..") + cbm.SETTIM(0,0,0) + repeat 200 { + for svalue in -128 to 127 { + cx16.r0sL = svalue*99 + } + } + txt.print_uw(cbm.RDTIM16()) + txt.nl() + + txt.print("sbyte multiply new..") + cbm.SETTIM(0,0,0) + repeat 200 { + for svalue in -128 to 127 { + cx16.r0L = multiply_sb(svalue, 99) + } + } + txt.print_uw(cbm.RDTIM16()) + txt.nl() + + txt.print("sbyte multiply verify..") + for svalue in -128 to 127 { + if multiply_sb(svalue,99) != svalue*99 { + txt.print("different!") + sys.exit(1) + } + } + txt.nl() + txt.print("word multiply..") cbm.SETTIM(0,0,0) - repeat 50 { - for wvalue in 0 to 255 { - cx16.r0 = wvalue*otherw + repeat 200 { + for wvalue in 200 to 400 { + cx16.r0 = wvalue*987 } } txt.print_uw(cbm.RDTIM16()) txt.nl() - txt.print("word squares...") + txt.print("word multiply new..") cbm.SETTIM(0,0,0) - repeat 50 { - for wvalue in 0 to 255 { - cx16.r0 = wvalue*wvalue + repeat 200 { + for wvalue in 200 to 400 { + cx16.r0 = multiply_w(wvalue, 987) } } txt.print_uw(cbm.RDTIM16()) txt.nl() + + txt.print("word multiply verify..") + for wvalue in 200 to 400 { + if multiply_w(value,987) != value*987 { + txt.print("different!") + sys.exit(1) + } + } + txt.nl() + + txt.print("sword multiply..") + cbm.SETTIM(0,0,0) + repeat 100 { + for swvalue in -400 to 400 { + cx16.r0s = swvalue*987 + } + } + txt.print_uw(cbm.RDTIM16()) + txt.nl() + + txt.print("sword multiply new..") + cbm.SETTIM(0,0,0) + repeat 100 { + for swvalue in -400 to 400 { + cx16.r0s = multiply_sw(swvalue, 987) + } + } + txt.print_uw(cbm.RDTIM16()) + txt.nl() + + txt.print("sword multiply verify..") + for swvalue in -400 to 400 { + if multiply_sw(swvalue,987) != swvalue*987 { + txt.print("different!") + sys.exit(1) + } + } + txt.nl() + } + +asmsub multiply_sb(byte value @A, byte multiplicant @Y) -> ubyte @A { + %asm {{ + jmp p8_multiply_b + }} +} + +asmsub multiply_sw(word value @AY, word multiplicant @R0) -> word @AY { + %asm {{ + jmp p8_multiply_w + }} +} + + + asmsub multiply_b(ubyte value @A, ubyte multiplicant @Y) -> ubyte @A { + %asm {{ + +; *************************************************************************************** +; On Entry: +; A: multiplier +; Y: multiplicand +; On Exit: +; A: low byte of product +; Y: (optional) high byte of product +_multiplicand = P8ZP_SCRATCH_B1 +_multiplier = P8ZP_SCRATCH_REG + + sty _multiplicand + lsr a + sta _multiplier + lda #0 + ldx #2 +- + bcc + + clc + adc _multiplicand ++ + ror a + ror _multiplier + bcc + + clc + adc _multiplicand ++ + ror a + ror _multiplier + + bcc + + clc + adc _multiplicand ++ + ror a + ror _multiplier + bcc + + clc + adc _multiplicand ++ + ror a + ror _multiplier + dex + bne - + ; tay ; if you want 16 bits result in AY, enable this again + lda _multiplier + rts + }} + } + + asmsub multiply_w(uword value @AY, uword multiplicant @R0) -> uword @AY { + %asm {{ + ; TODO + lda #99 + ldy #1 + rts + }} } }