diff --git a/libsrc/runtime/Makefile b/libsrc/runtime/Makefile index 14158398e..10a5f0e8d 100644 --- a/libsrc/runtime/Makefile +++ b/libsrc/runtime/Makefile @@ -152,6 +152,7 @@ OBJS = add.o \ makebool.o \ mod.o \ mul.o \ + mul8.o \ mulax3.o \ mulax5.o \ mulax6.o \ diff --git a/libsrc/runtime/mul.s b/libsrc/runtime/mul.s index 67caf52cf..eaf1fb97b 100644 --- a/libsrc/runtime/mul.s +++ b/libsrc/runtime/mul.s @@ -1,43 +1,69 @@ ; -; Ullrich von Bassewitz, 07.08.1998 +; Ullrich von Bassewitz, 2009-08-17 ; ; CC65 runtime: multiplication for ints ; - .export tosumula0, tosumulax, tosmula0, tosmulax - .import popsreg - .importzp sreg, tmp1, ptr4 + .export tosumulax, tosmulax + .import mul8x16, mul8x16a ; in mul8.s + .import popsreg + .importzp sreg, tmp1, ptr4 + + +;--------------------------------------------------------------------------- +; 16x16 multiplication routine -tosmula0: -tosumula0: - ldx #0 tosmulax: tosumulax: -mul16: sta ptr4 - stx ptr4+1 ; Save right operand - jsr popsreg ; Get left operand + sta ptr4 + txa ; High byte zero + beq @L3 ; Do 8x16 multiplication if high byte zero + stx ptr4+1 ; Save right operand + jsr popsreg ; Get left operand -; Do ptr4*sreg --> AX (see mult-div.s from "The Fridge"). +; Do ptr4:ptr4+1 * sreg:sreg+1 --> AX - lda #0 - sta tmp1 - ldx sreg+1 ; Get into register for speed - ldy #16 ; Number of bits -L0: lsr tmp1 - ror a - ror ptr4+1 - ror ptr4 - bcc L1 - clc - adc sreg - pha + lda #0 + ldx sreg+1 ; Get high byte into register for speed + beq @L4 ; -> we can do 8x16 after swap + sta tmp1 + ldy #16 ; Number of bits + + lsr ptr4+1 + ror ptr4 ; Get first bit into carry +@L0: bcc @L1 + + clc + adc sreg + pha txa ; hi byte of left op - adc tmp1 - sta tmp1 - pla -L1: dey - bpl L0 - lda ptr4 ; Load the result - ldx ptr4+1 - rts ; Done + adc tmp1 + sta tmp1 + pla + +@L1: ror tmp1 + ror a + ror ptr4+1 + ror ptr4 + dey + bne @L0 + + lda ptr4 ; Load the result + ldx ptr4+1 + rts ; Done + +; High byte of rhs is zero, jump to the 8x16 routine instead + +@L3: jmp mul8x16 + +; If the high byte of rhs is zero, swap the operands and use the 8x16 +; routine. On entry, A and X are zero + +@L4: ldy sreg ; Save right operand (8 bit) + ldx ptr4 ; Copy left 16 bit operand to right + stx sreg + ldx ptr4+1 ; Don't store, this is done later + sty ptr4 ; Copy low 8 bit of right op to left + ldy #8 + jmp mul8x16a diff --git a/libsrc/runtime/mul8.s b/libsrc/runtime/mul8.s new file mode 100644 index 000000000..3287e2155 --- /dev/null +++ b/libsrc/runtime/mul8.s @@ -0,0 +1,63 @@ +; +; Ullrich von Bassewitz, 2009-08-17 +; +; CC65 runtime: multiplication for ints. Short versions. +; + + .export tosumula0, tosmula0 + .export mul8x16, mul8x16a + .import popsreg + .importzp sreg, ptr4 + + +;--------------------------------------------------------------------------- +; 8x16 routine with external entry points used by the 16x16 routine in mul.s + +tosmula0: +tosumula0: + sta ptr4 +mul8x16:jsr popsreg ; Get left operand + + lda #0 ; Clear byte 1 + ldy #8 ; Number of bits + ldx sreg+1 ; Get into register for speed + beq mul8x8 ; Do 8x8 multiplication if high byte zero +mul8x16a: + sta ptr4+1 ; Clear byte 2 + + lsr ptr4 ; Get first bit into carry +@L0: bcc @L1 + + clc + adc sreg + pha + txa ; hi byte of left op + adc ptr4+1 + sta ptr4+1 + pla + +@L1: ror ptr4+1 + ror a + ror ptr4 + dey + bne @L0 + tax + lda ptr4 ; Load the result + rts + +;--------------------------------------------------------------------------- +; 8x8 multiplication routine + +mul8x8: + lsr ptr4 ; Get first bit into carry +@L0: bcc @L1 + clc + adc sreg +@L1: ror + ror ptr4 + dey + bne @L0 + tax + lda ptr4 ; Load the result + rts ; Done +