mirror of
https://github.com/cc65/cc65.git
synced 2024-11-18 00:07:21 +00:00
Greatly improved multiplication routine. Optimized the generic 16x16 one and
added special cases for 8x16 and 8x8. The former is directly called by the compiler as tosmula0 and tosumula0 resp. git-svn-id: svn://svn.cc65.org/cc65/trunk@4036 b7a2c559-68d2-44c3-8de9-860c34a00d81
This commit is contained in:
parent
b5a44f9542
commit
acd1233bf2
@ -152,6 +152,7 @@ OBJS = add.o \
|
||||
makebool.o \
|
||||
mod.o \
|
||||
mul.o \
|
||||
mul8.o \
|
||||
mulax3.o \
|
||||
mulax5.o \
|
||||
mulax6.o \
|
||||
|
@ -1,43 +1,69 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 07.08.1998
|
||||
; Ullrich von Bassewitz, 2009-08-17
|
||||
;
|
||||
; CC65 runtime: multiplication for ints
|
||||
;
|
||||
|
||||
.export tosumula0, tosumulax, tosmula0, tosmulax
|
||||
.import popsreg
|
||||
.importzp sreg, tmp1, ptr4
|
||||
.export tosumulax, tosmulax
|
||||
.import mul8x16, mul8x16a ; in mul8.s
|
||||
.import popsreg
|
||||
.importzp sreg, tmp1, ptr4
|
||||
|
||||
|
||||
;---------------------------------------------------------------------------
|
||||
; 16x16 multiplication routine
|
||||
|
||||
tosmula0:
|
||||
tosumula0:
|
||||
ldx #0
|
||||
tosmulax:
|
||||
tosumulax:
|
||||
mul16: sta ptr4
|
||||
stx ptr4+1 ; Save right operand
|
||||
jsr popsreg ; Get left operand
|
||||
sta ptr4
|
||||
txa ; High byte zero
|
||||
beq @L3 ; Do 8x16 multiplication if high byte zero
|
||||
stx ptr4+1 ; Save right operand
|
||||
jsr popsreg ; Get left operand
|
||||
|
||||
; Do ptr4*sreg --> AX (see mult-div.s from "The Fridge").
|
||||
; Do ptr4:ptr4+1 * sreg:sreg+1 --> AX
|
||||
|
||||
lda #0
|
||||
sta tmp1
|
||||
ldx sreg+1 ; Get into register for speed
|
||||
ldy #16 ; Number of bits
|
||||
L0: lsr tmp1
|
||||
ror a
|
||||
ror ptr4+1
|
||||
ror ptr4
|
||||
bcc L1
|
||||
clc
|
||||
adc sreg
|
||||
pha
|
||||
lda #0
|
||||
ldx sreg+1 ; Get high byte into register for speed
|
||||
beq @L4 ; -> we can do 8x16 after swap
|
||||
sta tmp1
|
||||
ldy #16 ; Number of bits
|
||||
|
||||
lsr ptr4+1
|
||||
ror ptr4 ; Get first bit into carry
|
||||
@L0: bcc @L1
|
||||
|
||||
clc
|
||||
adc sreg
|
||||
pha
|
||||
txa ; hi byte of left op
|
||||
adc tmp1
|
||||
sta tmp1
|
||||
pla
|
||||
L1: dey
|
||||
bpl L0
|
||||
lda ptr4 ; Load the result
|
||||
ldx ptr4+1
|
||||
rts ; Done
|
||||
adc tmp1
|
||||
sta tmp1
|
||||
pla
|
||||
|
||||
@L1: ror tmp1
|
||||
ror a
|
||||
ror ptr4+1
|
||||
ror ptr4
|
||||
dey
|
||||
bne @L0
|
||||
|
||||
lda ptr4 ; Load the result
|
||||
ldx ptr4+1
|
||||
rts ; Done
|
||||
|
||||
; High byte of rhs is zero, jump to the 8x16 routine instead
|
||||
|
||||
@L3: jmp mul8x16
|
||||
|
||||
; If the high byte of rhs is zero, swap the operands and use the 8x16
|
||||
; routine. On entry, A and X are zero
|
||||
|
||||
@L4: ldy sreg ; Save right operand (8 bit)
|
||||
ldx ptr4 ; Copy left 16 bit operand to right
|
||||
stx sreg
|
||||
ldx ptr4+1 ; Don't store, this is done later
|
||||
sty ptr4 ; Copy low 8 bit of right op to left
|
||||
ldy #8
|
||||
jmp mul8x16a
|
||||
|
||||
|
63
libsrc/runtime/mul8.s
Normal file
63
libsrc/runtime/mul8.s
Normal file
@ -0,0 +1,63 @@
|
||||
;
|
||||
; Ullrich von Bassewitz, 2009-08-17
|
||||
;
|
||||
; CC65 runtime: multiplication for ints. Short versions.
|
||||
;
|
||||
|
||||
.export tosumula0, tosmula0
|
||||
.export mul8x16, mul8x16a
|
||||
.import popsreg
|
||||
.importzp sreg, ptr4
|
||||
|
||||
|
||||
;---------------------------------------------------------------------------
|
||||
; 8x16 routine with external entry points used by the 16x16 routine in mul.s
|
||||
|
||||
tosmula0:
|
||||
tosumula0:
|
||||
sta ptr4
|
||||
mul8x16:jsr popsreg ; Get left operand
|
||||
|
||||
lda #0 ; Clear byte 1
|
||||
ldy #8 ; Number of bits
|
||||
ldx sreg+1 ; Get into register for speed
|
||||
beq mul8x8 ; Do 8x8 multiplication if high byte zero
|
||||
mul8x16a:
|
||||
sta ptr4+1 ; Clear byte 2
|
||||
|
||||
lsr ptr4 ; Get first bit into carry
|
||||
@L0: bcc @L1
|
||||
|
||||
clc
|
||||
adc sreg
|
||||
pha
|
||||
txa ; hi byte of left op
|
||||
adc ptr4+1
|
||||
sta ptr4+1
|
||||
pla
|
||||
|
||||
@L1: ror ptr4+1
|
||||
ror a
|
||||
ror ptr4
|
||||
dey
|
||||
bne @L0
|
||||
tax
|
||||
lda ptr4 ; Load the result
|
||||
rts
|
||||
|
||||
;---------------------------------------------------------------------------
|
||||
; 8x8 multiplication routine
|
||||
|
||||
mul8x8:
|
||||
lsr ptr4 ; Get first bit into carry
|
||||
@L0: bcc @L1
|
||||
clc
|
||||
adc sreg
|
||||
@L1: ror
|
||||
ror ptr4
|
||||
dey
|
||||
bne @L0
|
||||
tax
|
||||
lda ptr4 ; Load the result
|
||||
rts ; Done
|
||||
|
Loading…
Reference in New Issue
Block a user