mirror of
https://github.com/cc65/cc65.git
synced 2025-01-15 07:31:32 +00:00
64 lines
1.6 KiB
ArmAsm
64 lines
1.6 KiB
ArmAsm
|
;
|
||
|
; Ullrich von Bassewitz, 2011-07-10
|
||
|
;
|
||
|
; CC65 runtime: 8x16 => 24 unsigned multiplication
|
||
|
;
|
||
|
|
||
|
.export umul8x16r24, umul8x16r24m
|
||
|
.export umul8x16r16, umul8x16r16m
|
||
|
|
||
|
.include "zeropage.inc"
|
||
|
|
||
|
|
||
|
;---------------------------------------------------------------------------
|
||
|
; 8x16 => 24 unsigned multiplication routine. Because the overhead for a
|
||
|
; 8x16 => 16 unsigned multiplication routine is small, we will tag it with
|
||
|
; the matching labels as well.
|
||
|
;
|
||
|
; routine lhs rhs result result also in
|
||
|
; -----------------------------------------------------------------------
|
||
|
; umul8x16r24 ptr1-lo ax ax:sreg-lo ptr1:sreg-lo
|
||
|
; umul8x16r24m ptr1-lo ptr3 ax:sreg-lo ptr1:sreg-lo
|
||
|
;
|
||
|
; ptr3 ist left intact by the routine.
|
||
|
;
|
||
|
|
||
|
umul8x16r24:
|
||
|
umul8x16r16:
|
||
|
sta ptr3
|
||
|
stx ptr3+1
|
||
|
|
||
|
umul8x16r24m:
|
||
|
umul8x16r16m:
|
||
|
ldx #0
|
||
|
stx ptr1+1
|
||
|
stx sreg
|
||
|
|
||
|
ldy #8 ; Number of bits
|
||
|
ldx ptr3 ; Get into register for speed
|
||
|
lda ptr1
|
||
|
ror a ; Get next bit into carry
|
||
|
@L0: bcc @L1
|
||
|
|
||
|
clc
|
||
|
pha
|
||
|
txa
|
||
|
adc ptr1+1
|
||
|
sta ptr1+1
|
||
|
lda ptr3+1
|
||
|
adc sreg
|
||
|
sta sreg
|
||
|
pla
|
||
|
|
||
|
@L1: ror sreg
|
||
|
ror ptr1+1
|
||
|
ror a
|
||
|
dey
|
||
|
bne @L0
|
||
|
|
||
|
sta ptr1 ; Save low byte of result
|
||
|
ldx ptr1+1 ; Load high byte of result
|
||
|
rts ; Done
|
||
|
|
||
|
|