diff --git a/libsrc/common/cc65_imul8x8r16.s b/libsrc/common/cc65_imul8x8r16.s new file mode 100644 index 000000000..0e7d5479b --- /dev/null +++ b/libsrc/common/cc65_imul8x8r16.s @@ -0,0 +1,22 @@ +; +; 2014-03-27, Oliver Schmidt +; 2014-05-08, Greg King +; +; CC65 library: 8x8 => 16 signed multiplication +; + + .export _cc65_imul8x8r16 + .import imul8x8r16, popa, ptr1:zp + + +;--------------------------------------------------------------------------- +; 8x8 => 16 signed multiplication routine. + + +.proc _cc65_imul8x8r16 + + sta ptr1 + jsr popa + jmp imul8x8r16 + +.endproc diff --git a/libsrc/common/cc65_umul8x8r16.s b/libsrc/common/cc65_umul8x8r16.s index 5cbb4f8b0..cf3b26bb1 100644 --- a/libsrc/common/cc65_umul8x8r16.s +++ b/libsrc/common/cc65_umul8x8r16.s @@ -5,9 +5,7 @@ ; .export _cc65_umul8x8r16 - .import umul8x8r16, popa - - .include "zeropage.inc" + .import umul8x8r16, popa, ptr1:zp ;--------------------------------------------------------------------------- diff --git a/libsrc/runtime/imul16x16r32.s b/libsrc/runtime/imul16x16r32.s index f41d13602..6fad9abf0 100644 --- a/libsrc/runtime/imul16x16r32.s +++ b/libsrc/runtime/imul16x16r32.s @@ -13,9 +13,9 @@ ;--------------------------------------------------------------------------- ; 16x16 => 32 signed multiplication routine. ; -; lhs rhs result result also in +; LHS RHS result result also in ; ------------------------------------------------------------- -; ptr1 ax ax:sreg +; ax ptr1 ax:sreg ; ; There is probably a faster way to do this. ; diff --git a/libsrc/runtime/imul8x8r16.s b/libsrc/runtime/imul8x8r16.s new file mode 100644 index 000000000..017297e4b --- /dev/null +++ b/libsrc/runtime/imul8x8r16.s @@ -0,0 +1,75 @@ +; +; 2010-11-02, Ullrich von Bassewitz +; 2014-05-10, Greg King +; +; CC65 runtime: 8x8 => 16 signed multiplication +; + + .export imul8x8r16, imul8x8r16m + .importzp ptr1, ptr3, tmp1 + + .macpack generic + +;--------------------------------------------------------------------------- +; 8x8 => 16 signed multiplication routine. +; +; multiplicand multiplier product +; LHS RHS result +; ------------------------------------------------------------- +; .A (ptr3-low) ptr1-low .XA +; + +imul8x8r16: + sta ptr3 + +imul8x8r16m: + ldx #>0 + bit ptr3 + bpl @L7 + dex +@L7: stx ptr3+1 ; Extend sign of Left-Hand Side + ldy #<0 ; Clear .XY accumulator + ldx #>0 + lda ptr1 + bmi NegMult + bpl @L2 ; Branch always + +@L0: tya ; Add current multiplicand + add ptr3 + tay + txa + adc ptr3+1 + tax + +@L1: asl ptr3 + rol ptr3+1 +@L2: lsr ptr1 ; Get next bit of Right-Hand Side into carry + bcs @L0 + bnz @L1 ; Loop if more one-bits in multiplier + + tya ; Put result into cc65's accumulator + rts + +; The multiplier is negative. +; Therefore, make it positive; and, subtract when multiplying. +NegMult: + eor #%11111111 + sta ptr1 + inc ptr1 + bnz @L2 ; Branch always + +@L0: tya ; Subtract current multiplicand + sub ptr3 + tay + txa + sbc ptr3+1 + tax + +@L1: asl ptr3 + rol ptr3+1 +@L2: lsr ptr1 ; Get next bit of Right-Hand Side into carry + bcs @L0 + bnz @L1 ; Loop if more one-bits in multiplier + + tya ; Put result into cc65's accumulator + rts diff --git a/libsrc/runtime/umul16x16r32.s b/libsrc/runtime/umul16x16r32.s index c02cf8bb2..9ecd1596e 100644 --- a/libsrc/runtime/umul16x16r32.s +++ b/libsrc/runtime/umul16x16r32.s @@ -13,14 +13,14 @@ ;--------------------------------------------------------------------------- ; 16x16 => 32 unsigned multiplication routine. Because the overhead for a ; 16x16 => 16 unsigned multiplication routine is small, we will tag it with -; the matching labels as well. +; the matching labels, as well. ; -; routine lhs rhs result result also in +; routine LHS RHS result result also in ; ----------------------------------------------------------------------- -; umul16x16r32 ptr1 ax ax:sreg ptr1:sreg -; umul16x16r32m ptr1 ptr3 ax:sreg ptr1:sreg -; umul16x16r16 ptr1 ax ax ptr1 -; umul16x16r16m ptr1 ptr3 ax ptr1 +; umul16x16r32 ax ptr1 ax:sreg ptr1:sreg +; umul16x16r32m ptr3 ptr1 ax:sreg ptr1:sreg +; umul16x16r16 ax ptr1 ax ptr1 +; umul16x16r16m ptr3 ptr1 ax ptr1 ; ; ptr3 is left intact by the routine. ; diff --git a/libsrc/runtime/umul8x16r24.s b/libsrc/runtime/umul8x16r24.s index 64653f5f4..ff7d0bae6 100644 --- a/libsrc/runtime/umul8x16r24.s +++ b/libsrc/runtime/umul8x16r24.s @@ -13,14 +13,14 @@ ;--------------------------------------------------------------------------- ; 8x16 => 24 unsigned multiplication routine. Because the overhead for a ; 8x16 => 16 unsigned multiplication routine is small, we will tag it with -; the matching labels as well. +; the matching labels, as well. ; -; routine lhs rhs result result also in +; routine LHS RHS result result also in ; ----------------------------------------------------------------------- -; umul8x16r24 ptr1-lo ax ax:sreg-lo ptr1:sreg-lo -; umul8x16r24m ptr1-lo ptr3 ax:sreg-lo ptr1:sreg-lo +; umul8x16r24 ax ptr1-low ax:sreg-low ptr1:sreg-low +; umul8x16r24m ptr3 ptr1-low ax:sreg-low ptr1:sreg-low ; -; ptr3 ist left intact by the routine. +; ptr3 is left intact by the routine. ; umul8x16r24: diff --git a/libsrc/runtime/umul8x8r16.s b/libsrc/runtime/umul8x8r16.s index 34ff82872..aba758e1a 100644 --- a/libsrc/runtime/umul8x8r16.s +++ b/libsrc/runtime/umul8x8r16.s @@ -1,7 +1,7 @@ ; ; Ullrich von Bassewitz, 2010-11-02 ; -; CC65 runtime: 8x8 => 16 multiplication +; CC65 runtime: 8x8 => 16 unsigned multiplication ; .export umul8x8r16, umul8x8r16m @@ -9,11 +9,11 @@ ;--------------------------------------------------------------------------- -; 8x8 => 16 multiplication routine. +; 8x8 => 16 unsigned multiplication routine. ; -; lhs rhs result result also in +; LHS RHS result result in also ; ------------------------------------------------------------- -; ptr1-lo ptr3-lo ax ptr1 +; .A (ptr3-low) ptr1-low .XA ptr1 ; umul8x8r16: @@ -21,7 +21,7 @@ umul8x8r16: umul8x8r16m: lda #0 ; Clear byte 1 ldy #8 ; Number of bits - lsr ptr1 ; Get first bit of lhs into carry + lsr ptr1 ; Get first bit of RHS into carry @L0: bcc @L1 clc adc ptr3 @@ -30,8 +30,6 @@ umul8x8r16m: dey bne @L0 tax - stx ptr1+1 ; Result in a/x and ptr1 + stx ptr1+1 ; Result in .XA and ptr1 lda ptr1 ; Load the result rts ; Done - -