Minor math optimizations

2025-02-28 20:29:46 +00:00 · 2019-02-28 17:30:34 +01:00 · 2019-02-28 17:30:34 +01:00 · cc6559c3f6
commit cc6559c3f6
parent 0ea78be056
4 changed files with 16 additions and 10 deletions
--- a/libsrc/runtime/ludiv.s
+++ b/libsrc/runtime/ludiv.s
@ -78,7 +78,7 @@ L0:     asl     ptr1
 ; Do a subtraction. we do not have enough space to store the intermediate
 ; result, so we may have to do the subtraction twice.
-        pha
+        tax
        cmp     ptr3
        lda     ptr2+1
        sbc     ptr3+1
@ -91,9 +91,9 @@ L0:     asl     ptr1
 ; Overflow, do the subtraction again, this time store the result
        sta     tmp4            ; We have the high byte already
-        pla
+        txa
        sbc     ptr3            ; byte 0
-        pha
+        tax
        lda     ptr2+1
        sbc     ptr3+1
        sta     ptr2+1          ; byte 1
@ -102,7 +102,7 @@ L0:     asl     ptr1
        sta     tmp3            ; byte 2
        inc     ptr1            ; Set result bit
-L1:     pla
+L1:     txa
        dey
        bne     L0
        sta     ptr2
--- a/libsrc/runtime/udiv32by16r16.s
+++ b/libsrc/runtime/udiv32by16r16.s
@ -34,19 +34,19 @@ L0:     asl     ptr1
        rol     a
        rol     sreg+1
-        pha
+        tax
        cmp     ptr3
        lda     sreg+1
        sbc     ptr3+1
        bcc     L1
        sta     sreg+1
-        pla
+        txa
        sbc     ptr3
-        pha
+        tax
        inc     ptr1
-L1:     pla
+L1:     txa
        dey
        bne     L0
        sta     sreg
--- a/libsrc/runtime/umul16x16r32.s
+++ b/libsrc/runtime/umul16x16r32.s
@ -42,11 +42,11 @@ umul16x16r16m:
        clc
        adc     ptr3
-        pha
+        tax
        lda     ptr3+1
        adc     sreg+1
        sta     sreg+1
-        pla
+        txa
@L1:    ror     sreg+1
        ror     a
--- a/libsrc/runtime/umul8x16r24.s
+++ b/libsrc/runtime/umul8x16r24.s
@ -9,6 +9,7 @@
        .include        "zeropage.inc"
        .macpack        cpu
 ;---------------------------------------------------------------------------
 ; 8x16 => 24 unsigned multiplication routine. Because the overhead for a
@ -30,9 +31,14 @@ umul8x16r16:
 umul8x16r24m:
 umul8x16r16m:
 .if (.cpu .bitand ::CPU_ISET_65SC02)
        stz     ptr1+1
        stz     sreg
 .else
        ldx     #0
        stx     ptr1+1
        stx     sreg
 .endif
        ldy     #8              ; Number of bits
        ldx     ptr3            ; Get into register for speed