Optimized mul20 & mul40 and extracted to new library.

2025-04-19 00:39:53 +00:00 · 2019-03-21 15:59:45 +01:00 · 2019-03-21 15:59:45 +01:00 · 399250a105
commit 399250a105
parent fbf302e6cd
14 changed files with 216 additions and 92 deletions
--- a/doc/index.sgml
+++ b/doc/index.sgml
@ -97,6 +97,9 @@
  Describes Christian Kr&uuml;ger's macro package for writing self modifying
  assembler code.

+  <tag><htmlurl url="specialmath.html" name="specialmath.html"></tag>
+  Library for speed optimized math functions.  
+
  <tag><url name="6502 Binary Relocation Format document"
        url="http://www.6502.org/users/andre/o65/fileformat.html"></tag>
  Describes the o65 file format that is used for dynamically loadable modules
--- a/doc/specialmath.sgml
+++ b/doc/specialmath.sgml
@ -0,0 +1,38 @@
+<!doctype linuxdoc system>
+
+<article>
+<title>Special math functions
+<author>Christian Kr&uuml;ger
+
+<abstract>
+This library provides functions for speed optimized math operations.
+</abstract>
+
+<!-- Table of contents -->
+<toc>
+
+<!-- Begin the document -->
+
+<sect>Multiplication<p>
+
+When accessing screen memory often a multiplication of the vertical position is needed to calculate
+the target address. A quite common horizontal span for 8-bit systems is 20 or 40 bytes (beside e.g. 32 bytes, where the multiplication can be accomplished by shifting 5 times).
+
+<p>
+<tscreen><verb>
+    unsigned int __fastcall__ mul20(unsigned char value);
+</verb></tscreen>
+
+The 8 bit <tt>value</tt> is multiplied by 20 and returned as 16 bit value.
+</p>
+
+<p>
+<tscreen><verb>
+    unsigned int __fastcall__ mul40(unsigned char value);
+</verb></tscreen>
+
+The 8 bit <tt>value</tt> is multiplied by 40 and returned as 16 bit value.
+</p>
+
+
+</article>
--- a/include/specialmath.h
+++ b/include/specialmath.h
@ -0,0 +1,46 @@
+/*****************************************************************************/
+/*                                                                           */
+/*                              specialmath.h                                */
+/*                                                                           */
+/*                 Optimized math routines for special usages                */
+/*                                                                           */
+/*                                                                           */
+/*                                                                           */
+/* (C) 2019 Christian 'Irgendwer' Krueger                                    */
+/*                                                                           */
+/* This software is provided 'as-is', without any expressed or implied       */
+/* warranty.  In no event will the authors be held liable for any damages    */
+/* arising from the use of this software.                                    */
+/*                                                                           */
+/* Permission is granted to anyone to use this software for any purpose,     */
+/* including commercial applications, and to alter it and redistribute it    */
+/* freely, subject to the following restrictions:                            */
+/*                                                                           */
+/* 1. The origin of this software must not be misrepresented; you must not   */
+/*    claim that you wrote the original software. If you use this software   */
+/*    in a product, an acknowledgment in the product documentation would be  */
+/*    appreciated but is not required.                                       */
+/* 2. Altered source versions must be plainly marked as such, and must not   */
+/*    be misrepresented as being the original software.                      */
+/* 3. This notice may not be removed or altered from any source              */
+/*    distribution.                                                          */
+/*                                                                           */
+/*****************************************************************************/
+
+#ifndef _SPECIALMATH_H
+#define _SPECIALMATH_H
+
+
+/* Multiply an 8 bit unsigned value by 20 and return the 16 bit unsigned result */
+
+unsigned int __fastcall__ mul20(unsigned char value);
+
+
+/* Multiply an 8 bit unsigned value by 40 and return the 16 bit unsigned result */
+
+unsigned int __fastcall__ mul40(unsigned char value);
+
+
+
+/* End of specialmath.h */
+#endif
--- a/libsrc/Makefile
+++ b/libsrc/Makefile
@ -181,6 +181,7 @@ SRCDIRS += common   \
           mouse    \
           runtime  \
           serial   \
+           specialmath \
           tgi      \
           zlib

--- a/libsrc/atari/cputc.s
+++ b/libsrc/atari/cputc.s
@ -7,7 +7,7 @@

        .export         _cputcxy, _cputc
        .export         plot, cputdirect, putchar
-        .import         gotoxy, mul40
+        .import         gotoxy, _mul40
        .importzp       tmp4,ptr4
        .import         _revflag,setcursor

@ -71,8 +71,7 @@ putchar:
        sta     (OLDADR),y

        lda     ROWCRS
-        jsr     mul40           ; destroys tmp4
-        clc
+        jsr     _mul40          ; destroys tmp4, carry is cleared
        adc     SAVMSC          ; add start of screen memory
        sta     ptr4
        txa
--- a/libsrc/atari/mcbtxtchar.s
+++ b/libsrc/atari/mcbtxtchar.s
@ -12,7 +12,7 @@

        .export         _mouse_txt_callbacks
        .importzp       tmp4
-        .import         mul40,loc_tmp
+        .import         _mul40
        .importzp       mouse_txt_char          ; screen code of mouse cursor

        .include        "atari.inc"
@ -104,22 +104,15 @@ movex:

 ; Move the mouse cursor y position to the value in A/X.
 movey:
-        tax
-        ldy     tmp4            ; mul40 uses tmp4
-        lda     loc_tmp         ; and this local variable
-        pha
-        txa                     ; get parameter back
+        ldy     tmp4            ; mul40 uses tmp4, save in Y
        lsr     a               ; convert y position to character line
        lsr     a
        lsr     a
-        jsr     mul40
-        clc
+        jsr     _mul40          ; carry is cleared by _mul40
        adc     SAVMSC
        sta     scrptr
        txa
        adc     SAVMSC+1
        sta     scrptr+1
-        pla
-        sta     loc_tmp
-        sty     tmp4
+        sty     tmp4            ; restore tmp4
        rts
--- a/libsrc/atari/mul40.s
+++ b/libsrc/atari/mul40.s
@ -1,35 +0,0 @@
-;
-; Christian Groessler, June 2000
-;
-; mul40
-; multiplies A by 40 and returns result in AX
-; uses tmp4
-
-        .importzp       tmp4
-        .export         mul40,loc_tmp
-
-.proc   mul40
-
-        ldx     #0
-        stx     tmp4
-        sta     loc_tmp
-        asl     a
-        rol     tmp4
-        asl     a
-        rol     tmp4            ; val * 4
-        adc     loc_tmp
-        bcc     L1
-        inc     tmp4            ; val * 5
-L1:     asl     a
-        rol     tmp4            ; val * 10
-        asl     a
-        rol     tmp4
-        asl     a
-        rol     tmp4            ; val * 40
-        ldx     tmp4
-        rts
-
-.endproc
-
-        .bss
-loc_tmp:.res    1
--- a/libsrc/atari/scroll.s
+++ b/libsrc/atari/scroll.s
@ -8,7 +8,7 @@

        .include        "atari.inc"
        .importzp       tmp1,tmp4,ptr1,ptr2
-        .import         mul40,_clrscr
+        .import         _mul40,_clrscr
        .export         __scroll

 .proc   __scroll
@ -40,7 +40,7 @@ down_ok:lda     SAVMSC
        sta     ptr2+1

        lda     tmp1
-        jsr     mul40
+        jsr     _mul40
        sta     tmp4
        lda     ptr2
        sec
@ -103,8 +103,7 @@ up:     sta     tmp1            ; # of lines to scroll
        jmp     _clrscr

        ;multiply by 40 (xsize)
-up_ok:  jsr     mul40
-        clc
+up_ok:  jsr     _mul40          ; carry is cleared by _mul40
        adc     SAVMSC          ; add start of screen mem
        sta     ptr2
        txa
--- a/libsrc/atari/setcursor.s
+++ b/libsrc/atari/setcursor.s
@ -4,7 +4,7 @@
 ; cursor handling, internal function

        .include "atari.inc"
-        .import cursor,mul40
+        .import cursor,_mul40
        .export setcursor

 .proc   setcursor
@ -14,8 +14,7 @@
        sta     (OLDADR),y

        lda     ROWCRS
-        jsr     mul40
-        clc
+        jsr     _mul40          ; function leaves with carry clear!
        adc     SAVMSC          ; add start of screen memory
        sta     OLDADR
        txa
--- a/libsrc/atari5200/cputc.s
+++ b/libsrc/atari5200/cputc.s
@ -10,7 +10,7 @@

        .export         _cputcxy, _cputc
        .export         plot, cputdirect, putchar
-        .import         gotoxy, mul20
+        .import         gotoxy, _mul20
        .importzp       ptr4
        .import         setcursor

@ -75,8 +75,7 @@ putchar:
        pha                     ; save char

        lda     ROWCRS_5200
-        jsr     mul20           ; destroys tmp4
-        clc
+        jsr     _mul20          ; destroys tmp4, carry is cleared
        adc     SAVMSC          ; add start of screen memory
        sta     ptr4
        txa
--- a/libsrc/atari5200/mul20.s
+++ b/libsrc/atari5200/mul20.s
@ -1,33 +0,0 @@
-;
-; Christian Groessler, April 2014
-;
-; mul20
-; multiplies A by 20 and returns result in AX
-; uses tmp4
-
-        .importzp       tmp4
-        .export         mul20,loc_tmp
-
-.proc   mul20
-
-        ldx     #0
-        stx     tmp4
-        sta     loc_tmp
-        asl     a
-        rol     tmp4
-        asl     a
-        rol     tmp4            ; val * 4
-        adc     loc_tmp
-        bcc     L1
-        inc     tmp4            ; val * 5
-L1:     asl     a
-        rol     tmp4            ; val * 10
-        asl     a
-        rol     tmp4            ; val * 20
-        ldx     tmp4
-        rts
-
-.endproc
-
-        .bss
-loc_tmp:.res    1
--- a/libsrc/specialmath/mul20.s
+++ b/libsrc/specialmath/mul20.s
@ -0,0 +1,47 @@
+; mul20.s
+;
+; This file is part of
+; cc65 - a freeware C compiler for 6502 based systems
+;
+; https://github.com/cc65/cc65
+;
+; See "LICENSE" file for legal information.
+;
+;
+; unsigned int __fastcall__ mul20(unsigned char value);
+; 
+; REMARKS: Function is defined to return with carry-flag cleared
+
+
+        .importzp       tmp4
+        .export         _mul20
+
+.proc   _mul20                  ; = 30 bytes, 41/46 cycles
+
+        sta     tmp4            ; remember value for later addition...
+        ldx     #0              ; clear high-byte
+        asl     a               ; * 2
+        bcc     mul4            ; high-byte affected?
+        ldx     #2              ; this will be the 1st high-bit soon...
+
+mul4:   asl     a               ; * 4                  
+        bcc     mul5            ; high-byte affected?
+        inx                     ; => yes, apply to 0 high-bit
+        clc                     ; prepare addition
+
+mul5:   adc     tmp4            ; * 5
+        bcc     mul10           ; high-byte affected?
+        inx                     ; yes, correct...
+
+mul10:  stx     tmp4            ; continue with classic shifting...
+        
+        asl     a               ; * 10
+        rol     tmp4                                    
+
+        asl     a               ; * 20 
+        rol     tmp4
+
+        ldx     tmp4            ; deliver high-byte in X
+        rts
+
+.endproc
--- a/libsrc/specialmath/mul40.s
+++ b/libsrc/specialmath/mul40.s
@ -0,0 +1,50 @@
+; mul40.s
+;
+; This file is part of
+; cc65 - a freeware C compiler for 6502 based systems
+;
+; https://github.com/cc65/cc65
+;
+; See "LICENSE" file for legal information.
+;
+;
+; unsigned int __fastcall__ mul40(unsigned char value);
+; 
+; REMARKS: Function is defined to return with carry-flag cleared
+
+
+        .importzp       tmp4
+        .export         _mul40
+
+.proc   _mul40                  ; = 33 bytes, 48/53 cycles
+
+        sta     tmp4            ; remember value for later addition...
+        ldx     #0              ; clear high-byte
+        asl     a               ; * 2
+        bcc     mul4            ; high-byte affected?
+        ldx     #2              ; this will be the 1st high-bit soon...
+
+mul4:   asl     a               ; * 4                  
+        bcc     mul5            ; high-byte affected?
+        inx                     ; => yes, apply to 0 high-bit
+        clc                     ; prepare addition
+
+mul5:   adc     tmp4            ; * 5
+        bcc     mul10           ; high-byte affected?
+        inx                     ; yes, correct...
+
+mul10:  stx     tmp4            ; continue with classic shifting...
+        
+        asl     a               ; * 10
+        rol     tmp4                                    
+
+        asl     a               ; * 20 
+        rol     tmp4
+
+        asl     a               ; * 40
+        rol     tmp4
+
+        ldx     tmp4            ; deliver high-byte in X
+        rts
+
+.endproc
--- a/test/val/lib_specialmath_mulxx.c
+++ b/test/val/lib_specialmath_mulxx.c
@ -0,0 +1,18 @@
+#include <specialmath.h>
+#include "unittest.h"
+
+TEST
+{
+    unsigned i;
+    
+    for (i=0; i < 256; ++i)
+    {
+        ASSERT_AreEqual(i*20, mul20(i), "%u", "Invalid 'mul20(%u)' calculation!" COMMA i);
+    }
+
+    for (i=0; i < 256; ++i)
+    {
+        ASSERT_AreEqual(i*40, mul40(i), "%u", "Invalid 'mul40(%u)' calculation!" COMMA i);
+    }
+}
+ENDTEST