Optimized mul20 & mul40 and extracted to new library.

2025-01-28 00:30:12 +00:00 · 2019-03-21 15:59:45 +01:00 · 2019-03-21 15:59:45 +01:00 · 399250a105
commit 399250a105
parent fbf302e6cd
14 changed files with 216 additions and 92 deletions
--- a/doc/index.sgml
+++ b/doc/index.sgml
@ -97,6 +97,9 @@
  Describes Christian Kr&uuml;ger's macro package for writing self modifying
  assembler code.
  <tag><htmlurl url="specialmath.html" name="specialmath.html"></tag>
  Library for speed optimized math functions.  
  <tag><url name="6502 Binary Relocation Format document"
        url="http://www.6502.org/users/andre/o65/fileformat.html"></tag>
  Describes the o65 file format that is used for dynamically loadable modules
--- a/doc/specialmath.sgml
+++ b/doc/specialmath.sgml
@ -0,0 +1,38 @@
 <!doctype linuxdoc system>
 <article>
 <title>Special math functions
 <author>Christian Kr&uuml;ger
 <abstract>
 This library provides functions for speed optimized math operations.
 </abstract>
 <!-- Table of contents -->
 <toc>
 <!-- Begin the document -->
 <sect>Multiplication<p>
 When accessing screen memory often a multiplication of the vertical position is needed to calculate
 the target address. A quite common horizontal span for 8-bit systems is 20 or 40 bytes (beside e.g. 32 bytes, where the multiplication can be accomplished by shifting 5 times).
 <p>
 <tscreen><verb>
    unsigned int __fastcall__ mul20(unsigned char value);
 </verb></tscreen>
 The 8 bit <tt>value</tt> is multiplied by 20 and returned as 16 bit value.
 </p>
 <p>
 <tscreen><verb>
    unsigned int __fastcall__ mul40(unsigned char value);
 </verb></tscreen>
 The 8 bit <tt>value</tt> is multiplied by 40 and returned as 16 bit value.
 </p>
 </article>
--- a/include/specialmath.h
+++ b/include/specialmath.h
@ -0,0 +1,46 @@
 /*****************************************************************************/
 /*                                                                           */
 /*                              specialmath.h                                */
 /*                                                                           */
 /*                 Optimized math routines for special usages                */
 /*                                                                           */
 /*                                                                           */
 /*                                                                           */
 /* (C) 2019 Christian 'Irgendwer' Krueger                                    */
 /*                                                                           */
 /* This software is provided 'as-is', without any expressed or implied       */
 /* warranty.  In no event will the authors be held liable for any damages    */
 /* arising from the use of this software.                                    */
 /*                                                                           */
 /* Permission is granted to anyone to use this software for any purpose,     */
 /* including commercial applications, and to alter it and redistribute it    */
 /* freely, subject to the following restrictions:                            */
 /*                                                                           */
 /* 1. The origin of this software must not be misrepresented; you must not   */
 /*    claim that you wrote the original software. If you use this software   */
 /*    in a product, an acknowledgment in the product documentation would be  */
 /*    appreciated but is not required.                                       */
 /* 2. Altered source versions must be plainly marked as such, and must not   */
 /*    be misrepresented as being the original software.                      */
 /* 3. This notice may not be removed or altered from any source              */
 /*    distribution.                                                          */
 /*                                                                           */
 /*****************************************************************************/
 #ifndef _SPECIALMATH_H
 #define _SPECIALMATH_H
 /* Multiply an 8 bit unsigned value by 20 and return the 16 bit unsigned result */
 unsigned int __fastcall__ mul20(unsigned char value);
 /* Multiply an 8 bit unsigned value by 40 and return the 16 bit unsigned result */
 unsigned int __fastcall__ mul40(unsigned char value);
 /* End of specialmath.h */
 #endif
--- a/libsrc/Makefile
+++ b/libsrc/Makefile
@ -181,6 +181,7 @@ SRCDIRS += common   \
           mouse    \
           runtime  \
           serial   \
           specialmath \
           tgi      \
           zlib
--- a/libsrc/atari/cputc.s
+++ b/libsrc/atari/cputc.s
@ -7,7 +7,7 @@
        .export         _cputcxy, _cputc
        .export         plot, cputdirect, putchar
-        .import         gotoxy, mul40
+        .import         gotoxy, _mul40
        .importzp       tmp4,ptr4
        .import         _revflag,setcursor
@ -71,8 +71,7 @@ putchar:
        sta     (OLDADR),y
        lda     ROWCRS
-        jsr     mul40           ; destroys tmp4
+        jsr     _mul40          ; destroys tmp4, carry is cleared
        clc
        adc     SAVMSC          ; add start of screen memory
        sta     ptr4
        txa
--- a/libsrc/atari/mcbtxtchar.s
+++ b/libsrc/atari/mcbtxtchar.s
@ -12,7 +12,7 @@
        .export         _mouse_txt_callbacks
        .importzp       tmp4
-        .import         mul40,loc_tmp
+        .import         _mul40
        .importzp       mouse_txt_char          ; screen code of mouse cursor
        .include        "atari.inc"
@ -104,22 +104,15 @@ movex:
 ; Move the mouse cursor y position to the value in A/X.
 movey:
-        tax
+        ldy     tmp4            ; mul40 uses tmp4, save in Y
        ldy     tmp4            ; mul40 uses tmp4
        lda     loc_tmp         ; and this local variable
        pha
        txa                     ; get parameter back
        lsr     a               ; convert y position to character line
        lsr     a
        lsr     a
-        jsr     mul40
+        jsr     _mul40          ; carry is cleared by _mul40
        clc
        adc     SAVMSC
        sta     scrptr
        txa
        adc     SAVMSC+1
        sta     scrptr+1
-        pla
+        sty     tmp4            ; restore tmp4
        sta     loc_tmp
        sty     tmp4
        rts
--- a/libsrc/atari/mul40.s
+++ b/libsrc/atari/mul40.s
@ -1,35 +0,0 @@
 ;
 ; Christian Groessler, June 2000
 ;
 ; mul40
 ; multiplies A by 40 and returns result in AX
 ; uses tmp4
        .importzp       tmp4
        .export         mul40,loc_tmp
 .proc   mul40
        ldx     #0
        stx     tmp4
        sta     loc_tmp
        asl     a
        rol     tmp4
        asl     a
        rol     tmp4            ; val * 4
        adc     loc_tmp
        bcc     L1
        inc     tmp4            ; val * 5
 L1:     asl     a
        rol     tmp4            ; val * 10
        asl     a
        rol     tmp4
        asl     a
        rol     tmp4            ; val * 40
        ldx     tmp4
        rts
 .endproc
        .bss
 loc_tmp:.res    1
--- a/libsrc/atari/scroll.s
+++ b/libsrc/atari/scroll.s
@ -8,7 +8,7 @@
        .include        "atari.inc"
        .importzp       tmp1,tmp4,ptr1,ptr2
-        .import         mul40,_clrscr
+        .import         _mul40,_clrscr
        .export         __scroll
 .proc   __scroll
@ -40,7 +40,7 @@ down_ok:lda     SAVMSC
        sta     ptr2+1
        lda     tmp1
-        jsr     mul40
+        jsr     _mul40
        sta     tmp4
        lda     ptr2
        sec
@ -103,8 +103,7 @@ up:     sta     tmp1            ; # of lines to scroll
        jmp     _clrscr
        ;multiply by 40 (xsize)
-up_ok:  jsr     mul40
+up_ok:  jsr     _mul40          ; carry is cleared by _mul40
        clc
        adc     SAVMSC          ; add start of screen mem
        sta     ptr2
        txa
--- a/libsrc/atari/setcursor.s
+++ b/libsrc/atari/setcursor.s
@ -4,7 +4,7 @@
 ; cursor handling, internal function
        .include "atari.inc"
-        .import cursor,mul40
+        .import cursor,_mul40
        .export setcursor
 .proc   setcursor
@ -14,8 +14,7 @@
        sta     (OLDADR),y
        lda     ROWCRS
-        jsr     mul40
+        jsr     _mul40          ; function leaves with carry clear!
        clc
        adc     SAVMSC          ; add start of screen memory
        sta     OLDADR
        txa
--- a/libsrc/atari5200/cputc.s
+++ b/libsrc/atari5200/cputc.s
@ -10,7 +10,7 @@
        .export         _cputcxy, _cputc
        .export         plot, cputdirect, putchar
-        .import         gotoxy, mul20
+        .import         gotoxy, _mul20
        .importzp       ptr4
        .import         setcursor
@ -75,8 +75,7 @@ putchar:
        pha                     ; save char
        lda     ROWCRS_5200
-        jsr     mul20           ; destroys tmp4
+        jsr     _mul20          ; destroys tmp4, carry is cleared
        clc
        adc     SAVMSC          ; add start of screen memory
        sta     ptr4
        txa
--- a/libsrc/atari5200/mul20.s
+++ b/libsrc/atari5200/mul20.s
@ -1,33 +0,0 @@
 ;
 ; Christian Groessler, April 2014
 ;
 ; mul20
 ; multiplies A by 20 and returns result in AX
 ; uses tmp4
        .importzp       tmp4
        .export         mul20,loc_tmp
 .proc   mul20
        ldx     #0
        stx     tmp4
        sta     loc_tmp
        asl     a
        rol     tmp4
        asl     a
        rol     tmp4            ; val * 4
        adc     loc_tmp
        bcc     L1
        inc     tmp4            ; val * 5
 L1:     asl     a
        rol     tmp4            ; val * 10
        asl     a
        rol     tmp4            ; val * 20
        ldx     tmp4
        rts
 .endproc
        .bss
 loc_tmp:.res    1
--- a/libsrc/specialmath/mul20.s
+++ b/libsrc/specialmath/mul20.s
@ -0,0 +1,47 @@
 ; mul20.s
 ;
 ; This file is part of
 ; cc65 - a freeware C compiler for 6502 based systems
 ;
 ; https://github.com/cc65/cc65
 ;
 ; See "LICENSE" file for legal information.
 ;
 ;
 ; unsigned int __fastcall__ mul20(unsigned char value);
 ; 
 ; REMARKS: Function is defined to return with carry-flag cleared
        .importzp       tmp4
        .export         _mul20
 .proc   _mul20                  ; = 30 bytes, 41/46 cycles
        sta     tmp4            ; remember value for later addition...
        ldx     #0              ; clear high-byte
        asl     a               ; * 2
        bcc     mul4            ; high-byte affected?
        ldx     #2              ; this will be the 1st high-bit soon...
 mul4:   asl     a               ; * 4                  
        bcc     mul5            ; high-byte affected?
        inx                     ; => yes, apply to 0 high-bit
        clc                     ; prepare addition
 mul5:   adc     tmp4            ; * 5
        bcc     mul10           ; high-byte affected?
        inx                     ; yes, correct...
 mul10:  stx     tmp4            ; continue with classic shifting...
        asl     a               ; * 10
        rol     tmp4                                    
        asl     a               ; * 20 
        rol     tmp4
        ldx     tmp4            ; deliver high-byte in X
        rts
 .endproc
--- a/libsrc/specialmath/mul40.s
+++ b/libsrc/specialmath/mul40.s
@ -0,0 +1,50 @@
 ; mul40.s
 ;
 ; This file is part of
 ; cc65 - a freeware C compiler for 6502 based systems
 ;
 ; https://github.com/cc65/cc65
 ;
 ; See "LICENSE" file for legal information.
 ;
 ;
 ; unsigned int __fastcall__ mul40(unsigned char value);
 ; 
 ; REMARKS: Function is defined to return with carry-flag cleared
        .importzp       tmp4
        .export         _mul40
 .proc   _mul40                  ; = 33 bytes, 48/53 cycles
        sta     tmp4            ; remember value for later addition...
        ldx     #0              ; clear high-byte
        asl     a               ; * 2
        bcc     mul4            ; high-byte affected?
        ldx     #2              ; this will be the 1st high-bit soon...
 mul4:   asl     a               ; * 4                  
        bcc     mul5            ; high-byte affected?
        inx                     ; => yes, apply to 0 high-bit
        clc                     ; prepare addition
 mul5:   adc     tmp4            ; * 5
        bcc     mul10           ; high-byte affected?
        inx                     ; yes, correct...
 mul10:  stx     tmp4            ; continue with classic shifting...
        asl     a               ; * 10
        rol     tmp4                                    
        asl     a               ; * 20 
        rol     tmp4
        asl     a               ; * 40
        rol     tmp4
        ldx     tmp4            ; deliver high-byte in X
        rts
 .endproc
--- a/test/val/lib_specialmath_mulxx.c
+++ b/test/val/lib_specialmath_mulxx.c
@ -0,0 +1,18 @@
 #include <specialmath.h>
 #include "unittest.h"
 TEST
 {
    unsigned i;
    for (i=0; i < 256; ++i)
    {
        ASSERT_AreEqual(i*20, mul20(i), "%u", "Invalid 'mul20(%u)' calculation!" COMMA i);
    }
    for (i=0; i < 256; ++i)
    {
        ASSERT_AreEqual(i*40, mul40(i), "%u", "Invalid 'mul40(%u)' calculation!" COMMA i);
    }
 }
 ENDTEST