1
0
mirror of https://github.com/cc65/cc65.git synced 2025-01-28 00:30:12 +00:00

Optimized mul20 & mul40 and extracted to new library.

This commit is contained in:
IrgendwerA8 2019-03-21 15:59:45 +01:00 committed by Oliver Schmidt
parent fbf302e6cd
commit 399250a105
14 changed files with 216 additions and 92 deletions

View File

@ -97,6 +97,9 @@
Describes Christian Krüger's macro package for writing self modifying Describes Christian Krüger's macro package for writing self modifying
assembler code. assembler code.
<tag><htmlurl url="specialmath.html" name="specialmath.html"></tag>
Library for speed optimized math functions.
<tag><url name="6502 Binary Relocation Format document" <tag><url name="6502 Binary Relocation Format document"
url="http://www.6502.org/users/andre/o65/fileformat.html"></tag> url="http://www.6502.org/users/andre/o65/fileformat.html"></tag>
Describes the o65 file format that is used for dynamically loadable modules Describes the o65 file format that is used for dynamically loadable modules

38
doc/specialmath.sgml Normal file
View File

@ -0,0 +1,38 @@
<!doctype linuxdoc system>
<article>
<title>Special math functions
<author>Christian Kr&uuml;ger
<abstract>
This library provides functions for speed optimized math operations.
</abstract>
<!-- Table of contents -->
<toc>
<!-- Begin the document -->
<sect>Multiplication<p>
When accessing screen memory often a multiplication of the vertical position is needed to calculate
the target address. A quite common horizontal span for 8-bit systems is 20 or 40 bytes (beside e.g. 32 bytes, where the multiplication can be accomplished by shifting 5 times).
<p>
<tscreen><verb>
unsigned int __fastcall__ mul20(unsigned char value);
</verb></tscreen>
The 8 bit <tt>value</tt> is multiplied by 20 and returned as 16 bit value.
</p>
<p>
<tscreen><verb>
unsigned int __fastcall__ mul40(unsigned char value);
</verb></tscreen>
The 8 bit <tt>value</tt> is multiplied by 40 and returned as 16 bit value.
</p>
</article>

46
include/specialmath.h Normal file
View File

@ -0,0 +1,46 @@
/*****************************************************************************/
/* */
/* specialmath.h */
/* */
/* Optimized math routines for special usages */
/* */
/* */
/* */
/* (C) 2019 Christian 'Irgendwer' Krueger */
/* */
/* This software is provided 'as-is', without any expressed or implied */
/* warranty. In no event will the authors be held liable for any damages */
/* arising from the use of this software. */
/* */
/* Permission is granted to anyone to use this software for any purpose, */
/* including commercial applications, and to alter it and redistribute it */
/* freely, subject to the following restrictions: */
/* */
/* 1. The origin of this software must not be misrepresented; you must not */
/* claim that you wrote the original software. If you use this software */
/* in a product, an acknowledgment in the product documentation would be */
/* appreciated but is not required. */
/* 2. Altered source versions must be plainly marked as such, and must not */
/* be misrepresented as being the original software. */
/* 3. This notice may not be removed or altered from any source */
/* distribution. */
/* */
/*****************************************************************************/
#ifndef _SPECIALMATH_H
#define _SPECIALMATH_H
/* Multiply an 8 bit unsigned value by 20 and return the 16 bit unsigned result */
unsigned int __fastcall__ mul20(unsigned char value);
/* Multiply an 8 bit unsigned value by 40 and return the 16 bit unsigned result */
unsigned int __fastcall__ mul40(unsigned char value);
/* End of specialmath.h */
#endif

View File

@ -181,6 +181,7 @@ SRCDIRS += common \
mouse \ mouse \
runtime \ runtime \
serial \ serial \
specialmath \
tgi \ tgi \
zlib zlib

View File

@ -7,7 +7,7 @@
.export _cputcxy, _cputc .export _cputcxy, _cputc
.export plot, cputdirect, putchar .export plot, cputdirect, putchar
.import gotoxy, mul40 .import gotoxy, _mul40
.importzp tmp4,ptr4 .importzp tmp4,ptr4
.import _revflag,setcursor .import _revflag,setcursor
@ -71,8 +71,7 @@ putchar:
sta (OLDADR),y sta (OLDADR),y
lda ROWCRS lda ROWCRS
jsr mul40 ; destroys tmp4 jsr _mul40 ; destroys tmp4, carry is cleared
clc
adc SAVMSC ; add start of screen memory adc SAVMSC ; add start of screen memory
sta ptr4 sta ptr4
txa txa

View File

@ -12,7 +12,7 @@
.export _mouse_txt_callbacks .export _mouse_txt_callbacks
.importzp tmp4 .importzp tmp4
.import mul40,loc_tmp .import _mul40
.importzp mouse_txt_char ; screen code of mouse cursor .importzp mouse_txt_char ; screen code of mouse cursor
.include "atari.inc" .include "atari.inc"
@ -104,22 +104,15 @@ movex:
; Move the mouse cursor y position to the value in A/X. ; Move the mouse cursor y position to the value in A/X.
movey: movey:
tax ldy tmp4 ; mul40 uses tmp4, save in Y
ldy tmp4 ; mul40 uses tmp4
lda loc_tmp ; and this local variable
pha
txa ; get parameter back
lsr a ; convert y position to character line lsr a ; convert y position to character line
lsr a lsr a
lsr a lsr a
jsr mul40 jsr _mul40 ; carry is cleared by _mul40
clc
adc SAVMSC adc SAVMSC
sta scrptr sta scrptr
txa txa
adc SAVMSC+1 adc SAVMSC+1
sta scrptr+1 sta scrptr+1
pla sty tmp4 ; restore tmp4
sta loc_tmp
sty tmp4
rts rts

View File

@ -1,35 +0,0 @@
;
; Christian Groessler, June 2000
;
; mul40
; multiplies A by 40 and returns result in AX
; uses tmp4
.importzp tmp4
.export mul40,loc_tmp
.proc mul40
ldx #0
stx tmp4
sta loc_tmp
asl a
rol tmp4
asl a
rol tmp4 ; val * 4
adc loc_tmp
bcc L1
inc tmp4 ; val * 5
L1: asl a
rol tmp4 ; val * 10
asl a
rol tmp4
asl a
rol tmp4 ; val * 40
ldx tmp4
rts
.endproc
.bss
loc_tmp:.res 1

View File

@ -8,7 +8,7 @@
.include "atari.inc" .include "atari.inc"
.importzp tmp1,tmp4,ptr1,ptr2 .importzp tmp1,tmp4,ptr1,ptr2
.import mul40,_clrscr .import _mul40,_clrscr
.export __scroll .export __scroll
.proc __scroll .proc __scroll
@ -40,7 +40,7 @@ down_ok:lda SAVMSC
sta ptr2+1 sta ptr2+1
lda tmp1 lda tmp1
jsr mul40 jsr _mul40
sta tmp4 sta tmp4
lda ptr2 lda ptr2
sec sec
@ -103,8 +103,7 @@ up: sta tmp1 ; # of lines to scroll
jmp _clrscr jmp _clrscr
;multiply by 40 (xsize) ;multiply by 40 (xsize)
up_ok: jsr mul40 up_ok: jsr _mul40 ; carry is cleared by _mul40
clc
adc SAVMSC ; add start of screen mem adc SAVMSC ; add start of screen mem
sta ptr2 sta ptr2
txa txa

View File

@ -4,7 +4,7 @@
; cursor handling, internal function ; cursor handling, internal function
.include "atari.inc" .include "atari.inc"
.import cursor,mul40 .import cursor,_mul40
.export setcursor .export setcursor
.proc setcursor .proc setcursor
@ -14,8 +14,7 @@
sta (OLDADR),y sta (OLDADR),y
lda ROWCRS lda ROWCRS
jsr mul40 jsr _mul40 ; function leaves with carry clear!
clc
adc SAVMSC ; add start of screen memory adc SAVMSC ; add start of screen memory
sta OLDADR sta OLDADR
txa txa

View File

@ -10,7 +10,7 @@
.export _cputcxy, _cputc .export _cputcxy, _cputc
.export plot, cputdirect, putchar .export plot, cputdirect, putchar
.import gotoxy, mul20 .import gotoxy, _mul20
.importzp ptr4 .importzp ptr4
.import setcursor .import setcursor
@ -75,8 +75,7 @@ putchar:
pha ; save char pha ; save char
lda ROWCRS_5200 lda ROWCRS_5200
jsr mul20 ; destroys tmp4 jsr _mul20 ; destroys tmp4, carry is cleared
clc
adc SAVMSC ; add start of screen memory adc SAVMSC ; add start of screen memory
sta ptr4 sta ptr4
txa txa

View File

@ -1,33 +0,0 @@
;
; Christian Groessler, April 2014
;
; mul20
; multiplies A by 20 and returns result in AX
; uses tmp4
.importzp tmp4
.export mul20,loc_tmp
.proc mul20
ldx #0
stx tmp4
sta loc_tmp
asl a
rol tmp4
asl a
rol tmp4 ; val * 4
adc loc_tmp
bcc L1
inc tmp4 ; val * 5
L1: asl a
rol tmp4 ; val * 10
asl a
rol tmp4 ; val * 20
ldx tmp4
rts
.endproc
.bss
loc_tmp:.res 1

View File

@ -0,0 +1,47 @@
; mul20.s
;
; This file is part of
; cc65 - a freeware C compiler for 6502 based systems
;
; https://github.com/cc65/cc65
;
; See "LICENSE" file for legal information.
;
;
; unsigned int __fastcall__ mul20(unsigned char value);
;
; REMARKS: Function is defined to return with carry-flag cleared
.importzp tmp4
.export _mul20
.proc _mul20 ; = 30 bytes, 41/46 cycles
sta tmp4 ; remember value for later addition...
ldx #0 ; clear high-byte
asl a ; * 2
bcc mul4 ; high-byte affected?
ldx #2 ; this will be the 1st high-bit soon...
mul4: asl a ; * 4
bcc mul5 ; high-byte affected?
inx ; => yes, apply to 0 high-bit
clc ; prepare addition
mul5: adc tmp4 ; * 5
bcc mul10 ; high-byte affected?
inx ; yes, correct...
mul10: stx tmp4 ; continue with classic shifting...
asl a ; * 10
rol tmp4
asl a ; * 20
rol tmp4
ldx tmp4 ; deliver high-byte in X
rts
.endproc

View File

@ -0,0 +1,50 @@
; mul40.s
;
; This file is part of
; cc65 - a freeware C compiler for 6502 based systems
;
; https://github.com/cc65/cc65
;
; See "LICENSE" file for legal information.
;
;
; unsigned int __fastcall__ mul40(unsigned char value);
;
; REMARKS: Function is defined to return with carry-flag cleared
.importzp tmp4
.export _mul40
.proc _mul40 ; = 33 bytes, 48/53 cycles
sta tmp4 ; remember value for later addition...
ldx #0 ; clear high-byte
asl a ; * 2
bcc mul4 ; high-byte affected?
ldx #2 ; this will be the 1st high-bit soon...
mul4: asl a ; * 4
bcc mul5 ; high-byte affected?
inx ; => yes, apply to 0 high-bit
clc ; prepare addition
mul5: adc tmp4 ; * 5
bcc mul10 ; high-byte affected?
inx ; yes, correct...
mul10: stx tmp4 ; continue with classic shifting...
asl a ; * 10
rol tmp4
asl a ; * 20
rol tmp4
asl a ; * 40
rol tmp4
ldx tmp4 ; deliver high-byte in X
rts
.endproc

View File

@ -0,0 +1,18 @@
#include <specialmath.h>
#include "unittest.h"
TEST
{
unsigned i;
for (i=0; i < 256; ++i)
{
ASSERT_AreEqual(i*20, mul20(i), "%u", "Invalid 'mul20(%u)' calculation!" COMMA i);
}
for (i=0; i < 256; ++i)
{
ASSERT_AreEqual(i*40, mul40(i), "%u", "Invalid 'mul40(%u)' calculation!" COMMA i);
}
}
ENDTEST