mirror of
https://github.com/cc65/cc65.git
synced 2025-01-14 16:33:00 +00:00
Optimized mul20 & mul40 and extracted to new library.
This commit is contained in:
parent
fbf302e6cd
commit
399250a105
@ -97,6 +97,9 @@
|
||||
Describes Christian Krüger's macro package for writing self modifying
|
||||
assembler code.
|
||||
|
||||
<tag><htmlurl url="specialmath.html" name="specialmath.html"></tag>
|
||||
Library for speed optimized math functions.
|
||||
|
||||
<tag><url name="6502 Binary Relocation Format document"
|
||||
url="http://www.6502.org/users/andre/o65/fileformat.html"></tag>
|
||||
Describes the o65 file format that is used for dynamically loadable modules
|
||||
|
38
doc/specialmath.sgml
Normal file
38
doc/specialmath.sgml
Normal file
@ -0,0 +1,38 @@
|
||||
<!doctype linuxdoc system>
|
||||
|
||||
<article>
|
||||
<title>Special math functions
|
||||
<author>Christian Krüger
|
||||
|
||||
<abstract>
|
||||
This library provides functions for speed optimized math operations.
|
||||
</abstract>
|
||||
|
||||
<!-- Table of contents -->
|
||||
<toc>
|
||||
|
||||
<!-- Begin the document -->
|
||||
|
||||
<sect>Multiplication<p>
|
||||
|
||||
When accessing screen memory often a multiplication of the vertical position is needed to calculate
|
||||
the target address. A quite common horizontal span for 8-bit systems is 20 or 40 bytes (beside e.g. 32 bytes, where the multiplication can be accomplished by shifting 5 times).
|
||||
|
||||
<p>
|
||||
<tscreen><verb>
|
||||
unsigned int __fastcall__ mul20(unsigned char value);
|
||||
</verb></tscreen>
|
||||
|
||||
The 8 bit <tt>value</tt> is multiplied by 20 and returned as 16 bit value.
|
||||
</p>
|
||||
|
||||
<p>
|
||||
<tscreen><verb>
|
||||
unsigned int __fastcall__ mul40(unsigned char value);
|
||||
</verb></tscreen>
|
||||
|
||||
The 8 bit <tt>value</tt> is multiplied by 40 and returned as 16 bit value.
|
||||
</p>
|
||||
|
||||
|
||||
</article>
|
46
include/specialmath.h
Normal file
46
include/specialmath.h
Normal file
@ -0,0 +1,46 @@
|
||||
/*****************************************************************************/
|
||||
/* */
|
||||
/* specialmath.h */
|
||||
/* */
|
||||
/* Optimized math routines for special usages */
|
||||
/* */
|
||||
/* */
|
||||
/* */
|
||||
/* (C) 2019 Christian 'Irgendwer' Krueger */
|
||||
/* */
|
||||
/* This software is provided 'as-is', without any expressed or implied */
|
||||
/* warranty. In no event will the authors be held liable for any damages */
|
||||
/* arising from the use of this software. */
|
||||
/* */
|
||||
/* Permission is granted to anyone to use this software for any purpose, */
|
||||
/* including commercial applications, and to alter it and redistribute it */
|
||||
/* freely, subject to the following restrictions: */
|
||||
/* */
|
||||
/* 1. The origin of this software must not be misrepresented; you must not */
|
||||
/* claim that you wrote the original software. If you use this software */
|
||||
/* in a product, an acknowledgment in the product documentation would be */
|
||||
/* appreciated but is not required. */
|
||||
/* 2. Altered source versions must be plainly marked as such, and must not */
|
||||
/* be misrepresented as being the original software. */
|
||||
/* 3. This notice may not be removed or altered from any source */
|
||||
/* distribution. */
|
||||
/* */
|
||||
/*****************************************************************************/
|
||||
|
||||
#ifndef _SPECIALMATH_H
|
||||
#define _SPECIALMATH_H
|
||||
|
||||
|
||||
/* Multiply an 8 bit unsigned value by 20 and return the 16 bit unsigned result */
|
||||
|
||||
unsigned int __fastcall__ mul20(unsigned char value);
|
||||
|
||||
|
||||
/* Multiply an 8 bit unsigned value by 40 and return the 16 bit unsigned result */
|
||||
|
||||
unsigned int __fastcall__ mul40(unsigned char value);
|
||||
|
||||
|
||||
|
||||
/* End of specialmath.h */
|
||||
#endif
|
@ -181,6 +181,7 @@ SRCDIRS += common \
|
||||
mouse \
|
||||
runtime \
|
||||
serial \
|
||||
specialmath \
|
||||
tgi \
|
||||
zlib
|
||||
|
||||
|
@ -7,7 +7,7 @@
|
||||
|
||||
.export _cputcxy, _cputc
|
||||
.export plot, cputdirect, putchar
|
||||
.import gotoxy, mul40
|
||||
.import gotoxy, _mul40
|
||||
.importzp tmp4,ptr4
|
||||
.import _revflag,setcursor
|
||||
|
||||
@ -71,8 +71,7 @@ putchar:
|
||||
sta (OLDADR),y
|
||||
|
||||
lda ROWCRS
|
||||
jsr mul40 ; destroys tmp4
|
||||
clc
|
||||
jsr _mul40 ; destroys tmp4, carry is cleared
|
||||
adc SAVMSC ; add start of screen memory
|
||||
sta ptr4
|
||||
txa
|
||||
|
@ -12,7 +12,7 @@
|
||||
|
||||
.export _mouse_txt_callbacks
|
||||
.importzp tmp4
|
||||
.import mul40,loc_tmp
|
||||
.import _mul40
|
||||
.importzp mouse_txt_char ; screen code of mouse cursor
|
||||
|
||||
.include "atari.inc"
|
||||
@ -104,22 +104,15 @@ movex:
|
||||
|
||||
; Move the mouse cursor y position to the value in A/X.
|
||||
movey:
|
||||
tax
|
||||
ldy tmp4 ; mul40 uses tmp4
|
||||
lda loc_tmp ; and this local variable
|
||||
pha
|
||||
txa ; get parameter back
|
||||
ldy tmp4 ; mul40 uses tmp4, save in Y
|
||||
lsr a ; convert y position to character line
|
||||
lsr a
|
||||
lsr a
|
||||
jsr mul40
|
||||
clc
|
||||
jsr _mul40 ; carry is cleared by _mul40
|
||||
adc SAVMSC
|
||||
sta scrptr
|
||||
txa
|
||||
adc SAVMSC+1
|
||||
sta scrptr+1
|
||||
pla
|
||||
sta loc_tmp
|
||||
sty tmp4
|
||||
sty tmp4 ; restore tmp4
|
||||
rts
|
||||
|
@ -1,35 +0,0 @@
|
||||
;
|
||||
; Christian Groessler, June 2000
|
||||
;
|
||||
; mul40
|
||||
; multiplies A by 40 and returns result in AX
|
||||
; uses tmp4
|
||||
|
||||
.importzp tmp4
|
||||
.export mul40,loc_tmp
|
||||
|
||||
.proc mul40
|
||||
|
||||
ldx #0
|
||||
stx tmp4
|
||||
sta loc_tmp
|
||||
asl a
|
||||
rol tmp4
|
||||
asl a
|
||||
rol tmp4 ; val * 4
|
||||
adc loc_tmp
|
||||
bcc L1
|
||||
inc tmp4 ; val * 5
|
||||
L1: asl a
|
||||
rol tmp4 ; val * 10
|
||||
asl a
|
||||
rol tmp4
|
||||
asl a
|
||||
rol tmp4 ; val * 40
|
||||
ldx tmp4
|
||||
rts
|
||||
|
||||
.endproc
|
||||
|
||||
.bss
|
||||
loc_tmp:.res 1
|
@ -8,7 +8,7 @@
|
||||
|
||||
.include "atari.inc"
|
||||
.importzp tmp1,tmp4,ptr1,ptr2
|
||||
.import mul40,_clrscr
|
||||
.import _mul40,_clrscr
|
||||
.export __scroll
|
||||
|
||||
.proc __scroll
|
||||
@ -40,7 +40,7 @@ down_ok:lda SAVMSC
|
||||
sta ptr2+1
|
||||
|
||||
lda tmp1
|
||||
jsr mul40
|
||||
jsr _mul40
|
||||
sta tmp4
|
||||
lda ptr2
|
||||
sec
|
||||
@ -103,8 +103,7 @@ up: sta tmp1 ; # of lines to scroll
|
||||
jmp _clrscr
|
||||
|
||||
;multiply by 40 (xsize)
|
||||
up_ok: jsr mul40
|
||||
clc
|
||||
up_ok: jsr _mul40 ; carry is cleared by _mul40
|
||||
adc SAVMSC ; add start of screen mem
|
||||
sta ptr2
|
||||
txa
|
||||
|
@ -4,7 +4,7 @@
|
||||
; cursor handling, internal function
|
||||
|
||||
.include "atari.inc"
|
||||
.import cursor,mul40
|
||||
.import cursor,_mul40
|
||||
.export setcursor
|
||||
|
||||
.proc setcursor
|
||||
@ -14,8 +14,7 @@
|
||||
sta (OLDADR),y
|
||||
|
||||
lda ROWCRS
|
||||
jsr mul40
|
||||
clc
|
||||
jsr _mul40 ; function leaves with carry clear!
|
||||
adc SAVMSC ; add start of screen memory
|
||||
sta OLDADR
|
||||
txa
|
||||
|
@ -10,7 +10,7 @@
|
||||
|
||||
.export _cputcxy, _cputc
|
||||
.export plot, cputdirect, putchar
|
||||
.import gotoxy, mul20
|
||||
.import gotoxy, _mul20
|
||||
.importzp ptr4
|
||||
.import setcursor
|
||||
|
||||
@ -75,8 +75,7 @@ putchar:
|
||||
pha ; save char
|
||||
|
||||
lda ROWCRS_5200
|
||||
jsr mul20 ; destroys tmp4
|
||||
clc
|
||||
jsr _mul20 ; destroys tmp4, carry is cleared
|
||||
adc SAVMSC ; add start of screen memory
|
||||
sta ptr4
|
||||
txa
|
||||
|
@ -1,33 +0,0 @@
|
||||
;
|
||||
; Christian Groessler, April 2014
|
||||
;
|
||||
; mul20
|
||||
; multiplies A by 20 and returns result in AX
|
||||
; uses tmp4
|
||||
|
||||
.importzp tmp4
|
||||
.export mul20,loc_tmp
|
||||
|
||||
.proc mul20
|
||||
|
||||
ldx #0
|
||||
stx tmp4
|
||||
sta loc_tmp
|
||||
asl a
|
||||
rol tmp4
|
||||
asl a
|
||||
rol tmp4 ; val * 4
|
||||
adc loc_tmp
|
||||
bcc L1
|
||||
inc tmp4 ; val * 5
|
||||
L1: asl a
|
||||
rol tmp4 ; val * 10
|
||||
asl a
|
||||
rol tmp4 ; val * 20
|
||||
ldx tmp4
|
||||
rts
|
||||
|
||||
.endproc
|
||||
|
||||
.bss
|
||||
loc_tmp:.res 1
|
47
libsrc/specialmath/mul20.s
Normal file
47
libsrc/specialmath/mul20.s
Normal file
@ -0,0 +1,47 @@
|
||||
; mul20.s
|
||||
;
|
||||
; This file is part of
|
||||
; cc65 - a freeware C compiler for 6502 based systems
|
||||
;
|
||||
; https://github.com/cc65/cc65
|
||||
;
|
||||
; See "LICENSE" file for legal information.
|
||||
;
|
||||
;
|
||||
; unsigned int __fastcall__ mul20(unsigned char value);
|
||||
;
|
||||
; REMARKS: Function is defined to return with carry-flag cleared
|
||||
|
||||
|
||||
.importzp tmp4
|
||||
.export _mul20
|
||||
|
||||
.proc _mul20 ; = 30 bytes, 41/46 cycles
|
||||
|
||||
sta tmp4 ; remember value for later addition...
|
||||
ldx #0 ; clear high-byte
|
||||
asl a ; * 2
|
||||
bcc mul4 ; high-byte affected?
|
||||
ldx #2 ; this will be the 1st high-bit soon...
|
||||
|
||||
mul4: asl a ; * 4
|
||||
bcc mul5 ; high-byte affected?
|
||||
inx ; => yes, apply to 0 high-bit
|
||||
clc ; prepare addition
|
||||
|
||||
mul5: adc tmp4 ; * 5
|
||||
bcc mul10 ; high-byte affected?
|
||||
inx ; yes, correct...
|
||||
|
||||
mul10: stx tmp4 ; continue with classic shifting...
|
||||
|
||||
asl a ; * 10
|
||||
rol tmp4
|
||||
|
||||
asl a ; * 20
|
||||
rol tmp4
|
||||
|
||||
ldx tmp4 ; deliver high-byte in X
|
||||
rts
|
||||
|
||||
.endproc
|
50
libsrc/specialmath/mul40.s
Normal file
50
libsrc/specialmath/mul40.s
Normal file
@ -0,0 +1,50 @@
|
||||
; mul40.s
|
||||
;
|
||||
; This file is part of
|
||||
; cc65 - a freeware C compiler for 6502 based systems
|
||||
;
|
||||
; https://github.com/cc65/cc65
|
||||
;
|
||||
; See "LICENSE" file for legal information.
|
||||
;
|
||||
;
|
||||
; unsigned int __fastcall__ mul40(unsigned char value);
|
||||
;
|
||||
; REMARKS: Function is defined to return with carry-flag cleared
|
||||
|
||||
|
||||
.importzp tmp4
|
||||
.export _mul40
|
||||
|
||||
.proc _mul40 ; = 33 bytes, 48/53 cycles
|
||||
|
||||
sta tmp4 ; remember value for later addition...
|
||||
ldx #0 ; clear high-byte
|
||||
asl a ; * 2
|
||||
bcc mul4 ; high-byte affected?
|
||||
ldx #2 ; this will be the 1st high-bit soon...
|
||||
|
||||
mul4: asl a ; * 4
|
||||
bcc mul5 ; high-byte affected?
|
||||
inx ; => yes, apply to 0 high-bit
|
||||
clc ; prepare addition
|
||||
|
||||
mul5: adc tmp4 ; * 5
|
||||
bcc mul10 ; high-byte affected?
|
||||
inx ; yes, correct...
|
||||
|
||||
mul10: stx tmp4 ; continue with classic shifting...
|
||||
|
||||
asl a ; * 10
|
||||
rol tmp4
|
||||
|
||||
asl a ; * 20
|
||||
rol tmp4
|
||||
|
||||
asl a ; * 40
|
||||
rol tmp4
|
||||
|
||||
ldx tmp4 ; deliver high-byte in X
|
||||
rts
|
||||
|
||||
.endproc
|
18
test/val/lib_specialmath_mulxx.c
Normal file
18
test/val/lib_specialmath_mulxx.c
Normal file
@ -0,0 +1,18 @@
|
||||
#include <specialmath.h>
|
||||
#include "unittest.h"
|
||||
|
||||
TEST
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
for (i=0; i < 256; ++i)
|
||||
{
|
||||
ASSERT_AreEqual(i*20, mul20(i), "%u", "Invalid 'mul20(%u)' calculation!" COMMA i);
|
||||
}
|
||||
|
||||
for (i=0; i < 256; ++i)
|
||||
{
|
||||
ASSERT_AreEqual(i*40, mul40(i), "%u", "Invalid 'mul40(%u)' calculation!" COMMA i);
|
||||
}
|
||||
}
|
||||
ENDTEST
|
Loading…
x
Reference in New Issue
Block a user