mirror of
https://github.com/cc65/cc65.git
synced 2025-01-28 00:30:12 +00:00
Optimized mul20 & mul40 and extracted to new library.
This commit is contained in:
parent
fbf302e6cd
commit
399250a105
@ -97,6 +97,9 @@
|
|||||||
Describes Christian Krüger's macro package for writing self modifying
|
Describes Christian Krüger's macro package for writing self modifying
|
||||||
assembler code.
|
assembler code.
|
||||||
|
|
||||||
|
<tag><htmlurl url="specialmath.html" name="specialmath.html"></tag>
|
||||||
|
Library for speed optimized math functions.
|
||||||
|
|
||||||
<tag><url name="6502 Binary Relocation Format document"
|
<tag><url name="6502 Binary Relocation Format document"
|
||||||
url="http://www.6502.org/users/andre/o65/fileformat.html"></tag>
|
url="http://www.6502.org/users/andre/o65/fileformat.html"></tag>
|
||||||
Describes the o65 file format that is used for dynamically loadable modules
|
Describes the o65 file format that is used for dynamically loadable modules
|
||||||
|
38
doc/specialmath.sgml
Normal file
38
doc/specialmath.sgml
Normal file
@ -0,0 +1,38 @@
|
|||||||
|
<!doctype linuxdoc system>
|
||||||
|
|
||||||
|
<article>
|
||||||
|
<title>Special math functions
|
||||||
|
<author>Christian Krüger
|
||||||
|
|
||||||
|
<abstract>
|
||||||
|
This library provides functions for speed optimized math operations.
|
||||||
|
</abstract>
|
||||||
|
|
||||||
|
<!-- Table of contents -->
|
||||||
|
<toc>
|
||||||
|
|
||||||
|
<!-- Begin the document -->
|
||||||
|
|
||||||
|
<sect>Multiplication<p>
|
||||||
|
|
||||||
|
When accessing screen memory often a multiplication of the vertical position is needed to calculate
|
||||||
|
the target address. A quite common horizontal span for 8-bit systems is 20 or 40 bytes (beside e.g. 32 bytes, where the multiplication can be accomplished by shifting 5 times).
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<tscreen><verb>
|
||||||
|
unsigned int __fastcall__ mul20(unsigned char value);
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
The 8 bit <tt>value</tt> is multiplied by 20 and returned as 16 bit value.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
<p>
|
||||||
|
<tscreen><verb>
|
||||||
|
unsigned int __fastcall__ mul40(unsigned char value);
|
||||||
|
</verb></tscreen>
|
||||||
|
|
||||||
|
The 8 bit <tt>value</tt> is multiplied by 40 and returned as 16 bit value.
|
||||||
|
</p>
|
||||||
|
|
||||||
|
|
||||||
|
</article>
|
46
include/specialmath.h
Normal file
46
include/specialmath.h
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
/*****************************************************************************/
|
||||||
|
/* */
|
||||||
|
/* specialmath.h */
|
||||||
|
/* */
|
||||||
|
/* Optimized math routines for special usages */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/* */
|
||||||
|
/* (C) 2019 Christian 'Irgendwer' Krueger */
|
||||||
|
/* */
|
||||||
|
/* This software is provided 'as-is', without any expressed or implied */
|
||||||
|
/* warranty. In no event will the authors be held liable for any damages */
|
||||||
|
/* arising from the use of this software. */
|
||||||
|
/* */
|
||||||
|
/* Permission is granted to anyone to use this software for any purpose, */
|
||||||
|
/* including commercial applications, and to alter it and redistribute it */
|
||||||
|
/* freely, subject to the following restrictions: */
|
||||||
|
/* */
|
||||||
|
/* 1. The origin of this software must not be misrepresented; you must not */
|
||||||
|
/* claim that you wrote the original software. If you use this software */
|
||||||
|
/* in a product, an acknowledgment in the product documentation would be */
|
||||||
|
/* appreciated but is not required. */
|
||||||
|
/* 2. Altered source versions must be plainly marked as such, and must not */
|
||||||
|
/* be misrepresented as being the original software. */
|
||||||
|
/* 3. This notice may not be removed or altered from any source */
|
||||||
|
/* distribution. */
|
||||||
|
/* */
|
||||||
|
/*****************************************************************************/
|
||||||
|
|
||||||
|
#ifndef _SPECIALMATH_H
|
||||||
|
#define _SPECIALMATH_H
|
||||||
|
|
||||||
|
|
||||||
|
/* Multiply an 8 bit unsigned value by 20 and return the 16 bit unsigned result */
|
||||||
|
|
||||||
|
unsigned int __fastcall__ mul20(unsigned char value);
|
||||||
|
|
||||||
|
|
||||||
|
/* Multiply an 8 bit unsigned value by 40 and return the 16 bit unsigned result */
|
||||||
|
|
||||||
|
unsigned int __fastcall__ mul40(unsigned char value);
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
/* End of specialmath.h */
|
||||||
|
#endif
|
@ -181,6 +181,7 @@ SRCDIRS += common \
|
|||||||
mouse \
|
mouse \
|
||||||
runtime \
|
runtime \
|
||||||
serial \
|
serial \
|
||||||
|
specialmath \
|
||||||
tgi \
|
tgi \
|
||||||
zlib
|
zlib
|
||||||
|
|
||||||
|
@ -7,7 +7,7 @@
|
|||||||
|
|
||||||
.export _cputcxy, _cputc
|
.export _cputcxy, _cputc
|
||||||
.export plot, cputdirect, putchar
|
.export plot, cputdirect, putchar
|
||||||
.import gotoxy, mul40
|
.import gotoxy, _mul40
|
||||||
.importzp tmp4,ptr4
|
.importzp tmp4,ptr4
|
||||||
.import _revflag,setcursor
|
.import _revflag,setcursor
|
||||||
|
|
||||||
@ -71,8 +71,7 @@ putchar:
|
|||||||
sta (OLDADR),y
|
sta (OLDADR),y
|
||||||
|
|
||||||
lda ROWCRS
|
lda ROWCRS
|
||||||
jsr mul40 ; destroys tmp4
|
jsr _mul40 ; destroys tmp4, carry is cleared
|
||||||
clc
|
|
||||||
adc SAVMSC ; add start of screen memory
|
adc SAVMSC ; add start of screen memory
|
||||||
sta ptr4
|
sta ptr4
|
||||||
txa
|
txa
|
||||||
|
@ -12,7 +12,7 @@
|
|||||||
|
|
||||||
.export _mouse_txt_callbacks
|
.export _mouse_txt_callbacks
|
||||||
.importzp tmp4
|
.importzp tmp4
|
||||||
.import mul40,loc_tmp
|
.import _mul40
|
||||||
.importzp mouse_txt_char ; screen code of mouse cursor
|
.importzp mouse_txt_char ; screen code of mouse cursor
|
||||||
|
|
||||||
.include "atari.inc"
|
.include "atari.inc"
|
||||||
@ -104,22 +104,15 @@ movex:
|
|||||||
|
|
||||||
; Move the mouse cursor y position to the value in A/X.
|
; Move the mouse cursor y position to the value in A/X.
|
||||||
movey:
|
movey:
|
||||||
tax
|
ldy tmp4 ; mul40 uses tmp4, save in Y
|
||||||
ldy tmp4 ; mul40 uses tmp4
|
|
||||||
lda loc_tmp ; and this local variable
|
|
||||||
pha
|
|
||||||
txa ; get parameter back
|
|
||||||
lsr a ; convert y position to character line
|
lsr a ; convert y position to character line
|
||||||
lsr a
|
lsr a
|
||||||
lsr a
|
lsr a
|
||||||
jsr mul40
|
jsr _mul40 ; carry is cleared by _mul40
|
||||||
clc
|
|
||||||
adc SAVMSC
|
adc SAVMSC
|
||||||
sta scrptr
|
sta scrptr
|
||||||
txa
|
txa
|
||||||
adc SAVMSC+1
|
adc SAVMSC+1
|
||||||
sta scrptr+1
|
sta scrptr+1
|
||||||
pla
|
sty tmp4 ; restore tmp4
|
||||||
sta loc_tmp
|
|
||||||
sty tmp4
|
|
||||||
rts
|
rts
|
||||||
|
@ -1,35 +0,0 @@
|
|||||||
;
|
|
||||||
; Christian Groessler, June 2000
|
|
||||||
;
|
|
||||||
; mul40
|
|
||||||
; multiplies A by 40 and returns result in AX
|
|
||||||
; uses tmp4
|
|
||||||
|
|
||||||
.importzp tmp4
|
|
||||||
.export mul40,loc_tmp
|
|
||||||
|
|
||||||
.proc mul40
|
|
||||||
|
|
||||||
ldx #0
|
|
||||||
stx tmp4
|
|
||||||
sta loc_tmp
|
|
||||||
asl a
|
|
||||||
rol tmp4
|
|
||||||
asl a
|
|
||||||
rol tmp4 ; val * 4
|
|
||||||
adc loc_tmp
|
|
||||||
bcc L1
|
|
||||||
inc tmp4 ; val * 5
|
|
||||||
L1: asl a
|
|
||||||
rol tmp4 ; val * 10
|
|
||||||
asl a
|
|
||||||
rol tmp4
|
|
||||||
asl a
|
|
||||||
rol tmp4 ; val * 40
|
|
||||||
ldx tmp4
|
|
||||||
rts
|
|
||||||
|
|
||||||
.endproc
|
|
||||||
|
|
||||||
.bss
|
|
||||||
loc_tmp:.res 1
|
|
@ -8,7 +8,7 @@
|
|||||||
|
|
||||||
.include "atari.inc"
|
.include "atari.inc"
|
||||||
.importzp tmp1,tmp4,ptr1,ptr2
|
.importzp tmp1,tmp4,ptr1,ptr2
|
||||||
.import mul40,_clrscr
|
.import _mul40,_clrscr
|
||||||
.export __scroll
|
.export __scroll
|
||||||
|
|
||||||
.proc __scroll
|
.proc __scroll
|
||||||
@ -40,7 +40,7 @@ down_ok:lda SAVMSC
|
|||||||
sta ptr2+1
|
sta ptr2+1
|
||||||
|
|
||||||
lda tmp1
|
lda tmp1
|
||||||
jsr mul40
|
jsr _mul40
|
||||||
sta tmp4
|
sta tmp4
|
||||||
lda ptr2
|
lda ptr2
|
||||||
sec
|
sec
|
||||||
@ -103,8 +103,7 @@ up: sta tmp1 ; # of lines to scroll
|
|||||||
jmp _clrscr
|
jmp _clrscr
|
||||||
|
|
||||||
;multiply by 40 (xsize)
|
;multiply by 40 (xsize)
|
||||||
up_ok: jsr mul40
|
up_ok: jsr _mul40 ; carry is cleared by _mul40
|
||||||
clc
|
|
||||||
adc SAVMSC ; add start of screen mem
|
adc SAVMSC ; add start of screen mem
|
||||||
sta ptr2
|
sta ptr2
|
||||||
txa
|
txa
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
; cursor handling, internal function
|
; cursor handling, internal function
|
||||||
|
|
||||||
.include "atari.inc"
|
.include "atari.inc"
|
||||||
.import cursor,mul40
|
.import cursor,_mul40
|
||||||
.export setcursor
|
.export setcursor
|
||||||
|
|
||||||
.proc setcursor
|
.proc setcursor
|
||||||
@ -14,8 +14,7 @@
|
|||||||
sta (OLDADR),y
|
sta (OLDADR),y
|
||||||
|
|
||||||
lda ROWCRS
|
lda ROWCRS
|
||||||
jsr mul40
|
jsr _mul40 ; function leaves with carry clear!
|
||||||
clc
|
|
||||||
adc SAVMSC ; add start of screen memory
|
adc SAVMSC ; add start of screen memory
|
||||||
sta OLDADR
|
sta OLDADR
|
||||||
txa
|
txa
|
||||||
|
@ -10,7 +10,7 @@
|
|||||||
|
|
||||||
.export _cputcxy, _cputc
|
.export _cputcxy, _cputc
|
||||||
.export plot, cputdirect, putchar
|
.export plot, cputdirect, putchar
|
||||||
.import gotoxy, mul20
|
.import gotoxy, _mul20
|
||||||
.importzp ptr4
|
.importzp ptr4
|
||||||
.import setcursor
|
.import setcursor
|
||||||
|
|
||||||
@ -75,8 +75,7 @@ putchar:
|
|||||||
pha ; save char
|
pha ; save char
|
||||||
|
|
||||||
lda ROWCRS_5200
|
lda ROWCRS_5200
|
||||||
jsr mul20 ; destroys tmp4
|
jsr _mul20 ; destroys tmp4, carry is cleared
|
||||||
clc
|
|
||||||
adc SAVMSC ; add start of screen memory
|
adc SAVMSC ; add start of screen memory
|
||||||
sta ptr4
|
sta ptr4
|
||||||
txa
|
txa
|
||||||
|
@ -1,33 +0,0 @@
|
|||||||
;
|
|
||||||
; Christian Groessler, April 2014
|
|
||||||
;
|
|
||||||
; mul20
|
|
||||||
; multiplies A by 20 and returns result in AX
|
|
||||||
; uses tmp4
|
|
||||||
|
|
||||||
.importzp tmp4
|
|
||||||
.export mul20,loc_tmp
|
|
||||||
|
|
||||||
.proc mul20
|
|
||||||
|
|
||||||
ldx #0
|
|
||||||
stx tmp4
|
|
||||||
sta loc_tmp
|
|
||||||
asl a
|
|
||||||
rol tmp4
|
|
||||||
asl a
|
|
||||||
rol tmp4 ; val * 4
|
|
||||||
adc loc_tmp
|
|
||||||
bcc L1
|
|
||||||
inc tmp4 ; val * 5
|
|
||||||
L1: asl a
|
|
||||||
rol tmp4 ; val * 10
|
|
||||||
asl a
|
|
||||||
rol tmp4 ; val * 20
|
|
||||||
ldx tmp4
|
|
||||||
rts
|
|
||||||
|
|
||||||
.endproc
|
|
||||||
|
|
||||||
.bss
|
|
||||||
loc_tmp:.res 1
|
|
47
libsrc/specialmath/mul20.s
Normal file
47
libsrc/specialmath/mul20.s
Normal file
@ -0,0 +1,47 @@
|
|||||||
|
; mul20.s
|
||||||
|
;
|
||||||
|
; This file is part of
|
||||||
|
; cc65 - a freeware C compiler for 6502 based systems
|
||||||
|
;
|
||||||
|
; https://github.com/cc65/cc65
|
||||||
|
;
|
||||||
|
; See "LICENSE" file for legal information.
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; unsigned int __fastcall__ mul20(unsigned char value);
|
||||||
|
;
|
||||||
|
; REMARKS: Function is defined to return with carry-flag cleared
|
||||||
|
|
||||||
|
|
||||||
|
.importzp tmp4
|
||||||
|
.export _mul20
|
||||||
|
|
||||||
|
.proc _mul20 ; = 30 bytes, 41/46 cycles
|
||||||
|
|
||||||
|
sta tmp4 ; remember value for later addition...
|
||||||
|
ldx #0 ; clear high-byte
|
||||||
|
asl a ; * 2
|
||||||
|
bcc mul4 ; high-byte affected?
|
||||||
|
ldx #2 ; this will be the 1st high-bit soon...
|
||||||
|
|
||||||
|
mul4: asl a ; * 4
|
||||||
|
bcc mul5 ; high-byte affected?
|
||||||
|
inx ; => yes, apply to 0 high-bit
|
||||||
|
clc ; prepare addition
|
||||||
|
|
||||||
|
mul5: adc tmp4 ; * 5
|
||||||
|
bcc mul10 ; high-byte affected?
|
||||||
|
inx ; yes, correct...
|
||||||
|
|
||||||
|
mul10: stx tmp4 ; continue with classic shifting...
|
||||||
|
|
||||||
|
asl a ; * 10
|
||||||
|
rol tmp4
|
||||||
|
|
||||||
|
asl a ; * 20
|
||||||
|
rol tmp4
|
||||||
|
|
||||||
|
ldx tmp4 ; deliver high-byte in X
|
||||||
|
rts
|
||||||
|
|
||||||
|
.endproc
|
50
libsrc/specialmath/mul40.s
Normal file
50
libsrc/specialmath/mul40.s
Normal file
@ -0,0 +1,50 @@
|
|||||||
|
; mul40.s
|
||||||
|
;
|
||||||
|
; This file is part of
|
||||||
|
; cc65 - a freeware C compiler for 6502 based systems
|
||||||
|
;
|
||||||
|
; https://github.com/cc65/cc65
|
||||||
|
;
|
||||||
|
; See "LICENSE" file for legal information.
|
||||||
|
;
|
||||||
|
;
|
||||||
|
; unsigned int __fastcall__ mul40(unsigned char value);
|
||||||
|
;
|
||||||
|
; REMARKS: Function is defined to return with carry-flag cleared
|
||||||
|
|
||||||
|
|
||||||
|
.importzp tmp4
|
||||||
|
.export _mul40
|
||||||
|
|
||||||
|
.proc _mul40 ; = 33 bytes, 48/53 cycles
|
||||||
|
|
||||||
|
sta tmp4 ; remember value for later addition...
|
||||||
|
ldx #0 ; clear high-byte
|
||||||
|
asl a ; * 2
|
||||||
|
bcc mul4 ; high-byte affected?
|
||||||
|
ldx #2 ; this will be the 1st high-bit soon...
|
||||||
|
|
||||||
|
mul4: asl a ; * 4
|
||||||
|
bcc mul5 ; high-byte affected?
|
||||||
|
inx ; => yes, apply to 0 high-bit
|
||||||
|
clc ; prepare addition
|
||||||
|
|
||||||
|
mul5: adc tmp4 ; * 5
|
||||||
|
bcc mul10 ; high-byte affected?
|
||||||
|
inx ; yes, correct...
|
||||||
|
|
||||||
|
mul10: stx tmp4 ; continue with classic shifting...
|
||||||
|
|
||||||
|
asl a ; * 10
|
||||||
|
rol tmp4
|
||||||
|
|
||||||
|
asl a ; * 20
|
||||||
|
rol tmp4
|
||||||
|
|
||||||
|
asl a ; * 40
|
||||||
|
rol tmp4
|
||||||
|
|
||||||
|
ldx tmp4 ; deliver high-byte in X
|
||||||
|
rts
|
||||||
|
|
||||||
|
.endproc
|
18
test/val/lib_specialmath_mulxx.c
Normal file
18
test/val/lib_specialmath_mulxx.c
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
#include <specialmath.h>
|
||||||
|
#include "unittest.h"
|
||||||
|
|
||||||
|
TEST
|
||||||
|
{
|
||||||
|
unsigned i;
|
||||||
|
|
||||||
|
for (i=0; i < 256; ++i)
|
||||||
|
{
|
||||||
|
ASSERT_AreEqual(i*20, mul20(i), "%u", "Invalid 'mul20(%u)' calculation!" COMMA i);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (i=0; i < 256; ++i)
|
||||||
|
{
|
||||||
|
ASSERT_AreEqual(i*40, mul40(i), "%u", "Invalid 'mul40(%u)' calculation!" COMMA i);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ENDTEST
|
Loading…
x
Reference in New Issue
Block a user