kickc/src/test/ref/min-fmul-16.asm

395 lines
7.5 KiB
NASM

/// @file
/// A lightweight library for printing on the C64.
///
/// Printing with this library is done by calling print_ function for each element
// Commodore 64 PRG executable file
.file [name="min-fmul-16.prg", type="prg", segments="Program"]
.segmentdef Program [segments="Basic, Code, Data"]
.segmentdef Basic [start=$0801]
.segmentdef Code [start=$80d]
.segmentdef Data [startAfter="Code"]
.segment Basic
:BasicUpstart(main)
.label RASTER = $d012
.label BORDER_COLOR = $d020
.label SCREEN = $400
.label print_char_cursor = 2
.segment Code
main: {
.label a = $4d2
.label b = $929
.label r = $a
// mulf_init()
jsr mulf_init
// asm
sei
lda #<$400
sta.z print_char_cursor
lda #>$400
sta.z print_char_cursor+1
__b2:
// while(*RASTER!=$ff)
lda #$ff
cmp RASTER
bne __b2
// (*BORDER_COLOR)++;
inc BORDER_COLOR
// dword r = mulf16u(a, b)
jsr mulf16u
// (*BORDER_COLOR)--;
dec BORDER_COLOR
// print_ulong(r)
jsr print_ulong
lda #<SCREEN
sta.z print_char_cursor
lda #>SCREEN
sta.z print_char_cursor+1
jmp __b2
}
// Initialize the mulf_sqr multiplication tables with f(x)=int(x*x/4)
mulf_init: {
// x/2
.label c = $f
// Counter used for determining x%2==0
.label sqr1_hi = 2
// Fill mulf_sqr1 = f(x) = int(x*x/4): If f(x) = x*x/4 then f(x+1) = f(x) + x/2 + 1/4
.label sqr = 6
.label sqr1_lo = 4
// Decrease or increase x_255 - initially we decrease
.label sqr2_hi = $10
.label sqr2_lo = 8
//Start with g(0)=f(255)
.label dir = $e
ldx #0
lda #<mulf_sqr1_hi+1
sta.z sqr1_hi
lda #>mulf_sqr1_hi+1
sta.z sqr1_hi+1
txa
sta.z sqr
sta.z sqr+1
sta.z c
lda #<mulf_sqr1_lo+1
sta.z sqr1_lo
lda #>mulf_sqr1_lo+1
sta.z sqr1_lo+1
__b1:
// for(byte* sqr1_lo = mulf_sqr1_lo+1; sqr1_lo!=mulf_sqr1_lo+512; sqr1_lo++)
lda.z sqr1_lo+1
cmp #>mulf_sqr1_lo+$200
bne __b2
lda.z sqr1_lo
cmp #<mulf_sqr1_lo+$200
bne __b2
lda #$ff
sta.z dir
lda #<mulf_sqr2_hi
sta.z sqr2_hi
lda #>mulf_sqr2_hi
sta.z sqr2_hi+1
ldx #-1
lda #<mulf_sqr2_lo
sta.z sqr2_lo
lda #>mulf_sqr2_lo
sta.z sqr2_lo+1
__b5:
// for(byte* sqr2_lo = mulf_sqr2_lo; sqr2_lo!=mulf_sqr2_lo+511; sqr2_lo++)
lda.z sqr2_lo+1
cmp #>mulf_sqr2_lo+$1ff
bne __b6
lda.z sqr2_lo
cmp #<mulf_sqr2_lo+$1ff
bne __b6
// *(mulf_sqr2_lo+511) = *(mulf_sqr1_lo+256)
// Set the very last value g(511) = f(256)
lda mulf_sqr1_lo+$100
sta mulf_sqr2_lo+$1ff
// *(mulf_sqr2_hi+511) = *(mulf_sqr1_hi+256)
lda mulf_sqr1_hi+$100
sta mulf_sqr2_hi+$1ff
// }
rts
__b6:
// *sqr2_lo = mulf_sqr1_lo[x_255]
lda mulf_sqr1_lo,x
ldy #0
sta (sqr2_lo),y
// *sqr2_hi++ = mulf_sqr1_hi[x_255]
lda mulf_sqr1_hi,x
sta (sqr2_hi),y
// *sqr2_hi++ = mulf_sqr1_hi[x_255];
inc.z sqr2_hi
bne !+
inc.z sqr2_hi+1
!:
// x_255 = x_255 + dir
txa
clc
adc.z dir
tax
// if(x_255==0)
cpx #0
bne __b8
lda #1
sta.z dir
__b8:
// for(byte* sqr2_lo = mulf_sqr2_lo; sqr2_lo!=mulf_sqr2_lo+511; sqr2_lo++)
inc.z sqr2_lo
bne !+
inc.z sqr2_lo+1
!:
jmp __b5
__b2:
// if((++c&1)==0)
inc.z c
// ++c&1
lda #1
and.z c
// if((++c&1)==0)
cmp #0
bne __b3
// x_2++;
inx
// sqr++;
inc.z sqr
bne !+
inc.z sqr+1
!:
__b3:
// BYTE0(sqr)
lda.z sqr
// *sqr1_lo = BYTE0(sqr)
ldy #0
sta (sqr1_lo),y
// BYTE1(sqr)
lda.z sqr+1
// *sqr1_hi++ = BYTE1(sqr)
sta (sqr1_hi),y
// *sqr1_hi++ = BYTE1(sqr);
inc.z sqr1_hi
bne !+
inc.z sqr1_hi+1
!:
// sqr = sqr + x_2
txa
clc
adc.z sqr
sta.z sqr
bcc !+
inc.z sqr+1
!:
// for(byte* sqr1_lo = mulf_sqr1_lo+1; sqr1_lo!=mulf_sqr1_lo+512; sqr1_lo++)
inc.z sqr1_lo
bne !+
inc.z sqr1_lo+1
!:
jmp __b1
}
// Fast multiply two unsigned words to a double word result
// Done in assembler to utilize fast addition A+X
// __zp($a) unsigned long mulf16u(unsigned int a, unsigned int b)
mulf16u: {
.label memA = $f8
.label memB = $fa
.label memR = $fc
.label return = $a
// *memA = a
lda #<main.a
sta.z memA
lda #>main.a
sta.z memA+1
// *memB = b
lda #<main.b
sta.z memB
lda #>main.b
sta.z memB+1
// asm
lda memA
sta sm1a+1
sta sm3a+1
sta sm5a+1
sta sm7a+1
eor #$ff
sta sm2a+1
sta sm4a+1
sta sm6a+1
sta sm8a+1
lda memA+1
sta sm1b+1
sta sm3b+1
sta sm5b+1
sta sm7b+1
eor #$ff
sta sm2b+1
sta sm4b+1
sta sm6b+1
sta sm8b+1
ldx memB
sec
sm1a:
lda mulf_sqr1_lo,x
sm2a:
sbc mulf_sqr2_lo,x
sta memR+0
sm3a:
lda mulf_sqr1_hi,x
sm4a:
sbc mulf_sqr2_hi,x
sta _AA+1
sec
sm1b:
lda mulf_sqr1_lo,x
sm2b:
sbc mulf_sqr2_lo,x
sta _cc+1
sm3b:
lda mulf_sqr1_hi,x
sm4b:
sbc mulf_sqr2_hi,x
sta _CC+1
ldx memB+1
sec
sm5a:
lda mulf_sqr1_lo,x
sm6a:
sbc mulf_sqr2_lo,x
sta _bb+1
sm7a:
lda mulf_sqr1_hi,x
sm8a:
sbc mulf_sqr2_hi,x
sta _BB+1
sec
sm5b:
lda mulf_sqr1_lo,x
sm6b:
sbc mulf_sqr2_lo,x
sta _dd+1
sm7b:
lda mulf_sqr1_hi,x
sm8b:
sbc mulf_sqr2_hi,x
sta memR+3
clc
_AA:
lda #0
_bb:
adc #0
sta memR+1
_BB:
lda #0
_CC:
adc #0
sta memR+2
bcc !+
inc memR+3
clc
!:
_cc:
lda #0
adc memR+1
sta memR+1
_dd:
lda #0
adc memR+2
sta memR+2
bcc !+
inc memR+3
!:
// return *memR;
lda.z memR
sta.z return
lda.z memR+1
sta.z return+1
lda.z memR+2
sta.z return+2
lda.z memR+3
sta.z return+3
// }
rts
}
// Print a unsigned long as HEX
// void print_ulong(__zp($a) unsigned long dw)
print_ulong: {
.label dw = $a
// print_uint(WORD1(dw))
lda.z dw+2
sta.z print_uint.w
lda.z dw+3
sta.z print_uint.w+1
jsr print_uint
// print_uint(WORD0(dw))
lda.z dw
sta.z print_uint.w
lda.z dw+1
sta.z print_uint.w+1
jsr print_uint
// }
rts
}
// Print a unsigned int as HEX
// void print_uint(__zp(4) unsigned int w)
print_uint: {
.label w = 4
// print_uchar(BYTE1(w))
ldx.z w+1
jsr print_uchar
// print_uchar(BYTE0(w))
ldx.z w
jsr print_uchar
// }
rts
}
// Print a char as HEX
// void print_uchar(__register(X) char b)
print_uchar: {
// b>>4
txa
lsr
lsr
lsr
lsr
// print_char(print_hextab[b>>4])
tay
lda print_hextab,y
// Table of hexadecimal digits
jsr print_char
// b&0xf
lda #$f
axs #0
// print_char(print_hextab[b&0xf])
lda print_hextab,x
jsr print_char
// }
rts
}
// Print a single char
// void print_char(__register(A) char ch)
print_char: {
// *(print_char_cursor++) = ch
ldy #0
sta (print_char_cursor),y
// *(print_char_cursor++) = ch;
inc.z print_char_cursor
bne !+
inc.z print_char_cursor+1
!:
// }
rts
}
.segment Data
print_hextab: .text "0123456789abcdef"
// mulf_sqr tables will contain f(x)=int(x*x/4) and g(x) = f(x-255).
// <f(x) = <(( x * x )/4)
.align $100
mulf_sqr1_lo: .fill $200, 0
// >f(x) = >(( x * x )/4)
.align $100
mulf_sqr1_hi: .fill $200, 0
// <g(x) = <((( x - 255) * ( x - 255 ))/4)
.align $100
mulf_sqr2_lo: .fill $200, 0
// >g(x) = >((( x - 255) * ( x - 255 ))/4)
.align $100
mulf_sqr2_hi: .fill $200, 0