mirror of https://gitlab.com/camelot/kickc.git
395 lines
7.5 KiB
NASM
395 lines
7.5 KiB
NASM
/// @file
|
|
/// A lightweight library for printing on the C64.
|
|
///
|
|
/// Printing with this library is done by calling print_ function for each element
|
|
// Commodore 64 PRG executable file
|
|
.file [name="min-fmul-16.prg", type="prg", segments="Program"]
|
|
.segmentdef Program [segments="Basic, Code, Data"]
|
|
.segmentdef Basic [start=$0801]
|
|
.segmentdef Code [start=$80d]
|
|
.segmentdef Data [startAfter="Code"]
|
|
.segment Basic
|
|
:BasicUpstart(main)
|
|
.label RASTER = $d012
|
|
.label BORDER_COLOR = $d020
|
|
.label SCREEN = $400
|
|
.label print_char_cursor = 2
|
|
.segment Code
|
|
main: {
|
|
.label a = $4d2
|
|
.label b = $929
|
|
.label r = $a
|
|
// mulf_init()
|
|
jsr mulf_init
|
|
// asm
|
|
sei
|
|
lda #<$400
|
|
sta.z print_char_cursor
|
|
lda #>$400
|
|
sta.z print_char_cursor+1
|
|
__b2:
|
|
// while(*RASTER!=$ff)
|
|
lda #$ff
|
|
cmp RASTER
|
|
bne __b2
|
|
// (*BORDER_COLOR)++;
|
|
inc BORDER_COLOR
|
|
// dword r = mulf16u(a, b)
|
|
jsr mulf16u
|
|
// (*BORDER_COLOR)--;
|
|
dec BORDER_COLOR
|
|
// print_ulong(r)
|
|
jsr print_ulong
|
|
lda #<SCREEN
|
|
sta.z print_char_cursor
|
|
lda #>SCREEN
|
|
sta.z print_char_cursor+1
|
|
jmp __b2
|
|
}
|
|
// Initialize the mulf_sqr multiplication tables with f(x)=int(x*x/4)
|
|
mulf_init: {
|
|
// x/2
|
|
.label c = $f
|
|
// Counter used for determining x%2==0
|
|
.label sqr1_hi = 2
|
|
// Fill mulf_sqr1 = f(x) = int(x*x/4): If f(x) = x*x/4 then f(x+1) = f(x) + x/2 + 1/4
|
|
.label sqr = 6
|
|
.label sqr1_lo = 4
|
|
// Decrease or increase x_255 - initially we decrease
|
|
.label sqr2_hi = $10
|
|
.label sqr2_lo = 8
|
|
//Start with g(0)=f(255)
|
|
.label dir = $e
|
|
ldx #0
|
|
lda #<mulf_sqr1_hi+1
|
|
sta.z sqr1_hi
|
|
lda #>mulf_sqr1_hi+1
|
|
sta.z sqr1_hi+1
|
|
txa
|
|
sta.z sqr
|
|
sta.z sqr+1
|
|
sta.z c
|
|
lda #<mulf_sqr1_lo+1
|
|
sta.z sqr1_lo
|
|
lda #>mulf_sqr1_lo+1
|
|
sta.z sqr1_lo+1
|
|
__b1:
|
|
// for(byte* sqr1_lo = mulf_sqr1_lo+1; sqr1_lo!=mulf_sqr1_lo+512; sqr1_lo++)
|
|
lda.z sqr1_lo+1
|
|
cmp #>mulf_sqr1_lo+$200
|
|
bne __b2
|
|
lda.z sqr1_lo
|
|
cmp #<mulf_sqr1_lo+$200
|
|
bne __b2
|
|
lda #$ff
|
|
sta.z dir
|
|
lda #<mulf_sqr2_hi
|
|
sta.z sqr2_hi
|
|
lda #>mulf_sqr2_hi
|
|
sta.z sqr2_hi+1
|
|
ldx #-1
|
|
lda #<mulf_sqr2_lo
|
|
sta.z sqr2_lo
|
|
lda #>mulf_sqr2_lo
|
|
sta.z sqr2_lo+1
|
|
__b5:
|
|
// for(byte* sqr2_lo = mulf_sqr2_lo; sqr2_lo!=mulf_sqr2_lo+511; sqr2_lo++)
|
|
lda.z sqr2_lo+1
|
|
cmp #>mulf_sqr2_lo+$1ff
|
|
bne __b6
|
|
lda.z sqr2_lo
|
|
cmp #<mulf_sqr2_lo+$1ff
|
|
bne __b6
|
|
// *(mulf_sqr2_lo+511) = *(mulf_sqr1_lo+256)
|
|
// Set the very last value g(511) = f(256)
|
|
lda mulf_sqr1_lo+$100
|
|
sta mulf_sqr2_lo+$1ff
|
|
// *(mulf_sqr2_hi+511) = *(mulf_sqr1_hi+256)
|
|
lda mulf_sqr1_hi+$100
|
|
sta mulf_sqr2_hi+$1ff
|
|
// }
|
|
rts
|
|
__b6:
|
|
// *sqr2_lo = mulf_sqr1_lo[x_255]
|
|
lda mulf_sqr1_lo,x
|
|
ldy #0
|
|
sta (sqr2_lo),y
|
|
// *sqr2_hi++ = mulf_sqr1_hi[x_255]
|
|
lda mulf_sqr1_hi,x
|
|
sta (sqr2_hi),y
|
|
// *sqr2_hi++ = mulf_sqr1_hi[x_255];
|
|
inc.z sqr2_hi
|
|
bne !+
|
|
inc.z sqr2_hi+1
|
|
!:
|
|
// x_255 = x_255 + dir
|
|
txa
|
|
clc
|
|
adc.z dir
|
|
tax
|
|
// if(x_255==0)
|
|
cpx #0
|
|
bne __b8
|
|
lda #1
|
|
sta.z dir
|
|
__b8:
|
|
// for(byte* sqr2_lo = mulf_sqr2_lo; sqr2_lo!=mulf_sqr2_lo+511; sqr2_lo++)
|
|
inc.z sqr2_lo
|
|
bne !+
|
|
inc.z sqr2_lo+1
|
|
!:
|
|
jmp __b5
|
|
__b2:
|
|
// if((++c&1)==0)
|
|
inc.z c
|
|
// ++c&1
|
|
lda #1
|
|
and.z c
|
|
// if((++c&1)==0)
|
|
cmp #0
|
|
bne __b3
|
|
// x_2++;
|
|
inx
|
|
// sqr++;
|
|
inc.z sqr
|
|
bne !+
|
|
inc.z sqr+1
|
|
!:
|
|
__b3:
|
|
// BYTE0(sqr)
|
|
lda.z sqr
|
|
// *sqr1_lo = BYTE0(sqr)
|
|
ldy #0
|
|
sta (sqr1_lo),y
|
|
// BYTE1(sqr)
|
|
lda.z sqr+1
|
|
// *sqr1_hi++ = BYTE1(sqr)
|
|
sta (sqr1_hi),y
|
|
// *sqr1_hi++ = BYTE1(sqr);
|
|
inc.z sqr1_hi
|
|
bne !+
|
|
inc.z sqr1_hi+1
|
|
!:
|
|
// sqr = sqr + x_2
|
|
txa
|
|
clc
|
|
adc.z sqr
|
|
sta.z sqr
|
|
bcc !+
|
|
inc.z sqr+1
|
|
!:
|
|
// for(byte* sqr1_lo = mulf_sqr1_lo+1; sqr1_lo!=mulf_sqr1_lo+512; sqr1_lo++)
|
|
inc.z sqr1_lo
|
|
bne !+
|
|
inc.z sqr1_lo+1
|
|
!:
|
|
jmp __b1
|
|
}
|
|
// Fast multiply two unsigned words to a double word result
|
|
// Done in assembler to utilize fast addition A+X
|
|
// __zp($a) unsigned long mulf16u(unsigned int a, unsigned int b)
|
|
mulf16u: {
|
|
.label memA = $f8
|
|
.label memB = $fa
|
|
.label memR = $fc
|
|
.label return = $a
|
|
// *memA = a
|
|
lda #<main.a
|
|
sta.z memA
|
|
lda #>main.a
|
|
sta.z memA+1
|
|
// *memB = b
|
|
lda #<main.b
|
|
sta.z memB
|
|
lda #>main.b
|
|
sta.z memB+1
|
|
// asm
|
|
lda memA
|
|
sta sm1a+1
|
|
sta sm3a+1
|
|
sta sm5a+1
|
|
sta sm7a+1
|
|
eor #$ff
|
|
sta sm2a+1
|
|
sta sm4a+1
|
|
sta sm6a+1
|
|
sta sm8a+1
|
|
lda memA+1
|
|
sta sm1b+1
|
|
sta sm3b+1
|
|
sta sm5b+1
|
|
sta sm7b+1
|
|
eor #$ff
|
|
sta sm2b+1
|
|
sta sm4b+1
|
|
sta sm6b+1
|
|
sta sm8b+1
|
|
ldx memB
|
|
sec
|
|
sm1a:
|
|
lda mulf_sqr1_lo,x
|
|
sm2a:
|
|
sbc mulf_sqr2_lo,x
|
|
sta memR+0
|
|
sm3a:
|
|
lda mulf_sqr1_hi,x
|
|
sm4a:
|
|
sbc mulf_sqr2_hi,x
|
|
sta _AA+1
|
|
sec
|
|
sm1b:
|
|
lda mulf_sqr1_lo,x
|
|
sm2b:
|
|
sbc mulf_sqr2_lo,x
|
|
sta _cc+1
|
|
sm3b:
|
|
lda mulf_sqr1_hi,x
|
|
sm4b:
|
|
sbc mulf_sqr2_hi,x
|
|
sta _CC+1
|
|
ldx memB+1
|
|
sec
|
|
sm5a:
|
|
lda mulf_sqr1_lo,x
|
|
sm6a:
|
|
sbc mulf_sqr2_lo,x
|
|
sta _bb+1
|
|
sm7a:
|
|
lda mulf_sqr1_hi,x
|
|
sm8a:
|
|
sbc mulf_sqr2_hi,x
|
|
sta _BB+1
|
|
sec
|
|
sm5b:
|
|
lda mulf_sqr1_lo,x
|
|
sm6b:
|
|
sbc mulf_sqr2_lo,x
|
|
sta _dd+1
|
|
sm7b:
|
|
lda mulf_sqr1_hi,x
|
|
sm8b:
|
|
sbc mulf_sqr2_hi,x
|
|
sta memR+3
|
|
clc
|
|
_AA:
|
|
lda #0
|
|
_bb:
|
|
adc #0
|
|
sta memR+1
|
|
_BB:
|
|
lda #0
|
|
_CC:
|
|
adc #0
|
|
sta memR+2
|
|
bcc !+
|
|
inc memR+3
|
|
clc
|
|
!:
|
|
_cc:
|
|
lda #0
|
|
adc memR+1
|
|
sta memR+1
|
|
_dd:
|
|
lda #0
|
|
adc memR+2
|
|
sta memR+2
|
|
bcc !+
|
|
inc memR+3
|
|
!:
|
|
// return *memR;
|
|
lda.z memR
|
|
sta.z return
|
|
lda.z memR+1
|
|
sta.z return+1
|
|
lda.z memR+2
|
|
sta.z return+2
|
|
lda.z memR+3
|
|
sta.z return+3
|
|
// }
|
|
rts
|
|
}
|
|
// Print a unsigned long as HEX
|
|
// void print_ulong(__zp($a) unsigned long dw)
|
|
print_ulong: {
|
|
.label dw = $a
|
|
// print_uint(WORD1(dw))
|
|
lda.z dw+2
|
|
sta.z print_uint.w
|
|
lda.z dw+3
|
|
sta.z print_uint.w+1
|
|
jsr print_uint
|
|
// print_uint(WORD0(dw))
|
|
lda.z dw
|
|
sta.z print_uint.w
|
|
lda.z dw+1
|
|
sta.z print_uint.w+1
|
|
jsr print_uint
|
|
// }
|
|
rts
|
|
}
|
|
// Print a unsigned int as HEX
|
|
// void print_uint(__zp(4) unsigned int w)
|
|
print_uint: {
|
|
.label w = 4
|
|
// print_uchar(BYTE1(w))
|
|
ldx.z w+1
|
|
jsr print_uchar
|
|
// print_uchar(BYTE0(w))
|
|
ldx.z w
|
|
jsr print_uchar
|
|
// }
|
|
rts
|
|
}
|
|
// Print a char as HEX
|
|
// void print_uchar(__register(X) char b)
|
|
print_uchar: {
|
|
// b>>4
|
|
txa
|
|
lsr
|
|
lsr
|
|
lsr
|
|
lsr
|
|
// print_char(print_hextab[b>>4])
|
|
tay
|
|
lda print_hextab,y
|
|
// Table of hexadecimal digits
|
|
jsr print_char
|
|
// b&0xf
|
|
lda #$f
|
|
axs #0
|
|
// print_char(print_hextab[b&0xf])
|
|
lda print_hextab,x
|
|
jsr print_char
|
|
// }
|
|
rts
|
|
}
|
|
// Print a single char
|
|
// void print_char(__register(A) char ch)
|
|
print_char: {
|
|
// *(print_char_cursor++) = ch
|
|
ldy #0
|
|
sta (print_char_cursor),y
|
|
// *(print_char_cursor++) = ch;
|
|
inc.z print_char_cursor
|
|
bne !+
|
|
inc.z print_char_cursor+1
|
|
!:
|
|
// }
|
|
rts
|
|
}
|
|
.segment Data
|
|
print_hextab: .text "0123456789abcdef"
|
|
// mulf_sqr tables will contain f(x)=int(x*x/4) and g(x) = f(x-255).
|
|
// <f(x) = <(( x * x )/4)
|
|
.align $100
|
|
mulf_sqr1_lo: .fill $200, 0
|
|
// >f(x) = >(( x * x )/4)
|
|
.align $100
|
|
mulf_sqr1_hi: .fill $200, 0
|
|
// <g(x) = <((( x - 255) * ( x - 255 ))/4)
|
|
.align $100
|
|
mulf_sqr2_lo: .fill $200, 0
|
|
// >g(x) = >((( x - 255) * ( x - 255 ))/4)
|
|
.align $100
|
|
mulf_sqr2_hi: .fill $200, 0
|