mirror of
https://gitlab.com/camelot/kickc.git
synced 2024-10-21 17:24:39 +00:00
1125 lines
17 KiB
NASM
1125 lines
17 KiB
NASM
// Test the fast multiplication library
|
|
.pc = $801 "Basic"
|
|
:BasicUpstart(main)
|
|
.pc = $80d "Program"
|
|
.label BGCOL = $d021
|
|
.label print_char_cursor = $f
|
|
.label print_line_cursor = 7
|
|
main: {
|
|
lda #5
|
|
sta BGCOL
|
|
jsr print_cls
|
|
jsr mulf_init
|
|
jsr mul16u_compare
|
|
jsr mul16s_compare
|
|
rts
|
|
}
|
|
// Perform many possible word multiplications (slow and fast) and compare the results
|
|
mul16s_compare: {
|
|
.label a = 3
|
|
.label b = 5
|
|
.label ms = $b
|
|
.label mn = $19
|
|
.label mf = $11
|
|
.label i = 2
|
|
lda print_line_cursor
|
|
sta print_char_cursor
|
|
lda print_line_cursor+1
|
|
sta print_char_cursor+1
|
|
lda #0
|
|
sta i
|
|
lda #<-$7fff
|
|
sta b
|
|
lda #>-$7fff
|
|
sta b+1
|
|
lda #<-$7fff
|
|
sta a
|
|
lda #>-$7fff
|
|
sta a+1
|
|
b1:
|
|
lda #<str
|
|
sta print_str.str
|
|
lda #>str
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
ldy #0
|
|
b2:
|
|
clc
|
|
lda a
|
|
adc #<$d2b
|
|
sta a
|
|
lda a+1
|
|
adc #>$d2b
|
|
sta a+1
|
|
clc
|
|
lda b
|
|
adc #<$ffd
|
|
sta b
|
|
lda b+1
|
|
adc #>$ffd
|
|
sta b+1
|
|
jsr muls16s
|
|
jsr mul16s
|
|
jsr mulf16s
|
|
lda ms
|
|
cmp mf
|
|
bne !+
|
|
lda ms+1
|
|
cmp mf+1
|
|
bne !+
|
|
lda ms+2
|
|
cmp mf+2
|
|
bne !+
|
|
lda ms+3
|
|
cmp mf+3
|
|
beq b6
|
|
!:
|
|
ldx #0
|
|
jmp b3
|
|
b6:
|
|
ldx #1
|
|
b3:
|
|
lda ms
|
|
cmp mn
|
|
bne !+
|
|
lda ms+1
|
|
cmp mn+1
|
|
bne !+
|
|
lda ms+2
|
|
cmp mn+2
|
|
bne !+
|
|
lda ms+3
|
|
cmp mn+3
|
|
beq b4
|
|
!:
|
|
ldx #0
|
|
b4:
|
|
cpx #0
|
|
bne b5
|
|
lda #2
|
|
sta BGCOL
|
|
jsr mul16s_error
|
|
rts
|
|
b5:
|
|
iny
|
|
cpy #$10
|
|
bne b2
|
|
inc i
|
|
lda #$10
|
|
cmp i
|
|
beq !b1+
|
|
jmp b1
|
|
!b1:
|
|
jsr print_ln
|
|
lda print_line_cursor
|
|
sta print_char_cursor
|
|
lda print_line_cursor+1
|
|
sta print_char_cursor+1
|
|
lda #<str1
|
|
sta print_str.str
|
|
lda #>str1
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
jsr print_ln
|
|
rts
|
|
str1: .text "signed word multiply results match!@"
|
|
}
|
|
// Print a newline
|
|
print_ln: {
|
|
b1:
|
|
lda #$28
|
|
clc
|
|
adc print_line_cursor
|
|
sta print_line_cursor
|
|
bcc !+
|
|
inc print_line_cursor+1
|
|
!:
|
|
lda print_line_cursor+1
|
|
cmp print_char_cursor+1
|
|
bcc b1
|
|
bne !+
|
|
lda print_line_cursor
|
|
cmp print_char_cursor
|
|
bcc b1
|
|
!:
|
|
rts
|
|
}
|
|
// Print a zero-terminated string
|
|
// print_str(byte* zeropage(9) str)
|
|
print_str: {
|
|
.label str = 9
|
|
b1:
|
|
ldy #0
|
|
lda (str),y
|
|
cmp #'@'
|
|
bne b2
|
|
rts
|
|
b2:
|
|
ldy #0
|
|
lda (str),y
|
|
sta (print_char_cursor),y
|
|
inc print_char_cursor
|
|
bne !+
|
|
inc print_char_cursor+1
|
|
!:
|
|
inc str
|
|
bne !+
|
|
inc str+1
|
|
!:
|
|
jmp b1
|
|
}
|
|
// mul16s_error(signed word zeropage(3) a, signed word zeropage(5) b, signed dword zeropage($b) ms, signed dword zeropage($19) mn, signed dword zeropage($11) mf)
|
|
mul16s_error: {
|
|
.label a = 3
|
|
.label b = 5
|
|
.label ms = $b
|
|
.label mn = $19
|
|
.label mf = $11
|
|
lda #<str
|
|
sta print_str.str
|
|
lda #>str
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
jsr print_sword
|
|
lda #<str1
|
|
sta print_str.str
|
|
lda #>str1
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
lda b
|
|
sta print_sword.w
|
|
lda b+1
|
|
sta print_sword.w+1
|
|
jsr print_sword
|
|
lda #<str2
|
|
sta print_str.str
|
|
lda #>str2
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
jsr print_sdword
|
|
lda #<str3
|
|
sta print_str.str
|
|
lda #>str3
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
lda mn
|
|
sta print_sdword.dw
|
|
lda mn+1
|
|
sta print_sdword.dw+1
|
|
lda mn+2
|
|
sta print_sdword.dw+2
|
|
lda mn+3
|
|
sta print_sdword.dw+3
|
|
jsr print_sdword
|
|
lda #<str4
|
|
sta print_str.str
|
|
lda #>str4
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
lda mf
|
|
sta print_sdword.dw
|
|
lda mf+1
|
|
sta print_sdword.dw+1
|
|
lda mf+2
|
|
sta print_sdword.dw+2
|
|
lda mf+3
|
|
sta print_sdword.dw+3
|
|
jsr print_sdword
|
|
jsr print_ln
|
|
rts
|
|
str: .text "signed word multiply mismatch @"
|
|
}
|
|
// Print a signed dword as HEX
|
|
// print_sdword(signed dword zeropage($b) dw)
|
|
print_sdword: {
|
|
.label dw = $b
|
|
lda dw+3
|
|
bpl b1
|
|
lda #'-'
|
|
jsr print_char
|
|
sec
|
|
lda dw
|
|
eor #$ff
|
|
adc #0
|
|
sta dw
|
|
lda dw+1
|
|
eor #$ff
|
|
adc #0
|
|
sta dw+1
|
|
lda dw+2
|
|
eor #$ff
|
|
adc #0
|
|
sta dw+2
|
|
lda dw+3
|
|
eor #$ff
|
|
adc #0
|
|
sta dw+3
|
|
b1:
|
|
jsr print_dword
|
|
rts
|
|
}
|
|
// Print a dword as HEX
|
|
// print_dword(dword zeropage($b) dw)
|
|
print_dword: {
|
|
.label dw = $b
|
|
lda dw+2
|
|
sta print_word.w
|
|
lda dw+3
|
|
sta print_word.w+1
|
|
jsr print_word
|
|
lda dw
|
|
sta print_word.w
|
|
lda dw+1
|
|
sta print_word.w+1
|
|
jsr print_word
|
|
rts
|
|
}
|
|
// Print a word as HEX
|
|
// print_word(word zeropage(3) w)
|
|
print_word: {
|
|
.label w = 3
|
|
lda w+1
|
|
tax
|
|
jsr print_byte
|
|
lda w
|
|
tax
|
|
jsr print_byte
|
|
rts
|
|
}
|
|
// Print a byte as HEX
|
|
// print_byte(byte register(X) b)
|
|
print_byte: {
|
|
txa
|
|
lsr
|
|
lsr
|
|
lsr
|
|
lsr
|
|
tay
|
|
lda print_hextab,y
|
|
jsr print_char
|
|
lda #$f
|
|
axs #0
|
|
lda print_hextab,x
|
|
jsr print_char
|
|
rts
|
|
}
|
|
// Print a single char
|
|
// print_char(byte register(A) ch)
|
|
print_char: {
|
|
ldy #0
|
|
sta (print_char_cursor),y
|
|
inc print_char_cursor
|
|
bne !+
|
|
inc print_char_cursor+1
|
|
!:
|
|
rts
|
|
}
|
|
// Print a signed word as HEX
|
|
// print_sword(signed word zeropage(3) w)
|
|
print_sword: {
|
|
.label w = 3
|
|
lda w+1
|
|
bpl b1
|
|
lda #'-'
|
|
jsr print_char
|
|
sec
|
|
lda w
|
|
eor #$ff
|
|
adc #0
|
|
sta w
|
|
lda w+1
|
|
eor #$ff
|
|
adc #0
|
|
sta w+1
|
|
b1:
|
|
jsr print_word
|
|
rts
|
|
}
|
|
// Fast multiply two signed words to a signed double word result
|
|
// Fixes offsets introduced by using unsigned multiplication
|
|
// mulf16s(signed word zeropage(3) a, signed word zeropage(5) b)
|
|
mulf16s: {
|
|
.label _9 = $38
|
|
.label _13 = $3a
|
|
.label _16 = $38
|
|
.label _17 = $3a
|
|
.label m = $11
|
|
.label return = $11
|
|
.label a = 3
|
|
.label b = 5
|
|
lda a
|
|
sta mulf16u.a
|
|
lda a+1
|
|
sta mulf16u.a+1
|
|
lda b
|
|
sta mulf16u.b
|
|
lda b+1
|
|
sta mulf16u.b+1
|
|
jsr mulf16u
|
|
lda a+1
|
|
bpl b1
|
|
lda m+2
|
|
sta _9
|
|
lda m+3
|
|
sta _9+1
|
|
lda _16
|
|
sec
|
|
sbc b
|
|
sta _16
|
|
lda _16+1
|
|
sbc b+1
|
|
sta _16+1
|
|
lda _16
|
|
sta m+2
|
|
lda _16+1
|
|
sta m+3
|
|
b1:
|
|
lda b+1
|
|
bpl b2
|
|
lda m+2
|
|
sta _13
|
|
lda m+3
|
|
sta _13+1
|
|
lda _17
|
|
sec
|
|
sbc a
|
|
sta _17
|
|
lda _17+1
|
|
sbc a+1
|
|
sta _17+1
|
|
lda _17
|
|
sta m+2
|
|
lda _17+1
|
|
sta m+3
|
|
b2:
|
|
rts
|
|
}
|
|
// Fast multiply two unsigned words to a double word result
|
|
// Done in assembler to utilize fast addition A+X
|
|
// mulf16u(word zeropage($15) a, word zeropage($17) b)
|
|
mulf16u: {
|
|
.label memA = $f8
|
|
.label memB = $fa
|
|
.label memR = $fc
|
|
.label return = $11
|
|
.label a = $15
|
|
.label b = $17
|
|
lda a
|
|
sta memA
|
|
lda a+1
|
|
sta memA+1
|
|
lda b
|
|
sta memB
|
|
lda b+1
|
|
sta memB+1
|
|
lda memA
|
|
sta sm1a+1
|
|
sta sm3a+1
|
|
sta sm5a+1
|
|
sta sm7a+1
|
|
eor #$ff
|
|
sta sm2a+1
|
|
sta sm4a+1
|
|
sta sm6a+1
|
|
sta sm8a+1
|
|
lda memA+1
|
|
sta sm1b+1
|
|
sta sm3b+1
|
|
sta sm5b+1
|
|
sta sm7b+1
|
|
eor #$ff
|
|
sta sm2b+1
|
|
sta sm4b+1
|
|
sta sm6b+1
|
|
sta sm8b+1
|
|
ldx memB
|
|
sec
|
|
sm1a:
|
|
lda mulf_sqr1_lo,x
|
|
sm2a:
|
|
sbc mulf_sqr2_lo,x
|
|
sta memR+0
|
|
sm3a:
|
|
lda mulf_sqr1_hi,x
|
|
sm4a:
|
|
sbc mulf_sqr2_hi,x
|
|
sta _AA+1
|
|
sec
|
|
sm1b:
|
|
lda mulf_sqr1_lo,x
|
|
sm2b:
|
|
sbc mulf_sqr2_lo,x
|
|
sta _cc+1
|
|
sm3b:
|
|
lda mulf_sqr1_hi,x
|
|
sm4b:
|
|
sbc mulf_sqr2_hi,x
|
|
sta _CC+1
|
|
ldx memB+1
|
|
sec
|
|
sm5a:
|
|
lda mulf_sqr1_lo,x
|
|
sm6a:
|
|
sbc mulf_sqr2_lo,x
|
|
sta _bb+1
|
|
sm7a:
|
|
lda mulf_sqr1_hi,x
|
|
sm8a:
|
|
sbc mulf_sqr2_hi,x
|
|
sta _BB+1
|
|
sec
|
|
sm5b:
|
|
lda mulf_sqr1_lo,x
|
|
sm6b:
|
|
sbc mulf_sqr2_lo,x
|
|
sta _dd+1
|
|
sm7b:
|
|
lda mulf_sqr1_hi,x
|
|
sm8b:
|
|
sbc mulf_sqr2_hi,x
|
|
sta memR+3
|
|
clc
|
|
_AA:
|
|
lda #0
|
|
_bb:
|
|
adc #0
|
|
sta memR+1
|
|
_BB:
|
|
lda #0
|
|
_CC:
|
|
adc #0
|
|
sta memR+2
|
|
bcc !+
|
|
inc memR+3
|
|
clc
|
|
!:
|
|
_cc:
|
|
lda #0
|
|
adc memR+1
|
|
sta memR+1
|
|
_dd:
|
|
lda #0
|
|
adc memR+2
|
|
sta memR+2
|
|
bcc !+
|
|
inc memR+3
|
|
!:
|
|
lda memR
|
|
sta return
|
|
lda memR+1
|
|
sta return+1
|
|
lda memR+2
|
|
sta return+2
|
|
lda memR+3
|
|
sta return+3
|
|
rts
|
|
}
|
|
// Multiply of two signed words to a signed double word
|
|
// Fixes offsets introduced by using unsigned multiplication
|
|
// mul16s(signed word zeropage(3) a, signed word zeropage(5) b)
|
|
mul16s: {
|
|
.label _9 = $3c
|
|
.label _13 = $3e
|
|
.label _16 = $3c
|
|
.label _17 = $3e
|
|
.label m = $19
|
|
.label return = $19
|
|
.label a = 3
|
|
.label b = 5
|
|
lda b
|
|
sta mul16u.mb
|
|
lda b+1
|
|
sta mul16u.mb+1
|
|
lda #0
|
|
sta mul16u.mb+2
|
|
sta mul16u.mb+3
|
|
lda a
|
|
sta mul16u.a
|
|
lda a+1
|
|
sta mul16u.a+1
|
|
jsr mul16u
|
|
lda a+1
|
|
bpl b1
|
|
lda m+2
|
|
sta _9
|
|
lda m+3
|
|
sta _9+1
|
|
lda _16
|
|
sec
|
|
sbc b
|
|
sta _16
|
|
lda _16+1
|
|
sbc b+1
|
|
sta _16+1
|
|
lda _16
|
|
sta m+2
|
|
lda _16+1
|
|
sta m+3
|
|
b1:
|
|
lda b+1
|
|
bpl b2
|
|
lda m+2
|
|
sta _13
|
|
lda m+3
|
|
sta _13+1
|
|
lda _17
|
|
sec
|
|
sbc a
|
|
sta _17
|
|
lda _17+1
|
|
sbc a+1
|
|
sta _17+1
|
|
lda _17
|
|
sta m+2
|
|
lda _17+1
|
|
sta m+3
|
|
b2:
|
|
rts
|
|
}
|
|
// Perform binary multiplication of two unsigned 16-bit words into a 32-bit unsigned double word
|
|
// mul16u(word zeropage($1d) a, word zeropage($17) b)
|
|
mul16u: {
|
|
.label mb = $1f
|
|
.label a = $1d
|
|
.label res = $19
|
|
.label return = $19
|
|
.label b = $17
|
|
lda #0
|
|
sta res
|
|
sta res+1
|
|
sta res+2
|
|
sta res+3
|
|
b1:
|
|
lda a
|
|
bne b2
|
|
lda a+1
|
|
bne b2
|
|
rts
|
|
b2:
|
|
lda a
|
|
and #1
|
|
cmp #0
|
|
beq b3
|
|
lda res
|
|
clc
|
|
adc mb
|
|
sta res
|
|
lda res+1
|
|
adc mb+1
|
|
sta res+1
|
|
lda res+2
|
|
adc mb+2
|
|
sta res+2
|
|
lda res+3
|
|
adc mb+3
|
|
sta res+3
|
|
b3:
|
|
clc
|
|
ror a+1
|
|
ror a
|
|
asl mb
|
|
rol mb+1
|
|
rol mb+2
|
|
rol mb+3
|
|
jmp b1
|
|
}
|
|
// Slow multiplication of signed words
|
|
// Perform a signed multiplication by repeated addition/subtraction
|
|
// muls16s(signed word zeropage(3) a, signed word zeropage(5) b)
|
|
muls16s: {
|
|
.label m = $b
|
|
.label j = $23
|
|
.label return = $b
|
|
.label i = $25
|
|
.label a = 3
|
|
.label b = 5
|
|
lda a+1
|
|
bmi b6
|
|
bmi b5
|
|
bne !+
|
|
lda a
|
|
beq b5
|
|
!:
|
|
lda #0
|
|
sta j
|
|
sta j+1
|
|
sta m
|
|
sta m+1
|
|
sta m+2
|
|
sta m+3
|
|
b3:
|
|
lda b+1
|
|
ora #$7f
|
|
bmi !+
|
|
lda #0
|
|
!:
|
|
sta $ff
|
|
lda m
|
|
clc
|
|
adc b
|
|
sta m
|
|
lda m+1
|
|
adc b+1
|
|
sta m+1
|
|
lda m+2
|
|
adc $ff
|
|
sta m+2
|
|
lda m+3
|
|
adc $ff
|
|
sta m+3
|
|
inc j
|
|
bne !+
|
|
inc j+1
|
|
!:
|
|
lda j+1
|
|
cmp a+1
|
|
bne b3
|
|
lda j
|
|
cmp a
|
|
bne b3
|
|
rts
|
|
b5:
|
|
lda #0
|
|
sta return
|
|
sta return+1
|
|
sta return+2
|
|
sta return+3
|
|
rts
|
|
b6:
|
|
lda #0
|
|
sta i
|
|
sta i+1
|
|
sta m
|
|
sta m+1
|
|
sta m+2
|
|
sta m+3
|
|
b4:
|
|
lda b+1
|
|
ora #$7f
|
|
bmi !+
|
|
lda #0
|
|
!:
|
|
sta $ff
|
|
sec
|
|
lda m
|
|
sbc b
|
|
sta m
|
|
lda m+1
|
|
sbc b+1
|
|
sta m+1
|
|
lda m+2
|
|
sbc $ff
|
|
sta m+2
|
|
lda m+3
|
|
sbc $ff
|
|
sta m+3
|
|
lda i
|
|
bne !+
|
|
dec i+1
|
|
!:
|
|
dec i
|
|
lda i+1
|
|
cmp a+1
|
|
bne b4
|
|
lda i
|
|
cmp a
|
|
bne b4
|
|
rts
|
|
}
|
|
// Perform many possible word multiplications (slow and fast) and compare the results
|
|
mul16u_compare: {
|
|
.label a = $15
|
|
.label b = $17
|
|
.label ms = $b
|
|
.label mn = $19
|
|
.label mf = $11
|
|
.label i = $27
|
|
lda #0
|
|
sta i
|
|
sta b
|
|
sta b+1
|
|
sta a
|
|
sta a+1
|
|
lda #<$400
|
|
sta print_char_cursor
|
|
lda #>$400
|
|
sta print_char_cursor+1
|
|
b1:
|
|
lda #<str
|
|
sta print_str.str
|
|
lda #>str
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
ldy #0
|
|
b2:
|
|
clc
|
|
lda a
|
|
adc #<$d2b
|
|
sta a
|
|
lda a+1
|
|
adc #>$d2b
|
|
sta a+1
|
|
clc
|
|
lda b
|
|
adc #<$ffd
|
|
sta b
|
|
lda b+1
|
|
adc #>$ffd
|
|
sta b+1
|
|
jsr muls16u
|
|
lda a
|
|
sta mul16u.a
|
|
lda a+1
|
|
sta mul16u.a+1
|
|
lda mul16u.b
|
|
sta mul16u.mb
|
|
lda mul16u.b+1
|
|
sta mul16u.mb+1
|
|
lda #0
|
|
sta mul16u.mb+2
|
|
sta mul16u.mb+3
|
|
jsr mul16u
|
|
jsr mulf16u
|
|
lda ms
|
|
cmp mf
|
|
bne !+
|
|
lda ms+1
|
|
cmp mf+1
|
|
bne !+
|
|
lda ms+2
|
|
cmp mf+2
|
|
bne !+
|
|
lda ms+3
|
|
cmp mf+3
|
|
beq b6
|
|
!:
|
|
ldx #0
|
|
jmp b3
|
|
b6:
|
|
ldx #1
|
|
b3:
|
|
lda ms
|
|
cmp mn
|
|
bne !+
|
|
lda ms+1
|
|
cmp mn+1
|
|
bne !+
|
|
lda ms+2
|
|
cmp mn+2
|
|
bne !+
|
|
lda ms+3
|
|
cmp mn+3
|
|
beq b4
|
|
!:
|
|
ldx #0
|
|
b4:
|
|
cpx #0
|
|
bne b5
|
|
lda #2
|
|
sta BGCOL
|
|
lda a
|
|
sta mul16u_error.a
|
|
lda a+1
|
|
sta mul16u_error.a+1
|
|
jsr mul16u_error
|
|
rts
|
|
b5:
|
|
iny
|
|
cpy #$10
|
|
beq !b2+
|
|
jmp b2
|
|
!b2:
|
|
inc i
|
|
lda #$10
|
|
cmp i
|
|
beq !b1+
|
|
jmp b1
|
|
!b1:
|
|
lda #<$400
|
|
sta print_line_cursor
|
|
lda #>$400
|
|
sta print_line_cursor+1
|
|
jsr print_ln
|
|
lda print_line_cursor
|
|
sta print_char_cursor
|
|
lda print_line_cursor+1
|
|
sta print_char_cursor+1
|
|
lda #<str1
|
|
sta print_str.str
|
|
lda #>str1
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
jsr print_ln
|
|
rts
|
|
str1: .text "word multiply results match!@"
|
|
}
|
|
// mul16u_error(word zeropage(3) a, word zeropage($17) b, dword zeropage($b) ms, dword zeropage($19) mn, dword zeropage($11) mf)
|
|
mul16u_error: {
|
|
.label a = 3
|
|
.label b = $17
|
|
.label ms = $b
|
|
.label mn = $19
|
|
.label mf = $11
|
|
lda #<str
|
|
sta print_str.str
|
|
lda #>str
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
jsr print_word
|
|
lda #<str1
|
|
sta print_str.str
|
|
lda #>str1
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
lda b
|
|
sta print_word.w
|
|
lda b+1
|
|
sta print_word.w+1
|
|
jsr print_word
|
|
lda #<str2
|
|
sta print_str.str
|
|
lda #>str2
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
jsr print_dword
|
|
lda #<str3
|
|
sta print_str.str
|
|
lda #>str3
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
lda mn
|
|
sta print_dword.dw
|
|
lda mn+1
|
|
sta print_dword.dw+1
|
|
lda mn+2
|
|
sta print_dword.dw+2
|
|
lda mn+3
|
|
sta print_dword.dw+3
|
|
jsr print_dword
|
|
lda #<str4
|
|
sta print_str.str
|
|
lda #>str4
|
|
sta print_str.str+1
|
|
jsr print_str
|
|
lda mf
|
|
sta print_dword.dw
|
|
lda mf+1
|
|
sta print_dword.dw+1
|
|
lda mf+2
|
|
sta print_dword.dw+2
|
|
lda mf+3
|
|
sta print_dword.dw+3
|
|
jsr print_dword
|
|
lda #<$400
|
|
sta print_line_cursor
|
|
lda #>$400
|
|
sta print_line_cursor+1
|
|
jsr print_ln
|
|
rts
|
|
str: .text "multiply mismatch @"
|
|
}
|
|
// Slow multiplication of unsigned words
|
|
// Calculate an unsigned multiplication by repeated addition
|
|
// muls16u(word zeropage($15) a, word zeropage($17) b)
|
|
muls16u: {
|
|
.label return = $b
|
|
.label m = $b
|
|
.label i = $28
|
|
.label a = $15
|
|
.label b = $17
|
|
lda a
|
|
bne !+
|
|
lda a+1
|
|
beq b3
|
|
!:
|
|
lda #0
|
|
sta i
|
|
sta i+1
|
|
sta m
|
|
sta m+1
|
|
sta m+2
|
|
sta m+3
|
|
b2:
|
|
lda m
|
|
clc
|
|
adc b
|
|
sta m
|
|
lda m+1
|
|
adc b+1
|
|
sta m+1
|
|
lda m+2
|
|
adc #0
|
|
sta m+2
|
|
lda m+3
|
|
adc #0
|
|
sta m+3
|
|
inc i
|
|
bne !+
|
|
inc i+1
|
|
!:
|
|
lda i+1
|
|
cmp a+1
|
|
bne b2
|
|
lda i
|
|
cmp a
|
|
bne b2
|
|
rts
|
|
b3:
|
|
lda #0
|
|
sta return
|
|
sta return+1
|
|
sta return+2
|
|
sta return+3
|
|
rts
|
|
}
|
|
// Initialize the mulf_sqr multiplication tables with f(x)=int(x*x/4)
|
|
mulf_init: {
|
|
.label sqr1_hi = $2c
|
|
.label sqr = $2f
|
|
.label sqr1_lo = $2a
|
|
.label x_2 = $2e
|
|
.label sqr2_hi = $33
|
|
.label sqr2_lo = $31
|
|
.label dir = $35
|
|
lda #0
|
|
sta x_2
|
|
lda #<mulf_sqr1_hi+1
|
|
sta sqr1_hi
|
|
lda #>mulf_sqr1_hi+1
|
|
sta sqr1_hi+1
|
|
lda #<mulf_sqr1_lo+1
|
|
sta sqr1_lo
|
|
lda #>mulf_sqr1_lo+1
|
|
sta sqr1_lo+1
|
|
lda #0
|
|
sta sqr
|
|
sta sqr+1
|
|
tax
|
|
b1:
|
|
inx
|
|
txa
|
|
and #1
|
|
cmp #0
|
|
bne b2
|
|
inc x_2
|
|
inc sqr
|
|
bne !+
|
|
inc sqr+1
|
|
!:
|
|
b2:
|
|
lda sqr
|
|
ldy #0
|
|
sta (sqr1_lo),y
|
|
lda sqr+1
|
|
sta (sqr1_hi),y
|
|
inc sqr1_hi
|
|
bne !+
|
|
inc sqr1_hi+1
|
|
!:
|
|
lda x_2
|
|
clc
|
|
adc sqr
|
|
sta sqr
|
|
bcc !+
|
|
inc sqr+1
|
|
!:
|
|
inc sqr1_lo
|
|
bne !+
|
|
inc sqr1_lo+1
|
|
!:
|
|
lda sqr1_lo+1
|
|
cmp #>mulf_sqr1_lo+$200
|
|
bne b1
|
|
lda sqr1_lo
|
|
cmp #<mulf_sqr1_lo+$200
|
|
bne b1
|
|
lda #$ff
|
|
sta dir
|
|
lda #<mulf_sqr2_hi
|
|
sta sqr2_hi
|
|
lda #>mulf_sqr2_hi
|
|
sta sqr2_hi+1
|
|
lda #<mulf_sqr2_lo
|
|
sta sqr2_lo
|
|
lda #>mulf_sqr2_lo
|
|
sta sqr2_lo+1
|
|
ldx #-1
|
|
b4:
|
|
lda mulf_sqr1_lo,x
|
|
ldy #0
|
|
sta (sqr2_lo),y
|
|
lda mulf_sqr1_hi,x
|
|
sta (sqr2_hi),y
|
|
inc sqr2_hi
|
|
bne !+
|
|
inc sqr2_hi+1
|
|
!:
|
|
txa
|
|
clc
|
|
adc dir
|
|
tax
|
|
cpx #0
|
|
bne b5
|
|
lda #1
|
|
sta dir
|
|
b5:
|
|
inc sqr2_lo
|
|
bne !+
|
|
inc sqr2_lo+1
|
|
!:
|
|
lda sqr2_lo+1
|
|
cmp #>mulf_sqr2_lo+$1ff
|
|
bne b4
|
|
lda sqr2_lo
|
|
cmp #<mulf_sqr2_lo+$1ff
|
|
bne b4
|
|
// Set the very last value g(511) = f(256)
|
|
lda mulf_sqr1_lo+$100
|
|
sta mulf_sqr2_lo+$1ff
|
|
lda mulf_sqr1_hi+$100
|
|
sta mulf_sqr2_hi+$1ff
|
|
rts
|
|
}
|
|
// Clear the screen. Also resets current line/char cursor.
|
|
print_cls: {
|
|
.label sc = $36
|
|
lda #<$400
|
|
sta sc
|
|
lda #>$400
|
|
sta sc+1
|
|
b1:
|
|
lda #' '
|
|
ldy #0
|
|
sta (sc),y
|
|
inc sc
|
|
bne !+
|
|
inc sc+1
|
|
!:
|
|
lda sc+1
|
|
cmp #>$400+$3e8
|
|
bne b1
|
|
lda sc
|
|
cmp #<$400+$3e8
|
|
bne b1
|
|
rts
|
|
}
|
|
print_hextab: .text "0123456789abcdef"
|
|
// mulf_sqr tables will contain f(x)=int(x*x/4) and g(x) = f(x-255).
|
|
// <f(x) = <(( x * x )/4)
|
|
.align $100
|
|
mulf_sqr1_lo: .fill $200, 0
|
|
// >f(x) = >(( x * x )/4)
|
|
.align $100
|
|
mulf_sqr1_hi: .fill $200, 0
|
|
// <g(x) = <((( x - 255) * ( x - 255 ))/4)
|
|
.align $100
|
|
mulf_sqr2_lo: .fill $200, 0
|
|
// >g(x) = >((( x - 255) * ( x - 255 ))/4)
|
|
.align $100
|
|
mulf_sqr2_hi: .fill $200, 0
|
|
str: .text ".@"
|
|
str1: .text "*@"
|
|
str2: .text " slow:@"
|
|
str3: .text " / normal:@"
|
|
str4: .text " / fast:@"
|