mirror of https://gitlab.com/camelot/kickc.git
331 lines
6.3 KiB
NASM
331 lines
6.3 KiB
NASM
// Seriously fast multiply 8-bit version (8bit*8bit=8bit)
|
|
// Multiplies two signed 8-bit numbers and results in an 8-bit number
|
|
// C=A*B, A in [-64;64], B in [-96;95], C in [-96;95] - 64 acts a 1 (X*64=X)
|
|
// Uses the formula a*b = (a+b)^2/4 - (a-b)^2/4
|
|
// See the following for information about the method
|
|
// - http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
|
|
// - http://codebase64.org/doku.php?id=magazines:chacking16
|
|
// Commodore 64 PRG executable file
|
|
.file [name="fastmultiply8.prg", type="prg", segments="Program"]
|
|
.segmentdef Program [segments="Basic, Code, Data"]
|
|
.segmentdef Basic [start=$0801]
|
|
.segmentdef Code [start=$80d]
|
|
.segmentdef Data [startAfter="Code"]
|
|
.segment Basic
|
|
:BasicUpstart(main)
|
|
.label print_screen = $400
|
|
.segment Code
|
|
main: {
|
|
.label at = $11
|
|
.label k = $10
|
|
.label at_1 = $d
|
|
.label at_2 = 8
|
|
.label j = $a
|
|
.label i = $f
|
|
.label at_line = $d
|
|
// init_screen()
|
|
jsr init_screen
|
|
lda #<$400+4
|
|
sta.z at
|
|
lda #>$400+4
|
|
sta.z at+1
|
|
lda #0
|
|
sta.z k
|
|
__b1:
|
|
// print_schar_at(vals[k], at)
|
|
ldy.z k
|
|
lda vals,y
|
|
sta.z print_schar_at.b
|
|
lda.z at
|
|
sta.z print_schar_at.at
|
|
lda.z at+1
|
|
sta.z print_schar_at.at+1
|
|
jsr print_schar_at
|
|
// at += 4
|
|
lda #4
|
|
clc
|
|
adc.z at
|
|
sta.z at
|
|
bcc !+
|
|
inc.z at+1
|
|
!:
|
|
// for(char k: 0..8)
|
|
inc.z k
|
|
lda #9
|
|
cmp.z k
|
|
bne __b1
|
|
lda #0
|
|
sta.z i
|
|
lda #<$400
|
|
sta.z at_line
|
|
lda #>$400
|
|
sta.z at_line+1
|
|
__b2:
|
|
// at_line +=40
|
|
lda #$28
|
|
clc
|
|
adc.z at_1
|
|
sta.z at_1
|
|
bcc !+
|
|
inc.z at_1+1
|
|
!:
|
|
// print_schar_at(vals[i], at)
|
|
ldy.z i
|
|
lda vals,y
|
|
sta.z print_schar_at.b
|
|
lda.z at_1
|
|
sta.z print_schar_at.at
|
|
lda.z at_1+1
|
|
sta.z print_schar_at.at+1
|
|
jsr print_schar_at
|
|
lda.z at_1
|
|
sta.z at_2
|
|
lda.z at_1+1
|
|
sta.z at_2+1
|
|
lda #0
|
|
sta.z j
|
|
__b3:
|
|
// at += 4
|
|
lda #4
|
|
clc
|
|
adc.z at_2
|
|
sta.z at_2
|
|
bcc !+
|
|
inc.z at_2+1
|
|
!:
|
|
// signed char r = fmul8(vals[i], vals[j])
|
|
ldy.z i
|
|
lda vals,y
|
|
sta.z fmul8.aa
|
|
ldy.z j
|
|
lda vals,y
|
|
sta.z fmul8.bb
|
|
jsr fmul8
|
|
// print_schar_at(r, at)
|
|
sta.z print_schar_at.b
|
|
lda.z at_2
|
|
sta.z print_schar_at.at
|
|
lda.z at_2+1
|
|
sta.z print_schar_at.at+1
|
|
jsr print_schar_at
|
|
// for(char j: 0..8)
|
|
inc.z j
|
|
lda #9
|
|
cmp.z j
|
|
bne __b3
|
|
// for(char i: 0..8)
|
|
inc.z i
|
|
cmp.z i
|
|
bne __b2
|
|
// }
|
|
rts
|
|
}
|
|
init_screen: {
|
|
.const WHITE = 1
|
|
.label COLS = 2
|
|
// print_cls()
|
|
jsr print_cls
|
|
ldx #0
|
|
__b1:
|
|
// COLS[l] = WHITE
|
|
lda #WHITE
|
|
sta $d800,x
|
|
// for(char l: 0..39)
|
|
inx
|
|
cpx #$28
|
|
bne __b1
|
|
ldx #0
|
|
lda #<$d800
|
|
sta.z COLS
|
|
lda #>$d800
|
|
sta.z COLS+1
|
|
__b2:
|
|
// COLS[0] = WHITE
|
|
lda #WHITE
|
|
ldy #0
|
|
sta (COLS),y
|
|
// COLS[1] = WHITE
|
|
ldy #1
|
|
sta (COLS),y
|
|
// COLS[2] = WHITE
|
|
ldy #2
|
|
sta (COLS),y
|
|
// COLS[3] = WHITE
|
|
ldy #3
|
|
sta (COLS),y
|
|
// COLS += 40
|
|
lda #$28
|
|
clc
|
|
adc.z COLS
|
|
sta.z COLS
|
|
bcc !+
|
|
inc.z COLS+1
|
|
!:
|
|
// for(char m: 0..24)
|
|
inx
|
|
cpx #$19
|
|
bne __b2
|
|
// }
|
|
rts
|
|
}
|
|
// Print a signed char as hex at a specific screen position
|
|
// void print_schar_at(__zp(6) signed char b, __zp(2) char *at)
|
|
print_schar_at: {
|
|
.label b = 6
|
|
.label at = 2
|
|
// if(b<0)
|
|
lda.z b
|
|
bmi __b1
|
|
// print_char_at(' ', at)
|
|
ldx #' '
|
|
jsr print_char_at
|
|
__b2:
|
|
// print_uchar_at((char)b, at+1)
|
|
inc.z print_uchar_at.at
|
|
bne !+
|
|
inc.z print_uchar_at.at+1
|
|
!:
|
|
jsr print_uchar_at
|
|
// }
|
|
rts
|
|
__b1:
|
|
// print_char_at('-', at)
|
|
ldx #'-'
|
|
jsr print_char_at
|
|
// b = -b
|
|
lda.z b
|
|
eor #$ff
|
|
clc
|
|
adc #1
|
|
sta.z b
|
|
jmp __b2
|
|
}
|
|
// __register(A) signed char fmul8(__zp($c) volatile signed char aa, __zp($b) volatile signed char bb)
|
|
fmul8: {
|
|
.label aa = $c
|
|
.label bb = $b
|
|
.label cc = 7
|
|
// signed char cc
|
|
lda #0
|
|
sta.z cc
|
|
// asm
|
|
lda aa
|
|
sta A1+1
|
|
eor #$ff
|
|
sta A2+1
|
|
ldx bb
|
|
sec
|
|
A1:
|
|
lda mulf_sqr1,x
|
|
A2:
|
|
sbc mulf_sqr2,x
|
|
sta cc
|
|
// return cc;
|
|
// }
|
|
rts
|
|
}
|
|
// Clear the screen. Also resets current line/char cursor.
|
|
print_cls: {
|
|
// memset(print_screen, ' ', 1000)
|
|
jsr memset
|
|
// }
|
|
rts
|
|
}
|
|
// Print a single char
|
|
// void print_char_at(__register(X) char ch, __zp(2) char *at)
|
|
print_char_at: {
|
|
.label at = 2
|
|
// *(at) = ch
|
|
txa
|
|
ldy #0
|
|
sta (at),y
|
|
// }
|
|
rts
|
|
}
|
|
// Print a char as HEX at a specific position
|
|
// void print_uchar_at(__zp(6) char b, __zp(2) char *at)
|
|
print_uchar_at: {
|
|
.label b = 6
|
|
.label at = 2
|
|
// b>>4
|
|
lda.z b
|
|
lsr
|
|
lsr
|
|
lsr
|
|
lsr
|
|
// print_char_at(print_hextab[b>>4], at)
|
|
tay
|
|
ldx print_hextab,y
|
|
// Table of hexadecimal digits
|
|
jsr print_char_at
|
|
// b&0xf
|
|
lda #$f
|
|
and.z b
|
|
tay
|
|
// print_char_at(print_hextab[b&0xf], at+1)
|
|
inc.z print_char_at.at
|
|
bne !+
|
|
inc.z print_char_at.at+1
|
|
!:
|
|
ldx print_hextab,y
|
|
jsr print_char_at
|
|
// }
|
|
rts
|
|
}
|
|
// Copies the character c (an unsigned char) to the first num characters of the object pointed to by the argument str.
|
|
// void * memset(void *str, char c, unsigned int num)
|
|
memset: {
|
|
.const c = ' '
|
|
.const num = $3e8
|
|
.label str = print_screen
|
|
.label end = str+num
|
|
.label dst = 4
|
|
lda #<str
|
|
sta.z dst
|
|
lda #>str
|
|
sta.z dst+1
|
|
__b1:
|
|
// for(char* dst = str; dst!=end; dst++)
|
|
lda.z dst+1
|
|
cmp #>end
|
|
bne __b2
|
|
lda.z dst
|
|
cmp #<end
|
|
bne __b2
|
|
// }
|
|
rts
|
|
__b2:
|
|
// *dst = c
|
|
lda #c
|
|
ldy #0
|
|
sta (dst),y
|
|
// for(char* dst = str; dst!=end; dst++)
|
|
inc.z dst
|
|
bne !+
|
|
inc.z dst+1
|
|
!:
|
|
jmp __b1
|
|
}
|
|
.segment Data
|
|
print_hextab: .text "0123456789abcdef"
|
|
vals: .byte -$5f, -$40, -$20, -$10, 0, $10, $20, $40, $5f
|
|
// mulf_sqr tables will contain f(x)=int(x*x) and g(x) = f(1-x).
|
|
// f(x) = >(( x * x ))
|
|
.align $100
|
|
mulf_sqr1:
|
|
.for(var i=0;i<$200;i++) {
|
|
.if(i<=159) { .byte round((i*i)/256) }
|
|
.if(i>159 && i<=351 ) { .byte round(((i-256)*(i-256))/256) }
|
|
.if(i>351) { .byte round(((512-i)*(512-i))/256) }
|
|
}
|
|
|
|
// g(x) = >((( 1 - x ) * ( 1 - x )))
|
|
.align $100
|
|
mulf_sqr2:
|
|
.for(var i=0;i<$200;i++) {
|
|
.if(i<=159) { .byte round((-i-1)*(-i-1)/256) }
|
|
.if(i>159 && i<=351 ) { .byte round(((255-i)*(255-i))/256) }
|
|
.if(i>351) { .byte round(((i-511)*(i-511))/256) }
|
|
}
|
|
|