1
0
mirror of https://gitlab.com/camelot/kickc.git synced 2024-09-08 17:54:40 +00:00
kickc/src/test/ref/sinusgen16b.asm

848 lines
15 KiB
NASM

// Generates a 16-bit signed sinus
.pc = $801 "Basic"
:BasicUpstart(main)
.pc = $80d "Program"
.const SIZEOF_SIGNED_WORD = 2
// PI*2 in u[4.28] format
.const PI2_u4f28 = $6487ed51
// PI in u[4.28] format
.const PI_u4f28 = $3243f6a9
// PI/2 in u[4.28] format
.const PI_HALF_u4f28 = $1921fb54
// PI in u[4.12] format
.const PI_u4f12 = $3244
// PI/2 in u[4.12] format
.const PI_HALF_u4f12 = $1922
.label print_line_cursor = $400
.label rem16u = 6
.label print_char_cursor = $1f
main: {
.label wavelength = $78
.label sw = $1d
.label st1 = 6
.label st2 = $e
jsr sin16s_gen
jsr sin16s_genb
jsr print_cls
ldx #0
lda #<print_line_cursor
sta.z print_char_cursor
lda #>print_line_cursor
sta.z print_char_cursor+1
lda #<sintab2
sta.z st2
lda #>sintab2
sta.z st2+1
lda #<sintab1
sta.z st1
lda #>sintab1
sta.z st1+1
b1:
ldy #0
sec
lda (st1),y
sbc (st2),y
sta.z sw
iny
lda (st1),y
sbc (st2),y
sta.z sw+1
bmi b2
lda #<str1
sta.z print_str.str
lda #>str1
sta.z print_str.str+1
jsr print_str
b2:
jsr print_sword
lda #<str
sta.z print_str.str
lda #>str
sta.z print_str.str+1
jsr print_str
lda #SIZEOF_SIGNED_WORD
clc
adc.z st1
sta.z st1
bcc !+
inc.z st1+1
!:
lda #SIZEOF_SIGNED_WORD
clc
adc.z st2
sta.z st2
bcc !+
inc.z st2+1
!:
inx
cpx #$78
bne b1
rts
str: .text " "
.byte 0
str1: .text " "
.byte 0
sintab1: .fill 2*$78, 0
sintab2: .fill 2*$78, 0
}
// Print a zero-terminated string
// print_str(byte* zeropage($15) str)
print_str: {
.label str = $15
b1:
ldy #0
lda (str),y
cmp #0
bne b2
rts
b2:
ldy #0
lda (str),y
sta (print_char_cursor),y
inc.z print_char_cursor
bne !+
inc.z print_char_cursor+1
!:
inc.z str
bne !+
inc.z str+1
!:
jmp b1
}
// Print a signed word as HEX
// print_sword(signed word zeropage($1d) w)
print_sword: {
.label w = $1d
lda.z w+1
bmi b1
lda #' '
jsr print_char
b2:
jsr print_word
rts
b1:
lda #'-'
jsr print_char
sec
lda #0
sbc.z w
sta.z w
lda #0
sbc.z w+1
sta.z w+1
jmp b2
}
// Print a single char
// print_char(byte register(A) ch)
print_char: {
ldy #0
sta (print_char_cursor),y
inc.z print_char_cursor
bne !+
inc.z print_char_cursor+1
!:
rts
}
// Print a word as HEX
// print_word(word zeropage($1d) w)
print_word: {
.label w = $1d
lda.z w+1
sta.z print_byte.b
jsr print_byte
lda.z w
sta.z print_byte.b
jsr print_byte
rts
}
// Print a byte as HEX
// print_byte(byte zeropage($10) b)
print_byte: {
.label b = $10
lda.z b
lsr
lsr
lsr
lsr
tay
lda print_hextab,y
jsr print_char
lda #$f
and.z b
tay
lda print_hextab,y
jsr print_char
rts
}
// Clear the screen. Also resets current line/char cursor.
print_cls: {
jsr memset
rts
}
// Copies the character c (an unsigned char) to the first num characters of the object pointed to by the argument str.
memset: {
.const c = ' '
.const num = $3e8
.label str = print_line_cursor
.label end = str+num
.label dst = 6
lda #<str
sta.z dst
lda #>str
sta.z dst+1
b1:
lda.z dst+1
cmp #>end
bne b2
lda.z dst
cmp #<end
bne b2
rts
b2:
lda #c
ldy #0
sta (dst),y
inc.z dst
bne !+
inc.z dst+1
!:
jmp b1
}
// Generate signed (large) word sinus table - on the full -$7fff - $7fff range
// sintab - the table to generate into
// wavelength - the number of sinus points in a total sinus wavelength (the size of the table)
// sin16s_genb(signed word* zeropage($15) sintab)
sin16s_genb: {
.label _3 = $1f
.label step = $17
.label sintab = $15
.label x = $a
.label i = $e
jsr div32u16u
lda #<main.sintab2
sta.z sintab
lda #>main.sintab2
sta.z sintab+1
lda #0
sta.z x
sta.z x+1
sta.z x+2
sta.z x+3
sta.z i
sta.z i+1
// u[4.28]
b1:
lda.z i+1
cmp #>main.wavelength
bcc b2
bne !+
lda.z i
cmp #<main.wavelength
bcc b2
!:
rts
b2:
lda.z x+2
sta.z sin16sb.x
lda.z x+3
sta.z sin16sb.x+1
jsr sin16sb
ldy #0
lda.z _3
sta (sintab),y
iny
lda.z _3+1
sta (sintab),y
lda #SIZEOF_SIGNED_WORD
clc
adc.z sintab
sta.z sintab
bcc !+
inc.z sintab+1
!:
lda.z x
clc
adc.z step
sta.z x
lda.z x+1
adc.z step+1
sta.z x+1
lda.z x+2
adc.z step+2
sta.z x+2
lda.z x+3
adc.z step+3
sta.z x+3
inc.z i
bne !+
inc.z i+1
!:
jmp b1
}
// Calculate signed word sinus sin(x)
// x: unsigned dword input u[4.28] in the interval $00000000 - PI2_u4f28
// result: signed word sin(x) s[0.15] - using the full range -$7fff - $7fff
// sin16sb(word zeropage($1d) x)
sin16sb: {
.label x = $1d
.label return = $1f
.label x1 = $1d
.label x2 = 6
.label x3 = 6
.label x3_6 = $1b
.label usinx = $1f
.label x4 = 6
.label x5 = $1b
.label x5_128 = $1b
.label sinx = $1f
lda.z x+1
cmp #>PI_u4f12
bcc b4
bne !+
lda.z x
cmp #<PI_u4f12
bcc b4
!:
lda.z x
sec
sbc #<PI_u4f12
sta.z x
lda.z x+1
sbc #>PI_u4f12
sta.z x+1
ldy #1
jmp b1
b4:
ldy #0
b1:
lda.z x+1
cmp #>PI_HALF_u4f12
bcc b2
bne !+
lda.z x
cmp #<PI_HALF_u4f12
bcc b2
!:
sec
lda #<PI_u4f12
sbc.z x
sta.z x
lda #>PI_u4f12
sbc.z x+1
sta.z x+1
b2:
asl.z x1
rol.z x1+1
asl.z x1
rol.z x1+1
asl.z x1
rol.z x1+1
lda.z x1
sta.z mulu16_sel.v1
lda.z x1+1
sta.z mulu16_sel.v1+1
lda.z x1
sta.z mulu16_sel.v2
lda.z x1+1
sta.z mulu16_sel.v2+1
ldx #0
jsr mulu16_sel
lda.z mulu16_sel.return_18
sta.z x2
lda.z mulu16_sel.return_18+1
sta.z x2+1
lda.z x1
sta.z mulu16_sel.v2
lda.z x1+1
sta.z mulu16_sel.v2+1
ldx #1
jsr mulu16_sel
lda.z mulu16_sel.return_17
sta.z mulu16_sel.return
lda.z mulu16_sel.return_17+1
sta.z mulu16_sel.return+1
ldx #1
lda #<$10000/6
sta.z mulu16_sel.v2
lda #>$10000/6
sta.z mulu16_sel.v2+1
jsr mulu16_sel
lda.z x1
sec
sbc.z x3_6
sta.z usinx
lda.z x1+1
sbc.z x3_6+1
sta.z usinx+1
lda.z x1
sta.z mulu16_sel.v2
lda.z x1+1
sta.z mulu16_sel.v2+1
ldx #0
jsr mulu16_sel
lda.z mulu16_sel.return_17
sta.z mulu16_sel.return
lda.z mulu16_sel.return_17+1
sta.z mulu16_sel.return+1
lda.z x1
sta.z mulu16_sel.v2
lda.z x1+1
sta.z mulu16_sel.v2+1
ldx #0
jsr mulu16_sel
lsr.z x5_128+1
ror.z x5_128
lsr.z x5_128+1
ror.z x5_128
lsr.z x5_128+1
ror.z x5_128
lsr.z x5_128+1
ror.z x5_128
lda.z usinx
clc
adc.z x5_128
sta.z usinx
lda.z usinx+1
adc.z x5_128+1
sta.z usinx+1
cpy #0
beq b3
sec
lda #0
sbc.z sinx
sta.z sinx
lda #0
sbc.z sinx+1
sta.z sinx+1
b3:
rts
}
// Calculate val*val for two unsigned word values - the result is 16 selected bits of the 32-bit result.
// The select parameter indicates how many of the highest bits of the 32-bit result to skip
// mulu16_sel(word zeropage(6) v1, word zeropage(8) v2, byte register(X) select)
mulu16_sel: {
.label _0 = $11
.label _1 = $11
.label v1 = 6
.label v2 = 8
.label return = 6
.label return_11 = $1b
.label return_14 = $1b
.label return_16 = $1b
.label return_17 = $1b
.label return_18 = $1b
.label return_20 = $1b
lda.z v1
sta.z mul16u.a
lda.z v1+1
sta.z mul16u.a+1
jsr mul16u
cpx #0
beq !e+
!:
asl.z _1
rol.z _1+1
rol.z _1+2
rol.z _1+3
dex
bne !-
!e:
lda.z _1+2
sta.z return_17
lda.z _1+3
sta.z return_17+1
rts
}
// Perform binary multiplication of two unsigned 16-bit words into a 32-bit unsigned double word
// mul16u(word zeropage($1b) a, word zeropage(8) b)
mul16u: {
.label a = $1b
.label mb = 2
.label res = $11
.label b = 8
.label return = $11
lda.z b
sta.z mb
lda.z b+1
sta.z mb+1
lda #0
sta.z mb+2
sta.z mb+3
sta.z res
sta.z res+1
sta.z res+2
sta.z res+3
b1:
lda.z a
bne b2
lda.z a+1
bne b2
rts
b2:
lda #1
and.z a
cmp #0
beq b3
lda.z res
clc
adc.z mb
sta.z res
lda.z res+1
adc.z mb+1
sta.z res+1
lda.z res+2
adc.z mb+2
sta.z res+2
lda.z res+3
adc.z mb+3
sta.z res+3
b3:
lsr.z a+1
ror.z a
asl.z mb
rol.z mb+1
rol.z mb+2
rol.z mb+3
jmp b1
}
// Divide unsigned 32-bit dword dividend with a 16-bit word divisor
// The 16-bit word remainder can be found in rem16u after the division
div32u16u: {
.label quotient_hi = $1d
.label quotient_lo = $1b
.label return = $17
lda #<PI2_u4f28>>$10
sta.z divr16u.dividend
lda #>PI2_u4f28>>$10
sta.z divr16u.dividend+1
lda #<0
sta.z divr16u.rem
sta.z divr16u.rem+1
jsr divr16u
lda.z divr16u.return
sta.z quotient_hi
lda.z divr16u.return+1
sta.z quotient_hi+1
lda #<PI2_u4f28&$ffff
sta.z divr16u.dividend
lda #>PI2_u4f28&$ffff
sta.z divr16u.dividend+1
jsr divr16u
lda.z quotient_hi
sta.z return+2
lda.z quotient_hi+1
sta.z return+3
lda.z quotient_lo
sta.z return
lda.z quotient_lo+1
sta.z return+1
rts
}
// Performs division on two 16 bit unsigned words and an initial remainder
// Returns the quotient dividend/divisor.
// The final remainder will be set into the global variable rem16u
// Implemented using simple binary division
// divr16u(word zeropage(8) dividend, word zeropage(6) rem)
divr16u: {
.label rem = 6
.label dividend = 8
.label quotient = $1b
.label return = $1b
ldx #0
txa
sta.z quotient
sta.z quotient+1
b1:
asl.z rem
rol.z rem+1
lda.z dividend+1
and #$80
cmp #0
beq b2
lda #1
ora.z rem
sta.z rem
b2:
asl.z dividend
rol.z dividend+1
asl.z quotient
rol.z quotient+1
lda.z rem+1
cmp #>main.wavelength
bcc b3
bne !+
lda.z rem
cmp #<main.wavelength
bcc b3
!:
inc.z quotient
bne !+
inc.z quotient+1
!:
lda.z rem
sec
sbc #<main.wavelength
sta.z rem
lda.z rem+1
sbc #>main.wavelength
sta.z rem+1
b3:
inx
cpx #$10
bne b1
rts
}
// Generate signed (large) word sinus table - on the full -$7fff - $7fff range
// sintab - the table to generate into
// wavelength - the number of sinus points in a total sinus wavelength (the size of the table)
// sin16s_gen(signed word* zeropage($e) sintab)
sin16s_gen: {
.label _2 = $15
.label step = $17
.label sintab = $e
.label x = $a
.label i = $1d
jsr div32u16u
lda #<main.sintab1
sta.z sintab
lda #>main.sintab1
sta.z sintab+1
lda #0
sta.z x
sta.z x+1
sta.z x+2
sta.z x+3
sta.z i
sta.z i+1
// u[4.28]
b1:
lda.z i+1
cmp #>main.wavelength
bcc b2
bne !+
lda.z i
cmp #<main.wavelength
bcc b2
!:
rts
b2:
lda.z x
sta.z sin16s.x
lda.z x+1
sta.z sin16s.x+1
lda.z x+2
sta.z sin16s.x+2
lda.z x+3
sta.z sin16s.x+3
jsr sin16s
ldy #0
lda.z _2
sta (sintab),y
iny
lda.z _2+1
sta (sintab),y
lda #SIZEOF_SIGNED_WORD
clc
adc.z sintab
sta.z sintab
bcc !+
inc.z sintab+1
!:
lda.z x
clc
adc.z step
sta.z x
lda.z x+1
adc.z step+1
sta.z x+1
lda.z x+2
adc.z step+2
sta.z x+2
lda.z x+3
adc.z step+3
sta.z x+3
inc.z i
bne !+
inc.z i+1
!:
jmp b1
}
// Calculate signed word sinus sin(x)
// x: unsigned dword input u[4.28] in the interval $00000000 - PI2_u4f28
// result: signed word sin(x) s[0.15] - using the full range -$7fff - $7fff
// sin16s(dword zeropage($11) x)
sin16s: {
.label _4 = $11
.label x = $11
.label return = $15
.label x1 = $1f
.label x2 = 6
.label x3 = 6
.label x3_6 = $1b
.label usinx = $15
.label x4 = 6
.label x5 = $1b
.label x5_128 = $1b
.label sinx = $15
.label isUpper = $10
lda.z x+3
cmp #>PI_u4f28>>$10
bcc b4
bne !+
lda.z x+2
cmp #<PI_u4f28>>$10
bcc b4
bne !+
lda.z x+1
cmp #>PI_u4f28
bcc b4
bne !+
lda.z x
cmp #<PI_u4f28
bcc b4
!:
lda.z x
sec
sbc #<PI_u4f28
sta.z x
lda.z x+1
sbc #>PI_u4f28
sta.z x+1
lda.z x+2
sbc #<PI_u4f28>>$10
sta.z x+2
lda.z x+3
sbc #>PI_u4f28>>$10
sta.z x+3
lda #1
sta.z isUpper
jmp b1
b4:
lda #0
sta.z isUpper
b1:
lda.z x+3
cmp #>PI_HALF_u4f28>>$10
bcc b2
bne !+
lda.z x+2
cmp #<PI_HALF_u4f28>>$10
bcc b2
bne !+
lda.z x+1
cmp #>PI_HALF_u4f28
bcc b2
bne !+
lda.z x
cmp #<PI_HALF_u4f28
bcc b2
!:
lda #<PI_u4f28
sec
sbc.z x
sta.z x
lda #>PI_u4f28
sbc.z x+1
sta.z x+1
lda #<PI_u4f28>>$10
sbc.z x+2
sta.z x+2
lda #>PI_u4f28>>$10
sbc.z x+3
sta.z x+3
b2:
ldy #3
!:
asl.z _4
rol.z _4+1
rol.z _4+2
rol.z _4+3
dey
bne !-
lda.z _4+2
sta.z x1
lda.z _4+3
sta.z x1+1
lda.z x1
sta.z mulu16_sel.v1
lda.z x1+1
sta.z mulu16_sel.v1+1
lda.z x1
sta.z mulu16_sel.v2
lda.z x1+1
sta.z mulu16_sel.v2+1
ldx #0
jsr mulu16_sel
lda.z mulu16_sel.return_17
sta.z mulu16_sel.return
lda.z mulu16_sel.return_17+1
sta.z mulu16_sel.return+1
lda.z x1
sta.z mulu16_sel.v2
lda.z x1+1
sta.z mulu16_sel.v2+1
ldx #1
jsr mulu16_sel
lda.z mulu16_sel.return_17
sta.z mulu16_sel.return
lda.z mulu16_sel.return_17+1
sta.z mulu16_sel.return+1
ldx #1
lda #<$10000/6
sta.z mulu16_sel.v2
lda #>$10000/6
sta.z mulu16_sel.v2+1
jsr mulu16_sel
lda.z x1
sec
sbc.z x3_6
sta.z usinx
lda.z x1+1
sbc.z x3_6+1
sta.z usinx+1
lda.z x1
sta.z mulu16_sel.v2
lda.z x1+1
sta.z mulu16_sel.v2+1
ldx #0
jsr mulu16_sel
lda.z mulu16_sel.return_17
sta.z mulu16_sel.return
lda.z mulu16_sel.return_17+1
sta.z mulu16_sel.return+1
lda.z x1
sta.z mulu16_sel.v2
lda.z x1+1
sta.z mulu16_sel.v2+1
ldx #0
jsr mulu16_sel
lsr.z x5_128+1
ror.z x5_128
lsr.z x5_128+1
ror.z x5_128
lsr.z x5_128+1
ror.z x5_128
lsr.z x5_128+1
ror.z x5_128
lda.z usinx
clc
adc.z x5_128
sta.z usinx
lda.z usinx+1
adc.z x5_128+1
sta.z usinx+1
lda.z isUpper
cmp #0
beq b3
sec
lda #0
sbc.z sinx
sta.z sinx
lda #0
sbc.z sinx+1
sta.z sinx+1
b3:
rts
}
print_hextab: .text "0123456789abcdef"