mirror of
https://github.com/irmen/prog8.git
synced 2024-11-23 22:33:12 +00:00
111 lines
4.2 KiB
Lua
111 lines
4.2 KiB
Lua
; Experimental Vera FX support.
|
|
; Docs:
|
|
; https://github.com/X16Community/x16-docs/blob/master/VERA%20FX%20Reference.md
|
|
; https://docs.google.com/document/d/1q34uWOiM3Be2pnaHRVgSdHySI-qsiQWPTo_gfE54PTg/edit
|
|
|
|
verafx {
|
|
|
|
sub clear(ubyte vbank, uword vaddr, ubyte data, uword amountof32bits) {
|
|
; use cached 4-byte write to quickly clear a portion of the video memory to a given byte value
|
|
; this routine is around 3 times faster as gfx2.clear_screen()
|
|
cx16.VERA_CTRL = 0
|
|
cx16.VERA_ADDR_H = vbank | %00110000 ; 4-byte increment
|
|
cx16.VERA_ADDR_M = msb(vaddr)
|
|
cx16.VERA_ADDR_L = lsb(vaddr)
|
|
cx16.VERA_CTRL = 6<<1 ; dcsel = 6, fill the 32 bits cache
|
|
cx16.VERA_FX_CACHE_L = data
|
|
cx16.VERA_FX_CACHE_M = data
|
|
cx16.VERA_FX_CACHE_H = data
|
|
cx16.VERA_FX_CACHE_U = data
|
|
cx16.VERA_CTRL = 2<<1 ; dcsel = 2
|
|
cx16.VERA_FX_MULT = 0
|
|
cx16.VERA_FX_CTRL = %01000000 ; cache write enable
|
|
|
|
if (amountof32bits & %1111110000000011) == 0 {
|
|
repeat lsb(amountof32bits >> 2)
|
|
unroll 4 cx16.VERA_DATA0=0 ; write 4 bytes at a time, unrolled
|
|
}
|
|
else if (amountof32bits & %1111111000000001) == 0 {
|
|
repeat lsb(amountof32bits >> 1)
|
|
unroll 2 cx16.VERA_DATA0=0 ; write 4 bytes at a time, unrolled
|
|
}
|
|
else if (lsb(amountof32bits) & 3) == 0 {
|
|
repeat amountof32bits >> 2
|
|
unroll 4 cx16.VERA_DATA0=0 ; write 4 bytes at a time, unrolled
|
|
}
|
|
else if (lsb(amountof32bits) & 1) == 0 {
|
|
repeat amountof32bits >> 1
|
|
unroll 2 cx16.VERA_DATA0=0 ; write 4 bytes at a time, unrolled
|
|
}
|
|
else {
|
|
repeat amountof32bits
|
|
cx16.VERA_DATA0=0 ; write 4 bytes at a time
|
|
}
|
|
|
|
cx16.VERA_FX_CTRL = 0 ; cache write disable
|
|
cx16.VERA_CTRL = 0
|
|
}
|
|
|
|
; unsigned multiplication just passes the values as signed to muls
|
|
; if you do this yourself in your call to muls, it will save a few instructions.
|
|
sub mult(uword value1, uword value2) -> uword {
|
|
return muls(value1 as word, value2 as word) as uword
|
|
}
|
|
|
|
asmsub muls(word value1 @R0, word value2 @R1) -> word @AY {
|
|
%asm {{
|
|
lda #(2 << 1)
|
|
sta cx16.VERA_CTRL ; $9F25
|
|
stz cx16.VERA_FX_CTRL ; $9F29 (mainly to reset Addr1 Mode to 0)
|
|
lda #%00010000
|
|
sta cx16.VERA_FX_MULT ; $9F2C
|
|
lda #(6 << 1)
|
|
sta cx16.VERA_CTRL ; $9F25
|
|
lda cx16.r0
|
|
ldy cx16.r0+1
|
|
sta cx16.VERA_FX_CACHE_L ; $9F29
|
|
sty cx16.VERA_FX_CACHE_M ; $9F2A
|
|
lda cx16.r1
|
|
ldy cx16.r1+1
|
|
sta cx16.VERA_FX_CACHE_H ; $9F2B
|
|
sty cx16.VERA_FX_CACHE_U ; $9F2C
|
|
lda cx16.VERA_FX_ACCUM_RESET ; $9F29 (DCSEL=6)
|
|
|
|
; Set the ADDR0 pointer to $1f9bc and write our multiplication result there
|
|
; (these are the 4 bytes just before the PSG registers start)
|
|
lda #(2 << 1)
|
|
sta cx16.VERA_CTRL
|
|
lda #%01000000 ; Cache Write Enable
|
|
sta cx16.VERA_FX_CTRL
|
|
lda #$bc
|
|
sta cx16.VERA_ADDR_L
|
|
lda #$f9
|
|
sta cx16.VERA_ADDR_M
|
|
lda #$01
|
|
sta cx16.VERA_ADDR_H ; no increment
|
|
stz cx16.VERA_DATA0 ; multiply and write out result
|
|
lda #%00010001 ; $01 with Increment 1
|
|
sta cx16.VERA_ADDR_H ; so we can read out the result
|
|
lda cx16.VERA_DATA0
|
|
ldy cx16.VERA_DATA0
|
|
stz cx16.VERA_FX_CTRL ; Cache write disable
|
|
stz cx16.VERA_CTRL ; reset DCSEL
|
|
rts
|
|
; we skip the upper 16 bits of the result:
|
|
; lda cx16.VERA_DATA0
|
|
; sta $0402
|
|
; lda cx16.VERA_DATA0
|
|
; sta $0403
|
|
}}
|
|
}
|
|
|
|
sub transparency(bool enable) {
|
|
cx16.VERA_CTRL = 2<<1 ; dcsel = 2
|
|
if enable
|
|
cx16.VERA_FX_CTRL |= %10000000
|
|
else
|
|
cx16.VERA_FX_CTRL &= %01111111
|
|
cx16.VERA_CTRL = 0
|
|
}
|
|
}
|