mirror of
https://github.com/irmen/prog8.git
synced 2024-11-26 11:49:22 +00:00
added cx16 verafx library module
This commit is contained in:
parent
55646edc3e
commit
390263a34e
@ -267,6 +267,17 @@ cx16 {
|
||||
&ubyte VERA_AUDIO_DATA = VERA_BASE + $001D
|
||||
&ubyte VERA_SPI_DATA = VERA_BASE + $001E
|
||||
&ubyte VERA_SPI_CTRL = VERA_BASE + $001F
|
||||
|
||||
; experimental Vera FX registers: (depends on particular value set in VERA_CTRL!!!)
|
||||
&ubyte VERA_FX_CTRL = VERA_BASE + $0009
|
||||
&ubyte VERA_FX_MULT = VERA_BASE + $000C
|
||||
&ubyte VERA_FX_CACHE_L = VERA_BASE + $0009
|
||||
&ubyte VERA_FX_CACHE_M = VERA_BASE + $000A
|
||||
&ubyte VERA_FX_CACHE_H = VERA_BASE + $000B
|
||||
&ubyte VERA_FX_CACHE_U = VERA_BASE + $000C
|
||||
&ubyte VERA_FX_ACCUM_RESET = VERA_BASE + $0009 ; (DCSEL=6)
|
||||
|
||||
|
||||
; VERA_PSG_BASE = $1F9C0
|
||||
; VERA_PALETTE_BASE = $1FA00
|
||||
; VERA_SPRITES_BASE = $1FC00
|
||||
|
59
compiler/res/prog8lib/cx16/verafx.p8
Normal file
59
compiler/res/prog8lib/cx16/verafx.p8
Normal file
@ -0,0 +1,59 @@
|
||||
; Experimental Vera FX support.
|
||||
; Docs:
|
||||
; https://github.com/X16Community/x16-docs/blob/master/VERA%20FX%20Reference.md
|
||||
; https://docs.google.com/document/d/1q34uWOiM3Be2pnaHRVgSdHySI-qsiQWPTo_gfE54PTg/edit
|
||||
|
||||
verafx {
|
||||
|
||||
; unsigned multiplication just passes the values as signed to muls
|
||||
; if you do this yourself in your call to muls, it will save a few instructions.
|
||||
sub mult(uword value1, uword value2) -> uword {
|
||||
return muls(value1 as word, value2 as word) as uword
|
||||
}
|
||||
|
||||
asmsub muls(word value1 @R0, word value2 @R1) -> word @AY {
|
||||
%asm {{
|
||||
lda #(2 << 1)
|
||||
sta cx16.VERA_CTRL ; $9F25
|
||||
stz cx16.VERA_FX_CTRL ; $9F29 (mainly to reset Addr1 Mode to 0)
|
||||
lda #%00010000
|
||||
sta cx16.VERA_FX_MULT ; $9F2C
|
||||
lda #(6 << 1)
|
||||
sta cx16.VERA_CTRL ; $9F25
|
||||
lda cx16.r0
|
||||
ldy cx16.r0+1
|
||||
sta cx16.VERA_FX_CACHE_L ; $9F29
|
||||
sty cx16.VERA_FX_CACHE_M ; $9F2A
|
||||
lda cx16.r1
|
||||
ldy cx16.r1+1
|
||||
sta cx16.VERA_FX_CACHE_H ; $9F2B
|
||||
sty cx16.VERA_FX_CACHE_U ; $9F2C
|
||||
lda cx16.VERA_FX_ACCUM_RESET ; $9F29 (DCSEL=6)
|
||||
|
||||
; Set the ADDR0 pointer to $1f9bc and write our multiplication result there
|
||||
; (these are the 4 bytes just before the PSG registers start)
|
||||
lda #(2 << 1)
|
||||
sta cx16.VERA_CTRL
|
||||
lda #%01000000 ; Cache Write Enable
|
||||
sta cx16.VERA_FX_CTRL
|
||||
lda #$bc
|
||||
sta cx16.VERA_ADDR_L
|
||||
lda #$f9
|
||||
sta cx16.VERA_ADDR_M
|
||||
lda #$01
|
||||
sta cx16.VERA_ADDR_H ; no increment
|
||||
stz cx16.VERA_DATA0 ; multiply and write out result
|
||||
lda #%00010001 ; $01 with Increment 1
|
||||
sta cx16.VERA_ADDR_H ; so we can read out the result
|
||||
stz cx16.VERA_FX_CTRL ; Cache write disable
|
||||
lda cx16.VERA_DATA0
|
||||
ldy cx16.VERA_DATA0
|
||||
rts
|
||||
; we skip the upper 16 bits of the result:
|
||||
; lda cx16.VERA_DATA0
|
||||
; sta $0402
|
||||
; lda cx16.VERA_DATA0
|
||||
; sta $0403
|
||||
}}
|
||||
}
|
||||
}
|
@ -64,6 +64,7 @@ multiply_words .proc
|
||||
; but there currently is no way to use 4 consecutive bytes in ZP (without disabling irq and saving/restoring them)...
|
||||
|
||||
; mult62.a
|
||||
; from: https://github.com/TobyLobster/multiply_test/blob/main/tests/mult62.a
|
||||
; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958
|
||||
; - adjusted to use fixed zero page addresses
|
||||
; - removed 'decrement to avoid clc' as this is slower on average
|
||||
|
@ -485,3 +485,17 @@ See the examples/cx16/sprites/dragon.p8 and dragons.p8 programs for ideas how to
|
||||
|
||||
Read the `source code <https://github.com/irmen/prog8/tree/master/compiler/res/prog8lib/cx16/sprites.p8>`_
|
||||
to see what's in there.
|
||||
|
||||
|
||||
verafx (cx16 only)
|
||||
-------------------
|
||||
Available for the Cx16 target.
|
||||
Experimental routines that use the new Vera FX logic (hopefully coming in the Vera in new X16 boards,
|
||||
the emulators already support it).
|
||||
|
||||
For now, the hardware 16*16 multiplier is exposed via ``mult`` and ``muls`` routines.
|
||||
They are about 4 to 5 times faster as the default 6502 cpu routine for word multiplication.
|
||||
But they depend on
|
||||
|
||||
Read the `source code <https://github.com/irmen/prog8/tree/master/compiler/res/prog8lib/cx16/verafx.p8>`_
|
||||
to see what's in there.
|
||||
|
@ -1,26 +1,39 @@
|
||||
%import textio
|
||||
%import floats
|
||||
%import verafx
|
||||
%zeropage basicsafe
|
||||
%option no_sysinit
|
||||
|
||||
main {
|
||||
sub start() {
|
||||
ubyte from = 10
|
||||
ubyte compare=9
|
||||
if from==compare
|
||||
goto equal
|
||||
|
||||
txt.print("from is not compare\n")
|
||||
equal:
|
||||
const word MULTIPLIER = 431
|
||||
|
||||
ubyte end = 15
|
||||
ubyte xx
|
||||
for xx in from to end {
|
||||
txt.print_ub(xx)
|
||||
txt.spc()
|
||||
; verify results:
|
||||
for value in -50 to 50 {
|
||||
if value*MULTIPLIER != verafx.muls(value, MULTIPLIER) {
|
||||
txt.print("verafx muls error\n")
|
||||
sys.exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
word value
|
||||
txt.print("verafx muls...")
|
||||
cbm.SETTIM(0,0,0)
|
||||
for value in -50 to 50 {
|
||||
repeat 250 void verafx.muls(value, MULTIPLIER)
|
||||
}
|
||||
txt.print_uw(cbm.RDTIM16())
|
||||
txt.nl()
|
||||
|
||||
txt.print("6502 muls...")
|
||||
cbm.SETTIM(0,0,0)
|
||||
for value in -50 to 50 {
|
||||
repeat 250 cx16.r0s = value*MULTIPLIER
|
||||
}
|
||||
txt.print_uw(cbm.RDTIM16())
|
||||
txt.nl()
|
||||
|
||||
ubyte ten=9
|
||||
if from!=ten
|
||||
txt.print("from is not 10\n")
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user