added cx16 verafx library module

This commit is contained in:
Irmen de Jong 2023-09-24 23:00:40 +02:00
parent 55646edc3e
commit 390263a34e
5 changed files with 113 additions and 15 deletions

View File

@ -267,6 +267,17 @@ cx16 {
&ubyte VERA_AUDIO_DATA = VERA_BASE + $001D
&ubyte VERA_SPI_DATA = VERA_BASE + $001E
&ubyte VERA_SPI_CTRL = VERA_BASE + $001F
; experimental Vera FX registers: (depends on particular value set in VERA_CTRL!!!)
&ubyte VERA_FX_CTRL = VERA_BASE + $0009
&ubyte VERA_FX_MULT = VERA_BASE + $000C
&ubyte VERA_FX_CACHE_L = VERA_BASE + $0009
&ubyte VERA_FX_CACHE_M = VERA_BASE + $000A
&ubyte VERA_FX_CACHE_H = VERA_BASE + $000B
&ubyte VERA_FX_CACHE_U = VERA_BASE + $000C
&ubyte VERA_FX_ACCUM_RESET = VERA_BASE + $0009 ; (DCSEL=6)
; VERA_PSG_BASE = $1F9C0
; VERA_PALETTE_BASE = $1FA00
; VERA_SPRITES_BASE = $1FC00

View File

@ -0,0 +1,59 @@
; Experimental Vera FX support.
; Docs:
; https://github.com/X16Community/x16-docs/blob/master/VERA%20FX%20Reference.md
; https://docs.google.com/document/d/1q34uWOiM3Be2pnaHRVgSdHySI-qsiQWPTo_gfE54PTg/edit
verafx {
; unsigned multiplication just passes the values as signed to muls
; if you do this yourself in your call to muls, it will save a few instructions.
sub mult(uword value1, uword value2) -> uword {
return muls(value1 as word, value2 as word) as uword
}
asmsub muls(word value1 @R0, word value2 @R1) -> word @AY {
%asm {{
lda #(2 << 1)
sta cx16.VERA_CTRL ; $9F25
stz cx16.VERA_FX_CTRL ; $9F29 (mainly to reset Addr1 Mode to 0)
lda #%00010000
sta cx16.VERA_FX_MULT ; $9F2C
lda #(6 << 1)
sta cx16.VERA_CTRL ; $9F25
lda cx16.r0
ldy cx16.r0+1
sta cx16.VERA_FX_CACHE_L ; $9F29
sty cx16.VERA_FX_CACHE_M ; $9F2A
lda cx16.r1
ldy cx16.r1+1
sta cx16.VERA_FX_CACHE_H ; $9F2B
sty cx16.VERA_FX_CACHE_U ; $9F2C
lda cx16.VERA_FX_ACCUM_RESET ; $9F29 (DCSEL=6)
; Set the ADDR0 pointer to $1f9bc and write our multiplication result there
; (these are the 4 bytes just before the PSG registers start)
lda #(2 << 1)
sta cx16.VERA_CTRL
lda #%01000000 ; Cache Write Enable
sta cx16.VERA_FX_CTRL
lda #$bc
sta cx16.VERA_ADDR_L
lda #$f9
sta cx16.VERA_ADDR_M
lda #$01
sta cx16.VERA_ADDR_H ; no increment
stz cx16.VERA_DATA0 ; multiply and write out result
lda #%00010001 ; $01 with Increment 1
sta cx16.VERA_ADDR_H ; so we can read out the result
stz cx16.VERA_FX_CTRL ; Cache write disable
lda cx16.VERA_DATA0
ldy cx16.VERA_DATA0
rts
; we skip the upper 16 bits of the result:
; lda cx16.VERA_DATA0
; sta $0402
; lda cx16.VERA_DATA0
; sta $0403
}}
}
}

View File

@ -64,6 +64,7 @@ multiply_words .proc
; but there currently is no way to use 4 consecutive bytes in ZP (without disabling irq and saving/restoring them)...
; mult62.a
; from: https://github.com/TobyLobster/multiply_test/blob/main/tests/mult62.a
; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958
; - adjusted to use fixed zero page addresses
; - removed 'decrement to avoid clc' as this is slower on average

View File

@ -485,3 +485,17 @@ See the examples/cx16/sprites/dragon.p8 and dragons.p8 programs for ideas how to
Read the `source code <https://github.com/irmen/prog8/tree/master/compiler/res/prog8lib/cx16/sprites.p8>`_
to see what's in there.
verafx (cx16 only)
-------------------
Available for the Cx16 target.
Experimental routines that use the new Vera FX logic (hopefully coming in the Vera in new X16 boards,
the emulators already support it).
For now, the hardware 16*16 multiplier is exposed via ``mult`` and ``muls`` routines.
They are about 4 to 5 times faster as the default 6502 cpu routine for word multiplication.
But they depend on
Read the `source code <https://github.com/irmen/prog8/tree/master/compiler/res/prog8lib/cx16/verafx.p8>`_
to see what's in there.

View File

@ -1,26 +1,39 @@
%import textio
%import floats
%import verafx
%zeropage basicsafe
%option no_sysinit
main {
sub start() {
ubyte from = 10
ubyte compare=9
if from==compare
goto equal
txt.print("from is not compare\n")
equal:
const word MULTIPLIER = 431
ubyte end = 15
ubyte xx
for xx in from to end {
txt.print_ub(xx)
txt.spc()
; verify results:
for value in -50 to 50 {
if value*MULTIPLIER != verafx.muls(value, MULTIPLIER) {
txt.print("verafx muls error\n")
sys.exit(1)
}
}
word value
txt.print("verafx muls...")
cbm.SETTIM(0,0,0)
for value in -50 to 50 {
repeat 250 void verafx.muls(value, MULTIPLIER)
}
txt.print_uw(cbm.RDTIM16())
txt.nl()
txt.print("6502 muls...")
cbm.SETTIM(0,0,0)
for value in -50 to 50 {
repeat 250 cx16.r0s = value*MULTIPLIER
}
txt.print_uw(cbm.RDTIM16())
txt.nl()
ubyte ten=9
if from!=ten
txt.print("from is not 10\n")
}
}