mirror of
https://github.com/irmen/prog8.git
synced 2024-11-26 11:49:22 +00:00
added verafx.clear()
This commit is contained in:
parent
70ee2026ff
commit
c3f1f09ad1
@ -6,15 +6,45 @@
|
||||
verafx {
|
||||
%option no_symbol_prefixing
|
||||
|
||||
sub fill(ubyte vbank, uword vaddr, ubyte data, uword numlongs) {
|
||||
; TODO use vera fx cache write
|
||||
cx16.vaddr(vbank, vaddr, 0, true)
|
||||
repeat numlongs {
|
||||
cx16.VERA_DATA0 = data
|
||||
cx16.VERA_DATA0 = data
|
||||
cx16.VERA_DATA0 = data
|
||||
cx16.VERA_DATA0 = data
|
||||
sub clear(ubyte vbank, uword vaddr, ubyte data, uword amountof32bits) {
|
||||
; use cached 4-byte write to quickly clear a portion of the video memory to a given byte value
|
||||
; this routine is around 3 times faster as gfx2.clear_screen()
|
||||
cx16.VERA_CTRL = 0
|
||||
cx16.VERA_ADDR_H = vbank | %00110000 ; 4-byte increment
|
||||
cx16.VERA_ADDR_M = msb(vaddr)
|
||||
cx16.VERA_ADDR_L = lsb(vaddr)
|
||||
cx16.VERA_CTRL = 6<<1 ; dcsel = 6, fill the 32 bits cache
|
||||
cx16.VERA_FX_CACHE_L = data
|
||||
cx16.VERA_FX_CACHE_M = data
|
||||
cx16.VERA_FX_CACHE_H = data
|
||||
cx16.VERA_FX_CACHE_U = data
|
||||
cx16.VERA_CTRL = 2<<1 ; dcsel = 2
|
||||
cx16.VERA_FX_MULT = 0
|
||||
cx16.VERA_FX_CTRL = %01000000 ; cache write enable
|
||||
|
||||
if (amountof32bits & %1111110000000011) == 0 {
|
||||
repeat lsb(amountof32bits >> 2)
|
||||
unroll 4 cx16.VERA_DATA0=0 ; write 4 bytes at a time, unrolled
|
||||
}
|
||||
else if (amountof32bits & %1111111000000001) == 0 {
|
||||
repeat lsb(amountof32bits >> 1)
|
||||
unroll 2 cx16.VERA_DATA0=0 ; write 4 bytes at a time, unrolled
|
||||
}
|
||||
else if (lsb(amountof32bits) & 3) == 0 {
|
||||
repeat amountof32bits >> 2
|
||||
unroll 4 cx16.VERA_DATA0=0 ; write 4 bytes at a time, unrolled
|
||||
}
|
||||
else if (lsb(amountof32bits) & 1) == 0 {
|
||||
repeat amountof32bits >> 1
|
||||
unroll 2 cx16.VERA_DATA0=0 ; write 4 bytes at a time, unrolled
|
||||
}
|
||||
else {
|
||||
repeat amountof32bits
|
||||
cx16.VERA_DATA0=0 ; write 4 bytes at a time
|
||||
}
|
||||
|
||||
cx16.VERA_FX_CTRL = 0 ; cache write disable
|
||||
cx16.VERA_CTRL = 0
|
||||
}
|
||||
|
||||
; unsigned multiplication just passes the values as signed to muls
|
||||
|
@ -493,9 +493,14 @@ Available for the Cx16 target.
|
||||
Experimental routines that use the new Vera FX logic (hopefully coming in the Vera in new X16 boards,
|
||||
the emulators already support it).
|
||||
|
||||
For now, the hardware 16*16 multiplier is exposed via ``mult`` and ``muls`` routines.
|
||||
They are about 4 to 5 times faster as the default 6502 cpu routine for word multiplication.
|
||||
But they depend on
|
||||
``mult`` , ``muls``
|
||||
For now, the hardware 16*16 multiplier is exposed via ``mult`` and ``muls`` routines (unsigned and signed respectively).
|
||||
They are about 4 to 5 times faster as the default 6502 cpu routine for word multiplication.
|
||||
But they depend on some Vera manipulation and 4 bytes in vram just below the PSG registers for storage.
|
||||
|
||||
``clear``
|
||||
There's also a ``clear`` routine here to very quickly clear a piece of vram to a given byte value (it writes 4 bytes at a time).
|
||||
The routine is around 3 times faster as a regular unrolled loop to clear vram.
|
||||
|
||||
Read the `source code <https://github.com/irmen/prog8/tree/master/compiler/res/prog8lib/cx16/verafx.p8>`_
|
||||
to see what's in there.
|
||||
|
@ -1,6 +1,8 @@
|
||||
TODO
|
||||
====
|
||||
|
||||
- '>>=' can be used as an operator in an expression?? should only be augmented assignment!
|
||||
|
||||
- [on branch: shortcircuit] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
|
||||
- [on branch: ir-less-branch-opcodes] IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction
|
||||
- IR: reduce amount of CMP/CMPI after instructions that set the status bits correctly (LOADs? INC? etc), but only after setting the status bits is verified!
|
||||
|
@ -37,8 +37,24 @@ main {
|
||||
txt.nl()
|
||||
|
||||
gfx2.screen_mode(1)
|
||||
verafx.fill(0, 0, %10101010, 1200) ; should fill top half of the screen
|
||||
verafx.fill(0, 4800, %11111111, 1200) ; should fill bottom half of the screen
|
||||
|
||||
cbm.SETTIM(0,0,0)
|
||||
repeat 255 {
|
||||
gfx2.clear_screen()
|
||||
}
|
||||
uword time1 = cbm.RDTIM16()
|
||||
|
||||
cbm.SETTIM(0,0,0)
|
||||
repeat 255 {
|
||||
verafx.clear(0, 0, %10101010, 2400)
|
||||
}
|
||||
uword time2 = cbm.RDTIM16()
|
||||
|
||||
gfx2.screen_mode(0)
|
||||
txt.print_uw(time1)
|
||||
txt.spc()
|
||||
txt.print_uw(time2)
|
||||
txt.nl()
|
||||
|
||||
|
||||
; txt.print_uw(math.mul16_last_upper())
|
||||
|
Loading…
Reference in New Issue
Block a user