Add clear_screen and set_screen_mode to gfx_lores. Fix boolean draw vs color param in some monogfx routines. Elaborate some docs.

This commit is contained in:
Irmen de Jong 2024-09-17 21:01:01 +02:00
parent cac4c1eb1e
commit 2954f5f04d
12 changed files with 92 additions and 56 deletions

View File

@ -82,7 +82,7 @@ internal class FunctionCallAsmGen(private val program: PtProgram, private val as
is PtAddressOf -> false
is PtIdentifier -> false
is PtIrRegister -> false
is PtMemoryByte -> true // TODO might not actually need extra registers if the value has to end up in A
is PtMemoryByte -> arg.address !is PtNumber && arg.address !is PtIdentifier
is PtNumber -> false
is PtBool -> false
else -> true

View File

@ -1,10 +1,31 @@
; optimized graphics routines for just a single screen mode: lores 320*240, 256c (8bpp)
; optimized graphics routines for just the single screen mode: lores 320*240, 256c (8bpp)
; bitmap image needs to start at VRAM addres $00000.
; This is compatible with the CX16's screen mode 128. (void cx16.set_screen_mode(128))
gfx_lores {
sub set_screen_mode() {
cx16.VERA_CTRL=0
cx16.VERA_DC_VIDEO = (cx16.VERA_DC_VIDEO & %11001111) | %00100000 ; enable only layer 1
cx16.VERA_DC_HSCALE = 64
cx16.VERA_DC_VSCALE = 64
cx16.VERA_L1_CONFIG = %00000111
cx16.VERA_L1_MAPBASE = 0
cx16.VERA_L1_TILEBASE = 0
clear_screen(0)
}
sub clear_screen(ubyte color) {
cx16.VERA_CTRL=0
cx16.VERA_ADDR=0
cx16.VERA_ADDR_H = 1<<4 ; 1 pixel auto increment
repeat 240
cs_innerloop320(color)
cx16.VERA_ADDR=0
cx16.VERA_ADDR_H = 0
}
sub line(uword x1, ubyte y1, uword x2, ubyte y2, ubyte color) {
; Bresenham algorithm.
; This code special-cases various quadrant loops to allow simple ++ and -- operations.
@ -141,7 +162,6 @@ times320_hi .byte `times320
}}
}
sub horizontal_line(uword xx, ubyte yy, uword length, ubyte color) {
if length==0
return
@ -183,6 +203,26 @@ times320_hi .byte `times320
}}
}
asmsub cs_innerloop320(ubyte color @A) clobbers(Y) {
; using verafx 32 bits writes here would make this faster but it's safer to
; use verafx only explicitly when you know what you're doing.
%asm {{
ldy #40
- sta cx16.VERA_DATA0
sta cx16.VERA_DATA0
sta cx16.VERA_DATA0
sta cx16.VERA_DATA0
sta cx16.VERA_DATA0
sta cx16.VERA_DATA0
sta cx16.VERA_DATA0
sta cx16.VERA_DATA0
dey
bne -
rts
}}
}
inline asmsub vera_setaddr(uword xx @R0, ubyte yy @R1) {
; set the correct vera start address (no auto increment yet!)
%asm {{

View File

@ -32,7 +32,7 @@ monogfx {
width = 320
height = 240
mode = MODE_NORMAL
clear_screen(0)
clear_screen(false)
}
sub hires() {
@ -47,7 +47,7 @@ monogfx {
width = 640
height = 480
mode = MODE_NORMAL
clear_screen(0)
clear_screen(false)
}
sub textmode() {
@ -61,16 +61,16 @@ monogfx {
mode = dm
}
sub clear_screen(ubyte color) {
sub clear_screen(bool draw) {
position(0, 0)
when width {
320 -> {
repeat 240/2/8
cs_innerloop640(color)
cs_innerloop640(draw)
}
640 -> {
repeat 480/8
cs_innerloop640(color)
cs_innerloop640(draw)
}
}
position(0, 0)
@ -90,8 +90,8 @@ monogfx {
}
sub fillrect(uword xx, uword yy, uword rwidth, uword rheight, bool draw) {
; Draw a filled rectangle of the given size and color.
; To fill the whole screen, use clear_screen(color) instead - it is much faster.
; Draw a filled rectangle of the given size.
; To fill the whole screen, use clear_screen(draw) instead - it is much faster.
if rwidth==0
return
repeat rheight {
@ -1000,7 +1000,7 @@ cdraw_mod2 ora cx16.VERA_DATA1
}
}
asmsub cs_innerloop640(ubyte color @A) clobbers(Y) {
asmsub cs_innerloop640(bool draw @A) clobbers(Y) {
; using verafx 32 bits writes here would make this faster but it's safer to
; use verafx only explicitly when you know what you're doing.
%asm {{

View File

@ -23,7 +23,7 @@ monogfx {
width = 320
height = 240
mode = MODE_NORMAL
clear_screen(0)
clear_screen(false)
}
sub hires() {
@ -32,7 +32,7 @@ monogfx {
width = 640
height = 480
mode = MODE_NORMAL
clear_screen(0)
clear_screen(false)
}
sub textmode() {
@ -43,9 +43,10 @@ monogfx {
mode = dm
}
sub clear_screen(ubyte color) {
if color!=0
color=255
sub clear_screen(bool draw) {
ubyte color = 0
if draw
color = 255
sys.gfx_clear(color)
}
@ -64,7 +65,7 @@ monogfx {
sub fillrect(uword xx, uword yy, uword rwidth, uword rheight, bool draw) {
; Draw a filled rectangle of the given size and color.
; To fill the whole screen, use clear_screen(color) instead - it is much faster.
; To fill the whole screen, use clear_screen(draw) instead - it is much faster.
if rwidth==0
return
repeat rheight {

View File

@ -903,7 +903,7 @@ to see what's in there.
gfx2 (cx16 only)
-----------------
Full-screen multicolor bitmap graphics routines, available on the Cx16 machine only.
Same interface as monogfx, but for color screens. For 1 bpp monochrome screens, use monogfx.
Same interface as monogfx, but for color screens. For 1 bpp monochrome screens: use monogfx.
- multiple full-screen bitmap color resolutions
- clearing screen, switching screen mode, also back to text mode
@ -916,6 +916,15 @@ Read the `gfx2 source code <https://github.com/irmen/prog8/tree/master/compiler/
to see what's in there.
gfx_lores (cx16 only)
----------------------
Heavily optimized graphics routines for just the single screen mode: lores 320*240, 256c (8bpp)
This is screen mode 1 from the gfx2 module (and also compatible with X16's basic screen mode 128).
Read the `gfx_lores source code <https://github.com/irmen/prog8/tree/master/compiler/res/prog8lib/cx16/gfx_lores.p8>`_
to see what's in there.
palette (cx16 only)
--------------------
Available for the Cx16 target. Various routines to set the display color palette.

View File

@ -1,25 +1,27 @@
TODO
====
Regenerate skeletons in doc.
Improve register load order in subroutine call args assignments:
in certain situations, the "wrong" order of evaluation of function call arguments is done which results
in overwriting registers that already got their value, which requires a lot of stack juggling (especially on plain 6502 cpu!)
Maybe this routine can be made more intelligent. See usesOtherRegistersWhileEvaluating() and argumentsViaRegisters().
Regenerate skeletons in doc.
Future Things and Ideas
^^^^^^^^^^^^^^^^^^^^^^^
Compiler:
- Some facility to use add-with-carry and sub-with-carry (so we can chain additions/subtractions without clc/sec inserted every time)
Note: +/- 0 can't be optimized away anymore in this case!
Note2: may need to preserve carry flag during evaluation of the operands!
Note3: only available for bytes? (or does it work on words automatically?), and perhaps restrict operand to a simple expression?
- Can we support signed % (remainder) somehow?
- Don't add "random" rts to %asm blocks but instead give a warning about it? (but this breaks existing behavior that others already depend on... command line switch?)
- IR: implement missing operators in AssignmentGen (array shifts etc)
- IR: CMPI+BSTEQ --> new BEQ reg,value,label instruction (like BGT etc)
- expand the kata encoding to somehow translate normal katana to half-widths? (see comment in KatakanaEncoding)
- instead of copy-pasting inline asmsubs, make them into a 64tass macro and use that instead.
that will allow them to be reused from custom user written assembly code as well.
- Multidimensional arrays and chained indexing, purely as syntactic sugar over regular arrays.

View File

@ -65,10 +65,10 @@ adpcm {
}
sub decode_nibble(ubyte @zp nibble) {
; Decoder for nibbles for the first channel.
; this is the hotspot of the decoder algorithm!
; Decoder for a single nibble for the first channel. (value of 'nibble' needs to be strictly 0-15 !)
; This is the hotspot of the decoder algorithm!
; Note that the generated assembly from this is pretty efficient,
; rewriting it by hand in asm seems to improve it only 5-10%
; rewriting it by hand in asm seems to improve it only ~10%.
cx16.r0s = 0 ; difference
if nibble & %0100 !=0
cx16.r0s += pstep

View File

@ -153,10 +153,10 @@ mono {
repeat 252/2 {
unroll 2 {
nibble = @(main.nibblesptr)
adpcm.decode_nibble(nibble & 15) ; first word
adpcm.decode_nibble(nibble & 15) ; first word (note: upper nibble needs to be zero!)
cx16.VERA_AUDIO_DATA = lsb(adpcm.predict)
cx16.VERA_AUDIO_DATA = msb(adpcm.predict)
adpcm.decode_nibble(nibble>>4) ; second word
adpcm.decode_nibble(nibble>>4) ; second word (note: upper nibble is zero, after the shifts.)
cx16.VERA_AUDIO_DATA = lsb(adpcm.predict)
cx16.VERA_AUDIO_DATA = msb(adpcm.predict)
main.nibblesptr++
@ -219,6 +219,7 @@ stereo {
sub decode_nibbles_unrolled() {
; decode 4 left channel nibbles
; note: when calling decode_nibble(), the upper nibble in the argument needs to be zero
uword[8] left
uword[8] right
ubyte @requirezp nibble = @(main.nibblesptr)

View File

@ -304,6 +304,7 @@ _lp2 lda $ffff,y
repeat 252/2 {
unroll 2 {
nibble = @(nibblesptr)
; note: when calling decode_nibble(), the upper nibble in the argument needs to be zero
adpcm.decode_nibble(nibble & 15) ; first word
cx16.VERA_AUDIO_DATA = lsb(adpcm.predict)
cx16.VERA_AUDIO_DATA = msb(adpcm.predict)
@ -330,6 +331,7 @@ _lp2 lda $ffff,y
sub decode_nibbles_unrolled() {
; decode 4 left channel nibbles
; note: when calling decode_nibble(), the upper nibble in the argument needs to be zero
uword[8] left
uword[8] right
ubyte @requirezp nibble = @(nibblesptr)

View File

@ -195,8 +195,8 @@ main {
}
sys.wait(60)
monogfx.clear_screen(1)
monogfx.clear_screen(0)
monogfx.clear_screen(true)
monogfx.clear_screen(false)
ubyte radius

View File

@ -1,27 +1,11 @@
%option no_sysinit
%import gfx_lores
%zeropage basicsafe
main {
sub start() {
; nothing!
}
}
derp {
asmsub f_tell() -> uword @R0, uword @R1, uword @R2, uword @R3 {
%asm {{
jmp p8s_internal_f_tell
}}
}
sub internal_f_tell() {
cx16.r1 = read4hex()
sub read4hex() -> uword {
str @shared hex = "0000000000000000000000000000000000000000000"
cx16.r0++
return cx16.r0
}
gfx_lores.set_screen_mode()
gfx_lores.clear_screen(0)
gfx_lores.line(0,0,319,239,5)
}
}

View File

@ -1,25 +1,22 @@
#!/usr/bin/env python
"""
program_description = """
This is a simple run-time profiler tool for X16 assembly programs.
It takes an assembly list file (as produced by 64tass/turbo assembler) and
a memory access statistics dump file (produced by the emulator's -memorystats option)
a memory access statistics dump file (produced by the X16 emulator's -memorystats option)
and prints out what assembly lines and variables were read from and written to the most.
These may indicate hot paths or even bottlenecks in your program,
and what variables in system ram might be better placed in Zeropage.
The -memorystats option in the emulator is work in progress at the time of writing.
"""
import sys
import argparse
import operator
from typing import Tuple
class AsmList:
"""parses a l64tass Turbo Assembler Macro listing file"""
"""parses a 64tass Turbo Assembler Macro listing file"""
def __init__(self, filename: str) -> None:
self.lines = []
@ -170,7 +167,7 @@ def profile(number_of_lines: int, asmlist: str, memstats: str) -> None:
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="simple X16 assembly run time profiler")
parser = argparse.ArgumentParser(description=program_description)
parser.add_argument("-n", dest="number", type=int, default=20, help="amount of reads and writes to print (default 20)")
parser.add_argument("asmlistfile", type=str, help="the 64tass/turbo assembler listing file to read")
parser.add_argument("memorystatsfile", type=str, help="the X16 emulator memstats dump file to read")