Add clear_screen and set_screen_mode to gfx_lores. Fix boolean draw vs color param in some monogfx routines. Elaborate some docs.

2025-08-10 10:25:31 +00:00 · 2024-09-17 21:01:01 +02:00
parent cac4c1eb1e
commit 2954f5f04d
12 changed files with 92 additions and 56 deletions
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/FunctionCallAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/FunctionCallAsmGen.kt
@@ -82,7 +82,7 @@ internal class FunctionCallAsmGen(private val program: PtProgram, private val as
            is PtAddressOf -> false
            is PtIdentifier -> false
            is PtIrRegister -> false
-            is PtMemoryByte -> true     // TODO might not actually need extra registers if the value has to end up in A
+            is PtMemoryByte -> arg.address !is PtNumber && arg.address !is PtIdentifier
            is PtNumber -> false
            is PtBool -> false
            else -> true
--- a/compiler/res/prog8lib/cx16/gfx_lores.p8
+++ b/compiler/res/prog8lib/cx16/gfx_lores.p8
@@ -1,10 +1,31 @@
-; optimized graphics routines for just a single screen mode:  lores 320*240, 256c  (8bpp)
+; optimized graphics routines for just the single screen mode: lores 320*240, 256c  (8bpp)
 ; bitmap image needs to start at VRAM addres $00000.
 ; This is compatible with the CX16's screen mode 128.  (void cx16.set_screen_mode(128))


 gfx_lores {

+    sub set_screen_mode() {
+        cx16.VERA_CTRL=0
+        cx16.VERA_DC_VIDEO = (cx16.VERA_DC_VIDEO & %11001111) | %00100000      ; enable only layer 1
+        cx16.VERA_DC_HSCALE = 64
+        cx16.VERA_DC_VSCALE = 64
+        cx16.VERA_L1_CONFIG = %00000111
+        cx16.VERA_L1_MAPBASE = 0
+        cx16.VERA_L1_TILEBASE = 0
+        clear_screen(0)
+    }
+
+    sub clear_screen(ubyte color) {
+        cx16.VERA_CTRL=0
+        cx16.VERA_ADDR=0
+        cx16.VERA_ADDR_H = 1<<4    ; 1 pixel auto increment
+        repeat 240
+            cs_innerloop320(color)
+        cx16.VERA_ADDR=0
+        cx16.VERA_ADDR_H = 0
+    }
+
    sub line(uword x1, ubyte y1, uword x2, ubyte y2, ubyte color) {
        ; Bresenham algorithm.
        ; This code special-cases various quadrant loops to allow simple ++ and -- operations.
@@ -141,7 +162,6 @@ times320_hi     .byte `times320
            }}
    }

-
    sub horizontal_line(uword xx, ubyte yy, uword length, ubyte color) {
        if length==0
            return
@@ -183,6 +203,26 @@ times320_hi     .byte `times320
        }}
    }

+
+    asmsub cs_innerloop320(ubyte color @A) clobbers(Y) {
+        ; using verafx 32 bits writes here would make this faster but it's safer to
+        ; use verafx only explicitly when you know what you're doing.
+        %asm {{
+            ldy  #40
+-           sta  cx16.VERA_DATA0
+            sta  cx16.VERA_DATA0
+            sta  cx16.VERA_DATA0
+            sta  cx16.VERA_DATA0
+            sta  cx16.VERA_DATA0
+            sta  cx16.VERA_DATA0
+            sta  cx16.VERA_DATA0
+            sta  cx16.VERA_DATA0
+            dey
+            bne  -
+            rts
+        }}
+    }
+
    inline asmsub vera_setaddr(uword xx @R0, ubyte yy @R1) {
        ; set the correct vera start address (no auto increment yet!)
        %asm {{
--- a/compiler/res/prog8lib/cx16/monogfx.p8
+++ b/compiler/res/prog8lib/cx16/monogfx.p8
@@ -32,7 +32,7 @@ monogfx {
        width = 320
        height = 240
        mode = MODE_NORMAL
-        clear_screen(0)
+        clear_screen(false)
    }

    sub hires() {
@@ -47,7 +47,7 @@ monogfx {
        width = 640
        height = 480
        mode = MODE_NORMAL
-        clear_screen(0)
+        clear_screen(false)
    }

    sub textmode() {
@@ -61,16 +61,16 @@ monogfx {
        mode = dm
    }

-    sub clear_screen(ubyte color) {
+    sub clear_screen(bool draw) {
        position(0, 0)
        when width {
            320 -> {
                repeat 240/2/8
-                    cs_innerloop640(color)
+                    cs_innerloop640(draw)
            }
            640 -> {
                repeat 480/8
-                    cs_innerloop640(color)
+                    cs_innerloop640(draw)
            }
        }
        position(0, 0)
@@ -90,8 +90,8 @@ monogfx {
    }

    sub fillrect(uword xx, uword yy, uword rwidth, uword rheight, bool draw) {
-        ; Draw a filled rectangle of the given size and color.
-        ; To fill the whole screen, use clear_screen(color) instead - it is much faster.
+        ; Draw a filled rectangle of the given size.
+        ; To fill the whole screen, use clear_screen(draw) instead - it is much faster.
        if rwidth==0
            return
        repeat rheight {
@@ -1000,7 +1000,7 @@ cdraw_mod2              ora  cx16.VERA_DATA1
        }
    }

-    asmsub cs_innerloop640(ubyte color @A) clobbers(Y) {
+    asmsub cs_innerloop640(bool draw @A) clobbers(Y) {
        ; using verafx 32 bits writes here would make this faster but it's safer to
        ; use verafx only explicitly when you know what you're doing.
        %asm {{
--- a/compiler/res/prog8lib/virtual/monogfx.p8
+++ b/compiler/res/prog8lib/virtual/monogfx.p8
@@ -23,7 +23,7 @@ monogfx {
        width = 320
        height = 240
        mode = MODE_NORMAL
-        clear_screen(0)
+        clear_screen(false)
    }

    sub hires() {
@@ -32,7 +32,7 @@ monogfx {
        width = 640
        height = 480
        mode = MODE_NORMAL
-        clear_screen(0)
+        clear_screen(false)
    }

    sub textmode() {
@@ -43,9 +43,10 @@ monogfx {
        mode = dm
    }

-    sub clear_screen(ubyte color) {
-        if color!=0
-            color=255
+    sub clear_screen(bool draw) {
+        ubyte color = 0
+        if draw
+            color = 255
        sys.gfx_clear(color)
    }

@@ -64,7 +65,7 @@ monogfx {

    sub fillrect(uword xx, uword yy, uword rwidth, uword rheight, bool draw) {
        ; Draw a filled rectangle of the given size and color.
-        ; To fill the whole screen, use clear_screen(color) instead - it is much faster.
+        ; To fill the whole screen, use clear_screen(draw) instead - it is much faster.
        if rwidth==0
            return
        repeat rheight {
--- a/docs/source/libraries.rst
+++ b/docs/source/libraries.rst
@@ -903,7 +903,7 @@ to see what's in there.
 gfx2  (cx16 only)
 -----------------
 Full-screen multicolor bitmap graphics routines, available on the Cx16 machine only.
-Same interface as monogfx, but for color screens. For 1 bpp monochrome screens, use monogfx.
+Same interface as monogfx, but for color screens. For 1 bpp monochrome screens: use monogfx.

 - multiple full-screen bitmap color resolutions
 - clearing screen, switching screen mode, also back to text mode
@@ -916,6 +916,15 @@ Read the `gfx2 source code <https://github.com/irmen/prog8/tree/master/compiler/
 to see what's in there.


+gfx_lores  (cx16 only)
+----------------------
+Heavily optimized graphics routines for just the single screen mode: lores 320*240, 256c  (8bpp)
+This is screen mode 1 from the gfx2 module (and also compatible with X16's basic screen mode 128).
+
+Read the `gfx_lores source code <https://github.com/irmen/prog8/tree/master/compiler/res/prog8lib/cx16/gfx_lores.p8>`_
+to see what's in there.
+
+
 palette  (cx16 only)
 --------------------
 Available for the Cx16 target. Various routines to set the display color palette.
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -1,25 +1,27 @@
 TODO
 ====

+Regenerate skeletons in doc.
+
+
 Improve register load order in subroutine call args assignments:
 in certain situations, the "wrong" order of evaluation of function call arguments is done which results
 in overwriting registers that already got their value, which requires a lot of stack juggling (especially on plain 6502 cpu!)
 Maybe this routine can be made more intelligent.  See usesOtherRegistersWhileEvaluating() and argumentsViaRegisters().


-Regenerate skeletons in doc.
-
-
 Future Things and Ideas
 ^^^^^^^^^^^^^^^^^^^^^^^
 Compiler:

 - Some facility to use add-with-carry and sub-with-carry (so we can chain additions/subtractions without clc/sec inserted every time)
+  Note: +/- 0  can't be optimized away anymore in this case!
+  Note2: may need to preserve carry flag during evaluation of the operands!
+  Note3: only available for bytes? (or does it work on words automatically?), and perhaps restrict operand to a simple expression?
 - Can we support signed % (remainder) somehow?
 - Don't add "random" rts to %asm blocks but instead give a warning about it? (but this breaks existing behavior that others already depend on... command line switch?)
 - IR: implement missing operators in AssignmentGen  (array shifts etc)
 - IR: CMPI+BSTEQ --> new BEQ reg,value,label instruction (like BGT etc)
- expand the kata encoding to somehow translate normal katana to half-widths?  (see comment in KatakanaEncoding)
 - instead of copy-pasting inline asmsubs, make them into a 64tass macro and use that instead.
  that will allow them to be reused from custom user written assembly code as well.
 - Multidimensional arrays and chained indexing, purely as syntactic sugar over regular arrays.
--- a/examples/cx16/pcmaudio/adpcm.p8
+++ b/examples/cx16/pcmaudio/adpcm.p8
@@ -65,10 +65,10 @@ adpcm {
    }

    sub decode_nibble(ubyte @zp nibble) {
-        ; Decoder for nibbles for the first channel.
-        ; this is the hotspot of the decoder algorithm!
+        ; Decoder for a single nibble for the first channel. (value of 'nibble' needs to be strictly 0-15 !)
+        ; This is the hotspot of the decoder algorithm!
        ; Note that the generated assembly from this is pretty efficient,
-        ; rewriting it by hand in asm seems to improve it only 5-10%
+        ; rewriting it by hand in asm seems to improve it only ~10%.
        cx16.r0s = 0                ; difference
        if nibble & %0100 !=0
            cx16.r0s += pstep
--- a/examples/cx16/pcmaudio/play-adpcm.p8
+++ b/examples/cx16/pcmaudio/play-adpcm.p8
@@ -153,10 +153,10 @@ mono {
        repeat 252/2 {
            unroll 2 {
                nibble = @(main.nibblesptr)
-                adpcm.decode_nibble(nibble & 15)     ; first word
+                adpcm.decode_nibble(nibble & 15)     ; first word  (note: upper nibble needs to be zero!)
                cx16.VERA_AUDIO_DATA = lsb(adpcm.predict)
                cx16.VERA_AUDIO_DATA = msb(adpcm.predict)
-                adpcm.decode_nibble(nibble>>4)       ; second word
+                adpcm.decode_nibble(nibble>>4)       ; second word  (note: upper nibble is zero, after the shifts.)
                cx16.VERA_AUDIO_DATA = lsb(adpcm.predict)
                cx16.VERA_AUDIO_DATA = msb(adpcm.predict)
                main.nibblesptr++
@@ -219,6 +219,7 @@ stereo {

    sub decode_nibbles_unrolled() {
        ; decode 4 left channel nibbles
+        ; note: when calling decode_nibble(), the upper nibble in the argument needs to be zero
        uword[8] left
        uword[8] right
        ubyte @requirezp nibble = @(main.nibblesptr)
--- a/examples/cx16/pcmaudio/stream-wav.p8
+++ b/examples/cx16/pcmaudio/stream-wav.p8
@@ -304,6 +304,7 @@ _lp2        lda  $ffff,y
        repeat 252/2 {
            unroll 2 {
                nibble = @(nibblesptr)
+                ; note: when calling decode_nibble(), the upper nibble in the argument needs to be zero
                adpcm.decode_nibble(nibble & 15)     ; first word
                cx16.VERA_AUDIO_DATA = lsb(adpcm.predict)
                cx16.VERA_AUDIO_DATA = msb(adpcm.predict)
@@ -330,6 +331,7 @@ _lp2        lda  $ffff,y

    sub decode_nibbles_unrolled() {
        ; decode 4 left channel nibbles
+        ; note: when calling decode_nibble(), the upper nibble in the argument needs to be zero
        uword[8] left
        uword[8] right
        ubyte @requirezp nibble = @(nibblesptr)
--- a/examples/cx16/testmonogfx.p8
+++ b/examples/cx16/testmonogfx.p8
@@ -195,8 +195,8 @@ main {
        }

        sys.wait(60)
-        monogfx.clear_screen(1)
-        monogfx.clear_screen(0)
+        monogfx.clear_screen(true)
+        monogfx.clear_screen(false)

        ubyte radius

--- a/examples/test.p8
+++ b/examples/test.p8
@@ -1,27 +1,11 @@
 %option no_sysinit
+%import gfx_lores
 %zeropage basicsafe

 main {
    sub start() {
-        ; nothing!
-    }
-}
-
-
-derp {
-    asmsub f_tell() -> uword @R0, uword @R1, uword @R2, uword @R3 {
-        %asm {{
-            jmp  p8s_internal_f_tell
-        }}
-    }
-
-    sub internal_f_tell() {
-        cx16.r1 = read4hex()
-
-        sub read4hex() -> uword {
-            str @shared hex = "0000000000000000000000000000000000000000000"
-            cx16.r0++
-            return cx16.r0
-        }
+        gfx_lores.set_screen_mode()
+        gfx_lores.clear_screen(0)
+        gfx_lores.line(0,0,319,239,5)
    }
 }
--- a/scripts/profiler.py
+++ b/scripts/profiler.py
@@ -1,25 +1,22 @@
 #!/usr/bin/env python

-"""
+program_description = """
 This is a simple run-time profiler tool for X16 assembly programs.
 It takes an assembly list file (as produced by 64tass/turbo assembler) and
-a memory access statistics dump file (produced by the emulator's -memorystats option)
+a memory access statistics dump file (produced by the X16 emulator's -memorystats option)
 and prints out what assembly lines and variables were read from and written to the most.
 These may indicate hot paths or even bottlenecks in your program,
 and what variables in system ram might be better placed in Zeropage.
-
-The -memorystats option in the emulator is work in progress at the time of writing.
 """


-import sys
 import argparse
 import operator
 from typing import Tuple


 class AsmList:
-    """parses a l64tass Turbo Assembler Macro listing file"""
+    """parses a 64tass Turbo Assembler Macro listing file"""

    def __init__(self, filename: str) -> None:
        self.lines = []
@@ -170,7 +167,7 @@ def profile(number_of_lines: int, asmlist: str, memstats: str) -> None:


 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="simple X16 assembly run time profiler")
+    parser = argparse.ArgumentParser(description=program_description)
    parser.add_argument("-n", dest="number", type=int, default=20, help="amount of reads and writes to print (default 20)")
    parser.add_argument("asmlistfile", type=str, help="the 64tass/turbo assembler listing file to read")
    parser.add_argument("memorystatsfile", type=str, help="the X16 emulator memstats dump file to read")