diff --git a/compiler/res/prog8lib/cx16/syslib.p8 b/compiler/res/prog8lib/cx16/syslib.p8 index 89c17a2db..2af57e13a 100644 --- a/compiler/res/prog8lib/cx16/syslib.p8 +++ b/compiler/res/prog8lib/cx16/syslib.p8 @@ -549,6 +549,7 @@ const ubyte EXTAPI_ps2data_raw = $09 const ubyte EXTAPI_cursor_blink = $0A const ubyte EXTAPI_led_update = $0B const ubyte EXTAPI_mouse_set_position = $0C +const ubyte EXTAPI_scnsiz = $0D ; rom R48+ ; extapi16 call numbers const ubyte EXTAPI16_test = $00 @@ -642,6 +643,15 @@ asmsub iso_cursor_char(ubyte character @X) clobbers(A,X,Y) { }} } +asmsub scnsiz(ubyte width @X, ubyte heigth @Y) clobbers(A,X,Y) { + ; -- sets the screen editor size dimensions (without changing the graphical screen mode itself) + ; (rom R48+) + %asm {{ + lda #EXTAPI_scnsiz + jmp cx16.extapi + }} +} + ; TODO : implement shims for the remaining extapi calls. diff --git a/docs/source/technical.rst b/docs/source/technical.rst index 739d275b4..afeff7db0 100644 --- a/docs/source/technical.rst +++ b/docs/source/technical.rst @@ -134,10 +134,48 @@ Some notes and references into the compiler's source code modules: to convert the Ast into IR first. The VM target uses this, but the 6502 codegen doesn't right now. -Upgrading from version 8 ------------------------- -Version 9 introduced several large, incompatible changes. If you still have programs -written for Prog8 version 8 or earlier, it is likely that you'll have to modify them -to be able to compile with version 9 or newer. +Run-time memory profiling with the X16emulator +---------------------------------------------- +The X16 emulator has a ``-memorystats`` option that enables it to keep track of memory access count statistics, +and write the accumulated counts to a file on exit. +Prog8 includes a Python script ``profiler.py`` (find it in the "scripts" subdirectory of the source code distribution) +that can cross-reference that file with an assembly listing produced by the compiler with the ``-asmlist`` option. +It then prints the top N lines in your (assembly) program source that perform the most reads and writes, +which you can use to identify possible hot spots/bottlenecks/variables that should be better placed in zeropage etc. +Note that the profiler just works with the number of accesses to memory locations, this is *not* the same +as the most run-time (cpu instructions cycle times aren't taken into account at all). +Here is an example of the output it generates:: -Information about this can be found in `older Prog8 documentation `_ . + $ scripts/profiler.py -n 10 cobramk3-gfx.list memstats.txt  ✔ + + number of actual lines in the assembly listing: 2134 + number of distinct addresses read from : 22006 + number of distinct addresses written to : 8179 + total number of reads : 375106285 (375M) + total number of writes : 63601962 (63M) + + top 10 most reads: + $007f (7198687) : $007e 'P8ZP_SCRATCH_W2' (line 13), $007e 'remainder' (line 1855) + $007e (6990527) : $007e 'P8ZP_SCRATCH_W2' (line 13), $007e 'remainder' (line 1855) + $0265 (5029230) : unknown + $007c (4455140) : $007c 'P8ZP_SCRATCH_W1' (line 12), $007c 'dividend' (line 1854), $007c 'result' (line 1856) + $007d (4275195) : $007c 'P8ZP_SCRATCH_W1' (line 12), $007c 'dividend' (line 1854), $007c 'result' (line 1856) + $0076 (3374800) : $0076 'label_asm_35_counter' (line 2082) + $15d7 (3374800) : $15d7 '9c 23 9f stz cx16.VERA_DATA0' (line 2022), $15d7 'label_asm_34_repeat' (line 2021) + $15d8 (3374800) : $15d7 '9c 23 9f stz cx16.VERA_DATA0' (line 2022), $15d7 'label_asm_34_repeat' (line 2021) + $15d9 (3374800) : $15da '9c 23 9f stz cx16.VERA_DATA0' (line 2023) + $15da (3374800) : $15da '9c 23 9f stz cx16.VERA_DATA0' (line 2023) + + top 10 most writes: + $9f23 (14748104) : $9f23 'VERA_DATA0' (line 1451) + $0265 (5657743) : unknown + $007e (4464393) : $007e 'P8ZP_SCRATCH_W2' (line 13), $007e 'remainder' (line 1855) + $007f (4464393) : $007e 'P8ZP_SCRATCH_W2' (line 13), $007e 'remainder' (line 1855) + $007c (4416537) : $007c 'P8ZP_SCRATCH_W1' (line 12), $007c 'dividend' (line 1854), $007c 'result' (line 1856) + $007d (3820272) : $007c 'P8ZP_SCRATCH_W1' (line 12), $007c 'dividend' (line 1854), $007c 'result' (line 1856) + $0076 (3375568) : $0076 'label_asm_35_counter' (line 2082) + $01e8 (1310425) : cpu stack + $01e7 (1280140) : cpu stack + $0264 (1258159) : unknown + +Apparently the most cpu activity while running this program is spent in a division routine. diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 3ee5a5508..428fcb638 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,10 +1,7 @@ TODO ==== -See https://github.com/irmen/prog8/issues/134 -+ any other issues that got reported. - -Document scripts/profiler.py in manual? +https://github.com/irmen/prog8/issues/136 (string.find register order issue) ... @@ -13,8 +10,9 @@ Future Things and Ideas ^^^^^^^^^^^^^^^^^^^^^^^ Compiler: +- Relax newline / bracket in parser so that you can put open and close brackets on the same line or on the next line if you so wish. For example be able to write a true one liner? +- Can we support signed % (remainder) somehow? - IR: implement missing operators in AssignmentGen (array shifts etc) -- can we support signed % (remainder) somehow? - instead of copy-pasting inline asmsubs, make them into a 64tass macro and use that instead. that will allow them to be reused from custom user written assembly code as well. - Multidimensional arrays and chained indexing, purely as syntactic sugar over regular arrays. diff --git a/examples/test.p8 b/examples/test.p8 index 51ee38b14..6cd45f85c 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -1,10 +1,8 @@ %zeropage basicsafe +%option no_sysinit main { sub start() { - uword @shared curr_sequence - ubyte @shared sequence_curr_step - - uword @shared sequence_offset = &curr_sequence[sequence_curr_step] + cx16.scnsiz(20,8) } } diff --git a/scripts/profiler.py b/scripts/profiler.py index 65c9ab835..86ff31cf3 100755 --- a/scripts/profiler.py +++ b/scripts/profiler.py @@ -129,6 +129,17 @@ def profile(number_of_lines: int, asmlist: str, memstats: str) -> None: stats = MemoryStats(memstats) asm.print_info() stats.print_info() + + def print_unknown(address: int) -> None: + if address < 0x100: + print("unknown zp") + elif address < 0x200: + print("cpu stack") + elif address in range(0x9f00, 0xa000): + print("io") + else: + print("unknown") + print(f"\ntop {number_of_lines} most reads:") for (bank, address), count in stats.reads[:number_of_lines]: print(f"${address:04x} ({count}) : ", end="") @@ -138,7 +149,7 @@ def profile(number_of_lines: int, asmlist: str, memstats: str) -> None: lines = [f"${address:04x} '{line}' (line {line_number})" for address, line, line_number in result] print(", ".join(lines)) else: - print("unknown") + print_unknown(address) else: print(f"banked memory: {bank:02x}:{address:04x}") print(f"\ntop {number_of_lines} most writes:") @@ -150,14 +161,14 @@ def profile(number_of_lines: int, asmlist: str, memstats: str) -> None: lines = [f"${address:04x} '{line}' (line {line_number})" for address, line, line_number in result] print(", ".join(lines)) else: - print("unknown") + print_unknown(address) else: print(f"banked memory: {bank:02x}:{address:04x}") if __name__ == "__main__": parser = argparse.ArgumentParser(description="simple X16 assembly run time profiler") - parser.add_argument("-n", dest="number", type=int, default=20, help="amount of reads and writes to print") + parser.add_argument("-n", dest="number", type=int, default=20, help="amount of reads and writes to print (default 20)") parser.add_argument("asmlistfile", type=str, help="the 64tass/turbo assembler listing file to read") parser.add_argument("memorystatsfile", type=str, help="the X16 emulator memstats dump file to read") args = parser.parse_args()