From b7afda781ac96809ced5e8a68603a06ef4b32168 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Mon, 9 Dec 2024 04:29:04 +0100 Subject: [PATCH] Optimize 6502 bitwise operations on word values where only the msb or lsb is touched --- .../src/prog8/codegen/cpu6502/AsmGen.kt | 16 +++++ .../cpu6502/assignment/AssignmentAsmGen.kt | 31 +++++++-- .../assignment/AugmentableAssignmentAsmGen.kt | 24 ++----- compiler/res/prog8lib/cx16/palette.p8 | 1 + docs/source/libraries.rst | 6 +- docs/source/todo.rst | 17 ----- examples/test.p8 | 68 +++---------------- 7 files changed, 61 insertions(+), 102 deletions(-) diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt index dc4cb71ee..c7556b624 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt @@ -1433,6 +1433,22 @@ $repeatLabel""") } } + internal fun immediateAndInplace(name: String, value: Int) { + if(isTargetCpu(CpuType.CPU65c02)) { + out(" lda #${value xor 255} | trb $name") // reset bit + } else { + out(" lda $name | and #$value | sta $name") + } + } + + internal fun immediateOrInplace(name: String, value: Int) { + if(isTargetCpu(CpuType.CPU65c02)) { + out(" lda #$value | tsb $name") // set bit + } else { + out(" lda $name | ora #$value | sta $name") + } + } + internal fun assignConditionValueToRegisterAndTest(condition: PtExpression) { assignExpressionToRegister(condition, RegisterOrPair.A, false) when(condition) { diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt index e01b63255..379c71c9f 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt @@ -1743,11 +1743,34 @@ internal class AssignmentAsmGen( assignExpressionToRegister(left, RegisterOrPair.AY, false) when(right) { is PtNumber -> { - val number = right.number.toHex() + val value = right.number.toInt() when (operator) { - "&" -> asmgen.out(" and #<$number | tax | tya | and #>$number | tay | txa") - "|" -> asmgen.out(" ora #<$number | tax | tya | ora #>$number | tay | txa") - "^" -> asmgen.out(" eor #<$number | tax | tya | eor #>$number | tay | txa") + "&" -> { + when { + value == 0 -> asmgen.out(" lda #0 | tay") + value == 0x00ff -> asmgen.out(" lda #0") + value == 0xff00 -> asmgen.out(" ldy #0") + value and 255 == 0 -> asmgen.out(" tya | and #>$value | tay | lda #0") + value < 0x0100 -> asmgen.out(" and #<$value | ldy #0") + else -> asmgen.out(" and #<$value | tax | tya | and #>$value | tay | txa") + } + } + "|" -> { + when { + value == 0 -> {} + value and 255 == 0 -> asmgen.out(" tax | tya | ora #>$value | tay | txa") + value < 0x0100 -> asmgen.out(" ora #$value") + else -> asmgen.out(" ora #<$value | tax | tya | ora #>$value | tay | txa") + } + } + "^" -> { + when { + value == 0 -> {} + value and 255 == 0 -> asmgen.out(" tax | tya | eor #>$value | tay | txa") + value < 0x0100 -> asmgen.out(" eor #$value") + else -> asmgen.out(" eor #<$value | tax | tya | eor #>$value | tay | txa") + } + } else -> throw AssemblyError("invalid bitwise operator") } } diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt index 3b7ecb32c..5b345223b 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt @@ -1538,8 +1538,8 @@ $shortcutLabel:""") } } } - "&", "and" -> immediateAndInplace(name, value) - "|", "or" -> immediateOrInplace(name, value) + "&", "and" -> asmgen.immediateAndInplace(name, value) + "|", "or" -> asmgen.immediateOrInplace(name, value) "^", "xor" -> asmgen.out(" lda $name | eor #$value | sta $name") "==" -> { asmgen.out(""" @@ -1660,22 +1660,6 @@ $shortcutLabel:""") } } - private fun immediateAndInplace(name: String, value: Int) { - if(asmgen.isTargetCpu(CpuType.CPU65c02)) { - asmgen.out(" lda #${value xor 255} | trb $name") // reset bit - } else { - asmgen.out(" lda $name | and #$value | sta $name") - } - } - - private fun immediateOrInplace(name: String, value: Int) { - if(asmgen.isTargetCpu(CpuType.CPU65c02)) { - asmgen.out(" lda #$value | tsb $name") // set bit - } else { - asmgen.out(" lda $name | ora #$value | sta $name") - } - } - private fun inplacemodificationWordWithMemread(name: String, dt: DataType, operator: String, memread: PtMemoryByte) { require(dt.isInteger) when (operator) { @@ -1998,7 +1982,7 @@ $shortcutLabel:""") asmgen.out(" lda $msb | and #>$value | sta $msb") } value < 0x0100 -> { - immediateAndInplace(lsb, value) + asmgen.immediateAndInplace(lsb, value) if(asmgen.isTargetCpu(CpuType.CPU65c02)) asmgen.out(" stz $msb") else @@ -2011,7 +1995,7 @@ $shortcutLabel:""") when { value == 0 -> {} value and 255 == 0 -> asmgen.out(" lda $msb | ora #>$value | sta $msb") - value < 0x0100 -> immediateOrInplace(lsb, value) + value < 0x0100 -> asmgen.immediateOrInplace(lsb, value) else -> asmgen.out(" lda $lsb | ora #<$value | sta $lsb | lda $msb | ora #>$value | sta $msb") } } diff --git a/compiler/res/prog8lib/cx16/palette.p8 b/compiler/res/prog8lib/cx16/palette.p8 index b5fe7bb44..8eee5a33d 100644 --- a/compiler/res/prog8lib/cx16/palette.p8 +++ b/compiler/res/prog8lib/cx16/palette.p8 @@ -1,6 +1,7 @@ ; Manipulate the Commander X16's display color palette. ; Should you want to restore the full default palette, you can call cbm.CINT() ; The first 16 colors can be restored to their default with set_default16() +; NOTE: assume R0, R1 and R2 are clobbered when using routines in this library! palette { %option ignore_unused diff --git a/docs/source/libraries.rst b/docs/source/libraries.rst index 5c124e313..091d5dcb6 100644 --- a/docs/source/libraries.rst +++ b/docs/source/libraries.rst @@ -185,7 +185,8 @@ sizeof (name) ; sizeof (number) memory (name, size, alignment) Returns the address of the first location of a statically "reserved" block of memory of the given size in bytes, - with the given name. The block is uninitialized memory, it is *not* set to zero! + with the given name. The block is *uninitialized memory*; unlike other variables in Prog8 it is *not* set to zero at the start of the program! + (if that is required, you can do so yourself using ``memset``). If you specify an alignment value >1, it means the block of memory will be aligned to such a dividable address in memory, for instance an alignment of $100 means the memory block is aligned on a page boundary, and $2 means word aligned (even addresses). @@ -195,8 +196,7 @@ memory (name, size, alignment) otherwise you'll get a compilation error. This routine can be used to "reserve" parts of the memory where a normal byte array variable would not suffice; for instance if you need more than 256 consecutive bytes. - The return value is just a simple uword address so it cannot be used as an array in your program. - You can only treat it as a pointer or use it in inline assembly. + The return value is an uword address, and you can use that like a pointer to the memory buffer. call (address) -> uword Calls a subroutine given by its memory address. You cannot pass arguments directly, diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 29eb70191..67382db5b 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,15 +1,6 @@ TODO ==== -Halloween is stuck on a black screen - - -Optimize 6502 bitwise operations on word values where only the msb or lsb is touched: (already done for simple augmented expressions!) - cx16.r0 = (cx16.r0 & $a000) | $0055 - cx16.r0 = (cx16.r0 | $a000) ^ $0055 - cx16.r0 = (cx16.r0 ^ $a000) & $0055 - - update zsmkit to newest version that includes the on_deck routines when stabilized @@ -38,14 +29,6 @@ Future Things and Ideas - make a form of "manual generics" possible like: varsub routine(T arg)->T where T is expanded to a specific type (this is already done hardcoded for several of the builtin functions) - [much work:] more support for (64tass) SEGMENTS ? - - (What, how, isn't current BSS support enough?) - - Add a mechanism to allocate variables into golden ram (or segments really) (see GoldenRam class) - - maybe treat block "golden" in a special way: can only contain vars, every var will be allocated in the Golden ram area? - - maybe or may not needed: the variables can NOT have initialization values, they will all be set to zero on startup (simple memset) - just initialize them yourself in start() if you need a non-zero value . - - OR.... do all this automatically if 'golden' is enabled as a compiler option? So compiler allocates in ZP first, then Golden Ram, then regular ram - - OR.... make all this more generic and use some %segment option to create real segments for 64tass? - - (need separate step in codegen and IR to write the "golden" variables) - [problematic due to using 64tass:] better support for building library programs, where unused .proc are NOT deleted from the assembly. Perhaps replace all uses of .proc/.pend/.endproc by .block/.bend will fix that with a compiler flag? But all library code written in asm uses .proc already..... (textual search/replace when writing the actual asm?) diff --git a/examples/test.p8 b/examples/test.p8 index dec36ba29..3d4754d00 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -4,66 +4,18 @@ main { sub start() { + cx16.r0 = (cx16.r0 & $a000) | $0055 + cx16.r0 = (cx16.r0 | $a000) ^ $0055 + cx16.r0 = (cx16.r0 ^ $a000) & $0055 - cx16.r0L = %11111110 - while cx16.r0L &32 == 32 { - cx16.r0L <<= 1 - txt.print_ubbin(cx16.r0L, true) - txt.nl() - } - txt.nl() + cx16.r0 = (cx16.r1 & $a000) + cx16.r0 = (cx16.r1 | $a000) + cx16.r0 = (cx16.r1 ^ $a000) - cx16.r0L = %11111110 - while cx16.r0L &32 != 0 { - cx16.r0L <<= 1 - txt.print_ubbin(cx16.r0L, true) - txt.nl() - } - txt.nl() - - ; this one must not be changed and stop after 3 iterations instead of 5! - cx16.r0L = %11111110 - while cx16.r0L &40 == 40 { - cx16.r0L <<= 1 - txt.print_ubbin(cx16.r0L, true) - txt.nl() - } - txt.nl() - txt.nl() - - cx16.r0L = %11111110 - do { - cx16.r0L <<= 1 - txt.print_ubbin(cx16.r0L, true) - txt.nl() - } until cx16.r0L &32 != 32 - txt.nl() - - cx16.r0L = %11111110 - do { - cx16.r0L <<= 1 - txt.print_ubbin(cx16.r0L, true) - txt.nl() - } until cx16.r0L &32 == 0 - txt.nl() - - ; this one must not be changed and stop after 3 iterations instead of 5! - cx16.r0L = %11111110 - do { - cx16.r0L <<= 1 - txt.print_ubbin(cx16.r0L, true) - txt.nl() - } until cx16.r0L &40 != 40 - txt.nl() - - -; while cx16.r0L & cx16.r1L == 0 { -; cx16.r0L++ -; } -; -; while cx16.r0L & cx16.r1L == cx16.r1L { -; cx16.r0L++ -; } + ; these are optimized already: + cx16.r0 = (cx16.r0 & $a000) + cx16.r0 = (cx16.r0 | $a000) + cx16.r0 = (cx16.r0 ^ $a000) /* sys.set_irqd()