Optimize 6502 bitwise operations on word values where only the msb or lsb is touched

This commit is contained in:
Irmen de Jong 2024-12-09 04:29:04 +01:00
parent 535ec13072
commit b7afda781a
7 changed files with 61 additions and 102 deletions

View File

@ -1433,6 +1433,22 @@ $repeatLabel""")
} }
} }
internal fun immediateAndInplace(name: String, value: Int) {
if(isTargetCpu(CpuType.CPU65c02)) {
out(" lda #${value xor 255} | trb $name") // reset bit
} else {
out(" lda $name | and #$value | sta $name")
}
}
internal fun immediateOrInplace(name: String, value: Int) {
if(isTargetCpu(CpuType.CPU65c02)) {
out(" lda #$value | tsb $name") // set bit
} else {
out(" lda $name | ora #$value | sta $name")
}
}
internal fun assignConditionValueToRegisterAndTest(condition: PtExpression) { internal fun assignConditionValueToRegisterAndTest(condition: PtExpression) {
assignExpressionToRegister(condition, RegisterOrPair.A, false) assignExpressionToRegister(condition, RegisterOrPair.A, false)
when(condition) { when(condition) {

View File

@ -1743,11 +1743,34 @@ internal class AssignmentAsmGen(
assignExpressionToRegister(left, RegisterOrPair.AY, false) assignExpressionToRegister(left, RegisterOrPair.AY, false)
when(right) { when(right) {
is PtNumber -> { is PtNumber -> {
val number = right.number.toHex() val value = right.number.toInt()
when (operator) { when (operator) {
"&" -> asmgen.out(" and #<$number | tax | tya | and #>$number | tay | txa") "&" -> {
"|" -> asmgen.out(" ora #<$number | tax | tya | ora #>$number | tay | txa") when {
"^" -> asmgen.out(" eor #<$number | tax | tya | eor #>$number | tay | txa") value == 0 -> asmgen.out(" lda #0 | tay")
value == 0x00ff -> asmgen.out(" lda #0")
value == 0xff00 -> asmgen.out(" ldy #0")
value and 255 == 0 -> asmgen.out(" tya | and #>$value | tay | lda #0")
value < 0x0100 -> asmgen.out(" and #<$value | ldy #0")
else -> asmgen.out(" and #<$value | tax | tya | and #>$value | tay | txa")
}
}
"|" -> {
when {
value == 0 -> {}
value and 255 == 0 -> asmgen.out(" tax | tya | ora #>$value | tay | txa")
value < 0x0100 -> asmgen.out(" ora #$value")
else -> asmgen.out(" ora #<$value | tax | tya | ora #>$value | tay | txa")
}
}
"^" -> {
when {
value == 0 -> {}
value and 255 == 0 -> asmgen.out(" tax | tya | eor #>$value | tay | txa")
value < 0x0100 -> asmgen.out(" eor #$value")
else -> asmgen.out(" eor #<$value | tax | tya | eor #>$value | tay | txa")
}
}
else -> throw AssemblyError("invalid bitwise operator") else -> throw AssemblyError("invalid bitwise operator")
} }
} }

View File

@ -1538,8 +1538,8 @@ $shortcutLabel:""")
} }
} }
} }
"&", "and" -> immediateAndInplace(name, value) "&", "and" -> asmgen.immediateAndInplace(name, value)
"|", "or" -> immediateOrInplace(name, value) "|", "or" -> asmgen.immediateOrInplace(name, value)
"^", "xor" -> asmgen.out(" lda $name | eor #$value | sta $name") "^", "xor" -> asmgen.out(" lda $name | eor #$value | sta $name")
"==" -> { "==" -> {
asmgen.out(""" asmgen.out("""
@ -1660,22 +1660,6 @@ $shortcutLabel:""")
} }
} }
private fun immediateAndInplace(name: String, value: Int) {
if(asmgen.isTargetCpu(CpuType.CPU65c02)) {
asmgen.out(" lda #${value xor 255} | trb $name") // reset bit
} else {
asmgen.out(" lda $name | and #$value | sta $name")
}
}
private fun immediateOrInplace(name: String, value: Int) {
if(asmgen.isTargetCpu(CpuType.CPU65c02)) {
asmgen.out(" lda #$value | tsb $name") // set bit
} else {
asmgen.out(" lda $name | ora #$value | sta $name")
}
}
private fun inplacemodificationWordWithMemread(name: String, dt: DataType, operator: String, memread: PtMemoryByte) { private fun inplacemodificationWordWithMemread(name: String, dt: DataType, operator: String, memread: PtMemoryByte) {
require(dt.isInteger) require(dt.isInteger)
when (operator) { when (operator) {
@ -1998,7 +1982,7 @@ $shortcutLabel:""")
asmgen.out(" lda $msb | and #>$value | sta $msb") asmgen.out(" lda $msb | and #>$value | sta $msb")
} }
value < 0x0100 -> { value < 0x0100 -> {
immediateAndInplace(lsb, value) asmgen.immediateAndInplace(lsb, value)
if(asmgen.isTargetCpu(CpuType.CPU65c02)) if(asmgen.isTargetCpu(CpuType.CPU65c02))
asmgen.out(" stz $msb") asmgen.out(" stz $msb")
else else
@ -2011,7 +1995,7 @@ $shortcutLabel:""")
when { when {
value == 0 -> {} value == 0 -> {}
value and 255 == 0 -> asmgen.out(" lda $msb | ora #>$value | sta $msb") value and 255 == 0 -> asmgen.out(" lda $msb | ora #>$value | sta $msb")
value < 0x0100 -> immediateOrInplace(lsb, value) value < 0x0100 -> asmgen.immediateOrInplace(lsb, value)
else -> asmgen.out(" lda $lsb | ora #<$value | sta $lsb | lda $msb | ora #>$value | sta $msb") else -> asmgen.out(" lda $lsb | ora #<$value | sta $lsb | lda $msb | ora #>$value | sta $msb")
} }
} }

View File

@ -1,6 +1,7 @@
; Manipulate the Commander X16's display color palette. ; Manipulate the Commander X16's display color palette.
; Should you want to restore the full default palette, you can call cbm.CINT() ; Should you want to restore the full default palette, you can call cbm.CINT()
; The first 16 colors can be restored to their default with set_default16() ; The first 16 colors can be restored to their default with set_default16()
; NOTE: assume R0, R1 and R2 are clobbered when using routines in this library!
palette { palette {
%option ignore_unused %option ignore_unused

View File

@ -185,7 +185,8 @@ sizeof (name) ; sizeof (number)
memory (name, size, alignment) memory (name, size, alignment)
Returns the address of the first location of a statically "reserved" block of memory of the given size in bytes, Returns the address of the first location of a statically "reserved" block of memory of the given size in bytes,
with the given name. The block is uninitialized memory, it is *not* set to zero! with the given name. The block is *uninitialized memory*; unlike other variables in Prog8 it is *not* set to zero at the start of the program!
(if that is required, you can do so yourself using ``memset``).
If you specify an alignment value >1, it means the block of memory will If you specify an alignment value >1, it means the block of memory will
be aligned to such a dividable address in memory, for instance an alignment of $100 means the be aligned to such a dividable address in memory, for instance an alignment of $100 means the
memory block is aligned on a page boundary, and $2 means word aligned (even addresses). memory block is aligned on a page boundary, and $2 means word aligned (even addresses).
@ -195,8 +196,7 @@ memory (name, size, alignment)
otherwise you'll get a compilation error. otherwise you'll get a compilation error.
This routine can be used to "reserve" parts of the memory where a normal byte array variable would This routine can be used to "reserve" parts of the memory where a normal byte array variable would
not suffice; for instance if you need more than 256 consecutive bytes. not suffice; for instance if you need more than 256 consecutive bytes.
The return value is just a simple uword address so it cannot be used as an array in your program. The return value is an uword address, and you can use that like a pointer to the memory buffer.
You can only treat it as a pointer or use it in inline assembly.
call (address) -> uword call (address) -> uword
Calls a subroutine given by its memory address. You cannot pass arguments directly, Calls a subroutine given by its memory address. You cannot pass arguments directly,

View File

@ -1,15 +1,6 @@
TODO TODO
==== ====
Halloween is stuck on a black screen
Optimize 6502 bitwise operations on word values where only the msb or lsb is touched: (already done for simple augmented expressions!)
cx16.r0 = (cx16.r0 & $a000) | $0055
cx16.r0 = (cx16.r0 | $a000) ^ $0055
cx16.r0 = (cx16.r0 ^ $a000) & $0055
update zsmkit to newest version that includes the on_deck routines when stabilized update zsmkit to newest version that includes the on_deck routines when stabilized
@ -38,14 +29,6 @@ Future Things and Ideas
- make a form of "manual generics" possible like: varsub routine(T arg)->T where T is expanded to a specific type - make a form of "manual generics" possible like: varsub routine(T arg)->T where T is expanded to a specific type
(this is already done hardcoded for several of the builtin functions) (this is already done hardcoded for several of the builtin functions)
- [much work:] more support for (64tass) SEGMENTS ? - [much work:] more support for (64tass) SEGMENTS ?
- (What, how, isn't current BSS support enough?)
- Add a mechanism to allocate variables into golden ram (or segments really) (see GoldenRam class)
- maybe treat block "golden" in a special way: can only contain vars, every var will be allocated in the Golden ram area?
- maybe or may not needed: the variables can NOT have initialization values, they will all be set to zero on startup (simple memset)
just initialize them yourself in start() if you need a non-zero value .
- OR.... do all this automatically if 'golden' is enabled as a compiler option? So compiler allocates in ZP first, then Golden Ram, then regular ram
- OR.... make all this more generic and use some %segment option to create real segments for 64tass?
- (need separate step in codegen and IR to write the "golden" variables)
- [problematic due to using 64tass:] better support for building library programs, where unused .proc are NOT deleted from the assembly. - [problematic due to using 64tass:] better support for building library programs, where unused .proc are NOT deleted from the assembly.
Perhaps replace all uses of .proc/.pend/.endproc by .block/.bend will fix that with a compiler flag? Perhaps replace all uses of .proc/.pend/.endproc by .block/.bend will fix that with a compiler flag?
But all library code written in asm uses .proc already..... (textual search/replace when writing the actual asm?) But all library code written in asm uses .proc already..... (textual search/replace when writing the actual asm?)

View File

@ -4,66 +4,18 @@
main { main {
sub start() { sub start() {
cx16.r0 = (cx16.r0 & $a000) | $0055
cx16.r0 = (cx16.r0 | $a000) ^ $0055
cx16.r0 = (cx16.r0 ^ $a000) & $0055
cx16.r0L = %11111110 cx16.r0 = (cx16.r1 & $a000)
while cx16.r0L &32 == 32 { cx16.r0 = (cx16.r1 | $a000)
cx16.r0L <<= 1 cx16.r0 = (cx16.r1 ^ $a000)
txt.print_ubbin(cx16.r0L, true)
txt.nl()
}
txt.nl()
cx16.r0L = %11111110 ; these are optimized already:
while cx16.r0L &32 != 0 { cx16.r0 = (cx16.r0 & $a000)
cx16.r0L <<= 1 cx16.r0 = (cx16.r0 | $a000)
txt.print_ubbin(cx16.r0L, true) cx16.r0 = (cx16.r0 ^ $a000)
txt.nl()
}
txt.nl()
; this one must not be changed and stop after 3 iterations instead of 5!
cx16.r0L = %11111110
while cx16.r0L &40 == 40 {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
}
txt.nl()
txt.nl()
cx16.r0L = %11111110
do {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
} until cx16.r0L &32 != 32
txt.nl()
cx16.r0L = %11111110
do {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
} until cx16.r0L &32 == 0
txt.nl()
; this one must not be changed and stop after 3 iterations instead of 5!
cx16.r0L = %11111110
do {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
} until cx16.r0L &40 != 40
txt.nl()
; while cx16.r0L & cx16.r1L == 0 {
; cx16.r0L++
; }
;
; while cx16.r0L & cx16.r1L == cx16.r1L {
; cx16.r0L++
; }
/* /*
sys.set_irqd() sys.set_irqd()