Optimize 6502 bitwise operations on word values where only the msb or lsb is touched

This commit is contained in:
Irmen de Jong
2024-12-09 04:29:04 +01:00
parent 535ec13072
commit b7afda781a
7 changed files with 61 additions and 102 deletions

View File

@@ -1433,6 +1433,22 @@ $repeatLabel""")
}
}
internal fun immediateAndInplace(name: String, value: Int) {
if(isTargetCpu(CpuType.CPU65c02)) {
out(" lda #${value xor 255} | trb $name") // reset bit
} else {
out(" lda $name | and #$value | sta $name")
}
}
internal fun immediateOrInplace(name: String, value: Int) {
if(isTargetCpu(CpuType.CPU65c02)) {
out(" lda #$value | tsb $name") // set bit
} else {
out(" lda $name | ora #$value | sta $name")
}
}
internal fun assignConditionValueToRegisterAndTest(condition: PtExpression) {
assignExpressionToRegister(condition, RegisterOrPair.A, false)
when(condition) {

View File

@@ -1743,11 +1743,34 @@ internal class AssignmentAsmGen(
assignExpressionToRegister(left, RegisterOrPair.AY, false)
when(right) {
is PtNumber -> {
val number = right.number.toHex()
val value = right.number.toInt()
when (operator) {
"&" -> asmgen.out(" and #<$number | tax | tya | and #>$number | tay | txa")
"|" -> asmgen.out(" ora #<$number | tax | tya | ora #>$number | tay | txa")
"^" -> asmgen.out(" eor #<$number | tax | tya | eor #>$number | tay | txa")
"&" -> {
when {
value == 0 -> asmgen.out(" lda #0 | tay")
value == 0x00ff -> asmgen.out(" lda #0")
value == 0xff00 -> asmgen.out(" ldy #0")
value and 255 == 0 -> asmgen.out(" tya | and #>$value | tay | lda #0")
value < 0x0100 -> asmgen.out(" and #<$value | ldy #0")
else -> asmgen.out(" and #<$value | tax | tya | and #>$value | tay | txa")
}
}
"|" -> {
when {
value == 0 -> {}
value and 255 == 0 -> asmgen.out(" tax | tya | ora #>$value | tay | txa")
value < 0x0100 -> asmgen.out(" ora #$value")
else -> asmgen.out(" ora #<$value | tax | tya | ora #>$value | tay | txa")
}
}
"^" -> {
when {
value == 0 -> {}
value and 255 == 0 -> asmgen.out(" tax | tya | eor #>$value | tay | txa")
value < 0x0100 -> asmgen.out(" eor #$value")
else -> asmgen.out(" eor #<$value | tax | tya | eor #>$value | tay | txa")
}
}
else -> throw AssemblyError("invalid bitwise operator")
}
}

View File

@@ -1538,8 +1538,8 @@ $shortcutLabel:""")
}
}
}
"&", "and" -> immediateAndInplace(name, value)
"|", "or" -> immediateOrInplace(name, value)
"&", "and" -> asmgen.immediateAndInplace(name, value)
"|", "or" -> asmgen.immediateOrInplace(name, value)
"^", "xor" -> asmgen.out(" lda $name | eor #$value | sta $name")
"==" -> {
asmgen.out("""
@@ -1660,22 +1660,6 @@ $shortcutLabel:""")
}
}
private fun immediateAndInplace(name: String, value: Int) {
if(asmgen.isTargetCpu(CpuType.CPU65c02)) {
asmgen.out(" lda #${value xor 255} | trb $name") // reset bit
} else {
asmgen.out(" lda $name | and #$value | sta $name")
}
}
private fun immediateOrInplace(name: String, value: Int) {
if(asmgen.isTargetCpu(CpuType.CPU65c02)) {
asmgen.out(" lda #$value | tsb $name") // set bit
} else {
asmgen.out(" lda $name | ora #$value | sta $name")
}
}
private fun inplacemodificationWordWithMemread(name: String, dt: DataType, operator: String, memread: PtMemoryByte) {
require(dt.isInteger)
when (operator) {
@@ -1998,7 +1982,7 @@ $shortcutLabel:""")
asmgen.out(" lda $msb | and #>$value | sta $msb")
}
value < 0x0100 -> {
immediateAndInplace(lsb, value)
asmgen.immediateAndInplace(lsb, value)
if(asmgen.isTargetCpu(CpuType.CPU65c02))
asmgen.out(" stz $msb")
else
@@ -2011,7 +1995,7 @@ $shortcutLabel:""")
when {
value == 0 -> {}
value and 255 == 0 -> asmgen.out(" lda $msb | ora #>$value | sta $msb")
value < 0x0100 -> immediateOrInplace(lsb, value)
value < 0x0100 -> asmgen.immediateOrInplace(lsb, value)
else -> asmgen.out(" lda $lsb | ora #<$value | sta $lsb | lda $msb | ora #>$value | sta $msb")
}
}

View File

@@ -1,6 +1,7 @@
; Manipulate the Commander X16's display color palette.
; Should you want to restore the full default palette, you can call cbm.CINT()
; The first 16 colors can be restored to their default with set_default16()
; NOTE: assume R0, R1 and R2 are clobbered when using routines in this library!
palette {
%option ignore_unused

View File

@@ -185,7 +185,8 @@ sizeof (name) ; sizeof (number)
memory (name, size, alignment)
Returns the address of the first location of a statically "reserved" block of memory of the given size in bytes,
with the given name. The block is uninitialized memory, it is *not* set to zero!
with the given name. The block is *uninitialized memory*; unlike other variables in Prog8 it is *not* set to zero at the start of the program!
(if that is required, you can do so yourself using ``memset``).
If you specify an alignment value >1, it means the block of memory will
be aligned to such a dividable address in memory, for instance an alignment of $100 means the
memory block is aligned on a page boundary, and $2 means word aligned (even addresses).
@@ -195,8 +196,7 @@ memory (name, size, alignment)
otherwise you'll get a compilation error.
This routine can be used to "reserve" parts of the memory where a normal byte array variable would
not suffice; for instance if you need more than 256 consecutive bytes.
The return value is just a simple uword address so it cannot be used as an array in your program.
You can only treat it as a pointer or use it in inline assembly.
The return value is an uword address, and you can use that like a pointer to the memory buffer.
call (address) -> uword
Calls a subroutine given by its memory address. You cannot pass arguments directly,

View File

@@ -1,15 +1,6 @@
TODO
====
Halloween is stuck on a black screen
Optimize 6502 bitwise operations on word values where only the msb or lsb is touched: (already done for simple augmented expressions!)
cx16.r0 = (cx16.r0 & $a000) | $0055
cx16.r0 = (cx16.r0 | $a000) ^ $0055
cx16.r0 = (cx16.r0 ^ $a000) & $0055
update zsmkit to newest version that includes the on_deck routines when stabilized
@@ -38,14 +29,6 @@ Future Things and Ideas
- make a form of "manual generics" possible like: varsub routine(T arg)->T where T is expanded to a specific type
(this is already done hardcoded for several of the builtin functions)
- [much work:] more support for (64tass) SEGMENTS ?
- (What, how, isn't current BSS support enough?)
- Add a mechanism to allocate variables into golden ram (or segments really) (see GoldenRam class)
- maybe treat block "golden" in a special way: can only contain vars, every var will be allocated in the Golden ram area?
- maybe or may not needed: the variables can NOT have initialization values, they will all be set to zero on startup (simple memset)
just initialize them yourself in start() if you need a non-zero value .
- OR.... do all this automatically if 'golden' is enabled as a compiler option? So compiler allocates in ZP first, then Golden Ram, then regular ram
- OR.... make all this more generic and use some %segment option to create real segments for 64tass?
- (need separate step in codegen and IR to write the "golden" variables)
- [problematic due to using 64tass:] better support for building library programs, where unused .proc are NOT deleted from the assembly.
Perhaps replace all uses of .proc/.pend/.endproc by .block/.bend will fix that with a compiler flag?
But all library code written in asm uses .proc already..... (textual search/replace when writing the actual asm?)

View File

@@ -4,66 +4,18 @@
main {
sub start() {
cx16.r0 = (cx16.r0 & $a000) | $0055
cx16.r0 = (cx16.r0 | $a000) ^ $0055
cx16.r0 = (cx16.r0 ^ $a000) & $0055
cx16.r0L = %11111110
while cx16.r0L &32 == 32 {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
}
txt.nl()
cx16.r0 = (cx16.r1 & $a000)
cx16.r0 = (cx16.r1 | $a000)
cx16.r0 = (cx16.r1 ^ $a000)
cx16.r0L = %11111110
while cx16.r0L &32 != 0 {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
}
txt.nl()
; this one must not be changed and stop after 3 iterations instead of 5!
cx16.r0L = %11111110
while cx16.r0L &40 == 40 {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
}
txt.nl()
txt.nl()
cx16.r0L = %11111110
do {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
} until cx16.r0L &32 != 32
txt.nl()
cx16.r0L = %11111110
do {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
} until cx16.r0L &32 == 0
txt.nl()
; this one must not be changed and stop after 3 iterations instead of 5!
cx16.r0L = %11111110
do {
cx16.r0L <<= 1
txt.print_ubbin(cx16.r0L, true)
txt.nl()
} until cx16.r0L &40 != 40
txt.nl()
; while cx16.r0L & cx16.r1L == 0 {
; cx16.r0L++
; }
;
; while cx16.r0L & cx16.r1L == cx16.r1L {
; cx16.r0L++
; }
; these are optimized already:
cx16.r0 = (cx16.r0 & $a000)
cx16.r0 = (cx16.r0 | $a000)
cx16.r0 = (cx16.r0 ^ $a000)
/*
sys.set_irqd()