don't optimize seemingly redundant assembly instructions away that manipulate IO memory space

This commit is contained in:
Irmen de Jong 2021-11-21 03:24:03 +01:00
parent b292124f3c
commit 3c39baf1d6
4 changed files with 113 additions and 66 deletions

View File

@ -87,7 +87,7 @@ class AsmGen(private val program: Program,
assemblyLines.addAll(outputFile.readLines()) assemblyLines.addAll(outputFile.readLines())
var optimizationsDone = 1 var optimizationsDone = 1
while (optimizationsDone > 0) { while (optimizationsDone > 0) {
optimizationsDone = optimizeAssembly(assemblyLines) optimizationsDone = optimizeAssembly(assemblyLines, options.compTarget.machine)
} }
outputFile.printWriter().use { outputFile.printWriter().use {
for (line in assemblyLines) { it.println(line) } for (line in assemblyLines) { it.println(line) }

View File

@ -1,10 +1,12 @@
package prog8.compiler.target.cpu6502.codegen package prog8.compiler.target.cpu6502.codegen
import prog8.compilerinterface.IMachineDefinition
// note: see https://wiki.nesdev.org/w/index.php/6502_assembly_optimisations // note: see https://wiki.nesdev.org/w/index.php/6502_assembly_optimisations
fun optimizeAssembly(lines: MutableList<String>): Int { fun optimizeAssembly(lines: MutableList<String>, machine: IMachineDefinition): Int {
var numberOfOptimizations = 0 var numberOfOptimizations = 0
@ -31,7 +33,7 @@ fun optimizeAssembly(lines: MutableList<String>): Int {
numberOfOptimizations++ numberOfOptimizations++
} }
mods = optimizeStoreLoadSame(linesByFour) mods = optimizeStoreLoadSame(linesByFour, machine)
if(mods.isNotEmpty()) { if(mods.isNotEmpty()) {
apply(mods, lines) apply(mods, lines)
linesByFour = getLinesBy(lines, 4) linesByFour = getLinesBy(lines, 4)
@ -46,7 +48,7 @@ fun optimizeAssembly(lines: MutableList<String>): Int {
} }
var linesByFourteen = getLinesBy(lines, 14) var linesByFourteen = getLinesBy(lines, 14)
mods = optimizeSameAssignments(linesByFourteen) mods = optimizeSameAssignments(linesByFourteen, machine)
if(mods.isNotEmpty()) { if(mods.isNotEmpty()) {
apply(mods, lines) apply(mods, lines)
linesByFourteen = getLinesBy(lines, 14) linesByFourteen = getLinesBy(lines, 14)
@ -111,9 +113,9 @@ private fun optimizeUselessStackByteWrites(linesByFour: List<List<IndexedValue<S
return mods return mods
} }
private fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<String>>>): List<Modification> { private fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<String>>>, machine: IMachineDefinition): List<Modification> {
// Optimize sequential assignments of the isSameAs value to various targets (bytes, words, floats) // Optimize sequential assignments of the same value to various targets (bytes, words, floats)
// the float one is the one that requires 2*7=14 lines of code to check... // the float one is the one that requires 2*7=14 lines of code to check...
// The better place to do this is in the Compiler instead and never create these types of assembly, but hey // The better place to do this is in the Compiler instead and never create these types of assembly, but hey
@ -135,9 +137,13 @@ private fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<Stri
val thirdvalue = fifth.substring(4) val thirdvalue = fifth.substring(4)
val fourthvalue = sixth.substring(4) val fourthvalue = sixth.substring(4)
if(firstvalue==thirdvalue && secondvalue==fourthvalue) { if(firstvalue==thirdvalue && secondvalue==fourthvalue) {
// lda/ldy sta/sty twice the isSameAs word --> remove second lda/ldy pair (fifth and sixth lines) // lda/ldy sta/sty twice the same word --> remove second lda/ldy pair (fifth and sixth lines)
mods.add(Modification(lines[4].index, true, null)) val address1 = getAddressArg(first)
mods.add(Modification(lines[5].index, true, null)) val address2 = getAddressArg(second)
if(address1==null || address2==null || (!machine.isIOAddress(address1) && !machine.isIOAddress(address2))) {
mods.add(Modification(lines[4].index, true, null))
mods.add(Modification(lines[5].index, true, null))
}
} }
} }
@ -145,8 +151,10 @@ private fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<Stri
val firstvalue = first.substring(4) val firstvalue = first.substring(4)
val secondvalue = third.substring(4) val secondvalue = third.substring(4)
if(firstvalue==secondvalue) { if(firstvalue==secondvalue) {
// lda value / sta ? / lda isSameAs-value / sta ? -> remove second lda (third line) // lda value / sta ? / lda same-value / sta ? -> remove second lda (third line)
mods.add(Modification(lines[2].index, true, null)) val address = getAddressArg(first)
if(address==null || !machine.isIOAddress(address))
mods.add(Modification(lines[2].index, true, null))
} }
} }
@ -227,10 +235,13 @@ private fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<Stri
val thirdvalue = third.substring(4) val thirdvalue = third.substring(4)
val fourthvalue = fourth.substring(4) val fourthvalue = fourth.substring(4)
if(firstvalue==thirdvalue && secondvalue == fourthvalue) { if(firstvalue==thirdvalue && secondvalue == fourthvalue) {
overlappingMods = true val address = getAddressArg(first)
mods.add(Modification(lines[2].index, true, null)) if(address==null || !machine.isIOAddress(address)) {
if(!fifth.startsWith('b')) overlappingMods = true
mods.add(Modification(lines[3].index, true, null)) mods.add(Modification(lines[2].index, true, null))
if (!fifth.startsWith('b'))
mods.add(Modification(lines[3].index, true, null))
}
} }
} }
} }
@ -247,8 +258,11 @@ private fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<Stri
val firstvalue = first.substring(4) val firstvalue = first.substring(4)
val thirdvalue = third.substring(4) val thirdvalue = third.substring(4)
if(firstvalue==thirdvalue) { if(firstvalue==thirdvalue) {
overlappingMods = true val address = getAddressArg(first)
mods.add(Modification(lines[2].index, true, null)) if(address==null || !machine.isIOAddress(address)) {
overlappingMods = true
mods.add(Modification(lines[2].index, true, null))
}
} }
} }
} }
@ -264,10 +278,31 @@ private fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<Stri
val secondvalue = second.substring(4) val secondvalue = second.substring(4)
val thirdvalue = third.substring(4) val thirdvalue = third.substring(4)
if(firstvalue==secondvalue && firstvalue==thirdvalue) { if(firstvalue==secondvalue && firstvalue==thirdvalue) {
overlappingMods = true val address = getAddressArg(first)
val reg2 = second[2] if(address==null || !machine.isIOAddress(address)) {
mods.add(Modification(lines[1].index, false, " ta$reg2")) overlappingMods = true
mods.add(Modification(lines[2].index, true, null)) val reg2 = second[2]
mods.add(Modification(lines[1].index, false, " ta$reg2"))
mods.add(Modification(lines[2].index, true, null))
}
}
}
/*
sta A
sta A
*/
if(!overlappingMods && first.startsWith("st") && second.startsWith("st")) {
if(first[2]==second[2]) {
val firstvalue = first.substring(4)
val secondvalue = second.substring(4)
if(firstvalue==secondvalue) {
val address = getAddressArg(first)
if(address==null || !machine.isIOAddress(address)) {
overlappingMods = true
mods.add(Modification(lines[1].index, true, null))
}
}
} }
} }
} }
@ -275,10 +310,8 @@ private fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<Stri
return mods return mods
} }
private fun optimizeStoreLoadSame(linesByFour: List<List<IndexedValue<String>>>): List<Modification> { private fun optimizeStoreLoadSame(linesByFour: List<List<IndexedValue<String>>>, machine: IMachineDefinition): List<Modification> {
// sta X + lda X, sty X + ldy X, stx X + ldx X -> the second instruction can OFTEN be eliminated // sta X + lda X, sty X + ldy X, stx X + ldx X -> the second instruction can OFTEN be eliminated
// TODO this is not true if X is not a regular RAM memory address (but instead mapped I/O or ROM) but how does this code know?
// should this optimization be removed???? or teach it about the InRegularRAM ?
val mods = mutableListOf<Modification>() val mods = mutableListOf<Modification>()
for (lines in linesByFour) { for (lines in linesByFour) {
val first = lines[1].value.trimStart() val first = lines[1].value.trimStart()
@ -305,7 +338,8 @@ private fun optimizeStoreLoadSame(linesByFour: List<List<IndexedValue<String>>>)
} }
else { else {
// no branch instruction follows, we can remove the load instruction // no branch instruction follows, we can remove the load instruction
true val address = getAddressArg(lines[2].value)
address==null || !machine.isIOAddress(address)
} }
if(attemptRemove) { if(attemptRemove) {
@ -319,6 +353,17 @@ private fun optimizeStoreLoadSame(linesByFour: List<List<IndexedValue<String>>>)
return mods return mods
} }
private fun getAddressArg(line: String): UInt? {
val loadArg = line.trimStart().substring(3).trim()
return when {
loadArg.startsWith('$') -> loadArg.substring(1).toUIntOrNull(16)
loadArg.startsWith('%') -> loadArg.substring(1).toUIntOrNull(2)
loadArg.startsWith('#') -> null
loadArg.startsWith('(') -> null
else -> loadArg.substring(1).toUIntOrNull()
}
}
private fun optimizeIncDec(linesByFour: List<List<IndexedValue<String>>>): List<Modification> { private fun optimizeIncDec(linesByFour: List<List<IndexedValue<String>>>): List<Modification> {
// sometimes, iny+dey / inx+dex / dey+iny / dex+inx sequences are generated, these can be eliminated. // sometimes, iny+dey / inx+dex / dey+iny / dex+inx sequences are generated, these can be eliminated.
val mods = mutableListOf<Modification>() val mods = mutableListOf<Modification>()
@ -327,8 +372,12 @@ private fun optimizeIncDec(linesByFour: List<List<IndexedValue<String>>>): List<
val second = lines[1].value val second = lines[1].value
if ((" iny" in first || "\tiny" in first) && (" dey" in second || "\tdey" in second) if ((" iny" in first || "\tiny" in first) && (" dey" in second || "\tdey" in second)
|| (" inx" in first || "\tinx" in first) && (" dex" in second || "\tdex" in second) || (" inx" in first || "\tinx" in first) && (" dex" in second || "\tdex" in second)
|| (" ina" in first || "\tina" in first) && (" dea" in second || "\tdea" in second)
|| (" inc a" in first || "\tinc a" in first) && (" dec a" in second || "\tdec a" in second)
|| (" dey" in first || "\tdey" in first) && (" iny" in second || "\tiny" in second) || (" dey" in first || "\tdey" in first) && (" iny" in second || "\tiny" in second)
|| (" dex" in first || "\tdex" in first) && (" inx" in second || "\tinx" in second)) { || (" dex" in first || "\tdex" in first) && (" inx" in second || "\tinx" in second)
|| (" dea" in first || "\tdea" in first) && (" ina" in second || "\tina" in second)
|| (" dec a" in first || "\tdec a" in first) && (" inc a" in second || "\tinc a" in second)) {
mods.add(Modification(lines[0].index, true, null)) mods.add(Modification(lines[0].index, true, null))
mods.add(Modification(lines[1].index, true, null)) mods.add(Modification(lines[1].index, true, null))
} }

View File

@ -3,9 +3,7 @@ TODO
For next compiler release (7.4) For next compiler release (7.4)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
optimize: there is an optimization in AsmOptimizer that can only be done correctly ...
if it knows about regular ram vs io space ram distinction.
Blocked by an official Commander-x16 v39 release Blocked by an official Commander-x16 v39 release

View File

@ -4,48 +4,48 @@ main {
sub start() { sub start() {
ubyte xx = 1 %asm {{
ubyte yy = 2
byte b1
byte b2=10
; result should be: 29 42 40 87 75 35 lda $d020
ldy $d021
sta $d020
sty $d021
lda $d020
ldy $d021
sta $d020
sty $d021
xx=6
yy=8
yy = (xx+5)+(yy+10)
txt.print_ub(yy) ; 29
txt.nl()
xx=6 lda $d020
yy=8 sta $d020
yy = (xx*3)+(yy*3) lda $d020
txt.print_ub(yy) ; 42 sta $d020
txt.nl() lda $d020
sta $d020
lda $d020
sta $d020
sta $d020
sta $d020
sta $d020
sta $d020
sta $d020
sta $d020
b1=13 lda $c020
b2=5 sta $c020
b2 = (b1*5)-(b2*5) lda $c020
txt.print_b(b2) ; 40 sta $c020
txt.nl() lda $c020
sta $c020
lda $c020
sta $c020
sta $c020
sta $c020
sta $c020
sta $c020
sta $c020
b1=100 }}
b2=8
b2 = (b1+5)-(b2+10)
txt.print_b(b2) ; 87
txt.nl()
b1=50
b2=40
b2 = (b1-5)+(b2-10)
txt.print_b(b2) ; 75
txt.nl()
b1=50
b2=20
b2 = (b1-5)-(b2-10)
txt.print_b(b2) ; 35
txt.nl()
repeat { repeat {
} }