From bda1c1c1ebc6dda3689026e747e0db84e524d40b Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Wed, 30 Sep 2020 19:57:16 +0200 Subject: [PATCH] reduce slow estack usage by splitting up simple binary expressions --- .../compiler/target/IMachineDefinition.kt | 1 + .../target/c64/C64MachineDefinition.kt | 2 ++ .../target/cx16/CX16MachineDefinition.kt | 4 +++ .../src/prog8/optimizer/StatementOptimizer.kt | 32 +++++++++++++++---- .../src/prog8/optimizer/UnusedCodeRemover.kt | 3 ++ examples/plasma.p8 | 11 +++++-- examples/test.p8 | 12 +++++++ 7 files changed, 56 insertions(+), 9 deletions(-) diff --git a/compiler/src/prog8/compiler/target/IMachineDefinition.kt b/compiler/src/prog8/compiler/target/IMachineDefinition.kt index 2aa93c582..fe86770cc 100644 --- a/compiler/src/prog8/compiler/target/IMachineDefinition.kt +++ b/compiler/src/prog8/compiler/target/IMachineDefinition.kt @@ -35,4 +35,5 @@ internal interface IMachineDefinition { fun getFloatRomConst(number: Double): String? fun importLibs(compilerOptions: CompilationOptions, importer: ModuleImporter, program: Program) fun launchEmulator(programName: String) + fun isRAMaddress(address: Int): Boolean } diff --git a/compiler/src/prog8/compiler/target/c64/C64MachineDefinition.kt b/compiler/src/prog8/compiler/target/c64/C64MachineDefinition.kt index 3180f8c4d..9bd841912 100644 --- a/compiler/src/prog8/compiler/target/c64/C64MachineDefinition.kt +++ b/compiler/src/prog8/compiler/target/c64/C64MachineDefinition.kt @@ -89,6 +89,8 @@ internal object C64MachineDefinition: IMachineDefinition { } } + override fun isRAMaddress(address: Int): Boolean = (address<0xa000) || (address in 0xc000..0xd000) + override fun initializeZeropage(compilerOptions: CompilationOptions) { zeropage = C64Zeropage(compilerOptions) } diff --git a/compiler/src/prog8/compiler/target/cx16/CX16MachineDefinition.kt b/compiler/src/prog8/compiler/target/cx16/CX16MachineDefinition.kt index 094c502fb..916f0faf2 100644 --- a/compiler/src/prog8/compiler/target/cx16/CX16MachineDefinition.kt +++ b/compiler/src/prog8/compiler/target/cx16/CX16MachineDefinition.kt @@ -52,6 +52,10 @@ internal object CX16MachineDefinition: IMachineDefinition { } } + override fun isRAMaddress(address: Int): Boolean { + return address < 0x9000 // TODO put correct Cx16 mem ranges here + } + override fun initializeZeropage(compilerOptions: CompilationOptions) { zeropage = CX16Zeropage(compilerOptions) } diff --git a/compiler/src/prog8/optimizer/StatementOptimizer.kt b/compiler/src/prog8/optimizer/StatementOptimizer.kt index 515f2c74a..088abbed2 100644 --- a/compiler/src/prog8/optimizer/StatementOptimizer.kt +++ b/compiler/src/prog8/optimizer/StatementOptimizer.kt @@ -449,12 +449,34 @@ internal class StatementOptimizer(private val program: Program, fun isSimpleTarget(target: AssignTarget): Boolean { return when { - target.identifier!=null -> true + target.identifier!=null -> { + val decl = target.identifier!!.targetVarDecl(program.namespace)!! + return if(decl.type!=VarDeclType.MEMORY) { + if(decl.value is NumericLiteralValue) { + CompilationTarget.instance.machine.isRAMaddress((decl.value as NumericLiteralValue).number.toInt()) + } else { + false + } + } else true + } target.memoryAddress!=null -> { - target.memoryAddress.addressExpression is NumericLiteralValue || target.memoryAddress.addressExpression is IdentifierReference + return when (target.memoryAddress.addressExpression) { + is NumericLiteralValue -> { + CompilationTarget.instance.machine.isRAMaddress((target.memoryAddress.addressExpression as NumericLiteralValue).number.toInt()) + } + is IdentifierReference -> { + val decl = (target.memoryAddress.addressExpression as IdentifierReference).targetVarDecl(program.namespace)!! + if(decl.value is NumericLiteralValue) { + CompilationTarget.instance.machine.isRAMaddress((decl.value as NumericLiteralValue).number.toInt()) + } else { + false + } + } + else -> false + } } target.arrayindexed!=null -> { - target.arrayindexed!!.arrayspec.index is NumericLiteralValue || target.arrayindexed!!.arrayspec.index is IdentifierReference + target.arrayindexed!!.arrayspec.index is NumericLiteralValue } else -> false } @@ -465,16 +487,14 @@ internal class StatementOptimizer(private val program: Program, // X = // or X = // split that into X = ; X = X - if(!assignment.isAugmentable && isSimpleTarget(assignment.target)) { + if(bexpr.operator !in comparisonOperators && !assignment.isAugmentable && isSimpleTarget(assignment.target)) { if (bexpr.right !is BinaryExpression) { - println("SPLIT RIGHT ${bexpr.left}\n ${bexpr.operator}\n ${bexpr.right}") val firstAssign = Assignment(assignment.target, bexpr.left, assignment.position) val augExpr = BinaryExpression(assignment.target.toExpression(), bexpr.operator, bexpr.right, bexpr.position) return listOf( IAstModification.InsertBefore(assignment, firstAssign, parent), IAstModification.ReplaceNode(assignment.value, augExpr, assignment)) } else if (bexpr.left !is BinaryExpression && bexpr.operator in associativeOperators) { - println("SPLIT LEFT ${bexpr.left}\n ${bexpr.operator}\n ${bexpr.right}") val firstAssign = Assignment(assignment.target, bexpr.right, assignment.position) val augExpr = BinaryExpression(assignment.target.toExpression(), bexpr.operator, bexpr.left, bexpr.position) return listOf( diff --git a/compiler/src/prog8/optimizer/UnusedCodeRemover.kt b/compiler/src/prog8/optimizer/UnusedCodeRemover.kt index e57a16e19..d7845138f 100644 --- a/compiler/src/prog8/optimizer/UnusedCodeRemover.kt +++ b/compiler/src/prog8/optimizer/UnusedCodeRemover.kt @@ -8,6 +8,9 @@ import prog8.ast.processing.AstWalker import prog8.ast.processing.IAstModification import prog8.ast.statements.* + +// TODO remove unneeded assignments such as: cc = 0 ; cc= xbuf ; ... the first can be removed (unless target is not RAM) + internal class UnusedCodeRemover(private val errors: ErrorReporter): AstWalker() { override fun before(program: Program, parent: Node): Iterable { diff --git a/examples/plasma.p8 b/examples/plasma.p8 index ec41ebd9e..adc55b327 100644 --- a/examples/plasma.p8 +++ b/examples/plasma.p8 @@ -74,13 +74,18 @@ main { } c2A += 2 c2B -= 3 + ubyte cc + for y in 24 downto 0 { for x in 39 downto 0 { - @(screen) = xbuf[x] + ybuf[y] + ; using a temp var here to enable expression optimization that can't be done on a 'problematic' ROM/RAM memory location + ubyte cc = xbuf[x] + ybuf[y] ; TODO should be split!! + @(screen) = cc +; this is the fastest way to do this inner part: ; %asm {{ -; ldy x +; ldy i ; lda xbuf,y -; ldy y +; ldy ii ; clc ; adc ybuf,y ; ldy #0 diff --git a/examples/test.p8 b/examples/test.p8 index 299144779..7e3bb0208 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -40,6 +40,18 @@ _saveX .byte 0 float ff1 = 1000 float ff2 = -1000 + + ubyte[10] xbuf + ubyte[10] ybuf + ubyte x + ubyte y + + ubyte cc = xbuf[x] + ybuf[y] ; TODO should be split!! + ubyte cc2 + cc2 = xbuf[x] + ybuf[y] ; will be split correctly? + + return + ff1 = 1+((-ff1) *3) floats.print_f(ff1) floats.print_f(1+((-1000) *3))