From aac5a4c27fc526ab826fb8cb48dd8ff992df4013 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Sun, 2 Jul 2023 03:57:42 +0200 Subject: [PATCH] optimize word repeat loop codegen --- .../src/prog8/codegen/cpu6502/AsmGen.kt | 77 ++++++++----------- compiler/test/vm/TestCompilerVirtual.kt | 69 ++++++++++++++++- examples/test.p8 | 41 ++++++++-- 3 files changed, 131 insertions(+), 56 deletions(-) diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt index 3a8a35349..4486b2ada 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmGen.kt @@ -641,65 +641,48 @@ class AsmGen6502Internal ( private fun repeatWordCount(count: Int, stmt: PtRepeatLoop) { require(count in 257..65535) { "invalid repeat count ${stmt.position}" } val repeatLabel = makeLabel("repeat") - if(isTargetCpu(CpuType.CPU65c02)) { - val counterVar = createRepeatCounterVar(DataType.UWORD, true, stmt) - out(""" - lda #<$count - ldy #>$count - sta $counterVar - sty $counterVar+1 + val counterVar = createRepeatCounterVar(DataType.UWORD, isTargetCpu(CpuType.CPU65c02), stmt) + // the iny + double dec is microoptimization of the 16 bit loop + out(""" + ldy #>$count + lda #<$count + beq + + iny ++ sta $counterVar + sty $counterVar+1 $repeatLabel""") - translate(stmt.statements) - out(""" - lda $counterVar - bne + - dec $counterVar+1 -+ dec $counterVar - lda $counterVar - ora $counterVar+1 - bne $repeatLabel""") - } else { - val counterVar = createRepeatCounterVar(DataType.UWORD, false, stmt) - out(""" - lda #<$count - ldy #>$count - sta $counterVar - sty $counterVar+1 -$repeatLabel""") - translate(stmt.statements) - out(""" - lda $counterVar - bne + - dec $counterVar+1 -+ dec $counterVar - lda $counterVar - ora $counterVar+1 - bne $repeatLabel""") - } + translate(stmt.statements) + out(""" + dec $counterVar + bne $repeatLabel + dec $counterVar+1 + bne $repeatLabel""") } private fun repeatWordCountInAY(endLabel: String, stmt: PtRepeatLoop) { // note: A/Y must have been loaded with the number of iterations! - // no need to explicitly test for 0 iterations as this is done in the countdown logic below + // the iny + double dec is microoptimization of the 16 bit loop val repeatLabel = makeLabel("repeat") val counterVar = createRepeatCounterVar(DataType.UWORD, false, stmt) out(""" - sta $counterVar - sty $counterVar+1 -$repeatLabel lda $counterVar - bne + - lda $counterVar+1 - beq $endLabel - lda $counterVar - bne + - dec $counterVar+1 -+ dec $counterVar -""") + cmp #0 + beq + + iny ++ sta $counterVar + sty $counterVar+1 + ora $counterVar+1 + beq $endLabel +$repeatLabel""") translate(stmt.statements) - jmp(repeatLabel) + out(""" + dec $counterVar + bne $repeatLabel + dec $counterVar+1 + bne $repeatLabel""") out(endLabel) } + private fun repeatByteCount(count: Int, stmt: PtRepeatLoop) { require(count in 2..256) { "invalid repeat count ${stmt.position}" } val repeatLabel = makeLabel("repeat") diff --git a/compiler/test/vm/TestCompilerVirtual.kt b/compiler/test/vm/TestCompilerVirtual.kt index cd2d01d70..7822674bd 100644 --- a/compiler/test/vm/TestCompilerVirtual.kt +++ b/compiler/test/vm/TestCompilerVirtual.kt @@ -364,7 +364,7 @@ main { compileText(VMTarget(), false, text, writeAssembly = true) shouldNotBe null } - test("repeat counts") { + test("repeat counts (const)") { val src=""" main { sub start() { @@ -387,14 +387,77 @@ main { repeat 1025 { cx16.r0++ } + repeat 65534 { + cx16.r0++ + } + repeat 65535 { + cx16.r0++ + } + repeat 0 { + cx16.r0++ + } } }""" val result = compileText(VMTarget(), false, src, writeAssembly = true)!! val start = result.codegenAst!!.entrypoint()!! - start.children.size shouldBe 8 + start.children.size shouldBe 11 val virtfile = result.compilationOptions.outputDir.resolve(result.compilerAst.name + ".p8ir") VmRunner().runAndTestProgram(virtfile.readText()) { vm -> - vm.memory.getUW(vm.cx16virtualregsBaseAddress) shouldBe 3840u + vm.memory.getUW(vm.cx16virtualregsBaseAddress) shouldBe 3837u } } + + test("repeat counts (variable)") { + val src=""" +main { + sub start() { + uword count + cx16.r0 = 0 + count=255 + repeat count { + cx16.r0++ + } + count=256 + repeat count { + cx16.r0++ + } + count=257 + repeat count { + cx16.r0++ + } + count=1023 + repeat count { + cx16.r0++ + } + count=1024 + repeat count { + cx16.r0++ + } + count=1025 + repeat count { + cx16.r0++ + } + count=65534 + repeat count { + cx16.r0++ + } + count=65535 + repeat count { + cx16.r0++ + } + count=0 + repeat count { + cx16.r0++ + } + } +}""" + val result = compileText(VMTarget(), false, src, writeAssembly = true)!! + val start = result.codegenAst!!.entrypoint()!! + start.children.size shouldBe 22 + val virtfile = result.compilationOptions.outputDir.resolve(result.compilerAst.name + ".p8ir") + VmRunner().runAndTestProgram(virtfile.readText()) { vm -> + vm.memory.getUW(vm.cx16virtualregsBaseAddress) shouldBe 3837u + } + } + }) \ No newline at end of file diff --git a/examples/test.p8 b/examples/test.p8 index c7332d7e4..7f806aa71 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -3,8 +3,9 @@ main { sub start() { + uword count = 255 cx16.r0 = 0 - repeat 255 { + repeat count { cx16.r0++ } txt.print_uw(255) @@ -12,40 +13,68 @@ main { txt.print_uw(cx16.r0) txt.nl() - repeat 256 { + count=256 + repeat count { cx16.r0++ } txt.print_uw(255+256) txt.spc() txt.print_uw(cx16.r0) txt.nl() - repeat 257 { + count = 257 + repeat count { cx16.r0++ } txt.print_uw(255+256+257) txt.spc() txt.print_uw(cx16.r0) txt.nl() - repeat 1023 { + count=1023 + repeat count { cx16.r0++ } txt.print_uw(255+256+257+1023) txt.spc() txt.print_uw(cx16.r0) txt.nl() - repeat 1024 { + count=1024 + repeat count { cx16.r0++ } txt.print_uw(255+256+257+1023+1024) txt.spc() txt.print_uw(cx16.r0) txt.nl() - repeat 1025 { + count = 1025 + repeat count { cx16.r0++ } txt.print_uw(255+256+257+1023+1024+1025) txt.spc() txt.print_uw(cx16.r0) txt.nl() + count = 65534 + repeat count { + cx16.r0++ + } + txt.print_uw(3838) + txt.spc() + txt.print_uw(cx16.r0) + txt.nl() + count = 65535 + repeat count { + cx16.r0++ + } + count=0 + repeat count { + cx16.r0++ + } + repeat 0 { + cx16.r0++ + } + txt.print_uw(3837) + txt.spc() + txt.print_uw(cx16.r0) + txt.nl() } }