From 9feda54d92ad63f4510bed0b625a111e377000b8 Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Wed, 13 Jan 2021 14:31:20 +0100 Subject: [PATCH] =?UTF-8?q?Optimize=20word=20shifts=20by=207=E2=80=9312=20?= =?UTF-8?q?bits?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../compiler/m6809/M6809Buitins.scala | 22 +++- .../compiler/mos/PseudoregisterBuiltIns.scala | 101 ++++++++++++++++-- .../millfork/compiler/z80/Z80Shifting.scala | 28 +++++ src/main/scala/millfork/env/Constant.scala | 1 + src/test/scala/millfork/test/ShiftSuite.scala | 39 ++++++- 5 files changed, 177 insertions(+), 14 deletions(-) diff --git a/src/main/scala/millfork/compiler/m6809/M6809Buitins.scala b/src/main/scala/millfork/compiler/m6809/M6809Buitins.scala index bd319795..4adb7ef1 100644 --- a/src/main/scala/millfork/compiler/m6809/M6809Buitins.scala +++ b/src/main/scala/millfork/compiler/m6809/M6809Buitins.scala @@ -392,9 +392,25 @@ object M6809Buitins { val op = if (left) List(MLine.inherentB(ASL), MLine.inherentA(ROL)) else List(MLine.inherentA(LSR), MLine.inherentB(ROR)) ctx.env.eval(rhs) match { case Some(NumericConstant(0, _)) => Nil - case Some(NumericConstant(8, _)) => - if (left) List(MLine.tfr(M6809Register.B, M6809Register.A), MLine.immediate(LDB, 0)) - else List(MLine.tfr(M6809Register.A, M6809Register.B), MLine.immediate(LDA, 0)) + case Some(NumericConstant(n, _)) if n >= 8 && n <= 12=> + if (left) List(MLine.tfr(M6809Register.B, M6809Register.A), MLine.immediate(LDB, 0)) ++ List.fill(n.toInt - 8)(MLine.inherentA(ASL)) + else List(MLine.tfr(M6809Register.A, M6809Register.B), MLine.immediate(LDA, 0)) ++ List.fill(n.toInt - 8)(MLine.inherentB(LSR)) + case Some(NumericConstant(7, _)) => + if (left) { + List( + MLine.inherentA(LSR), + MLine.inherentB(ROR), + MLine.tfr(M6809Register.B, M6809Register.A), + MLine.immediate(LDB, 0), + MLine.inherentB(ROR)) + } else { + List( + MLine.inherentB(ASL), + MLine.inherentA(ROL), + MLine.tfr(M6809Register.A, M6809Register.B), + MLine.immediate(LDA, 0), + MLine.inherentA(ROL)) + } case Some(NumericConstant(n, _)) => List.fill(n.toInt)(op).flatten case _ => val loop = ctx.nextLabel("sr") diff --git a/src/main/scala/millfork/compiler/mos/PseudoregisterBuiltIns.scala b/src/main/scala/millfork/compiler/mos/PseudoregisterBuiltIns.scala index df637af2..77c04ff8 100644 --- a/src/main/scala/millfork/compiler/mos/PseudoregisterBuiltIns.scala +++ b/src/main/scala/millfork/compiler/mos/PseudoregisterBuiltIns.scala @@ -1,6 +1,7 @@ package millfork.compiler.mos import millfork.CompilationFlag +import millfork.assembly.Elidability import millfork.assembly.mos.AddrMode._ import millfork.assembly.mos.Opcode._ import millfork.assembly.mos._ @@ -440,15 +441,12 @@ object PseudoregisterBuiltIns { val firstParamCompiled = MosExpressionCompiler.compile(ctx, l, Some(MosExpressionCompiler.getExpressionType(ctx, l) -> reg), NoBranching) ctx.env.eval(r) match { case Some(NumericConstant(0, _)) => - List(AssemblyLine.zeropage(LDA, reg), AssemblyLine.zeropage(LDX, reg, 1)) - case Some(NumericConstant(1, _)) if (firstParamCompiled match { - case List( - AssemblyLine0(LDA, ZeroPage | Absolute | Immediate, _), - AssemblyLine0(STA, ZeroPage, _), - AssemblyLine0(LDA, ZeroPage | Absolute | Immediate, _), - AssemblyLine0(STA, ZeroPage, _)) => true - case _ => false - }) => + if (isTrivialZpRegAssignment(firstParamCompiled)) { + List(firstParamCompiled(0), firstParamCompiled(2).copy(opcode = LDX)) + } else { + firstParamCompiled ++ List(AssemblyLine.zeropage(LDA, reg), AssemblyLine.zeropage(LDX, reg, 1)) + } + case Some(NumericConstant(1, _)) if (isTrivialZpRegAssignment(firstParamCompiled)) => if (left) { List( firstParamCompiled(0), @@ -466,6 +464,79 @@ object PseudoregisterBuiltIns { firstParamCompiled(0), AssemblyLine.implied(ROR)) } + case Some(NumericConstant(n, _)) if n >= 8 && n <= 12 && !ctx.options.flag(CompilationFlag.EmitNative65816Opcodes) => + val shifts = List.fill(n.toInt - 8)(if (left)AssemblyLine.implied(ASL) else AssemblyLine.implied(LSR)) + if (left) { + if (n != 8) { + if (isTrivialZpRegAssignment(firstParamCompiled) && firstParamCompiled(2).elidability == Elidability.Elidable) { + List(firstParamCompiled(0)) ++ shifts ++ List(AssemblyLine.implied(TAX), AssemblyLine.immediate(LDA, 0)) + } else { + firstParamCompiled ++ List(AssemblyLine.zeropage(LDA, reg)) ++ shifts ++ List(AssemblyLine.implied(TAX), AssemblyLine.immediate(LDA, 0)) + } + } else { + if (isTrivialZpRegAssignment(firstParamCompiled) && firstParamCompiled(2).elidability == Elidability.Elidable) { + List(AssemblyLine.immediate(LDA, 0), firstParamCompiled(0).copy(opcode = LDX)) + } else { + firstParamCompiled ++ List(AssemblyLine.immediate(LDA, 0), AssemblyLine.zeropage(LDX, reg)) + } + } + } else { + if (isTrivialZpRegAssignment(firstParamCompiled) && firstParamCompiled(0).elidability == Elidability.Elidable) { + List(firstParamCompiled(2))++shifts++List(AssemblyLine.immediate(LDX, 0)) + } else { + firstParamCompiled ++ List(AssemblyLine.zeropage(LDA, reg, 1))++shifts++List(AssemblyLine.immediate(LDX, 0)) + } + } + case Some(NumericConstant(7, _)) if !ctx.options.flag(CompilationFlag.EmitNative65816Opcodes) => + if (left) { + if (isTrivialZpRegAssignment(firstParamCompiled)) { + List( + firstParamCompiled(2), + AssemblyLine.implied(LSR), + AssemblyLine.zeropage(LDA, reg), + AssemblyLine.implied(ROR), + AssemblyLine.implied(TAX), + firstParamCompiled(0), + AssemblyLine.implied(ROR) + ) + } else { + firstParamCompiled ++ List( + AssemblyLine.zeropage(LDA, reg, 1), + AssemblyLine.implied(LSR), + AssemblyLine.zeropage(LDA, reg), + AssemblyLine.implied(ROR), + AssemblyLine.implied(TAX), + AssemblyLine.immediate(LDA, 0), + AssemblyLine.implied(ROR) + ) + } + } else { + if (isTrivialZpRegAssignment(firstParamCompiled)) { + List( + firstParamCompiled(0), + AssemblyLine.implied(ASL), + firstParamCompiled(2), + AssemblyLine.implied(ROL), + AssemblyLine.implied(PHA), + AssemblyLine.immediate(LDA, 0), + AssemblyLine.implied(ROL), + AssemblyLine.implied(TAX), + AssemblyLine.implied(PLA) + ) + } else { + firstParamCompiled ++ List( + AssemblyLine.zeropage(LDA, reg), + AssemblyLine.implied(ASL), + AssemblyLine.zeropage(LDA, reg, 1), + AssemblyLine.implied(ROL), + AssemblyLine.implied(PHA), + AssemblyLine.immediate(LDA, 0), + AssemblyLine.implied(ROL), + AssemblyLine.implied(TAX), + AssemblyLine.implied(PLA) + ) + } + } case Some(NumericConstant(v, _)) if v > 0 && unrollShift(ctx, v, 2, 4) => if (ctx.options.flag(CompilationFlag.EmitNative65816Opcodes)) { firstParamCompiled ++ @@ -520,6 +591,18 @@ object PseudoregisterBuiltIns { } } + private def isTrivialZpRegAssignment(firstParamCompiled: List[AssemblyLine]) : Boolean = { + firstParamCompiled match { + case List( + l0@AssemblyLine0(LDA, ZeroPage | Absolute | Immediate, _), + AssemblyLine0(STA, ZeroPage, zp0), + l1@AssemblyLine0(LDA, ZeroPage | Absolute | Immediate, _), + AssemblyLine0(STA, ZeroPage, zp1)) => + firstParamCompiled.forall(l => l.elidability != Elidability.Fixed) && (zp1 - zp0).quickSimplify.isLowestByteAlwaysEqual(1) + case _ => false + } + } + def compileByteMultiplication(ctx: CompilationContext, param1OrRegister: Option[Expression], param2: Expression, storeInRegLo: Boolean): List[AssemblyLine] = compileByteMultiplicationOrDivision(ctx, param1OrRegister, param2, storeInRegLo, "__mul_u8u8u8", commutative = true) def compileUnsignedByteDivision(ctx: CompilationContext, param1OrRegister: Option[Expression], param2: Expression, storeInRegLo: Boolean): List[AssemblyLine] = diff --git a/src/main/scala/millfork/compiler/z80/Z80Shifting.scala b/src/main/scala/millfork/compiler/z80/Z80Shifting.scala index dbb29478..a75b8b99 100644 --- a/src/main/scala/millfork/compiler/z80/Z80Shifting.scala +++ b/src/main/scala/millfork/compiler/z80/Z80Shifting.scala @@ -140,6 +140,34 @@ object Z80Shifting { case Some(NumericConstant(i, _)) => if (i <= 0) { l + } else if (i == 8) { + if (left) { + l ++ List(ZLine.ld8(ZRegister.H, ZRegister.L), ZLine.ldImm8(ZRegister.L, 0)) + } else { + l ++ List(ZLine.ld8(ZRegister.L, ZRegister.H), ZLine.ldImm8(ZRegister.H, 0)) + } + } else if (i >= 9 && i <= 12 && extendedOps) { + if (left) { + l ++ List(ZLine.ld8(ZRegister.H, ZRegister.L), ZLine.ldImm8(ZRegister.L, 0)) ++ List.fill(i.toInt - 8)(ZLine.register(ZOpcode.SLA, ZRegister.H)) + } else { + l ++ List(ZLine.ld8(ZRegister.L, ZRegister.H), ZLine.ldImm8(ZRegister.H, 0)) ++ List.fill(i.toInt - 8)(ZLine.register(ZOpcode.SRL, ZRegister.L)) + } + } else if (i == 7 && extendedOps) { + if (left) { + l ++ List( + ZLine.register(ZOpcode.SRL, ZRegister.H), + ZLine.register(ZOpcode.RR, ZRegister.L), + ZLine.ld8(ZRegister.H, ZRegister.L), + ZLine.ldImm8(ZRegister.L, 0), + ZLine.register(ZOpcode.RR, ZRegister.L)) + } else { + l ++ List( + ZLine.register(ZOpcode.SLA, ZRegister.L), + ZLine.register(ZOpcode.RL, ZRegister.H), + ZLine.ld8(ZRegister.L, ZRegister.H), + ZLine.ldImm8(ZRegister.H, 0), + ZLine.register(ZOpcode.RL, ZRegister.H)) + } } else if (i >= 16) { l :+ ZLine.ldImm16(ZRegister.HL, 0) // } else if (i > 8) { // TODO: optimize shifts larger than 8 diff --git a/src/main/scala/millfork/env/Constant.scala b/src/main/scala/millfork/env/Constant.scala index d90086f3..cec4aa53 100644 --- a/src/main/scala/millfork/env/Constant.scala +++ b/src/main/scala/millfork/env/Constant.scala @@ -464,6 +464,7 @@ case class CompoundConstant(operator: MathOperator.Value, lhs: Constant, rhs: Co val r = rhs.quickSimplify (l, r) match { case (MemoryAddressConstant(lt), MemoryAddressConstant(rt)) if operator == MathOperator.Minus && lt == rt => Constant.Zero + case (CompoundConstant(MathOperator.Plus, MemoryAddressConstant(lt), c), MemoryAddressConstant(rt)) if operator == MathOperator.Minus && lt == rt => c case (CompoundConstant(MathOperator.Plus, a, ll@NumericConstant(lv, _)), rr@NumericConstant(rv, _)) if operator == MathOperator.Plus => CompoundConstant(MathOperator.Plus, a, ll + rr).quickSimplify case (CompoundConstant(MathOperator.Minus, a, ll@NumericConstant(lv, _)), rr@NumericConstant(rv, _)) if operator == MathOperator.Minus => diff --git a/src/test/scala/millfork/test/ShiftSuite.scala b/src/test/scala/millfork/test/ShiftSuite.scala index 1ef7531b..ba0fa148 100644 --- a/src/test/scala/millfork/test/ShiftSuite.scala +++ b/src/test/scala/millfork/test/ShiftSuite.scala @@ -1,12 +1,12 @@ package millfork.test import millfork.Cpu import millfork.test.emu._ -import org.scalatest.{FunSuite, Matchers} +import org.scalatest.{AppendedClues, FunSuite, Matchers} /** * @author Karol Stasiak */ -class ShiftSuite extends FunSuite with Matchers { +class ShiftSuite extends FunSuite with Matchers with AppendedClues { test("In-place shifting") { EmuUnoptimizedCrossPlatformRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086, Cpu.Motorola6809)(""" @@ -42,6 +42,41 @@ class ShiftSuite extends FunSuite with Matchers { """.stripMargin)(_.readWord(0xc000) should equal(128 * 3)) } + test("Word shifting by certain interesting constants") { + for (w <- Seq(0, 1, 128, 6253, 65222, 0xffff)) { + EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Motorola6809)(s""" + | word output7l @$$c000 + | word output7r @$$c002 + | word output8l @$$c004 + | word output8r @$$c006 + | word output9l @$$c008 + | word output9r @$$c00a + | word outputal @$$c00c + | word outputar @$$c00e + | void main () { + | output7l = id($w) << 7 + | output7r = id($w) >> 7 + | output8l = id($w) << 8 + | output8r = id($w) >> 8 + | output9l = id($w) << 9 + | output9r = id($w) >> 9 + | outputal = id($w) << 10 + | outputar = id($w) >> 10 + | } + | noinline word id(word w) = w + """.stripMargin) { m => + m.readWord(0xc000) should equal((w << 7) & 0xffff) withClue s" = $w << 7" + m.readWord(0xc002) should equal((w >> 7) & 0xffff) withClue s" = $w >> 7" + m.readWord(0xc004) should equal((w << 8) & 0xffff) withClue s" = $w << 8" + m.readWord(0xc006) should equal((w >> 8) & 0xffff) withClue s" = $w >> 8" + m.readWord(0xc008) should equal((w << 9) & 0xffff) withClue s" = $w << 9" + m.readWord(0xc00a) should equal((w >> 9) & 0xffff) withClue s" = $w >> 9" + m.readWord(0xc00c) should equal((w << 10) & 0xffff) withClue s" = $w << 10" + m.readWord(0xc00e) should equal((w >> 10) & 0xffff) withClue s" = $w >> 10" + } + } + } + test("Long shifting left") { EmuUltraBenchmarkRun(""" | long output @$c000