1
0
mirror of https://github.com/KarolS/millfork.git synced 2024-09-30 00:56:56 +00:00

6502: Fix some cases of word additions; optimize bitmasking

This commit is contained in:
Karol Stasiak 2019-07-30 14:47:52 +02:00
parent 40b2e393da
commit 943a56a30f
6 changed files with 227 additions and 8 deletions

View File

@ -32,6 +32,7 @@ object OptimizationPresets {
AlwaysGoodOptimizations.IdempotentDuplicateRemoval,
AlwaysGoodOptimizations.BranchInPlaceRemoval,
UnusedLabelRemoval,
AlwaysGoodOptimizations.OptimizableMasking,
AlwaysGoodOptimizations.UnconditionalJumpRemoval,
UnusedLabelRemoval,
AlwaysGoodOptimizations.RearrangeMath,
@ -212,6 +213,7 @@ object OptimizationPresets {
AlwaysGoodOptimizations.NonetAddition,
AlwaysGoodOptimizations.NonetBitOp,
AlwaysGoodOptimizations.OperationsAroundShifting,
AlwaysGoodOptimizations.OptimizableMasking,
AlwaysGoodOptimizations.OptimizeZeroComparisons,
AlwaysGoodOptimizations.PoinlessFlagChange,
AlwaysGoodOptimizations.PointlessAccumulatorShifting,

View File

@ -2849,7 +2849,7 @@ object AlwaysGoodOptimizations {
},
)
val ConstantInlinedShifting = new RuleBasedAssemblyOptimization("Constant inlined shifting",
lazy val ConstantInlinedShifting = new RuleBasedAssemblyOptimization("Constant inlined shifting",
needsFlowInfo = FlowInfoRequirement.BothFlows,
// TODO: set limits on the loop iteration to avoid huge unrolled code
@ -2874,7 +2874,7 @@ object AlwaysGoodOptimizations {
)
val SimplifiableComparison = new RuleBasedAssemblyOptimization("Simplifiable comparison",
lazy val SimplifiableComparison = new RuleBasedAssemblyOptimization("Simplifiable comparison",
needsFlowInfo = FlowInfoRequirement.BackwardFlow,
(Elidable & HasOpcode(LDA)) ~
@ -2909,4 +2909,147 @@ object AlwaysGoodOptimizations {
},
)
private val powersOf2: List[(Int, Int)] = List(
1 -> 0,
2 -> 1,
4 -> 2,
8 -> 3,
16 -> 4,
32 -> 5,
64 -> 6
)
lazy val OptimizableMasking = new RuleBasedAssemblyOptimization("Simplifiable masking",
needsFlowInfo = FlowInfoRequirement.BackwardFlow,
MultipleAssemblyRules((for{
(sourceMask, sourceShift) <- powersOf2
(targetMask, targetShift) <- powersOf2
shiftOp = if (sourceShift > targetShift) LSR else ASL
shift = if (sourceShift > targetShift) sourceShift - targetShift else targetShift - sourceShift
if shift < 2
} yield {
List(
// LDA:
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDA) & HasImmediate(targetMask)) ~
(Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDA) & HasImmediate(0)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C)) ~~>
(c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source))),
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BNE) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDA) & HasImmediate(targetMask)) ~
(Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDA) & HasImmediate(0)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C)) ~~>
(c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source))),
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDA) & HasImmediate(0)) ~
(Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDA) & HasImmediate(targetMask)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C)) ~~>
(c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source))),
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BNE) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDA) & HasImmediate(0)) ~
(Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDA) & HasImmediate(targetMask)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C)) ~~>
(c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source))),
// LDY
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDY) & HasImmediate(targetMask)) ~
(Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & (HasOpcode(LDY) & HasImmediate(0) | HasOpcode(TAY))) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~>
(c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAY))),
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BNE) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDY) & HasImmediate(targetMask)) ~
(Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDY) & HasImmediate(0)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~>
(c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAY))),
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDY) & HasImmediate(0)) ~
(Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDY) & HasImmediate(targetMask)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~>
(c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAY))),
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BNE) & MatchParameter(10)) ~
(Elidable & (HasOpcode(LDY) & HasImmediate(0) | HasOpcode(TAY))) ~
(Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDY) & HasImmediate(targetMask)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~>
(c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAY))),
// LDX
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDX) & HasImmediate(targetMask)) ~
(Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & (HasOpcode(LDX) & HasImmediate(0) | HasOpcode(TAX))) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~>
(c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAX))),
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BNE) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDX) & HasImmediate(targetMask)) ~
(Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDX) & HasImmediate(0)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~>
(c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAX))),
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BEQ) & MatchParameter(10)) ~
(Elidable & HasOpcode(LDX) & HasImmediate(0)) ~
(Elidable & HasOpcodeIn(JMP, BNE, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDX) & HasImmediate(targetMask)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~>
(c => List(c.head, AssemblyLine.immediate(EOR, sourceMask).pos(c(1).source)) ++ List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAX))),
(HasOpcode(AND) & HasImmediate(sourceMask)) ~
(Elidable & HasOpcode(BNE) & MatchParameter(10)) ~
(Elidable & (HasOpcode(LDX) & HasImmediate(0) | HasOpcode(TAX))) ~
(Elidable & HasOpcodeIn(JMP, BEQ, BRA, BPL) & MatchParameter(11)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(10)) ~
(Elidable & HasOpcode(LDX) & HasImmediate(targetMask)) ~
(Elidable & HasOpcode(LABEL) & IsNotALabelUsedManyTimes & MatchParameter(11) & DoesntMatterWhatItDoesWith(State.N, State.Z, State.C, State.A)) ~~>
(c => c.head :: List.fill(shift)(AssemblyLine.implied(shiftOp).pos(c(2).source)) ++ List(AssemblyLine.implied(TAX)))
)
}).flatten)
)
}

View File

@ -80,6 +80,8 @@ object PseudoregisterBuiltIns {
niceReads.prepend(List(AssemblyLine.immediate(LDA, constant.loByte)) -> List(AssemblyLine.immediate(LDA, constant.hiByte)))
}
}
if (doesMemoryAccessOverlap(niceReads.flatMap(_._1).toList, stores(1))) return None
if (doesMemoryAccessOverlap(niceReads.flatMap(_._2).toList, stores(0))) return None
for (b <- 0 to 1) {
for (read <- niceReads) {
if (b == 0) result ++= read._1
@ -122,11 +124,15 @@ object PseudoregisterBuiltIns {
return List(AssemblyLine.immediate(LDA, 0), AssemblyLine.immediate(LDX, 0))
}
val reg = ctx.env.get[VariableInMemory]("__reg.loword")
val head = params.head match {
val addToRegs = params.map{ case (sub, param) => addToReg(ctx, param, sub, decimal) }
val newHead = params.indices.find{ i =>
!params(i)._1 && addToRegs(i).exists(l => l.opcode == PHA || l.opcode == PHA_W)
}.getOrElse(0)
val head = params(newHead) match {
case (false, e) => MosExpressionCompiler.compile(ctx, e, Some(MosExpressionCompiler.getExpressionType(ctx, e) -> reg), BranchSpec.None)
case (true, e) => ???
}
params.tail.foldLeft[List[AssemblyLine]](head){case (code, (sub, param)) => code ++ addToReg(ctx, param, sub, decimal)} ++ List(
params.indices.filter(_ != newHead).foldLeft[List[AssemblyLine]](head){case (code, index) => code ++ addToRegs(index)} ++ List(
AssemblyLine.zeropage(LDA, reg),
AssemblyLine.zeropage(LDX, reg, 1),
)
@ -152,11 +158,15 @@ object PseudoregisterBuiltIns {
return List(AssemblyLine.accu16, AssemblyLine.immediate(LDA_W, 0), AssemblyLine.accu8)
}
val reg = ctx.env.get[VariableInMemory]("__reg.loword")
val head = params.head match {
val addToRegs = params.map{ case (sub, param) => addToReg(ctx, param, sub, decimal) }
val newHead = params.indices.find{ i =>
!params(i)._1 && addToRegs(i).exists(l => l.opcode == PHA || l.opcode == PHA_W)
}.getOrElse(0)
val head = params(newHead) match {
case (false, e) => MosExpressionCompiler.compile(ctx, e, Some(MosExpressionCompiler.getExpressionType(ctx, e) -> reg), BranchSpec.None)
case (true, e) => ???
}
params.tail.foldLeft[List[AssemblyLine]](head){case (code, (sub, param)) => code ++ addToReg(ctx, param, sub, decimal)} ++ List(
params.indices.filter(_ != newHead).foldLeft[List[AssemblyLine]](head){case (code, index) => code ++ addToRegs(index)} ++ List(
AssemblyLine.accu16,
AssemblyLine.zeropage(LDA_W, reg),
AssemblyLine.accu8
@ -238,8 +248,7 @@ object PseudoregisterBuiltIns {
AssemblyLine.zeropage(STA, reg),
AssemblyLine.zeropage(LDA, reg, 1),
h.copy(opcode = op),
AssemblyLine.zeropage(STA, reg, 1),
AssemblyLine.zeropage(LDA, reg)))
AssemblyLine.zeropage(STA, reg, 1)))
case List(
AssemblyLine0(REP, Immediate, NumericConstant(0x20, _)),
l@AssemblyLine0(LDA_W, addrMode, _),
@ -675,4 +684,18 @@ object PseudoregisterBuiltIns {
case AssemblyLine0(_, _, CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(2, _))) if th.name == "__reg" => true
case _ => false
}
def doesMemoryAccessOverlap(l1: List[AssemblyLine], l2: List[AssemblyLine]): Boolean = {
print()
for{
a1 <- l1
if a1.addrMode != Immediate && a1.addrMode != Implied
a2 <- l2
if a2.addrMode != Immediate && a2.addrMode != Implied
if (a1.opcode == STA) != (a2.opcode == STA)
} {
if (a1.parameter == a2.parameter) return true
}
false
}
}

View File

@ -72,4 +72,21 @@ class BitOpSuite extends FunSuite with Matchers {
| noinline void barrier(){}
""".stripMargin)(_.readWord(0xc000) should equal(0xc00c))
}
test("Bit testing optimizations") {
val code ="""
| byte output @$c000
| noinline byte f() = 5
| noinline bool g(byte x) = x & 1 == 0
| void main () {
| byte x
| x = f()
|
| if x & 4 == 0 { x = 0 } else { x = 2 }
| output = x
| }
|
""".stripMargin
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80)(code)(_.readByte(0xc000) should equal(2))
}
}

View File

@ -138,4 +138,21 @@ class BooleanSuite extends FunSuite with Matchers {
""".stripMargin
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80)(code)(_.readByte(0xc000) should equal(code.sliding(4).count(_ == "pass")))
}
test("Fat boolean optimization") {
val code ="""
| byte output @$c000
| noinline bool f(byte x) = x & 1 != 0
| noinline bool g(byte x) = x & 1 == 0
| void main () {
| output = 5
| if f(3) { output += 1 }
| if g(2) { output += 1 }
| if f(2) { output += 100 }
| if g(3) { output += 100 }
| }
|
""".stripMargin
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80)(code)(_.readByte(0xc000) should equal(7))
}
}

View File

@ -236,6 +236,23 @@ class WordMathSuite extends FunSuite with Matchers with AppendedClues {
}
}
test("Word addition 5") {
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Sixteen, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)("""
| word output @$c000
| void main () {
| word v
| word u
| v = $308
| output = $102
| barrier()
| output = (output.lo:output.hi) + v
| }
| noinline void barrier() { }
""".stripMargin){ m =>
m.readWord(0xc000) should equal(0x509)
}
}
test("Word bit ops 2") {
EmuCrossPlatformBenchmarkRun(Cpu.Sixteen, Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)("""
| word output @$c000