1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-01-01 06:29:53 +00:00

Division optimizations

This commit is contained in:
Karol Stasiak 2019-06-24 00:13:16 +02:00
parent fbe20a240b
commit 869211658a
5 changed files with 241 additions and 0 deletions

View File

@ -552,6 +552,35 @@ case class MatchA(i: Int) extends AssemblyLinePattern {
override def hitRate: Double = 0.42
}
case class MatchStoredRegister(i: Int) extends AssemblyLinePattern {
override def validate(needsFlowInfo: FlowInfoRequirement.Value): Unit =
FlowInfoRequirement.assertForward(needsFlowInfo)
override def matchLineTo(ctx: AssemblyMatchingContext, flowInfo: FlowInfo, line: AssemblyLine): Boolean = {
import Opcode._
line.opcode match {
case STA =>
flowInfo.statusBefore.a match {
case SingleStatus(value) => ctx.addObject(i, value)
case _ => false
}
case STX =>
flowInfo.statusBefore.x match {
case SingleStatus(value) => ctx.addObject(i, value)
case _ => false
}
case STY =>
flowInfo.statusBefore.y match {
case SingleStatus(value) => ctx.addObject(i, value)
case _ => false
}
case _ => false
}
}
override def hitRate: Double = 0.42
}
case class MatchX(i: Int) extends AssemblyLinePattern {
override def validate(needsFlowInfo: FlowInfoRequirement.Value): Unit =
FlowInfoRequirement.assertForward(needsFlowInfo)

View File

@ -5,6 +5,7 @@ import millfork.assembly.mos.AddrMode._
import millfork.assembly.AssemblyOptimization
import millfork.assembly.mos.{AssemblyLine, AssemblyLine0, Opcode, State}
import millfork.DecimalUtils.asDecimal
import millfork.assembly.z80.opt.HasRegister
import millfork.error.FatalErrorReporting
/**
* @author Karol Stasiak
@ -115,6 +116,76 @@ object ZeropageRegisterOptimizations {
},
)
val ConstantDivision = new RuleBasedAssemblyOptimization("Constant division",
needsFlowInfo = FlowInfoRequirement.BothFlows,
// TODO: constants other than power of 2:
(HasOpcodeIn(STA, STX, STY) & RefersTo("__reg", 1) & MatchStoredRegister(2) & MatchAddrMode(0) & MatchParameter(1)) ~
Where({ ctx =>
val a = ctx.get[Int](2)
a != 0 && a.-(1).&(a) == 0
}) ~
(Linear & DoesntChangeMemoryAt(0, 1)).* ~
(Elidable & HasOpcode(JSR) & RefersTo("__div_u8u8u8u8", 0)
& DoesntMatterWhatItDoesWith(State.C, State.Z, State.N, State.V) // everything else (including Y) should be preserved
& DoesntMatterWhatItDoesWithReg(0)
& DoesntMatterWhatItDoesWithReg(1)) ~~> { (code, ctx) =>
val count = Integer.numberOfTrailingZeros(ctx.get[Int](2))
val zreg = ctx.zeropageRegister.get
code.init ++ List(AssemblyLine.zeropage(LDA, zreg)) ++ List.fill(count)(AssemblyLine.implied(LSR))
},
(HasOpcodeIn(STA, STX, STY) & RefersTo("__reg", 1) & MatchStoredRegister(2) & MatchAddrMode(0) & MatchParameter(1)) ~
Where({ ctx =>
val a = ctx.get[Int](2)
a != 0 && a.-(1).&(a) == 0
}) ~
(Linear & DoesntChangeMemoryAt(0, 1)).* ~
(Elidable & HasOpcode(JSR) & RefersTo("__mod_u8u8u8u8", 0)
& DoesntMatterWhatItDoesWith(State.C, State.Z, State.N, State.V, State.X) // everything else (including Y) should be preserved
& DoesntMatterWhatItDoesWithReg(0)
& DoesntMatterWhatItDoesWithReg(1)) ~~> { (code, ctx) =>
val a = ctx.get[Int](2)
val zreg = ctx.zeropageRegister.get
code.init ++ List(AssemblyLine.zeropage(LDA, zreg), AssemblyLine.immediate(AND, a - 1))
},
(HasOpcodeIn(STA, STX, STY) & RefersTo("__reg", 2) & MatchStoredRegister(2) & MatchAddrMode(0) & MatchParameter(1)) ~
Where({ ctx =>
val a = ctx.get[Int](2)
a != 0 && a.-(1).&(a) == 0
}) ~
(Linear & DoesntChangeMemoryAt(0, 1)).* ~
(Elidable & HasOpcode(JSR) & RefersTo("__div_u16u8u16u8", 0)
& DoesntMatterWhatItDoesWith(State.C, State.Z, State.N, State.V) // everything else (including Y) should be preserved
& DoesntMatterWhatItDoesWithReg(0)
& DoesntMatterWhatItDoesWithReg(1)
& DoesntMatterWhatItDoesWithReg(2)) ~~> { (code, ctx) =>
val count = Integer.numberOfTrailingZeros(ctx.get[Int](2))
val zreg = ctx.zeropageRegister.get
code.init ++
List.fill(count)(List(AssemblyLine.zeropage(LSR, zreg, 1), AssemblyLine.zeropage(ROR, zreg))).flatten ++
List(AssemblyLine.zeropage(LDA, zreg), AssemblyLine.zeropage(LDX, zreg, 1))
},
(HasOpcodeIn(STA, STX, STY) & RefersTo("__reg", 2) & MatchStoredRegister(2) & MatchAddrMode(0) & MatchParameter(1)) ~
Where({ ctx =>
val a = ctx.get[Int](2)
a != 0 && a.-(1).&(a) == 0 && a <= 128
}) ~
(Linear & DoesntChangeMemoryAt(0, 1)).* ~
(Elidable & HasOpcode(JSR) & RefersTo("__mod_u16u8u16u8", 0)
& DoesntMatterWhatItDoesWith(State.C, State.Z, State.N, State.V, State.X) // everything else (including Y) should be preserved
& DoesntMatterWhatItDoesWithReg(0)
& DoesntMatterWhatItDoesWithReg(1)
& DoesntMatterWhatItDoesWithReg(2)) ~~> { (code, ctx) =>
val a = ctx.get[Int](2)
val zreg = ctx.zeropageRegister.get
code.init ++ List(AssemblyLine.zeropage(LDA, zreg), AssemblyLine.immediate(AND, a - 1), AssemblyLine.immediate(LDX, 0))
},
)
val ConstantDecimalMath = new RuleBasedAssemblyOptimization("Constant decimal math",
needsFlowInfo = FlowInfoRequirement.BothFlows,
@ -400,6 +471,7 @@ object ZeropageRegisterOptimizations {
val All: List[AssemblyOptimization[AssemblyLine]] = List(
ConstantDecimalMath,
ConstantDivision,
ConstantMultiplication,
ConstantInlinedMultiplication,
LoadingKnownValue,

View File

@ -6,6 +6,7 @@ import millfork.assembly.z80.ZOpcode._
import millfork.env.{CompoundConstant, Constant, InitializedArray, MathOperator, MemoryAddressConstant, NumericConstant}
import millfork.node.{LiteralExpression, ZRegister}
import ZRegister._
import millfork.CompilationFlag
import millfork.DecimalUtils._
import millfork.error.FatalErrorReporting
@ -1309,6 +1310,51 @@ object AlwaysGoodI80Optimizations {
)
val ConstantDivision = new RuleBasedAssemblyOptimization("Constant division",
needsFlowInfo = FlowInfoRequirement.BothFlows,
(Elidable & HasOpcode(CALL)
& IsUnconditional
& RefersTo("__divmod_u16u8u16u8", 0)
& MatchRegister(ZRegister.H, 4)
& MatchRegister(ZRegister.L, 5)
& MatchRegister(ZRegister.D, 6)
& DoesntMatterWhatItDoesWithFlags
& DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C, ZRegister.B)) ~~> { (_, ctx) =>
val p = ctx.get[Int](4) * 256 + ctx.get[Int](5).&(0xff)
val q = ctx.get[Int](6)
if (q == 0) Nil // lol undefined behaviour, everyone's favourite C feature
else List(ZLine.ldImm16(ZRegister.HL, p / q), ZLine.ldImm8(ZRegister.A, p % q))
},
(Elidable & HasOpcode(CALL)
& IsUnconditional
& RefersTo("__divmod_u16u8u16u8", 0)
& MatchRegister(ZRegister.D, 6)
& DoesntMatterWhatItDoesWithFlags
& DoesntMatterWhatItDoesWith(ZRegister.D, ZRegister.E, ZRegister.C, ZRegister.B)) ~
Where(ctx => {
val q = ctx.get[Int](6)
q != 0 && q.&(q - 1) == 0
}) ~~> { (_, ctx) =>
val q = ctx.get[Int](6)
if (ctx.compilationOptions.flag(CompilationFlag.EmitExtended80Opcodes)) {
List(ZLine.ld8(ZRegister.A, ZRegister.L), ZLine.imm8(ZOpcode.AND, q - 1)) ++ (0L until Integer.numberOfTrailingZeros(q)).flatMap(_ => List(
ZLine.register(ZOpcode.SRL, ZRegister.H),
ZLine.register(ZOpcode.RR, ZRegister.L)
))
} else {
List(ZLine.ld8(ZRegister.D, ZRegister.L)) ++ (0L until Integer.numberOfTrailingZeros(q)).flatMap(_ => List(
ZLine.ld8(ZRegister.A, ZRegister.H),
ZLine.register(ZOpcode.OR, ZRegister.A),
ZLine.implied(ZOpcode.RRA),
ZLine.ld8(ZRegister.H, ZRegister.A),
ZLine.ld8(ZRegister.A, ZRegister.L),
ZLine.implied(ZOpcode.RRA),
ZLine.ld8(ZRegister.L, ZRegister.A)
)) ++ List(ZLine.ld8(ZRegister.A, ZRegister.D), ZLine.imm8(ZOpcode.AND, q - 1))
}
},
)
private def compileMultiply[T](multiplicand: Int, add1:List[T], asl: List[T]): List[T] = {
if (multiplicand == 0) FatalErrorReporting.reportFlyingPig("Trying to optimize multiplication by 0 in a wrong way!")
def impl(m: Int): List[List[T]] = {
@ -1537,6 +1583,7 @@ object AlwaysGoodI80Optimizations {
val All: List[AssemblyOptimization[ZLine]] = List[AssemblyOptimization[ZLine]](
BranchInPlaceRemoval,
ConstantDivision,
ConstantMultiplication,
ConstantInlinedShifting,
FreeHL,

View File

@ -390,4 +390,48 @@ class ByteMathSuite extends FunSuite with Matchers with AppendedClues {
divisionCase1(42, 128)
divisionCase1(142, 128)
}
test("Byte division 4") {
divisionCase4(0, 2)
divisionCase4(1, 2)
divisionCase4(2, 2)
divisionCase4(250, 128)
divisionCase4(0, 4)
divisionCase4(0, 8)
divisionCase4(1, 4)
divisionCase4(6, 8)
divisionCase4(73, 16)
divisionCase4(75, 128)
divisionCase4(42, 128)
divisionCase4(142, 128)
}
private def divisionCase4(x: Int, y: Int): Unit = {
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)(
s"""
| import zp_reg
| byte output_q1 @$$c000
| byte output_m1 @$$c001
| byte output_q2 @$$c002
| byte output_m2 @$$c003
| void main () {
| byte a
| output_q2 = g()
| output_m2 = g()
| a = f()
| output_q1 = $x / a
| output_m1 = $x %% a
| output_q2 /= a
| output_m2 %%= a
| }
| byte f() {return $y}
| noinline byte g() {return $x}
""".
stripMargin) { m =>
m.readByte(0xc000) should equal(x / y) withClue s"$x / $y"
m.readByte(0xc001) should equal(x % y) withClue s"$x %% $y"
m.readByte(0xc002) should equal(x / y) withClue s"$x / $y"
m.readByte(0xc003) should equal(x % y) withClue s"$x %% $y"
}
}
}

View File

@ -538,4 +538,53 @@ class WordMathSuite extends FunSuite with Matchers with AppendedClues {
m.readWord(0xc006) should equal(x % y) withClue s"= $x %% $y"
}
}
test("Word division 4") {
divisionCase4(0, 2)
divisionCase4(1, 2)
divisionCase4(2, 2)
divisionCase4(250, 128)
divisionCase4(0, 4)
divisionCase4(0, 8)
divisionCase4(1, 4)
divisionCase4(6, 8)
divisionCase4(73, 16)
divisionCase4(75, 128)
divisionCase4(42, 128)
divisionCase4(142, 128)
divisionCase2(2534, 2)
divisionCase2(2534, 32)
divisionCase2(35000, 2)
divisionCase2(51462, 4)
divisionCase2(51462, 1)
}
private def divisionCase4(x: Int, y: Int): Unit = {
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Intel8086)(
s"""
| import zp_reg
| word output_q1 @$$c000
| byte output_m1 @$$c002
| word output_q2 @$$c004
| word output_m2 @$$c006
| void main () {
| byte a
| output_q2 = g()
| output_m2 = g()
| a = f()
| output_q1 = $x / a
| output_m1 = $x %% a
| output_q2 /= a
| output_m2 %%= a
| }
| byte f() {return $y}
| noinline word g() {return $x}
""".
stripMargin) { m =>
m.readWord(0xc000) should equal(x / y) withClue s"$x / $y"
m.readByte(0xc002) should equal(x % y) withClue s"$x %% $y"
m.readWord(0xc004) should equal(x / y) withClue s"$x / $y"
m.readByte(0xc006) should equal(x % y) withClue s"$x %% $y"
}
}
}