From 8dfb223a8c47e5538feb7c3c4704e4b0db08d398 Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Tue, 7 Aug 2018 17:37:09 +0200 Subject: [PATCH] Fast memset for Z80 and 6502 --- docs/abi/generated-labels.md | 2 + .../mos/MosBulkMemoryOperations.scala | 105 ++++++++++++++++++ .../compiler/mos/MosExpressionCompiler.scala | 29 +++-- .../compiler/mos/MosStatementCompiler.scala | 2 + .../z80/Z80BulkMemoryOperations.scala | 47 ++++++++ src/main/scala/millfork/env/Environment.scala | 4 +- .../scala/millfork/test/ForLoopSuite.scala | 34 ++++++ 7 files changed, 210 insertions(+), 13 deletions(-) create mode 100644 src/main/scala/millfork/compiler/mos/MosBulkMemoryOperations.scala diff --git a/docs/abi/generated-labels.md b/docs/abi/generated-labels.md index 07cf993a..186635c5 100644 --- a/docs/abi/generated-labels.md +++ b/docs/abi/generated-labels.md @@ -42,6 +42,8 @@ where `11111` is a sequential number and `xx` is the type: * `me` – start of a `for` loop doing bulk memory operations +* `ms` – bulk memory operations + * `no` – nonet to word extension caused by the `nonet` operator * `od` – end of a `do-while` statement diff --git a/src/main/scala/millfork/compiler/mos/MosBulkMemoryOperations.scala b/src/main/scala/millfork/compiler/mos/MosBulkMemoryOperations.scala new file mode 100644 index 00000000..c8ac9de0 --- /dev/null +++ b/src/main/scala/millfork/compiler/mos/MosBulkMemoryOperations.scala @@ -0,0 +1,105 @@ +package millfork.compiler.mos + +import millfork.assembly.mos.AssemblyLine +import millfork.compiler.{BranchSpec, CompilationContext} +import millfork.env.{Label, NumericConstant, Type, VariableInMemory} +import millfork.node._ +import millfork.assembly.mos.Opcode._ + +/** + * @author Karol Stasiak + */ +object MosBulkMemoryOperations { + + def compileMemset(ctx: CompilationContext, target: IndexedExpression, source: Expression, f: ForStatement): List[AssemblyLine] = { + if (ctx.options.zpRegisterSize < 2 || + target.name != f.variable || + target.index.containsVariable(f.variable) || + !target.index.isPure || + f.direction == ForDirection.DownTo) return MosStatementCompiler.compileForStatement(ctx, f) + val sizeExpr = f.direction match { + case ForDirection.DownTo => + SumExpression(List(false -> f.start, true -> f.end, false -> LiteralExpression(1, 1)), decimal = false) + case ForDirection.To | ForDirection.ParallelTo => + SumExpression(List(false -> f.end, true -> f.start, false -> LiteralExpression(1, 1)), decimal = false) + case ForDirection.Until | ForDirection.ParallelUntil => + SumExpression(List(false -> f.end, true -> f.start), decimal = false) + } + val reg = ctx.env.get[VariableInMemory]("__reg.loword") + val w = ctx.env.get[Type]("word") + val size = ctx.env.eval(sizeExpr) match { + case Some(c) => c.quickSimplify + case _ => return MosStatementCompiler.compileForStatement(ctx, f) + } + val loadReg = MosExpressionCompiler.compile(ctx, SumExpression(List(false -> f.start, false -> target.index), decimal = false), Some(w -> reg), BranchSpec.None) + val loadSource = MosExpressionCompiler.compileToA(ctx, source) + val loadAll = if (MosExpressionCompiler.changesZpreg(loadSource, 0) || MosExpressionCompiler.changesZpreg(loadSource, 1)) { + loadSource ++ MosExpressionCompiler.preserveRegisterIfNeeded(ctx, MosRegister.A, loadReg) + } else { + loadReg ++ loadSource + } + val wholePageCount = size.hiByte + val setWholePages = wholePageCount match { + case NumericConstant(0, _) => Nil + case NumericConstant(1, _) => + val label = ctx.nextLabel("ms") + List( + AssemblyLine.immediate(LDY, 0), + AssemblyLine.label(label), + AssemblyLine.indexedY(STA, reg), + AssemblyLine.implied(INY), + AssemblyLine.relative(BNE, label)) + case _ => + val labelX = ctx.nextLabel("ms") + val labelXSkip = ctx.nextLabel("ms") + val labelY = ctx.nextLabel("ms") + List( + AssemblyLine.immediate(LDX, wholePageCount), + AssemblyLine.relative(BEQ, labelXSkip), + AssemblyLine.label(labelX), + AssemblyLine.immediate(LDY, 0), + AssemblyLine.label(labelY), + AssemblyLine.indexedY(STA, reg), + AssemblyLine.implied(INY), + AssemblyLine.relative(BNE, labelY), + AssemblyLine.zeropage(INC, reg, 1), + AssemblyLine.implied(DEX), + AssemblyLine.relative(BNE, labelX), + AssemblyLine.label(labelXSkip)) + } + val restSize = size.loByte + val setRest = restSize match { + case NumericConstant(0, _) => Nil + case NumericConstant(1, _) => + List(AssemblyLine.indexedY(STA, reg)) + case NumericConstant(2, _) => List( + AssemblyLine.indexedY(STA, reg), + AssemblyLine.implied(INY), + AssemblyLine.indexedY(STA, reg)) + case _ => + val label = ctx.nextLabel("ms") + val labelSkip = ctx.nextLabel("ms") + if (f.direction == ForDirection.ParallelUntil) { + List( + AssemblyLine.immediate(LDY, restSize), + AssemblyLine.relative(BEQ, labelSkip), + AssemblyLine.label(label), + AssemblyLine.implied(DEY), + AssemblyLine.indexedY(STA, reg), + AssemblyLine.relative(BNE, label), + AssemblyLine.label(labelSkip)) + } else { + List( + AssemblyLine.immediate(LDY, 0), + AssemblyLine.label(label), + AssemblyLine.immediate(CPY, restSize), + AssemblyLine.relative(BCS, labelSkip), + AssemblyLine.indexedY(STA, reg), + AssemblyLine.implied(INY), + AssemblyLine.relative(BNE, label), + AssemblyLine.label(labelSkip)) + } + } + loadAll ++ setWholePages ++ setRest + } +} diff --git a/src/main/scala/millfork/compiler/mos/MosExpressionCompiler.scala b/src/main/scala/millfork/compiler/mos/MosExpressionCompiler.scala index 04cb86ff..7cd19ff9 100644 --- a/src/main/scala/millfork/compiler/mos/MosExpressionCompiler.scala +++ b/src/main/scala/millfork/compiler/mos/MosExpressionCompiler.scala @@ -81,18 +81,7 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] { } def preserveZpregIfNeededDestroyingAAndX(ctx: CompilationContext, Offset: Int, code: List[AssemblyLine]): List[AssemblyLine] = { - if (code.exists{ - case AssemblyLine(op, - AddrMode.ZeroPage | AddrMode.Absolute | AddrMode.LongAbsolute, - CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(Offset, _)), - _) if th.name =="__reg" && OpcodeClasses.ChangesMemoryAlways(op) || OpcodeClasses.ChangesMemoryIfNotImplied(op) => true - case AssemblyLine(op, - AddrMode.ZeroPage | AddrMode.Absolute | AddrMode.LongAbsolute, - MemoryAddressConstant(th), - _) if th.name =="__reg" && Offset == 0 && OpcodeClasses.ChangesMemoryAlways(op) || OpcodeClasses.ChangesMemoryIfNotImplied(op) => true - case AssemblyLine(JSR | BYTE | BSR, _, _, _) => true - case _ => false - }) { + if (changesZpreg(code, Offset)) { List(AssemblyLine.zeropage(LDA, ctx.env.get[VariableInMemory]("__reg"), Offset), AssemblyLine.implied(PHA)) ++ code ++ List( @@ -102,6 +91,22 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] { AssemblyLine.implied(TXA)) } else code } + + def changesZpreg(code: List[AssemblyLine], Offset: Int): Boolean = { + code.exists { + case AssemblyLine(op, + AddrMode.ZeroPage | AddrMode.Absolute | AddrMode.LongAbsolute, + CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(Offset, _)), + _) if th.name == "__reg" && OpcodeClasses.ChangesMemoryAlways(op) || OpcodeClasses.ChangesMemoryIfNotImplied(op) => true + case AssemblyLine(op, + AddrMode.ZeroPage | AddrMode.Absolute | AddrMode.LongAbsolute, + MemoryAddressConstant(th), + _) if th.name == "__reg" && Offset == 0 && OpcodeClasses.ChangesMemoryAlways(op) || OpcodeClasses.ChangesMemoryIfNotImplied(op) => true + case AssemblyLine(JSR | BYTE | BSR, _, _, _) => true + case _ => false + } + } + def preserveCarryIfNeeded(ctx: CompilationContext, code: List[AssemblyLine]): List[AssemblyLine] = { if (code.exists { case AssemblyLine(JSR | BSR, Absolute | LongAbsolute, MemoryAddressConstant(th), _) => true diff --git a/src/main/scala/millfork/compiler/mos/MosStatementCompiler.scala b/src/main/scala/millfork/compiler/mos/MosStatementCompiler.scala index bef8f073..48561af2 100644 --- a/src/main/scala/millfork/compiler/mos/MosStatementCompiler.scala +++ b/src/main/scala/millfork/compiler/mos/MosStatementCompiler.scala @@ -264,6 +264,8 @@ object MosStatementCompiler extends AbstractStatementCompiler[AssemblyLine] { compileWhileStatement(ctx, s) case s: DoWhileStatement => compileDoWhileStatement(ctx, s) + case f@ForStatement(variable, _, _, _, List(Assignment(target: IndexedExpression, source: Expression))) if !source.containsVariable(variable) => + MosBulkMemoryOperations.compileMemset(ctx, target, source, f) case f:ForStatement => compileForStatement(ctx,f) case s:BreakStatement => diff --git a/src/main/scala/millfork/compiler/z80/Z80BulkMemoryOperations.scala b/src/main/scala/millfork/compiler/z80/Z80BulkMemoryOperations.scala index 0fb70b6c..27e85991 100644 --- a/src/main/scala/millfork/compiler/z80/Z80BulkMemoryOperations.scala +++ b/src/main/scala/millfork/compiler/z80/Z80BulkMemoryOperations.scala @@ -43,6 +43,53 @@ object Z80BulkMemoryOperations { */ def compileMemset(ctx: CompilationContext, target: IndexedExpression, source: Expression, f: ForStatement): List[ZLine] = { val loadA = Z80ExpressionCompiler.stashHLIfChanged(ctx, Z80ExpressionCompiler.compileToA(ctx, source)) :+ ZLine.ld8(ZRegister.MEM_HL, ZRegister.A) + + def compileForZ80(targetOffset: Expression): List[ZLine] = { + val targetIndexExpression = f.direction match { + case ForDirection.DownTo => SumExpression(List(false -> targetOffset, false -> f.end), decimal = false) + case _ => SumExpression(List(false -> targetOffset, false -> f.start), decimal = false) + } + val array = if (target.name != f.variable) target.name else "$0000" + val calculateAddress = Z80ExpressionCompiler.calculateAddressToHL(ctx, IndexedExpression(array, targetIndexExpression)) + val calculateSize = f.direction match { + case ForDirection.DownTo => + Z80ExpressionCompiler.stashHLIfChanged(ctx, Z80ExpressionCompiler.compileToBC(ctx, SumExpression(List(false -> f.start, true -> f.end, false -> LiteralExpression(1, 1)), decimal = false))) + case ForDirection.To | ForDirection.ParallelTo => + Z80ExpressionCompiler.stashHLIfChanged(ctx, Z80ExpressionCompiler.compileToBC(ctx, SumExpression(List(false -> f.end, true -> f.start, false -> LiteralExpression(1, 1)), decimal = false))) + case ForDirection.Until | ForDirection.ParallelUntil => + Z80ExpressionCompiler.stashHLIfChanged(ctx, Z80ExpressionCompiler.compileToBC(ctx, SumExpression(List(false -> f.end, true -> f.start), decimal = false))) + } + val (incOp, ldOp) = f.direction match { + case ForDirection.DownTo => DEC_16 -> LDDR + case _ => INC_16 -> LDIR + } + val loadFirstValue = ctx.env.eval(source) match { + case Some(c) => List(ZLine.ldImm8(ZRegister.MEM_HL, c)) + case _ => Z80ExpressionCompiler.stashBCIfChanged(ctx, loadA) + } + val loadDE = calculateAddress match { + case List(ZLine(ZOpcode.LD_16, TwoRegisters(ZRegister.HL, ZRegister.IMM_16), c, _)) => + if (incOp == DEC_16) List(ZLine.ldImm16(ZRegister.DE, (c - 1).quickSimplify)) + else List(ZLine.ldImm16(ZRegister.DE, (c + 1).quickSimplify)) + case _ => List( + ZLine.ld8(ZRegister.D, ZRegister.H), + ZLine.ld8(ZRegister.E, ZRegister.L), + ZLine.register(incOp, ZRegister.DE)) + } + calculateAddress ++ calculateSize ++ loadFirstValue ++ loadDE :+ ZLine.implied(ldOp) + } + + if (ctx.options.flag(CompilationFlag.EmitZ80Opcodes)) { + removeVariableOnce(f.variable, target.index) match { + case Some(targetOffset) if targetOffset.isPure => + return compileForZ80(targetOffset) + case _ => + } + if (target.isPure && target.name == f.variable && !target.index.containsVariable(f.variable)) { + return compileForZ80(target.index) + } + } + compileMemoryBulk(ctx, target, f, useDEForTarget = false, preferDecreasing = false, diff --git a/src/main/scala/millfork/env/Environment.scala b/src/main/scala/millfork/env/Environment.scala index 1bc9d556..2e32cca8 100644 --- a/src/main/scala/millfork/env/Environment.scala +++ b/src/main/scala/millfork/env/Environment.scala @@ -340,7 +340,8 @@ class Environment(val parent: Option[Environment], val prefix: String, val cpuFa addThing(BasicPlainType("int112", 14), None) addThing(BasicPlainType("int120", 15), None) addThing(BasicPlainType("int128", 16), None) - addThing(DerivedPlainType("pointer", w, isSigned = false), None) + val p = DerivedPlainType("pointer", w, isSigned = false) + addThing(p, None) // addThing(DerivedPlainType("farpointer", get[PlainType]("farword"), isSigned = false), None) addThing(DerivedPlainType("ubyte", b, isSigned = false), None) addThing(DerivedPlainType("sbyte", b, isSigned = true), None) @@ -354,6 +355,7 @@ class Environment(val parent: Option[Environment], val prefix: String, val cpuFa addThing(ConstantThing("false", NumericConstant(0, 0), falseType), None) addThing(ConstantThing("__zeropage_usage", UnexpandedConstant("__zeropage_usage", 1), b), None) addThing(ConstantThing("__heap_start", UnexpandedConstant("__heap_start", 1), b), None) + addThing(ConstantThing("$0000", NumericConstant(0, 2), p), None) addThing(FlagBooleanType("set_carry", BranchingOpcodeMapping(Opcode.BCS, IfFlagSet(ZFlag.C)), BranchingOpcodeMapping(Opcode.BCC, IfFlagClear(ZFlag.C))), diff --git a/src/test/scala/millfork/test/ForLoopSuite.scala b/src/test/scala/millfork/test/ForLoopSuite.scala index 3979c421..c09350c0 100644 --- a/src/test/scala/millfork/test/ForLoopSuite.scala +++ b/src/test/scala/millfork/test/ForLoopSuite.scala @@ -174,6 +174,40 @@ class ForLoopSuite extends FunSuite with Matchers { } } + test("Memset with index") { + EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp)( + """ + | array output[5]@$c001 + | void main () { + | byte i + | for i,0,until,output.length { + | output[i] = 22 + | } + | } + | void _panic(){while(true){}} + """.stripMargin){ m=> + m.readByte(0xc001) should equal (22) + m.readByte(0xc005) should equal (22) + } + } + + test("Memset with pointer") { + EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp)( + """ + | array output[5]@$c001 + | void main () { + | pointer p + | for p,output.addr,until,output.addr+output.length { + | p[0] = 22 + | } + | } + | void _panic(){while(true){}} + """.stripMargin){ m=> + m.readByte(0xc001) should equal (22) + m.readByte(0xc005) should equal (22) + } + } + test("Screen fill") { EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp)( """