Fast memset for Z80 and 6502
@ -42,6 +42,8 @@ where `11111` is a sequential number and `xx` is the type:
* `me` – start of a `for` loop doing bulk memory operations
* `ms` – bulk memory operations
* `no` – nonet to word extension caused by the `nonet` operator
* `od` – end of a `do-while` statement
@ -0,0 +1,105 @@
package millfork.compiler.mos
import millfork.assembly.mos.AssemblyLine
import millfork.compiler.{BranchSpec, CompilationContext}
import millfork.env.{Label, NumericConstant, Type, VariableInMemory}
import millfork.node._
import millfork.assembly.mos.Opcode._
* @author Karol Stasiak
object MosBulkMemoryOperations {
def compileMemset(ctx: CompilationContext, target: IndexedExpression, source: Expression, f: ForStatement): List[AssemblyLine] = {
if (ctx.options.zpRegisterSize < 2 ||
target.name != f.variable ||
target.index.containsVariable(f.variable) ||
!target.index.isPure ||
f.direction == ForDirection.DownTo) return MosStatementCompiler.compileForStatement(ctx, f)
val sizeExpr = f.direction match {
case ForDirection.DownTo =>
SumExpression(List(false -> f.start, true -> f.end, false -> LiteralExpression(1, 1)), decimal = false)
case ForDirection.To | ForDirection.ParallelTo =>
SumExpression(List(false -> f.end, true -> f.start, false -> LiteralExpression(1, 1)), decimal = false)
case ForDirection.Until | ForDirection.ParallelUntil =>
SumExpression(List(false -> f.end, true -> f.start), decimal = false)
val reg = ctx.env.get[VariableInMemory]("__reg.loword")
val w = ctx.env.get[Type]("word")
val size = ctx.env.eval(sizeExpr) match {
case Some(c) => c.quickSimplify
case _ => return MosStatementCompiler.compileForStatement(ctx, f)
val loadReg = MosExpressionCompiler.compile(ctx, SumExpression(List(false -> f.start, false -> target.index), decimal = false), Some(w -> reg), BranchSpec.None)
val loadSource = MosExpressionCompiler.compileToA(ctx, source)
val loadAll = if (MosExpressionCompiler.changesZpreg(loadSource, 0) || MosExpressionCompiler.changesZpreg(loadSource, 1)) {
loadSource ++ MosExpressionCompiler.preserveRegisterIfNeeded(ctx, MosRegister.A, loadReg)
} else {
loadReg ++ loadSource
val wholePageCount = size.hiByte
val setWholePages = wholePageCount match {
case NumericConstant(0, _) => Nil
case NumericConstant(1, _) =>
val label = ctx.nextLabel("ms")
AssemblyLine.immediate(LDY, 0),
AssemblyLine.indexedY(STA, reg),
AssemblyLine.relative(BNE, label))
case _ =>
val labelX = ctx.nextLabel("ms")
val labelXSkip = ctx.nextLabel("ms")
val labelY = ctx.nextLabel("ms")
AssemblyLine.immediate(LDX, wholePageCount),
AssemblyLine.relative(BEQ, labelXSkip),
AssemblyLine.immediate(LDY, 0),
AssemblyLine.indexedY(STA, reg),
AssemblyLine.relative(BNE, labelY),
AssemblyLine.zeropage(INC, reg, 1),
AssemblyLine.relative(BNE, labelX),
val restSize = size.loByte
val setRest = restSize match {
case NumericConstant(0, _) => Nil
case NumericConstant(1, _) =>
List(AssemblyLine.indexedY(STA, reg))
case NumericConstant(2, _) => List(
AssemblyLine.indexedY(STA, reg),
AssemblyLine.indexedY(STA, reg))
case _ =>
val label = ctx.nextLabel("ms")
val labelSkip = ctx.nextLabel("ms")
if (f.direction == ForDirection.ParallelUntil) {
AssemblyLine.immediate(LDY, restSize),
AssemblyLine.relative(BEQ, labelSkip),
AssemblyLine.indexedY(STA, reg),
AssemblyLine.relative(BNE, label),
} else {
AssemblyLine.immediate(LDY, 0),
AssemblyLine.immediate(CPY, restSize),
AssemblyLine.relative(BCS, labelSkip),
AssemblyLine.indexedY(STA, reg),
AssemblyLine.relative(BNE, label),
loadAll ++ setWholePages ++ setRest
@ -81,18 +81,7 @@ object MosExpressionCompiler extends AbstractExpressionCompiler[AssemblyLine] {
def preserveZpregIfNeededDestroyingAAndX(ctx: CompilationContext, Offset: Int, code: List[AssemblyLine]): List[AssemblyLine] = {
if (code.exists{
case AssemblyLine(op,
AddrMode.ZeroPage | AddrMode.Absolute | AddrMode.LongAbsolute,
CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(Offset, _)),
_) if th.name =="__reg" && OpcodeClasses.ChangesMemoryAlways(op) || OpcodeClasses.ChangesMemoryIfNotImplied(op) => true
case AssemblyLine(op,
AddrMode.ZeroPage | AddrMode.Absolute | AddrMode.LongAbsolute,
_) if th.name =="__reg" && Offset == 0 && OpcodeClasses.ChangesMemoryAlways(op) || OpcodeClasses.ChangesMemoryIfNotImplied(op) => true
case AssemblyLine(JSR | BYTE | BSR, _, _, _) => true
case _ => false
}) {
if (changesZpreg(code, Offset)) {
List(AssemblyLine.zeropage(LDA, ctx.env.get[VariableInMemory]("__reg"), Offset), AssemblyLine.implied(PHA)) ++
code ++
} else code
} else code
def changesZpreg(code: List[AssemblyLine], Offset: Int): Boolean = {
code.exists {
case AssemblyLine(op,
AddrMode.ZeroPage | AddrMode.Absolute | AddrMode.LongAbsolute,
CompoundConstant(MathOperator.Plus, MemoryAddressConstant(th), NumericConstant(Offset, _)),
_) if th.name == "__reg" && OpcodeClasses.ChangesMemoryAlways(op) || OpcodeClasses.ChangesMemoryIfNotImplied(op) => true
case AssemblyLine(op,
AddrMode.ZeroPage | AddrMode.Absolute | AddrMode.LongAbsolute,
_) if th.name == "__reg" && Offset == 0 && OpcodeClasses.ChangesMemoryAlways(op) || OpcodeClasses.ChangesMemoryIfNotImplied(op) => true
case AssemblyLine(JSR | BYTE | BSR, _, _, _) => true
case _ => false
def preserveCarryIfNeeded(ctx: CompilationContext, code: List[AssemblyLine]): List[AssemblyLine] = {
if (code.exists {
case AssemblyLine(JSR | BSR, Absolute | LongAbsolute, MemoryAddressConstant(th), _) => true
@ -264,6 +264,8 @@ object MosStatementCompiler extends AbstractStatementCompiler[AssemblyLine] {
compileWhileStatement(ctx, s)
case s: DoWhileStatement =>
compileDoWhileStatement(ctx, s)
case f@ForStatement(variable, _, _, _, List(Assignment(target: IndexedExpression, source: Expression))) if !source.containsVariable(variable) =>
MosBulkMemoryOperations.compileMemset(ctx, target, source, f)
case f:ForStatement =>
case s:BreakStatement =>
@ -43,6 +43,53 @@ object Z80BulkMemoryOperations {
def compileMemset(ctx: CompilationContext, target: IndexedExpression, source: Expression, f: ForStatement): List[ZLine] = {
val loadA = Z80ExpressionCompiler.stashHLIfChanged(ctx, Z80ExpressionCompiler.compileToA(ctx, source)) :+ ZLine.ld8(ZRegister.MEM_HL, ZRegister.A)
def compileForZ80(targetOffset: Expression): List[ZLine] = {
val targetIndexExpression = f.direction match {
case ForDirection.DownTo => SumExpression(List(false -> targetOffset, false -> f.end), decimal = false)
case _ => SumExpression(List(false -> targetOffset, false -> f.start), decimal = false)
val array = if (target.name != f.variable) target.name else "$0000"
val calculateAddress = Z80ExpressionCompiler.calculateAddressToHL(ctx, IndexedExpression(array, targetIndexExpression))
val calculateSize = f.direction match {
case ForDirection.DownTo =>
Z80ExpressionCompiler.stashHLIfChanged(ctx, Z80ExpressionCompiler.compileToBC(ctx, SumExpression(List(false -> f.start, true -> f.end, false -> LiteralExpression(1, 1)), decimal = false)))
case ForDirection.To | ForDirection.ParallelTo =>
Z80ExpressionCompiler.stashHLIfChanged(ctx, Z80ExpressionCompiler.compileToBC(ctx, SumExpression(List(false -> f.end, true -> f.start, false -> LiteralExpression(1, 1)), decimal = false)))
case ForDirection.Until | ForDirection.ParallelUntil =>
Z80ExpressionCompiler.stashHLIfChanged(ctx, Z80ExpressionCompiler.compileToBC(ctx, SumExpression(List(false -> f.end, true -> f.start), decimal = false)))
val (incOp, ldOp) = f.direction match {
case ForDirection.DownTo => DEC_16 -> LDDR
case _ => INC_16 -> LDIR
val loadFirstValue = ctx.env.eval(source) match {
case Some(c) => List(ZLine.ldImm8(ZRegister.MEM_HL, c))
case _ => Z80ExpressionCompiler.stashBCIfChanged(ctx, loadA)
val loadDE = calculateAddress match {
case List(ZLine(ZOpcode.LD_16, TwoRegisters(ZRegister.HL, ZRegister.IMM_16), c, _)) =>
if (incOp == DEC_16) List(ZLine.ldImm16(ZRegister.DE, (c - 1).quickSimplify))
else List(ZLine.ldImm16(ZRegister.DE, (c + 1).quickSimplify))
case _ => List(
ZLine.ld8(ZRegister.D, ZRegister.H),
ZLine.ld8(ZRegister.E, ZRegister.L),
ZLine.register(incOp, ZRegister.DE))
calculateAddress ++ calculateSize ++ loadFirstValue ++ loadDE :+ ZLine.implied(ldOp)
if (ctx.options.flag(CompilationFlag.EmitZ80Opcodes)) {
removeVariableOnce(f.variable, target.index) match {
case Some(targetOffset) if targetOffset.isPure =>
return compileForZ80(targetOffset)
case _ =>
if (target.isPure && target.name == f.variable && !target.index.containsVariable(f.variable)) {
return compileForZ80(target.index)
compileMemoryBulk(ctx, target, f,
useDEForTarget = false,
preferDecreasing = false,
@ -340,7 +340,8 @@ class Environment(val parent: Option[Environment], val prefix: String, val cpuFa
addThing(BasicPlainType("int112", 14), None)
addThing(BasicPlainType("int120", 15), None)
addThing(BasicPlainType("int128", 16), None)
addThing(DerivedPlainType("pointer", w, isSigned = false), None)
val p = DerivedPlainType("pointer", w, isSigned = false)
addThing(p, None)
// addThing(DerivedPlainType("farpointer", get[PlainType]("farword"), isSigned = false), None)
addThing(DerivedPlainType("ubyte", b, isSigned = false), None)
addThing(DerivedPlainType("sbyte", b, isSigned = true), None)
@ -354,6 +355,7 @@ class Environment(val parent: Option[Environment], val prefix: String, val cpuFa
addThing(ConstantThing("false", NumericConstant(0, 0), falseType), None)
addThing(ConstantThing("__zeropage_usage", UnexpandedConstant("__zeropage_usage", 1), b), None)
addThing(ConstantThing("__heap_start", UnexpandedConstant("__heap_start", 1), b), None)
addThing(ConstantThing("$0000", NumericConstant(0, 2), p), None)
BranchingOpcodeMapping(Opcode.BCS, IfFlagSet(ZFlag.C)),
BranchingOpcodeMapping(Opcode.BCC, IfFlagClear(ZFlag.C))),
test("Memset with index") {
test("Memset with index") {
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp)(
| array output[5]@$c001
| void main () {
| byte i
| for i,0,until,output.length {
| output[i] = 22
| }
| }
| void _panic(){while(true){}}
""".stripMargin){ m=>
m.readByte(0xc001) should equal (22)
m.readByte(0xc005) should equal (22)
test("Memset with pointer") {
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp)(
| array output[5]@$c001
| void main () {
| pointer p
| for p,output.addr,until,output.addr+output.length {
| p[0] = 22
| }
| }
| void _panic(){while(true){}}
""".stripMargin){ m=>
m.readByte(0xc001) should equal (22)
m.readByte(0xc005) should equal (22)
test("Screen fill") {
EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp)(
