From 166acf2b187f68819f7288d3cb993953a67939fc Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Sat, 18 Sep 2021 00:36:16 +0200 Subject: [PATCH] R800 support --- docs/api/custom-platform.md | 4 + include/i80/i80_math.mfk | 27 ++++++- .../scala/millfork/CompilationOptions.scala | 24 ++++-- src/main/scala/millfork/Context.scala | 18 ++++- src/main/scala/millfork/Main.scala | 4 +- .../scala/millfork/assembly/z80/ZLine.scala | 14 ++++ .../scala/millfork/assembly/z80/ZOpcode.scala | 7 ++ .../z80/opt/AlwaysGoodR800Optimizations.scala | 24 ++++++ .../assembly/z80/opt/CoarseFlowAnalyzer.scala | 32 ++++++++ .../z80/opt/Z80OptimizationPresets.scala | 15 ++++ .../scala/millfork/output/Z80Assembler.scala | 75 +++++++++++++++++-- .../scala/millfork/parser/Z80Parser.scala | 17 ++++- .../scala/millfork/test/ByteMathSuite.scala | 2 +- .../scala/millfork/test/StructSuite.scala | 2 + .../millfork/test/Z80AssemblySuite.scala | 40 +++++++++- .../millfork/test/emu/EmuBenchmarkRun.scala | 28 +++++++ .../test/emu/EmuOptimizedInlinedRun.scala | 4 + .../millfork/test/emu/EmuOptimizedRun.scala | 2 + .../millfork/test/emu/EmuUnoptimizedRun.scala | 2 + .../scala/millfork/test/emu/EmuZ80Run.scala | 48 +++++++----- 20 files changed, 353 insertions(+), 36 deletions(-) create mode 100644 src/main/scala/millfork/assembly/z80/opt/AlwaysGoodR800Optimizations.scala diff --git a/docs/api/custom-platform.md b/docs/api/custom-platform.md index 4ee5a58c..d451cc58 100644 --- a/docs/api/custom-platform.md +++ b/docs/api/custom-platform.md @@ -37,6 +37,8 @@ if a line ends with a backslash character, the value continues to the next line. * `z80` (Zilog Z80) * `strictz80` (Z80 without illegal instructions) + + * `r800` (R800) * `z80next` (Z80 core from ZX Spectrum Next) Note: Millfork version 0.3.18 and earlier uses the name `zx80next` for this architecture. @@ -80,6 +82,8 @@ This list cannot contain module template instantiations. * `emit_x80` – whether the compiler should emit instructions present on Sharp LR35902 and Z80, but absent on Intel 8080, default is `true` on compatible processors and `false` elsewhere * `emit_z80` – whether the compiler should emit Zilog Z80 instructions not covered by `emit_x80`, default is `true` on compatible processors and `false` elsewhere + + * `emit_r800` – whether the compiler should emit R800 instructions, default is `true` on compatible processors and `false` elsewhere * `prevent_jmp_indirect_bug` – whether the compiler should try to avoid the indirect JMP bug, default is `false` on 65C02-compatible or non-6502 processors and `true` elsewhere diff --git a/include/i80/i80_math.mfk b/include/i80/i80_math.mfk index 2edf6acd..511841e7 100644 --- a/include/i80/i80_math.mfk +++ b/include/i80/i80_math.mfk @@ -11,6 +11,12 @@ inline asm byte __mul_u8u8u8() { ? LD A, E ? RET } +#elseif CPUFEATURE_R800 +inline asm byte __mul_u8u8u8() { + ? MULUB A,D + ? LD A,L + ? RET +} #elseif CPUFEATURE_Z80 || CPUFEATURE_GAMEBOY //A = A * D noinline asm byte __mul_u8u8u8() { @@ -89,6 +95,16 @@ __divmod_u16u8u16u8_skip: ? RET } + +#if CPUFEATURE_R800 +inline asm word __mul_u16u8u16() { + ? LD L,A + ? LD H,0 + ? MULUW HL,DE + ? RET +} +#else +// HL=A*DE noinline asm word __mul_u16u8u16() { ? LD HL,0 ? LD B,8 @@ -113,8 +129,17 @@ __mul_u16u8u16_skip: #endif ? RET } +#endif -#if CPUFEATURE_Z80 || CPUFEATURE_GAMEBOY + +#if CPUFEATURE_R800 +inline asm word __mul_u16u16u16() { + ? EX DE,HL + ? MULUW HL,BC + ? RET +} +#elseif CPUFEATURE_Z80 || CPUFEATURE_GAMEBOY +// HL=BC*DE noinline asm word __mul_u16u16u16() { LD HL,0 LD A,16 diff --git a/src/main/scala/millfork/CompilationOptions.scala b/src/main/scala/millfork/CompilationOptions.scala index 72ca7290..1f3dd325 100644 --- a/src/main/scala/millfork/CompilationOptions.scala +++ b/src/main/scala/millfork/CompilationOptions.scala @@ -45,7 +45,7 @@ case class CompilationOptions(platform: Platform, EmitIntel8085Opcodes, EmitIntel8080Opcodes, UseIxForStack, UseIntelSyntaxForInput, UseIntelSyntaxForOutput) if (CpuFamily.forType(platform.cpu) != CpuFamily.I80) invalids ++= Set( - EmitExtended80Opcodes, EmitZ80Opcodes, EmitSharpOpcodes, EmitEZ80Opcodes, EmitZ80NextOpcodes, + EmitExtended80Opcodes, EmitZ80Opcodes, EmitSharpOpcodes, EmitEZ80Opcodes, EmitZ80NextOpcodes, EmitR800Opcodes, UseIyForStack, UseIxForScratch, UseIyForScratch, UseShadowRegistersForInterrupts) if (CpuFamily.forType(platform.cpu) != CpuFamily.M6809) invalids ++= Set( @@ -172,6 +172,11 @@ case class CompilationOptions(platform: Platform, log.error("Extended 8080-like opcodes enabled for architecture that doesn't support them") } } + if (flags(EmitR800Opcodes)) { + if (platform.cpu != R800) { + log.error("R800 opcodes enabled for architecture that doesn't support them") + } + } if (flags(EmitIntel8080Opcodes)) { if (!Intel8080Compatible(platform.cpu)) { log.error("Intel 8080 opcodes enabled for architecture that doesn't support them") @@ -220,6 +225,7 @@ case class CompilationOptions(platform: Platform, "OPTIMIZE_IPO" -> toLong(flag(CompilationFlag.InterproceduralOptimization)), "CPUFEATURE_DECIMAL_MODE" -> toLong(flag(CompilationFlag.DecimalMode)), "CPUFEATURE_Z80" -> toLong(flag(CompilationFlag.EmitZ80Opcodes)), + "CPUFEATURE_R800" -> toLong(flag(CompilationFlag.EmitR800Opcodes)), "CPUFEATURE_EZ80" -> toLong(flag(CompilationFlag.EmitEZ80Opcodes)), "CPUFEATURE_8080" -> toLong(flag(CompilationFlag.EmitIntel8080Opcodes)), "CPUFEATURE_8085" -> toLong(flag(CompilationFlag.EmitIntel8085Opcodes)), @@ -290,7 +296,7 @@ object CpuFamily extends Enumeration { import Cpu._ cpu match { case Mos | StrictMos | Ricoh | StrictRicoh | Cmos | SC02 | Rockwell | Wdc | HuC6280 | CE02 | Sixteen => M6502 - case Intel8080 | Intel8085 | StrictIntel8085 | Sharp | Z80 | StrictZ80 | EZ80 | Z80Next => I80 + case Intel8080 | Intel8085 | StrictIntel8085 | Sharp | Z80 | StrictZ80 | R800 | EZ80 | Z80Next => I80 case Intel8086 | Intel80186 => I86 case Cpu.Motorola6809 => M6809 } @@ -368,6 +374,10 @@ object Cpu extends Enumeration { * The Zilog Z80 processor, without illegal instructions */ val StrictZ80: Cpu.Value = Value + /** + * The R800 CPU (used in MSX Turbo-R) + */ + val R800: Cpu.Value = Value /** * The Zilog eZ80 processor */ @@ -400,11 +410,11 @@ object Cpu extends Enumeration { /** * Processors that can run code for Zilog Z80 */ - val Z80Compatible: Set[Cpu.Value] = Set(Z80, StrictZ80, EZ80, Z80Next) + val Z80Compatible: Set[Cpu.Value] = Set(Z80, StrictZ80, R800, EZ80, Z80Next) /** * Processors that can run code for Intel 8080 */ - val Intel8080Compatible: Set[Cpu.Value] = Set(Intel8080, Intel8085, StrictIntel8085, Z80, StrictZ80, EZ80, Z80Next) + val Intel8080Compatible: Set[Cpu.Value] = Set(Intel8080, Intel8085, StrictIntel8085, Z80, StrictZ80, R800, EZ80, Z80Next) /** * Processors that can run code for Intel 8085 */ @@ -468,6 +478,8 @@ object Cpu extends Enumeration { i80AlwaysDefaultFlags ++ Set(EmitIntel8080Opcodes, EmitIntel8085Opcodes, UseIntelSyntaxForInput, UseIntelSyntaxForOutput) case StrictZ80 | Z80 => i80AlwaysDefaultFlags ++ Set(EmitIntel8080Opcodes, EmitExtended80Opcodes, EmitZ80Opcodes, UseIxForStack, UseShadowRegistersForInterrupts) + case R800 => + i80AlwaysDefaultFlags ++ Set(EmitIntel8080Opcodes, EmitExtended80Opcodes, EmitZ80Opcodes, UseIxForStack, UseShadowRegistersForInterrupts, EmitR800Opcodes) case Z80Next => i80AlwaysDefaultFlags ++ Set(EmitIntel8080Opcodes, EmitExtended80Opcodes, EmitZ80Opcodes, UseIxForStack, UseShadowRegistersForInterrupts, EmitIllegals, EmitZ80NextOpcodes) case EZ80 => @@ -514,6 +526,7 @@ object Cpu extends Enumeration { case "strict2a07" => StrictRicoh case "z80" => Z80 case "strictz80" => Z80 + case "r800" => R800 case "zx80next" => Z80Next case "z80next" => Z80Next // disabled for now: @@ -564,7 +577,7 @@ object CompilationFlag extends Enumeration { EmitCmosOpcodes, EmitCmosNopOpcodes, EmitSC02Opcodes, EmitRockwellOpcodes, EmitWdcOpcodes, EmitHudsonOpcodes, Emit65CE02Opcodes, EmitEmulation65816Opcodes, EmitNative65816Opcodes, PreventJmpIndirectBug, LargeCode, ReturnWordsViaAccumulator, SoftwareStack, // compilation options for I80 - EmitIntel8080Opcodes, EmitIntel8085Opcodes, EmitExtended80Opcodes, EmitZ80Opcodes, EmitEZ80Opcodes, EmitSharpOpcodes, EmitZ80NextOpcodes, + EmitIntel8080Opcodes, EmitIntel8085Opcodes, EmitExtended80Opcodes, EmitZ80Opcodes, EmitR800Opcodes, EmitEZ80Opcodes, EmitSharpOpcodes, EmitZ80NextOpcodes, UseShadowRegistersForInterrupts, UseIxForStack, UseIyForStack, UseIxForScratch, UseIyForScratch, @@ -628,6 +641,7 @@ object CompilationFlag extends Enumeration { "emit_65ce02" -> Emit65CE02Opcodes, "emit_huc6280" -> EmitHudsonOpcodes, "emit_z80" -> EmitZ80Opcodes, + "emit_r800" -> EmitR800Opcodes, "emit_ez80" -> EmitEZ80Opcodes, "emit_x80" -> EmitExtended80Opcodes, "emit_8080" -> EmitIntel8080Opcodes, diff --git a/src/main/scala/millfork/Context.scala b/src/main/scala/millfork/Context.scala index 18a637f6..3bcd3183 100644 --- a/src/main/scala/millfork/Context.scala +++ b/src/main/scala/millfork/Context.scala @@ -46,10 +46,24 @@ case class Context(errorReporting: Logger, if (isFlagSet(CompilationFlag.EmitEZ80Opcodes)) { addons += CompilationFlag.EmitZ80Opcodes -> true } - if (isFlagSet(CompilationFlag.EmitZ80Opcodes) || isFlagSet(CompilationFlag.EmitSharpOpcodes)) { + if (isFlagSet(CompilationFlag.EmitZ80NextOpcodes)) { + addons += CompilationFlag.EmitZ80Opcodes -> true + } + if (isFlagSet(CompilationFlag.EmitR800Opcodes)) { + addons += CompilationFlag.EmitZ80Opcodes -> true + } + if (isFlagSet(CompilationFlag.EmitEZ80Opcodes) + || isFlagSet(CompilationFlag.EmitZ80NextOpcodes) + || isFlagSet(CompilationFlag.EmitR800Opcodes) + || isFlagSet(CompilationFlag.EmitZ80Opcodes) + || isFlagSet(CompilationFlag.EmitSharpOpcodes)) { addons += CompilationFlag.EmitExtended80Opcodes -> true } - if (isFlagSet(CompilationFlag.EmitZ80Opcodes) || isFlagSet(CompilationFlag.EmitIntel8085Opcodes)) { + if (isFlagSet(CompilationFlag.EmitEZ80Opcodes) + || isFlagSet(CompilationFlag.EmitZ80NextOpcodes) + || isFlagSet(CompilationFlag.EmitR800Opcodes) + || isFlagSet(CompilationFlag.EmitZ80Opcodes) + || isFlagSet(CompilationFlag.EmitIntel8085Opcodes)) { addons += CompilationFlag.EmitIntel8080Opcodes -> true } if (isFlagSet(CompilationFlag.OptimizeForSpeed)) { diff --git a/src/main/scala/millfork/Main.scala b/src/main/scala/millfork/Main.scala index 4499beda..4b427218 100644 --- a/src/main/scala/millfork/Main.scala +++ b/src/main/scala/millfork/Main.scala @@ -321,7 +321,9 @@ object Main { val assemblyOptimizations = optLevel match { case 0 => Nil case _ => - if (options.flag(CompilationFlag.EmitZ80Opcodes)) + if (options.flag(CompilationFlag.EmitR800Opcodes)) + Z80OptimizationPresets.GoodForR800 + else if (options.flag(CompilationFlag.EmitZ80Opcodes)) Z80OptimizationPresets.GoodForZ80 else if (options.flag(CompilationFlag.EmitIntel8080Opcodes)) Z80OptimizationPresets.GoodForIntel8080 diff --git a/src/main/scala/millfork/assembly/z80/ZLine.scala b/src/main/scala/millfork/assembly/z80/ZLine.scala index 4adfec44..5f87ff51 100644 --- a/src/main/scala/millfork/assembly/z80/ZLine.scala +++ b/src/main/scala/millfork/assembly/z80/ZLine.scala @@ -1067,6 +1067,17 @@ case class ZLine(opcode: ZOpcode.Value, registers: ZRegisters, parameter: Consta case LHLX | RLDE => r == D || r == E case RRHL => r == H || r == L + case MULUB => r == A || (registers match { + case TwoRegisters(p, q) => r == q || r == p + case _ => true + }) + case MULUW => r == H || r == L || (registers match { + case TwoRegisters(_, BC) => r == B || r == C + case TwoRegisters(_, DE) => r == D || r == E + case TwoRegisters(_, SP) => r == SP + case _ => true + }) + case _ => true // TODO } } @@ -1230,6 +1241,9 @@ case class ZLine(opcode: ZOpcode.Value, registers: ZRegisters, parameter: Consta case LHLX | RRHL | DSUB => r == H || r == L case SHLX => false + case MULUB => r == H || r == L + case MULUW => r == H || r == L || r == D || r == E + case _ => true // TODO } } diff --git a/src/main/scala/millfork/assembly/z80/ZOpcode.scala b/src/main/scala/millfork/assembly/z80/ZOpcode.scala index 805360cf..b8e6cc90 100644 --- a/src/main/scala/millfork/assembly/z80/ZOpcode.scala +++ b/src/main/scala/millfork/assembly/z80/ZOpcode.scala @@ -29,6 +29,8 @@ object ZOpcode extends Enumeration { LD_DESP, LD_DEHL, RRHL, RLDE, DSUB, RSTV, LHLX, SHLX, //sharp: LD_AHLI, LD_AHLD, LD_HLIA, LD_HLDA, SWAP, LDH_DA, LDH_AD, LDH_CA, LDH_AC, LD_HLSP, ADD_SP, STOP, + // R800: + MULUB, MULUW, // next: LDIX, LDWS, LDIRX, LDDX, LDDRX, LDPIRX, OUTINB, MUL, SWAPNIB, MIRROR, NEXTREG, PIXELDN, PIXELAD, SETAE, TEST, DISCARD_A, DISCARD_F, DISCARD_HL, DISCARD_BC, DISCARD_DE, DISCARD_IX, DISCARD_IY, CHANGED_MEM, @@ -55,6 +57,7 @@ object ZOpcodeClasses { val CbInstructions: Set[ZOpcode.Value] = Set(SLA, SRA, SRL, SLL, RLC, RRC, RL, RR) ++ BIT ++ RES ++ SET val EdInstructions: Set[ZOpcode.Value] = Set(NEG, RETN, RETI, IM, RRD, RLD, + MULUB, MULUW, INI, INIR, OUTI, OUTIR, IND, INDR, OUTD, OUTDR, LDI, LDIR, LDD, LDDR, CPI, CPIR, CPD, CPDR) ++ BIT ++ RES ++ SET @@ -63,6 +66,7 @@ object ZOpcodeClasses { val ChangesAFAlways: Set[ZOpcode.Value] = Set( // TODO: ! DAA, ADD, ADC, SUB, SBC, XOR, OR, AND, INC, DEC, SCF, CCF, NEG, RIM, + MULUB, MULUW, LDH_AC, LDH_AD, LD_AHLI, LD_AHLD, ADD_16, ADC_16, SBC_16, INC_16, DEC_16, INI, INIR, OUTI, OUTIR, IND, INDR, OUTD, OUTDR, @@ -74,6 +78,7 @@ object ZOpcodeClasses { LDIX, LDIRX, LDDX, LDDRX, LDPIRX, EXX, CALL, JR, JP, LABEL, DJNZ) val ChangesHLAlways: Set[ZOpcode.Value] = Set( + MULUB, MULUW, INI, INIR, OUTI, OUTIR, IND, INDR, OUTD, OUTDR, LDI, LDIR, LDD, LDDR, CPI, CPIR, CPD, CPDR, LD_AHLI, LD_AHLD, LD_HLIA, LD_HLDA, LD_HLSP, DSUB, @@ -81,6 +86,7 @@ object ZOpcodeClasses { LDWS, LDIX, LDIRX, LDDX, LDDRX, LDPIRX, PIXELAD, PIXELDN, OUTINB, EXX, EX_DE_HL, CALL, JR, JP, LABEL) val ChangesDEAlways: Set[ZOpcode.Value] = Set( + MULUW, LDI, LDIR, LDD, LDDR, LD_DESP, LD_DEHL, RLDE, LDWS, LDIX, LDIRX, LDDX, LDDRX, LDPIRX, MUL, @@ -88,6 +94,7 @@ object ZOpcodeClasses { val ChangesOnlyRegister: Set[ZOpcode.Value] = Set(INC, DEC, INC_16, DEC_16, POP, EX_SP, IN_C, IN_IMM, RL, RR, RLC, RRC, SLA, SRA, SRL, SLL) ++ SET ++ RES val ChangesFirstRegister: Set[ZOpcode.Value] = Set(LD, LD_16, ADD_16, SBC_16) val ChangesAAlways: Set[ZOpcode.Value] = Set( + MULUB, DAA, ADD, ADC, SUB, SBC, XOR, OR, AND, LD_AHLI, LD_AHLD, RIM, MIRROR, SETAE, ) diff --git a/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodR800Optimizations.scala b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodR800Optimizations.scala new file mode 100644 index 00000000..c2fbaf55 --- /dev/null +++ b/src/main/scala/millfork/assembly/z80/opt/AlwaysGoodR800Optimizations.scala @@ -0,0 +1,24 @@ +package millfork.assembly.z80.opt + +import millfork.assembly.AssemblyOptimization +import millfork.assembly.z80.ZLine +import millfork.assembly.z80.ZOpcode.MULUB +import millfork.assembly.z80.ZOpcode.MULUW +import millfork.node.ZRegister + +/** + * Optimizations valid for R800 + * @author Karol Stasiak + */ +object AlwaysGoodR800Optimizations { + + val UnusedR800Instructions = new RuleBasedAssemblyOptimization("Simplifiable maths (R800)", + needsFlowInfo = FlowInfoRequirement.BackwardFlow, + (Elidable & HasOpcode(MULUB) & DoesntMatterWhatItDoesWith(ZRegister.H, ZRegister.L) & DoesntMatterWhatItDoesWithFlags) ~~> (_ => Nil), + (Elidable & HasOpcode(MULUW) & DoesntMatterWhatItDoesWith(ZRegister.H, ZRegister.L, ZRegister.D, ZRegister.E) & DoesntMatterWhatItDoesWithFlags) ~~> (_ => Nil), + ) + + val All: List[AssemblyOptimization[ZLine]] = List[AssemblyOptimization[ZLine]]( + UnusedR800Instructions, + ) +} diff --git a/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala b/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala index 20ee650a..912c54a0 100644 --- a/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala +++ b/src/main/scala/millfork/assembly/z80/opt/CoarseFlowAnalyzer.scala @@ -361,6 +361,38 @@ object CoarseFlowAnalyzer { case ZLine0(RIM, _, _) => currentStatus = currentStatus.copy(a = AnyStatus) + case ZLine0(MULUB, TwoRegisters(ZRegister.A, r@(ZRegister.B | ZRegister.C | ZRegister.D | ZRegister.E)), _) => + val hl = (currentStatus.a<*>currentStatus.getRegister(r, 0)){(a,b) => (a*b)&0xff} + currentStatus = currentStatus.copy( + h = hl.hi, + l = hl.lo, + hl = hl.map(NumericConstant(_, 2)), + cf = AnyStatus, + nf = AnyStatus, + hf = AnyStatus, + zf = AnyStatus, + sf = AnyStatus, + pf = AnyStatus + ) + case ZLine0(MULUW, TwoRegisters(ZRegister.HL, ZRegister.BC), _) => + val hl = (currentStatus.h<*>currentStatus.l)(currentStatus.mergeBytes) + val bc = (currentStatus.b<*>currentStatus.c)(currentStatus.mergeBytes) + val hi = (hl<*>bc){(a,b) => (a*b).>>(16).&(0xffff)} + val lo = (hl<*>bc){(a,b) => (a*b).&(0xffff)} + currentStatus = currentStatus.copy( + d = hi.hi, + e = hi.lo, + h = lo.hi, + l = lo.lo, + hl = lo.map(NumericConstant(_, 2)), + cf = AnyStatus, + nf = AnyStatus, + hf = AnyStatus, + zf = AnyStatus, + sf = AnyStatus, + pf = AnyStatus + ) + case ZLine0(opcode, registers, _) => currentStatus = currentStatus.copy(cf = AnyStatus, zf = AnyStatus, sf = AnyStatus, pf = AnyStatus, hf = AnyStatus) if (ZOpcodeClasses.ChangesAAlways(opcode)) currentStatus = currentStatus.copy(a = AnyStatus) diff --git a/src/main/scala/millfork/assembly/z80/opt/Z80OptimizationPresets.scala b/src/main/scala/millfork/assembly/z80/opt/Z80OptimizationPresets.scala index 64d77a16..7d0c06ed 100644 --- a/src/main/scala/millfork/assembly/z80/opt/Z80OptimizationPresets.scala +++ b/src/main/scala/millfork/assembly/z80/opt/Z80OptimizationPresets.scala @@ -22,6 +22,21 @@ object Z80OptimizationPresets { ).flatten } + val GoodForR800: List[AssemblyOptimization[ZLine]] = { + List.fill(5)( + List.fill(5)( + AlwaysGoodI80Optimizations.All ++ + AlwaysGoodZ80Optimizations.All ++ + AlwaysGoodR800Optimizations.All ++ + List( + EmptyParameterStoreRemoval, + EmptyMemoryStoreRemoval) + ).flatten ++ + List(ChangeRegisterPairPreferringDE, WordVariableToRegisterOptimization, ByteVariableToRegisterOptimization, ChangeRegisterPairPreferringBC, CompactStackFrame) ++ + LaterIntel8080Optimizations.All ++ LaterI80Optimizations.All + ).flatten + } + val GoodForIntel8080: List[AssemblyOptimization[ZLine]] = { List.fill(5)( List.fill(5)( diff --git a/src/main/scala/millfork/output/Z80Assembler.scala b/src/main/scala/millfork/output/Z80Assembler.scala index d869e932..975340a4 100644 --- a/src/main/scala/millfork/output/Z80Assembler.scala +++ b/src/main/scala/millfork/output/Z80Assembler.scala @@ -67,6 +67,14 @@ class Z80Assembler(program: Program, def requireZ80Illegals(): Unit = if (!options.flag(EmitZ80Opcodes) || !options.flag(EmitIllegals)) log.error("Unsupported instruction: " + instr) + def requireR800(): Unit = if (!options.flag(EmitR800Opcodes)) log.error("Unsupported instruction: " + instr) + + def requireNoR800(): Unit = if (options.flag(EmitR800Opcodes)) log.error("Unsupported instruction: " + instr) + + def requireR800OrIllegals(): Unit = if (!options.flag(EmitR800Opcodes) && !options.flag(EmitIllegals)) log.error("Unsupported instruction: " + instr) + + def requireR800Illegals(): Unit = if (!options.flag(EmitR800Opcodes) || !options.flag(EmitIllegals)) log.error("Unsupported instruction: " + instr) + def requireExtended80(): Unit = if (!options.flag(EmitExtended80Opcodes)) log.error("Unsupported instruction: " + instr) def requireSharp(): Unit = if (!options.flag(EmitSharpOpcodes)) log.error("Unsupported instruction: " + instr) @@ -326,7 +334,7 @@ class Z80Assembler(program: Program, writeByte(bank, index + 2, instr.parameter) index + 3 case ZLine0(op, OneRegister(ix@(IXH | IYH | IXL | IYL)), _) if oneRegister.contains(op) => - requireZ80Illegals() + requireR800OrIllegals() val o = oneRegister(op) writeByte(bank, index, prefixByte(ix)) writeByte(bank, index + 1, o.opcode + internalRegisterIndex(ix) * o.multiplier) @@ -337,6 +345,7 @@ class Z80Assembler(program: Program, index + 1 case ZLine0(SLL, OneRegister(reg), _) => requireZ80Illegals() + requireNoR800() writeByte(bank, index, 0xcb) writeByte(bank, index + 1, 0x30 + internalRegisterIndex(reg)) index + 2 @@ -347,6 +356,7 @@ class Z80Assembler(program: Program, index + 2 case ZLine0(SLL, OneRegisterOffset(ix@(ZRegister.MEM_IX_D | ZRegister.MEM_IY_D), offset), _) => requireZ80Illegals() + requireNoR800() writeByte(bank, index, prefixByte(ix)) writeByte(bank, index + 1, 0xcb) writeByte(bank, index + 2, offset) @@ -443,22 +453,22 @@ class Z80Assembler(program: Program, writeByte(bank, index + 2, offset) index + 3 case TwoRegisters(target@(IXH | IYH | IXL | IYL), source@(A | B | C | D | E)) => - requireZ80Illegals() + requireR800OrIllegals() writeByte(bank, index, prefixByte(target)) writeByte(bank, index, 0x40 + internalRegisterIndex(source) + internalRegisterIndex(target) * 8) index + 2 case TwoRegisters(target@(A | B | C | D | E), source@(IXH | IYH | IXL | IYL)) => - requireZ80Illegals() + requireR800OrIllegals() writeByte(bank, index, prefixByte(source)) writeByte(bank, index, 0x40 + internalRegisterIndex(source) + internalRegisterIndex(target) * 8) index + 2 case TwoRegisters(target@(IXH | IXL), source@(IXH | IXL)) => - requireZ80Illegals() + requireR800OrIllegals() writeByte(bank, index, prefixByte(source)) writeByte(bank, index, 0x40 + internalRegisterIndex(source) + internalRegisterIndex(target) * 8) index + 2 case TwoRegisters(target@(IYH | IYL), source@(IYH | IYL)) => - requireZ80Illegals() + requireR800OrIllegals() writeByte(bank, index, prefixByte(source)) writeByte(bank, index, 0x40 + internalRegisterIndex(source) + internalRegisterIndex(target) * 8) index + 2 @@ -771,6 +781,61 @@ class Z80Assembler(program: Program, requireIntel8085Illegals() writeByte(bank, index, 0x10) index + 1 + case ZLine0(MULUB, TwoRegisters(A, A), _) => + requireR800Illegals() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xF9) + index + 2 + case ZLine0(MULUB, TwoRegisters(A, B), _) => + requireR800() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xC1) + index + 2 + case ZLine0(MULUB, TwoRegisters(A, C), _) => + requireR800() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xC9) + index + 2 + case ZLine0(MULUB, TwoRegisters(A, D), _) => + requireR800() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xD1) + index + 2 + case ZLine0(MULUB, TwoRegisters(A, E), _) => + requireR800() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xD9) + index + 2 + case ZLine0(MULUB, TwoRegisters(A, H), _) => + requireR800Illegals() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xE1) + index + 2 + case ZLine0(MULUB, TwoRegisters(A, L), _) => + requireR800Illegals() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xE9) + index + 2 + case ZLine0(MULUW, TwoRegisters(HL, BC), _) => + requireR800() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xC3) + index + 2 + case ZLine0(MULUW, TwoRegisters(HL, DE), _) => + requireR800Illegals() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xD3) + index + 2 + case ZLine0(MULUW, TwoRegisters(HL, HL), _) => + requireR800Illegals() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xE3) + index + 2 + case ZLine0(MULUW, TwoRegisters(HL, SP), _) => + requireR800() + writeByte(bank, index, 0xED) + writeByte(bank, index + 1, 0xF3) + index + 2 case _ => log.fatal("Cannot assemble " + instr) index diff --git a/src/main/scala/millfork/parser/Z80Parser.scala b/src/main/scala/millfork/parser/Z80Parser.scala index 2e8b90b4..4226ce1b 100644 --- a/src/main/scala/millfork/parser/Z80Parser.scala +++ b/src/main/scala/millfork/parser/Z80Parser.scala @@ -109,7 +109,7 @@ case class Z80Parser(filename: String, case (VariableExpression(r), false) if toRegister.contains(r)=> (toRegister(r), None) case (VariableExpression(r), false) if options.flag(CompilationFlag.EmitZ80Opcodes) && - options.flag(CompilationFlag.EmitIllegals) && + (options.flag(CompilationFlag.EmitIllegals) || options.flag(CompilationFlag.EmitR800Opcodes)) && toIndexHalf.contains(r)=> (toIndexHalf(r), None) case (SumExpression(List( (false, LiteralExpression(0xff00, _)), @@ -534,7 +534,7 @@ case class Z80Parser(filename: String, case "PIXELAD" => imm(PIXELAD) case "SETAE" => imm(SETAE) case "MUL" => (("D"|"d") ~ HWS ~ "," ~/ HWS ~ ("E" | "e")).?.map { _ => (MUL, NoRegisters, None, zero)} - case "MIRROR" => ("A"|"a").?.map { _ => (MUL, NoRegisters, None, zero)} + case "MIRROR" => ("A"|"a").?.map { _ => (MIRROR, NoRegisters, None, zero)} case "NEXTREG" =>(param(allowAbsolute = false) ~ HWS ~ position("comma").map(_ => ()) ~ "," ~/ HWS ~ param(allowAbsolute = false)).map { case (ZRegister.IMM_8, Some(n), (ZRegister.A, None)) => (NEXTREG, TwoRegisters(ZRegister.IMM_8, ZRegister.A), None, n) case (ZRegister.IMM_8, Some(n), (ZRegister.IMM_8, Some(v))) => (NEXTREG, TwoRegisters(ZRegister.IMM_8, ZRegister.IMM_8), None, SeparateBytesExpression(v, n)) @@ -544,6 +544,19 @@ case class Z80Parser(filename: String, } case "TEST" => one8Register(TEST) + case "MULUB" => (param(allowAbsolute = false) ~ HWS ~ "," ~/ HWS ~ param(allowAbsolute = false)).map { + case (ZRegister.A, None, (r, None)) => (MULUB, TwoRegisters(ZRegister.A, r), None, zero) + case _ => + log.error("Invalid parameters for MULUB", Some(pos)) + (NOP, NoRegisters, None, zero) + } + case "MULUW" => (param(allowAbsolute = false) ~ HWS ~ "," ~/ HWS ~ param(allowAbsolute = false)).map { + case (ZRegister.HL, None, (r, None)) => (MULUW, TwoRegisters(ZRegister.HL, r), None, zero) + case _ => + log.error("Invalid parameters for MULUW", Some(pos)) + (NOP, NoRegisters, None, zero) + } + case _ => log.error("Unsupported opcode " + opcode, Some(pos)) imm(NOP) diff --git a/src/test/scala/millfork/test/ByteMathSuite.scala b/src/test/scala/millfork/test/ByteMathSuite.scala index 767ad0d1..9677d95f 100644 --- a/src/test/scala/millfork/test/ByteMathSuite.scala +++ b/src/test/scala/millfork/test/ByteMathSuite.scala @@ -193,7 +193,7 @@ class ByteMathSuite extends FunSuite with Matchers with AppendedClues { } test("Byte multiplication 2") { - EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.Motorola6809)( + EmuCrossPlatformBenchmarkRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8080, Cpu.Sharp, Cpu.R800, Cpu.Motorola6809)( """ | import zp_reg | byte output1 @$c001, output2 @$c002, output3 @$c003 diff --git a/src/test/scala/millfork/test/StructSuite.scala b/src/test/scala/millfork/test/StructSuite.scala index 427fbffc..f9a4d749 100644 --- a/src/test/scala/millfork/test/StructSuite.scala +++ b/src/test/scala/millfork/test/StructSuite.scala @@ -249,6 +249,7 @@ class StructSuite extends FunSuite with Matchers { | p->tmp[1] = 77 | outputAlias[0].tmp[id(3)] = 3 | outputAlias[id(0)].tmp[5] = 55 + | output.tmp[6] = lo(output.tmp - output.addr) |} |""".stripMargin EmuUnoptimizedCrossPlatformRun(Cpu.Mos, Cpu.Z80, Cpu.Intel8086, Cpu.Motorola6809)(code){ m => @@ -257,6 +258,7 @@ class StructSuite extends FunSuite with Matchers { m.readByte(0xc003) should equal(3) m.readByte(0xc004) should equal(4) m.readByte(0xc005) should equal(55) + m.readByte(0xc006) should equal(0) } } diff --git a/src/test/scala/millfork/test/Z80AssemblySuite.scala b/src/test/scala/millfork/test/Z80AssemblySuite.scala index 9c9967bc..939ea154 100644 --- a/src/test/scala/millfork/test/Z80AssemblySuite.scala +++ b/src/test/scala/millfork/test/Z80AssemblySuite.scala @@ -1,7 +1,7 @@ package millfork.test import millfork.Cpu -import millfork.test.emu.{EmuUnoptimizedCrossPlatformRun, EmuUnoptimizedIntel8080Run, EmuUnoptimizedIntel8085Run, EmuUnoptimizedSharpRun, EmuUnoptimizedZ80NextRun, EmuUnoptimizedZ80Run} +import millfork.test.emu.{EmuUnoptimizedCrossPlatformRun, EmuUnoptimizedIntel8080Run, EmuUnoptimizedIntel8085Run, EmuUnoptimizedR800Run, EmuUnoptimizedSharpRun, EmuUnoptimizedZ80NextRun, EmuUnoptimizedZ80Run} import org.scalatest.{FunSuite, Matchers} /** @@ -1340,4 +1340,42 @@ class Z80AssemblySuite extends FunSuite with Matchers { """.stripMargin) } + test("R800 stuff") { + EmuUnoptimizedR800Run( + """ + | #pragma zilog_syntax + | asm void main () { + | ret + | mulub a,b + | mulub a,c + | mulub a,d + | mulub a,e + | muluw hl,bc + | muluw hl,sp + | inc ixh + | inc ixl + | inc iyh + | inc iyl + | dec ixh + | dec ixl + | dec iyh + | dec iyl + | ld a,ixh + | ld a,ixl + | ld iyh,a + | ld iyl,a + | add a,iyl + | adc a,iyl + | sub iyl + | sbc a,iyl + | or iyl + | xor iyl + | and iyl + | cp iyl + | ld ixh,0 + | ret + | } + """.stripMargin) + } + } diff --git a/src/test/scala/millfork/test/emu/EmuBenchmarkRun.scala b/src/test/scala/millfork/test/emu/EmuBenchmarkRun.scala index cee461d6..82e71aa5 100644 --- a/src/test/scala/millfork/test/emu/EmuBenchmarkRun.scala +++ b/src/test/scala/millfork/test/emu/EmuBenchmarkRun.scala @@ -106,6 +106,31 @@ object EmuSharpBenchmarkRun { } } +object EmuR800BenchmarkRun { + def apply(source: String)(verifier: MemoryBank => Unit): Unit = { + val (Timings(t0, _), m0) = EmuUnoptimizedR800Run.apply2(source) + val (Timings(t1, _), m1) = EmuOptimizedR800Run.apply2(source) + val (Timings(t2, _), m2) = EmuOptimizedInlinedR800Run.apply2(source) + if (t0 > 0)println(f"Before optimization: $t0%7d") + if (t1 > 0)println(f"After optimization: $t1%7d") + if (t2 > 0)println(f"After inlining: $t2%7d") + if (t0 > 0 && t1 > 0) println(f"Gain: ${(100L * (t0 - t1) / t0.toDouble).round}%7d%%") + if (t0 > 0 && t2 > 0) println(f"Gain with inlining: ${(100L * (t0 - t2) / t0.toDouble).round}%7d%%") + if (t0 > 0) { + println(f"Running R800 unoptimized") + verifier(m0) + } + if (t1 > 0) { + println(f"Running R800 optimized") + verifier(m1) + } + if (t2 > 0) { + println(f"Running R800 optimized inlined") + verifier(m2) + } + } +} + object EmuIntel8086BenchmarkRun { def apply(source: String)(verifier: MemoryBank => Unit): Unit = { val (Timings(t0, _), m0) = EmuUnoptimizedIntel8086Run.apply2(source) @@ -170,6 +195,9 @@ object EmuCrossPlatformBenchmarkRun { if (Settings.enableIntel8080Tests && platforms.contains(millfork.Cpu.Intel8080)) { EmuIntel8080BenchmarkRun.apply(source)(verifier) } + if (Settings.enableZ80Tests && platforms.contains(millfork.Cpu.R800)) { + EmuR800BenchmarkRun.apply(source)(verifier) + } if (Settings.enableUnemulatedTests && platforms.contains(millfork.Cpu.Intel8085)) { EmuUnoptimizedIntel8085Run.apply(source) } diff --git a/src/test/scala/millfork/test/emu/EmuOptimizedInlinedRun.scala b/src/test/scala/millfork/test/emu/EmuOptimizedInlinedRun.scala index 0145e341..18760dbf 100644 --- a/src/test/scala/millfork/test/emu/EmuOptimizedInlinedRun.scala +++ b/src/test/scala/millfork/test/emu/EmuOptimizedInlinedRun.scala @@ -50,6 +50,10 @@ object EmuOptimizedInlinedSharpRun extends EmuZ80Run(Cpu.Sharp, OptimizationPres override def inline: Boolean = true } +object EmuOptimizedInlinedR800Run extends EmuZ80Run(Cpu.R800, OptimizationPresets.NodeOpt, Z80OptimizationPresets.GoodForR800) { + override def inline: Boolean = true +} + object EmuOptimizedInlinedM6809Run extends EmuM6809Run(Cpu.Motorola6809, OptimizationPresets.NodeOpt, M6809OptimizationPresets.Default) { override def inline: Boolean = true } diff --git a/src/test/scala/millfork/test/emu/EmuOptimizedRun.scala b/src/test/scala/millfork/test/emu/EmuOptimizedRun.scala index 1c772961..263519cf 100644 --- a/src/test/scala/millfork/test/emu/EmuOptimizedRun.scala +++ b/src/test/scala/millfork/test/emu/EmuOptimizedRun.scala @@ -95,4 +95,6 @@ object EmuSizeOptimizedIntel8080Run extends EmuZ80Run(Cpu.Intel8080, Optimizatio object EmuOptimizedSharpRun extends EmuZ80Run(Cpu.Sharp, OptimizationPresets.NodeOpt, Z80OptimizationPresets.GoodForSharp) +object EmuOptimizedR800Run extends EmuZ80Run(Cpu.R800, OptimizationPresets.NodeOpt, Z80OptimizationPresets.GoodForR800) + object EmuOptimizedM6809Run extends EmuM6809Run(Cpu.Motorola6809, OptimizationPresets.NodeOpt, M6809OptimizationPresets.Default) diff --git a/src/test/scala/millfork/test/emu/EmuUnoptimizedRun.scala b/src/test/scala/millfork/test/emu/EmuUnoptimizedRun.scala index c88f0fb0..adff1ac6 100644 --- a/src/test/scala/millfork/test/emu/EmuUnoptimizedRun.scala +++ b/src/test/scala/millfork/test/emu/EmuUnoptimizedRun.scala @@ -28,6 +28,8 @@ object EmuUnoptimizedIntel8085Run extends EmuZ80Run(Cpu.Intel8085, Nil, Nil) object EmuUnoptimizedZ80NextRun extends EmuZ80Run(Cpu.Z80Next, Nil, Nil) +object EmuUnoptimizedR800Run extends EmuZ80Run(Cpu.R800, Nil, Nil) + object EmuUnoptimizedIntel8086Run extends EmuI86Run(Nil, Nil) object EmuUnoptimizedSharpRun extends EmuZ80Run(Cpu.Sharp, Nil, Nil) diff --git a/src/test/scala/millfork/test/emu/EmuZ80Run.scala b/src/test/scala/millfork/test/emu/EmuZ80Run.scala index 8e77c0b0..9ddd1cd7 100644 --- a/src/test/scala/millfork/test/emu/EmuZ80Run.scala +++ b/src/test/scala/millfork/test/emu/EmuZ80Run.scala @@ -30,6 +30,8 @@ import scala.collection.mutable */ object EmuZ80Run { + val secondBytesOfMulOnR800: Set[Int] = Set(0xf9, 0xc1, 0xc9, 0xd1, 0xf9, 0xe1, 0xe9, 0xc3, 0xd3, 0xe3, 0xf3) + private def preload(cpu: millfork.Cpu.Value, filename: String): Option[Program] = { TestErrorReporting.log.info(s"Loading $filename for $cpu") val source = Files.readAllLines(Paths.get(filename), StandardCharsets.US_ASCII).asScala.mkString("\n") @@ -87,6 +89,7 @@ class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimizatio CompilationFlag.SubroutineExtraction -> optimizeForSize, CompilationFlag.EmitIllegals -> (cpu == millfork.Cpu.Z80 || cpu == millfork.Cpu.Intel8085 || cpu == millfork.Cpu.Z80Next), CompilationFlag.EmitZ80NextOpcodes -> (cpu == millfork.Cpu.Z80Next), + CompilationFlag.EmitR800Opcodes -> (cpu == millfork.Cpu.R800), CompilationFlag.LenientTextEncoding -> true) if (source.contains("intel_syntax")) { extraFlags += CompilationFlag.UseIntelSyntaxForOutput -> true @@ -199,26 +202,35 @@ class EmuZ80Run(cpu: millfork.Cpu.Value, nodeOptimizations: List[NodeOptimizatio method } val timings = platform.cpu match { - case millfork.Cpu.Z80 | millfork.Cpu.Intel8080 => - val cpu = new Z80Core(Z80Memory(memoryBank), DummyIO) - cpu.reset() - cpu.setProgramCounter(0x1ed) - cpu.resetTStates() - while (!cpu.getHalt) { - cpu.executeOneInstruction() - if (resetN) { - resetNMethod.invoke(cpu) + case millfork.Cpu.Z80 | millfork.Cpu.Intel8080 | millfork.Cpu.R800 => + val hasMultiplications = (platform.cpu == millfork.Cpu.R800) && ((0x200 to 0xfffe).exists { addr => + val b0 = memoryBank.output(addr) + val b1 = memoryBank.output(addr + 1) + b0.&(0xff) == 0xED && EmuZ80Run.secondBytesOfMulOnR800(b1.&(0xff)) + }) + if (hasMultiplications) { + Timings(-1, -1) -> memoryBank + } else { + val cpu = new Z80Core(Z80Memory(memoryBank), DummyIO) + cpu.reset() + cpu.setProgramCounter(0x1ed) + cpu.resetTStates() + while (!cpu.getHalt) { + cpu.executeOneInstruction() + if (resetN) { + resetNMethod.invoke(cpu) + } + if (cpu.getSP.&(0xffff) < 0xd002) { + log.debug("stack dump:") + (0xD000 until 0xD0FF).map(memoryBank.output).grouped(16).map(_.map(i => f"$i%02x").mkString(" ")).foreach(log.debug(_)) + throw new IllegalStateException("stack overflow") + } + // dump(cpu) + cpu.getTStates should be < TooManyCycles } - if (cpu.getSP.&(0xffff) < 0xd002) { - log.debug("stack dump:") - (0xD000 until 0xD0FF).map(memoryBank.output).grouped(16).map(_.map(i => f"$i%02x").mkString(" ")).foreach(log.debug(_)) - throw new IllegalStateException("stack overflow") - } -// dump(cpu) - cpu.getTStates should be < TooManyCycles + val tStates = cpu.getTStates + Timings(tStates, tStates) -> memoryBank } - val tStates = cpu.getTStates - Timings(tStates, tStates) -> memoryBank case millfork.Cpu.Sharp => var ticks = 0L val cpu = GameboyStubs(memoryBank).cpu