fix optimized multi-value call result assignment

This commit is contained in:
Irmen de Jong
2025-02-18 22:04:36 +01:00
parent ebdea9cf76
commit bc550a4549
10 changed files with 173 additions and 98 deletions
@@ -6,6 +6,41 @@ import prog8.code.core.*
sealed interface IPtSubroutine {
val name: String
val scopedName: String
fun returnsWhatWhere(): List<Pair<RegisterOrStatusflag, DataType>> {
when(this) {
is PtAsmSub -> {
return returns
}
is PtSub -> {
// for non-asm subroutines, determine the return registers based on the type of the return values
fun cpuRegisterFor(returntype: DataType): RegisterOrStatusflag = when {
returntype.isByteOrBool -> RegisterOrStatusflag(RegisterOrPair.A, null)
returntype.isWord -> RegisterOrStatusflag(RegisterOrPair.AY, null)
returntype.isFloat -> RegisterOrStatusflag(RegisterOrPair.FAC1, null)
else -> RegisterOrStatusflag(RegisterOrPair.AY, null)
}
when(returns.size) {
0 -> return emptyList()
1 -> {
val returntype = returns.single()
val register = cpuRegisterFor(returntype)
return listOf(Pair(register, returntype))
}
else -> {
// for multi-value results, put the first one in A or AY cpu register(s) and the rest in the virtual registers starting from R15 and counting down
val first = cpuRegisterFor(returns.first()) to returns.first()
val others = returns.drop(1)
.zip(Cx16VirtualRegisters.reversed())
.map { (type, reg) -> RegisterOrStatusflag(reg, null) to type }
return listOf(first) + others
}
}
}
}
}
}
class PtAsmSub(
@@ -687,8 +687,8 @@ class AsmGen6502Internal (
RegisterOrPair.Y -> assignmentAsmGen.assignRegisterByte(target, reg.asCpuRegister(), target.datatype.isSigned, true)
RegisterOrPair.AX,
RegisterOrPair.AY,
RegisterOrPair.XY,
in Cx16VirtualRegisters -> assignmentAsmGen.assignRegisterpairWord(target, reg)
RegisterOrPair.XY -> assignmentAsmGen.assignRegisterpairWord(target, reg)
in Cx16VirtualRegisters -> assignmentAsmGen.assignVirtualRegister(target, reg)
RegisterOrPair.FAC1 -> assignmentAsmGen.assignFAC1float(target)
RegisterOrPair.FAC2 -> assignmentAsmGen.assignFAC2float(target)
else -> throw AssemblyError("invalid register")
@@ -1,45 +0,0 @@
package prog8.codegen.cpu6502
import prog8.code.ast.IPtSubroutine
import prog8.code.ast.PtAsmSub
import prog8.code.ast.PtSub
import prog8.code.core.Cx16VirtualRegisters
import prog8.code.core.DataType
import prog8.code.core.RegisterOrPair
import prog8.code.core.RegisterOrStatusflag
internal fun IPtSubroutine.returnsWhatWhere(): List<Pair<RegisterOrStatusflag, DataType>> {
when(this) {
is PtAsmSub -> {
return returns
}
is PtSub -> {
// for non-asm subroutines, determine the return registers based on the type of the return values
fun cpuRegisterFor(returntype: DataType): RegisterOrStatusflag = when {
returntype.isByteOrBool -> RegisterOrStatusflag(RegisterOrPair.A, null)
returntype.isWord -> RegisterOrStatusflag(RegisterOrPair.AY, null)
returntype.isFloat -> RegisterOrStatusflag(RegisterOrPair.FAC1, null)
else -> RegisterOrStatusflag(RegisterOrPair.AY, null)
}
when(returns.size) {
0 -> return emptyList()
1 -> {
val returntype = returns.single()
val register = cpuRegisterFor(returntype)
return listOf(Pair(register, returntype))
}
else -> {
// for multi-value results, put the first one in A or AY cpu register(s) and the rest in the virtual registers starting from R15 and counting down
val first = cpuRegisterFor(returns.first()) to returns.first()
val others = returns.drop(1)
.zip(Cx16VirtualRegisters.reversed())
.map { (type, reg) -> RegisterOrStatusflag(reg, null) to type }
return listOf(first) + others
}
}
}
}
}
@@ -3,7 +3,6 @@ package prog8.codegen.cpu6502.assignment
import prog8.code.ast.*
import prog8.code.core.*
import prog8.codegen.cpu6502.AsmGen6502Internal
import prog8.codegen.cpu6502.returnsWhatWhere
internal enum class TargetStorageKind {
@@ -9,7 +9,6 @@ import prog8.code.ast.*
import prog8.code.core.*
import prog8.codegen.cpu6502.AsmGen6502Internal
import prog8.codegen.cpu6502.VariableAllocator
import prog8.codegen.cpu6502.returnsWhatWhere
internal class AssignmentAsmGen(
@@ -702,7 +701,10 @@ internal class AssignmentAsmGen(
}
}
private fun assignVirtualRegister(target: AsmAssignTarget, register: RegisterOrPair) {
internal fun assignVirtualRegister(target: AsmAssignTarget, register: RegisterOrPair) {
// Note: while the virtual register R0-R15 can hold a word value,
// the actual datatype that gets assigned is determined by the assignment target.
// This can be a single byte!
when {
target.datatype.isByteOrBool -> {
if(register in Cx16VirtualRegisters) {
@@ -43,15 +43,14 @@ internal class AssignmentGen(private val codeGen: IRCodeGen, private val express
if (normalsub != null) {
// note: multi-value returns are passed throug A or AY (for the first value) then cx16.R15 down to R0
// (this allows unencumbered use of many Rx registers if you don't return that many values)
TODO("fix A/AY for first value")
val registersReverseOrder = Cx16VirtualRegisters.reversed()
normalsub.returns.zip(assignmentTargets).zip(registersReverseOrder).forEach {
val returnregs = (normalsub.astNode!! as IPtSubroutine).returnsWhatWhere()
normalsub.returns.zip(assignmentTargets).zip(returnregs).forEach {
val target = it.first.second as PtAssignTarget
if(!target.void) {
val assignSingle = PtAssignment(assignment.position, assignment.isVarInitializer)
assignSingle.add(target)
assignSingle.add(PtIdentifier("cx16.${it.second.toString().lowercase()}", it.first.first, assignment.position))
result += translateRegularAssign(assignSingle)
val reg = it.second.first
val regnum = codeGen.registers.next(irType(it.second.second))
val p = StExtSubParameter(reg, it.second.second)
result += assignCpuRegister(p, regnum, target)
}
}
}
@@ -76,7 +75,7 @@ internal class AssignmentGen(private val codeGen: IRCodeGen, private val express
RegisterOrPair.AX -> IRInstruction(Opcode.LOADHAX, IRDataType.WORD, reg1=regNum)
RegisterOrPair.AY -> IRInstruction(Opcode.LOADHAY, IRDataType.WORD, reg1=regNum)
RegisterOrPair.XY -> IRInstruction(Opcode.LOADHXY, IRDataType.WORD, reg1=regNum)
in Cx16VirtualRegisters -> IRInstruction(Opcode.LOADM, IRDataType.WORD, reg1=regNum, labelSymbol = "cx16.${returns.register.registerOrPair.toString().lowercase()}")
in Cx16VirtualRegisters -> IRInstruction(Opcode.LOADM, irType(returns.type), reg1=regNum, labelSymbol = "cx16.${returns.register.registerOrPair.toString().lowercase()}")
null -> {
TODO("assign CPU status flag ${returns.register.statusflag!!}")
}
@@ -671,25 +671,7 @@ internal class ExpressionGen(private val codeGen: IRCodeGen) {
else
argRegisters.add(FunctionCallArgs.ArgumentSpec("", null, FunctionCallArgs.RegSpec(paramDt, tr.resultReg, parameter.register)))
result += tr.chunks
when(parameter.register.registerOrPair) {
RegisterOrPair.A -> addInstr(result, IRInstruction(Opcode.STOREHA, IRDataType.BYTE, reg1=tr.resultReg), null)
RegisterOrPair.X -> addInstr(result, IRInstruction(Opcode.STOREHX, IRDataType.BYTE, reg1=tr.resultReg), null)
RegisterOrPair.Y -> addInstr(result, IRInstruction(Opcode.STOREHY, IRDataType.BYTE, reg1=tr.resultReg), null)
RegisterOrPair.AX -> addInstr(result, IRInstruction(Opcode.STOREHAX, IRDataType.WORD, reg1=tr.resultReg), null)
RegisterOrPair.AY -> addInstr(result, IRInstruction(Opcode.STOREHAY, IRDataType.WORD, reg1=tr.resultReg), null)
RegisterOrPair.XY -> addInstr(result, IRInstruction(Opcode.STOREHXY, IRDataType.WORD, reg1=tr.resultReg), null)
RegisterOrPair.FAC1 -> addInstr(result, IRInstruction(Opcode.STOREHFACZERO, IRDataType.FLOAT, fpReg1 = tr.resultFpReg), null)
RegisterOrPair.FAC2 -> addInstr(result, IRInstruction(Opcode.STOREHFACONE, IRDataType.FLOAT, fpReg1 = tr.resultFpReg), null)
in Cx16VirtualRegisters -> {
addInstr(result, IRInstruction(Opcode.STOREM, paramDt, reg1=tr.resultReg, labelSymbol = "cx16.${parameter.register.registerOrPair.toString().lowercase()}"), null)
}
null -> when(parameter.register.statusflag) {
// TODO: do the statusflag argument as last
Statusflag.Pc -> addInstr(result, IRInstruction(Opcode.LSR, paramDt, reg1=tr.resultReg), null)
else -> throw AssemblyError("weird statusflag as param")
}
else -> throw AssemblyError("unsupported register arg")
}
result += codeGen.setCpuRegister(parameter.register, paramDt, tr.resultReg, tr.resultFpReg)
}
if(callTarget.returns.size>1)
@@ -1761,12 +1761,18 @@ class IRCodeGen(
if(ret.children.size>1) {
// note: multi-value returns are passed throug A or AY (for the first value) then cx16.R15 down to R0
// (this allows unencumbered use of many Rx registers if you don't return that many values)
TODO("fix A/AY for first value")
val registersReverseOrder = Cx16VirtualRegisters.reversed()
for ((value, register) in ret.children.zip(registersReverseOrder)) {
// make sure to assign the first value as the last in the sequence, to avoid clobbering the AY registers afterwards
val returnRegs = ret.definingISub()!!.returnsWhatWhere()
val values = ret.children.zip(returnRegs)
for ((value, register) in values.drop(1)) {
val tr = expressionEval.translateExpression(value as PtExpression)
addToResult(result, tr, tr.resultReg, -1)
addInstr(result, IRInstruction(Opcode.STOREM, tr.dt, reg1=tr.resultReg, labelSymbol = "cx16.${register.toString().lowercase()}"), null)
result += setCpuRegister(register.first, irType(register.second), tr.resultReg, -1)
}
values.first().also { (value, register) ->
val tr = expressionEval.translateExpression(value as PtExpression)
addToResult(result, tr, tr.resultReg, -1)
result += setCpuRegister(register.first, irType(register.second), tr.resultReg, -1)
}
addInstr(result, IRInstruction(Opcode.RETURN), null)
return result
@@ -1915,4 +1921,28 @@ class IRCodeGen(
}
fun registerTypes(): Map<Int, IRDataType> = registers.getTypes()
fun setCpuRegister(registerOrFlag: RegisterOrStatusflag, paramDt: IRDataType, resultReg: Int, resultFpReg: Int): IRCodeChunk {
val chunk = IRCodeChunk(null, null)
when(registerOrFlag.registerOrPair) {
RegisterOrPair.A -> chunk += IRInstruction(Opcode.STOREHA, IRDataType.BYTE, reg1=resultReg)
RegisterOrPair.X -> chunk += IRInstruction(Opcode.STOREHX, IRDataType.BYTE, reg1=resultReg)
RegisterOrPair.Y -> chunk += IRInstruction(Opcode.STOREHY, IRDataType.BYTE, reg1=resultReg)
RegisterOrPair.AX -> chunk += IRInstruction(Opcode.STOREHAX, IRDataType.WORD, reg1=resultReg)
RegisterOrPair.AY -> chunk += IRInstruction(Opcode.STOREHAY, IRDataType.WORD, reg1=resultReg)
RegisterOrPair.XY -> chunk += IRInstruction(Opcode.STOREHXY, IRDataType.WORD, reg1=resultReg)
RegisterOrPair.FAC1 -> chunk += IRInstruction(Opcode.STOREHFACZERO, IRDataType.FLOAT, fpReg1 = resultFpReg)
RegisterOrPair.FAC2 -> chunk += IRInstruction(Opcode.STOREHFACONE, IRDataType.FLOAT, fpReg1 = resultFpReg)
in Cx16VirtualRegisters -> {
chunk += IRInstruction(Opcode.STOREM, paramDt, reg1=resultReg, labelSymbol = "cx16.${registerOrFlag.registerOrPair.toString().lowercase()}")
}
null -> when(registerOrFlag.statusflag) {
// TODO: do the statusflag argument as last
Statusflag.Pc -> chunk += IRInstruction(Opcode.LSR, paramDt, reg1=resultReg)
else -> throw AssemblyError("weird statusflag as param")
}
else -> throw AssemblyError("unsupported register arg")
}
return chunk
}
}
+4 -1
View File
@@ -1,7 +1,9 @@
TODO
====
- IR: Multi-value returns of normal subroutines: use cpu register A or AY for the first one and only start using virtual registers for the rest. see TODO("fix A/AY for first value")
- IR: call main.two():r4.w,r5.w the registers mentioned after the call are wrong/unused in case of multi-value returns. Better to clear this to avoid confusion? (they ARE correct for single value returns!)
- update docs about call convention for multi-value results (first is in A or AY, then R15...R0)
...
@@ -38,6 +40,7 @@ Future Things and Ideas
IR/VM
-----
- Split the simplified AST and Symboltable from codeCore. VirtualMachine and Intermediate should not need those. (maybe others too?)
- getting it in shape for code generation...: the IR file should be able to encode every detail about a prog8 program (the VM doesn't have to actually be able to run all of it though!)
- fix call() return value handling
- proper code gen for the CALLI instruction and that it (optionally) returns a word value that needs to be assigned to a reg
+86 -16
View File
@@ -57,6 +57,10 @@ class VirtualMachine(irProgram: IRProgram) {
var statusZero = false
var statusNegative = false
var statusOverflow = false
var hardwareRegisterA: UByte = 0u
var hardwareRegisterX: UByte = 0u
var hardwareRegisterY: UByte = 0u
internal var randomGenerator = Random(0xa55a7653)
internal var randomGeneratorFloats = Random(0xc0d3dbad)
internal var mul16LastUpper = 0u
@@ -184,14 +188,14 @@ class VirtualMachine(irProgram: IRProgram) {
Opcode.LOADI -> InsLOADI(ins)
Opcode.LOADIX -> InsLOADIX(ins)
Opcode.LOADR -> InsLOADR(ins)
Opcode.LOADHA,
Opcode.LOADHX,
Opcode.LOADHY,
Opcode.LOADHAX,
Opcode.LOADHAY,
Opcode.LOADHXY,
Opcode.LOADHFACZERO,
Opcode.LOADHFACONE -> throw IllegalArgumentException("VM cannot access actual CPU hardware register")
Opcode.LOADHA -> InsLOADHA(ins)
Opcode.LOADHX -> InsLOADHX(ins)
Opcode.LOADHY -> InsLOADHY(ins)
Opcode.LOADHAX -> InsLOADHAX(ins)
Opcode.LOADHAY -> InsLOADHAY(ins)
Opcode.LOADHXY -> InsLOADHXY(ins)
Opcode.LOADHFACZERO -> TODO("read cpu reg FAC0")
Opcode.LOADHFACONE -> TODO("read cpu reg FAC1")
Opcode.STOREM -> InsSTOREM(ins)
Opcode.STOREX -> InsSTOREX(ins)
Opcode.STOREIX -> InsSTOREIX(ins)
@@ -199,14 +203,14 @@ class VirtualMachine(irProgram: IRProgram) {
Opcode.STOREZM -> InsSTOREZM(ins)
Opcode.STOREZX -> InsSTOREZX(ins)
Opcode.STOREZI -> InsSTOREZI(ins)
Opcode.STOREHA,
Opcode.STOREHX,
Opcode.STOREHY,
Opcode.STOREHAX,
Opcode.STOREHAY,
Opcode.STOREHXY,
Opcode.STOREHFACZERO,
Opcode.STOREHFACONE-> throw IllegalArgumentException("VM cannot access actual CPU hardware register")
Opcode.STOREHA -> InsSTOREHA(ins)
Opcode.STOREHX -> InsSTOREHX(ins)
Opcode.STOREHY -> InsSTOREHY(ins)
Opcode.STOREHAX -> InsSTOREHAX(ins)
Opcode.STOREHAY -> InsSTOREHAY(ins)
Opcode.STOREHXY -> InsSTOREHXY(ins)
Opcode.STOREHFACZERO -> TODO("store cpu reg FAC0")
Opcode.STOREHFACONE-> TODO("store cpu reg FAC1")
Opcode.JUMP -> InsJUMP(ins)
Opcode.JUMPI -> InsJUMPI(ins)
Opcode.PREPARECALL -> nextPc()
@@ -2322,6 +2326,72 @@ class VirtualMachine(irProgram: IRProgram) {
nextPc()
}
private fun InsLOADHA(i: IRInstruction) {
registers.setUB(i.reg1!!, hardwareRegisterA)
nextPc()
}
private fun InsLOADHX(i: IRInstruction) {
registers.setUB(i.reg1!!, hardwareRegisterX)
nextPc()
}
private fun InsLOADHY(i: IRInstruction) {
registers.setUB(i.reg1!!, hardwareRegisterY)
nextPc()
}
private fun InsLOADHAX(i: IRInstruction) {
registers.setUW(i.reg1!!, ((hardwareRegisterX.toUInt() shl 8) + hardwareRegisterA).toUShort())
nextPc()
}
private fun InsLOADHAY(i: IRInstruction) {
registers.setUW(i.reg1!!, ((hardwareRegisterY.toUInt() shl 8) + hardwareRegisterA).toUShort())
nextPc()
}
private fun InsLOADHXY(i: IRInstruction) {
registers.setUW(i.reg1!!, ((hardwareRegisterY.toUInt() shl 8) + hardwareRegisterX).toUShort())
nextPc()
}
private fun InsSTOREHA(i: IRInstruction) {
hardwareRegisterA = registers.getUB(i.reg1!!)
nextPc()
}
private fun InsSTOREHX(i: IRInstruction) {
hardwareRegisterX = registers.getUB(i.reg1!!)
nextPc()
}
private fun InsSTOREHY(i: IRInstruction) {
hardwareRegisterY = registers.getUB(i.reg1!!)
nextPc()
}
private fun InsSTOREHAX(i: IRInstruction) {
val word = registers.getUW(i.reg1!!).toUInt()
hardwareRegisterA = (word and 255u).toUByte()
hardwareRegisterX = (word shr 8).toUByte()
nextPc()
}
private fun InsSTOREHAY(i: IRInstruction) {
val word = registers.getUW(i.reg1!!).toUInt()
hardwareRegisterA = (word and 255u).toUByte()
hardwareRegisterY = (word shr 8).toUByte()
nextPc()
}
private fun InsSTOREHXY(i: IRInstruction) {
val word = registers.getUW(i.reg1!!).toUInt()
hardwareRegisterX = (word and 255u).toUByte()
hardwareRegisterY = (word shr 8).toUByte()
nextPc()
}
private fun getBranchOperands(i: IRInstruction): Pair<Int, Int> {
return when(i.type) {
IRDataType.BYTE -> Pair(registers.getSB(i.reg1!!).toInt(), registers.getSB(i.reg2!!).toInt())