consolidate IR line parse function

This commit is contained in:
Irmen de Jong 2022-09-27 17:45:26 +02:00
parent 06cf2e0bd7
commit 1d2ce2cbeb
10 changed files with 202 additions and 392 deletions

View File

@ -243,7 +243,7 @@ class IRCodeGen(
val data = node.file.readBytes()
.drop(node.offset?.toInt() ?: 0)
.take(node.length?.toInt() ?: Int.MAX_VALUE)
chunk += IRCodeInlineBinary(data.toByteArray())
chunk += IRCodeInlineBinary(data.map { it.toUByte() })
return chunk
}
is PtAddressOf,

View File

@ -1 +1 @@
8.6.1
8.7-dev

View File

@ -3,7 +3,7 @@ TODO
For next release
^^^^^^^^^^^^^^^^
- vm: replace addAssemblyToProgram() by call to IRFileLoader's logic, instead of duplicating it.
- replace throw IllegalArgumentException() by require()?
...
@ -19,7 +19,6 @@ Future Things and Ideas
^^^^^^^^^^^^^^^^^^^^^^^
Compiler:
- replace throw IllegalArgumentException() by require()?
- vm/ir: put variables and arrays in BSS section (unless -noreinit is specified)
- vm: Jumps go to a code block rather than a specific address(label) -> also helps future dead code elimination?
- vm: the above means that every label introduces a new code block. This eliminates the use of actual labels altogether.

View File

@ -1,69 +1,5 @@
%import textio
%zeropage basicsafe
main {
sub start() {
ubyte v1 = 1
uword v2 = 1
ubyte counterb
uword counter
repeat v1-1 {
txt.print("!")
}
repeat v2-1 {
txt.print("?")
}
for counterb in 0 to v1 {
txt.print("y1")
}
for counter in 0 to v2 {
txt.print("y2")
}
repeat v1 {
txt.print("ok1")
}
repeat v2 {
txt.print("ok2")
}
repeat v1-1 {
txt.print("!")
}
repeat v2-1 {
txt.print("?")
}
while v1-1 {
txt.print("%")
}
while v2-1 {
txt.print("*")
}
for counterb in 0 to v1-1 {
txt.print("@")
}
for counter in 0 to v2-1 {
txt.print("y#")
}
repeat 0 {
txt.print("zero1")
}
repeat $0000 {
txt.print("zero2")
}
%asmbinary "bsieve.prg", 10, 200
}
}

View File

@ -254,8 +254,6 @@ class IRFileReader {
private val asmsubPattern = Regex("<ASMSUB NAME=(.+) ADDRESS=(.+) CLOBBERS=(.*) RETURNS=(.*) POS=(.+)>")
private val subPattern = Regex("<SUB NAME=(.+) RETURNTYPE=(.+) POS=(.+)>")
private val posPattern = Regex("\\[(.+): line (.+) col (.+)-(.+)\\]")
private val instructionPattern = Regex("""([a-z]+)(\.b|\.w|\.f)?(.*)""", RegexOption.IGNORE_CASE)
private val labelPattern = Regex("""_([a-zA-Z\d\._]+):""")
private fun parseBlock(startline: String, lines: Iterator<String>, variables: List<StStaticVariable>): IRBlock {
var line = startline
@ -397,174 +395,21 @@ class IRFileReader {
if (line.isBlank() || line.startsWith(';'))
continue
if(line=="<BYTES>") {
val bytes = mutableListOf<Byte>()
val bytes = mutableListOf<UByte>()
line = lines.next()
while(line!="</BYTES>") {
line.trimEnd().windowed(size=2, step=2) {
bytes.add(it.toString().toByte(16))
bytes.add(it.toString().toUByte(16))
}
line = lines.next()
}
chunk += IRCodeInlineBinary(bytes.toByteArray())
chunk += IRCodeInlineBinary(bytes)
} else {
chunk += parseCodeLine(line)
chunk += parseIRCodeLine(line, 0, mutableMapOf())
}
}
}
private fun parseCodeLine(line: String): IRCodeLine {
val match = instructionPattern.matchEntire(line.trim())
if(match==null) {
// it's a label.
val labelmatch = labelPattern.matchEntire(line.trim()) ?: throw IRParseException("invalid label")
return IRCodeLabel(labelmatch.groupValues[1])
}
// it's an instruction.
val (_, instr, typestr, rest) = match.groupValues
val opcode = try {
Opcode.valueOf(instr.uppercase())
} catch (ax: IllegalArgumentException) {
throw IRParseException("invalid vmasm instruction: $instr")
}
var type: VmDataType? = convertIRType(typestr)
val formats = instructionFormats.getValue(opcode)
val format: InstructionFormat
if(type !in formats) {
type = VmDataType.BYTE
format = if(type !in formats)
formats.getValue(null)
else
formats.getValue(type)
} else {
format = formats.getValue(type)
}
// parse the operands
val operands = rest.lowercase().split(",").toMutableList()
var reg1: Int? = null
var reg2: Int? = null
var reg3: Int? = null
var fpReg1: Int? = null
var fpReg2: Int? = null
var fpReg3: Int? = null
var value: Float? = null
var operand: String?
var labelSymbol: String? = null
if(operands.isNotEmpty() && operands[0].isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand[0]=='r')
reg1 = operand.substring(1).toInt()
else if(operand[0]=='f' && operand[1]=='r')
fpReg1 = operand.substring(2).toInt()
else {
if(operand.startsWith('_')) {
// it's a label
labelSymbol = rest.split(",")[0].trim().substring(1) // keep the original case
value = null
} else {
value = parseIRValue(operand)
}
operands.clear()
}
if(operands.isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand[0]=='r')
reg2 = operand.substring(1).toInt()
else if(operand[0]=='f' && operand[1]=='r')
fpReg2 = operand.substring(2).toInt()
else {
if(operand.startsWith('_')) {
// it's a label
labelSymbol = rest.split(",")[1].trim().substring(1) // keep the original case
value = null
} else {
value = parseIRValue(operand)
}
operands.clear()
}
if(operands.isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand[0]=='r')
reg3 = operand.substring(1).toInt()
else if(operand[0]=='f' && operand[1]=='r')
fpReg3 = operand.substring(2).toInt()
else {
if(operand.startsWith('_')) {
// it's a label
labelSymbol = rest.split(",")[2].trim().substring(1) // keep the original case
value = null
} else {
value = parseIRValue(operand)
}
operands.clear()
}
if(operands.isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand.startsWith('_')) {
// it's a label
labelSymbol = rest.split(",")[3].trim().substring(1) // keep the original case
value = null
} else {
value = parseIRValue(operand)
}
}
}
}
}
// shift the operands back into place
while(reg1==null && reg2!=null) {
reg1 = reg2
reg2 = reg3
reg3 = null
}
while(fpReg1==null && fpReg2!=null) {
fpReg1 = fpReg2
fpReg2 = fpReg3
fpReg3 = null
}
if(reg3!=null)
throw IRParseException("too many reg arguments $line")
if(fpReg3!=null)
throw IRParseException("too many fpreg arguments $line")
if(type!=null && type !in formats)
throw IRParseException("invalid type code for $line")
if(format.reg1 && reg1==null)
throw IRParseException("needs reg1 for $line")
if(format.reg2 && reg2==null)
throw IRParseException("needs reg2 for $line")
if(format.value && value==null && labelSymbol==null)
throw IRParseException("needs value or label for $line")
if(!format.reg1 && reg1!=null)
throw IRParseException("invalid reg1 for $line")
if(!format.reg2 && reg2!=null)
throw IRParseException("invalid reg2 for $line")
if(value!=null && opcode !in OpcodesWithAddress) {
when (type) {
VmDataType.BYTE -> {
if (value < -128 || value > 255)
throw IRParseException("value out of range for byte: $value")
}
VmDataType.WORD -> {
if (value < -32768 || value > 65535)
throw IRParseException("value out of range for word: $value")
}
VmDataType.FLOAT -> {}
null -> {}
}
}
var floatValue: Float? = null
var intValue: Int? = null
if(format.value && value!=null)
intValue = value.toInt()
if(format.fpValue && value!=null)
floatValue = value
return IRInstruction(opcode, type, reg1, reg2, fpReg1, fpReg2, intValue, floatValue, labelSymbol)
}
private fun parseRegisterOrStatusflag(regs: String): RegisterOrStatusflag {
var reg: RegisterOrPair? = null
var sf: Statusflag? = null

View File

@ -426,7 +426,7 @@ data class IRInstruction(
val value: Int?=null, // 0-$ffff
val fpValue: Float?=null,
val labelSymbol: String?=null, // symbolic label name as alternative to value (so only for Branch/jump/call Instructions!)
val binaryData: ByteArray?=null
val binaryData: Collection<UByte>?=null
): IRCodeLine() {
// reg1 and fpreg1 can be IN/OUT/INOUT (all others are readonly INPUT)
// This knowledge is useful in IL assembly optimizers to see how registers are used.

View File

@ -130,7 +130,7 @@ class IRCodeLabel(val name: String): IRCodeLine()
class IRCodeComment(val comment: String): IRCodeLine()
class IRCodeInlineBinary(val data: ByteArray): IRCodeLine()
class IRCodeInlineBinary(val data: Collection<UByte>): IRCodeLine()
abstract class IRCodeChunkBase(val position: Position) {
val lines = mutableListOf<IRCodeLine>()

View File

@ -79,4 +79,169 @@ fun parseIRValue(value: String): Float {
throw IRParseException("address-of should be done with normal LOAD <symbol>")
else
return value.toFloat()
}
}
private val instructionPattern = Regex("""([a-z]+)(\.b|\.w|\.f)?(.*)""", RegexOption.IGNORE_CASE)
private val labelPattern = Regex("""_([a-zA-Z\d\._]+):""")
fun parseIRCodeLine(line: String, pc: Int, placeholders: MutableMap<Int, String>): IRCodeLine {
// Note: this function is used from multiple places:
// the IR File Reader but also the VirtualMachine itself to make sense of any inline vmasm blocks.
val labelmatch = labelPattern.matchEntire(line.trim())
if(labelmatch!=null)
return IRCodeLabel(labelmatch.groupValues[1])
val match = instructionPattern.matchEntire(line)
?: throw IRParseException("invalid IR instruction: $line")
val (instr, typestr, rest) = match.destructured
val opcode = try {
Opcode.valueOf(instr.uppercase())
} catch (ax: IllegalArgumentException) {
throw IRParseException("invalid vmasm instruction: $instr")
}
var type: VmDataType? = convertIRType(typestr)
val formats = instructionFormats.getValue(opcode)
val format: InstructionFormat
if(type !in formats) {
type = VmDataType.BYTE
format = if(type !in formats)
formats.getValue(null)
else
formats.getValue(type)
} else {
format = formats.getValue(type)
}
// parse the operands
val operands = rest.lowercase().split(",").toMutableList()
var reg1: Int? = null
var reg2: Int? = null
var reg3: Int? = null
var fpReg1: Int? = null
var fpReg2: Int? = null
var fpReg3: Int? = null
var value: Float? = null
var operand: String?
var labelSymbol: String? = null
fun parseValueOrPlaceholder(operand: String, pc: Int, rest: String, restIndex: Int): Float? {
return if(operand.startsWith('_')) {
labelSymbol = rest.split(",")[restIndex].trim().drop(1)
placeholders[pc] = labelSymbol!!
null
} else if(operand[0].isLetter()) {
labelSymbol = rest.split(",")[restIndex].trim()
placeholders[pc] = labelSymbol!!
null
} else {
parseIRValue(operand)
}
}
if(operands.isNotEmpty() && operands[0].isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand[0]=='r')
reg1 = operand.substring(1).toInt()
else if(operand[0]=='f' && operand[1]=='r')
fpReg1 = operand.substring(2).toInt()
else {
value = parseValueOrPlaceholder(operand, pc, rest, 0)
operands.clear()
}
if(operands.isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand[0]=='r')
reg2 = operand.substring(1).toInt()
else if(operand[0]=='f' && operand[1]=='r')
fpReg2 = operand.substring(2).toInt()
else {
value = parseValueOrPlaceholder(operand, pc, rest, 1)
operands.clear()
}
if(operands.isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand[0]=='r')
reg3 = operand.substring(1).toInt()
else if(operand[0]=='f' && operand[1]=='r')
fpReg3 = operand.substring(2).toInt()
else {
value = parseValueOrPlaceholder(operand, pc, rest, 2)
operands.clear()
}
if(operands.isNotEmpty()) {
TODO("placeholder symbol? $operands rest=$rest'")
// operands.clear()
}
}
}
}
// shift the operands back into place
while(reg1==null && reg2!=null) {
reg1 = reg2
reg2 = reg3
reg3 = null
}
while(fpReg1==null && fpReg2!=null) {
fpReg1 = fpReg2
fpReg2 = fpReg3
fpReg3 = null
}
if(reg3!=null)
throw IRParseException("too many reg arguments $line")
if(fpReg3!=null)
throw IRParseException("too many fpreg arguments $line")
if(type!=null && type !in formats)
throw IRParseException("invalid type code for $line")
if(format.reg1 && reg1==null)
throw IRParseException("needs reg1 for $line")
if(format.reg2 && reg2==null)
throw IRParseException("needs reg2 for $line")
if(format.value && value==null && labelSymbol==null)
throw IRParseException("needs value or symbol for $line")
if(!format.reg1 && reg1!=null)
throw IRParseException("invalid reg1 for $line")
if(!format.reg2 && reg2!=null)
throw IRParseException("invalid reg2 for $line")
if(value!=null && opcode !in OpcodesWithAddress) {
when (type) {
VmDataType.BYTE -> {
if (value < -128 || value > 255)
throw IRParseException("value out of range for byte: $value")
}
VmDataType.WORD -> {
if (value < -32768 || value > 65535)
throw IRParseException("value out of range for word: $value")
}
VmDataType.FLOAT -> {}
null -> {}
}
}
var floatValue: Float? = null
var intValue: Int? = null
if(format.value && value!=null)
intValue = value.toInt()
if(format.fpValue && value!=null)
floatValue = value
if(opcode in OpcodesForCpuRegisters) {
val regStr = rest.split(',').last().lowercase().trim()
val reg = if(regStr.startsWith('_')) regStr.substring(1) else regStr
if(reg !in setOf(
"a", "x", "y",
"ax", "ay", "xy",
"r0", "r1", "r2", "r3",
"r4", "r5", "r6", "r7",
"r8", "r9", "r10","r11",
"r12", "r13", "r14", "r15",
"pc", "pz", "pv","pn"))
throw IRParseException("invalid cpu reg: $reg")
return IRInstruction(opcode, type, reg1, labelSymbol = reg)
}
return IRInstruction(opcode, type, reg1, reg2, fpReg1, fpReg2, intValue, floatValue, labelSymbol = labelSymbol)
}

View File

@ -2,7 +2,6 @@ package prog8.vm
import prog8.code.core.DataType
import prog8.intermediate.*
import kotlin.IllegalArgumentException
class VmProgramLoader {
@ -24,7 +23,7 @@ class VmProgramLoader {
// make sure that if there is a "main.start" entrypoint, we jump to it
irProgram.blocks.firstOrNull()?.let {
if(it.subroutines.any { sub -> sub.name=="main.start" }) {
rememberPlaceholder("main.start", program.size)
placeholders[program.size] = "main.start"
program += IRInstruction(Opcode.JUMP, labelSymbol = "main.start")
}
}
@ -33,12 +32,12 @@ class VmProgramLoader {
if(block.address!=null)
throw IRParseException("blocks cannot have a load address for vm: ${block.name}")
block.inlineAssembly.forEach { addAssemblyToProgram(it, program) }
block.inlineAssembly.forEach { addAssemblyToProgram(it, program, symbolAddresses) }
block.subroutines.forEach {
symbolAddresses[it.name] = program.size
it.chunks.forEach { chunk ->
if(chunk is IRInlineAsmChunk)
addAssemblyToProgram(chunk, program)
addAssemblyToProgram(chunk, program, symbolAddresses)
else
addToProgram(chunk.lines, program, symbolAddresses)
}
@ -158,10 +157,6 @@ class VmProgramLoader {
}
}
private fun rememberPlaceholder(symbol: String, pc: Int) {
placeholders[pc] = symbol
}
private fun pass2replaceLabelsByProgIndex(
program: MutableList<IRInstruction>,
symbolAddresses: MutableMap<String, Int>
@ -192,7 +187,7 @@ class VmProgramLoader {
lines.map {
when(it) {
is IRInstruction -> {
it.labelSymbol?.let { symbol -> rememberPlaceholder(symbol, program.size)}
it.labelSymbol?.let { symbol -> placeholders[program.size]=symbol }
if(it.opcode==Opcode.SYSCALL) {
// convert IR Syscall to VM Syscall
val vmSyscall = when(it.value!!) {
@ -226,160 +221,14 @@ class VmProgramLoader {
private fun addAssemblyToProgram(
asmChunk: IRInlineAsmChunk,
program: MutableList<IRInstruction>,
symbolAddresses: MutableMap<String, Int>,
) {
// TODO use IRFileReader.parseCodeLine instead of duplicating everything here
val instructionPattern = Regex("""([a-z]+)(\.b|\.w|\.f)?(.*)""", RegexOption.IGNORE_CASE)
asmChunk.assembly.lineSequence().forEach {
val line = it.trim()
val match = instructionPattern.matchEntire(line)
?: throw IRParseException("invalid IR instruction: $line in ${asmChunk.position}")
val (instr, typestr, rest) = match.destructured
val opcode = try {
Opcode.valueOf(instr.uppercase())
} catch (ax: IllegalArgumentException) {
throw IRParseException("invalid vmasm instruction: $instr")
}
var type: VmDataType? = convertIRType(typestr)
val formats = instructionFormats.getValue(opcode)
val format: InstructionFormat
if(type !in formats) {
type = VmDataType.BYTE
format = if(type !in formats)
formats.getValue(null)
else
formats.getValue(type)
} else {
format = formats.getValue(type)
}
// parse the operands
val operands = rest.lowercase().split(",").toMutableList()
var reg1: Int? = null
var reg2: Int? = null
var reg3: Int? = null
var fpReg1: Int? = null
var fpReg2: Int? = null
var fpReg3: Int? = null
var value: Float? = null
var operand: String?
fun parseValueOrPlaceholder(operand: String, pc: Int, rest: String, restIndex: Int): Float {
return if(operand.startsWith('_')) {
rememberPlaceholder(rest.split(",")[restIndex].trim().drop(1), pc)
0f
} else if(operand[0].isLetter()) {
rememberPlaceholder(rest.split(",")[restIndex].trim(), pc)
0f
} else
parseIRValue(operand)
}
if(operands.isNotEmpty() && operands[0].isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand[0]=='r')
reg1 = operand.substring(1).toInt()
else if(operand[0]=='f' && operand[1]=='r')
fpReg1 = operand.substring(2).toInt()
else {
value = parseValueOrPlaceholder(operand, program.size, rest, 0)
operands.clear()
}
if(operands.isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand[0]=='r')
reg2 = operand.substring(1).toInt()
else if(operand[0]=='f' && operand[1]=='r')
fpReg2 = operand.substring(2).toInt()
else {
value = parseValueOrPlaceholder(operand, program.size, rest, 1)
operands.clear()
}
if(operands.isNotEmpty()) {
operand = operands.removeFirst().trim()
if(operand[0]=='r')
reg3 = operand.substring(1).toInt()
else if(operand[0]=='f' && operand[1]=='r')
fpReg3 = operand.substring(2).toInt()
else {
value = parseValueOrPlaceholder(operand, program.size, rest, 2)
operands.clear()
}
if(operands.isNotEmpty()) {
TODO("placeholder symbol? $operands rest=$rest'")
// operands.clear()
}
}
}
}
// shift the operands back into place
while(reg1==null && reg2!=null) {
reg1 = reg2
reg2 = reg3
reg3 = null
}
while(fpReg1==null && fpReg2!=null) {
fpReg1 = fpReg2
fpReg2 = fpReg3
fpReg3 = null
}
if(reg3!=null)
throw IRParseException("too many reg arguments $line")
if(fpReg3!=null)
throw IRParseException("too many fpreg arguments $line")
if(type!=null && type !in formats)
throw IRParseException("invalid type code for $line")
if(format.reg1 && reg1==null)
throw IRParseException("needs reg1 for $line")
if(format.reg2 && reg2==null)
throw IRParseException("needs reg2 for $line")
if(format.value && value==null)
throw IRParseException("needs value for $line")
if(!format.reg1 && reg1!=null)
throw IRParseException("invalid reg1 for $line")
if(!format.reg2 && reg2!=null)
throw IRParseException("invalid reg2 for $line")
if(value!=null && opcode !in OpcodesWithAddress) {
when (type) {
VmDataType.BYTE -> {
if (value < -128 || value > 255)
throw IRParseException("value out of range for byte: $value")
}
VmDataType.WORD -> {
if (value < -32768 || value > 65535)
throw IRParseException("value out of range for word: $value")
}
VmDataType.FLOAT -> {}
null -> {}
}
}
var floatValue: Float? = null
var intValue: Int? = null
if(format.value)
intValue = value!!.toInt()
if(format.fpValue)
floatValue = value!!
if(opcode in OpcodesForCpuRegisters) {
val regStr = rest.split(',').last().lowercase().trim()
val reg = if(regStr.startsWith('_')) regStr.substring(1) else regStr
if(reg !in setOf(
"a", "x", "y",
"ax", "ay", "xy",
"r0", "r1", "r2", "r3",
"r4", "r5", "r6", "r7",
"r8", "r9", "r10","r11",
"r12", "r13", "r14", "r15",
"pc", "pz", "pv","pn"))
throw IRParseException("invalid cpu reg: $reg")
program += IRInstruction(opcode, type, reg1, labelSymbol = reg)
} else {
program += IRInstruction(opcode, type, reg1, reg2, fpReg1, fpReg2, intValue, floatValue)
}
val parsed = parseIRCodeLine(it.trim(), program.size, placeholders)
if(parsed is IRInstruction)
program += parsed
else if(parsed is IRCodeLabel)
symbolAddresses[parsed.name] = program.size
}
}
}

View File

@ -66,6 +66,22 @@ class TestVm: FunSpec( {
}
test("vm asmsub not supported") {
val program = IRProgram("test", IRSymbolTable(null), getTestOptions(), VMTarget())
val block = IRBlock("testmain", null, IRBlock.BlockAlignment.NONE, Position.DUMMY)
val startSub = IRSubroutine("testmain.testsub", emptyList(), null, Position.DUMMY)
val code = IRCodeChunk(Position.DUMMY)
code += IRInstruction(Opcode.BINARYDATA, binaryData = listOf(1u,2u,3u))
code += IRInstruction(Opcode.RETURN)
startSub += code
block += startSub
program.addBlock(block)
val vm = VirtualMachine(program)
shouldThrowWithMessage<NotImplementedError>("An operation is not implemented: BINARYDATA not yet supported in VM") {
vm.run()
}
}
test("vm asmbinary not supported") {
val program = IRProgram("test", IRSymbolTable(null), getTestOptions(), VMTarget())
val block = IRBlock("main", null, IRBlock.BlockAlignment.NONE, Position.DUMMY)
val startSub = IRAsmSubroutine("main.asmstart", Position.DUMMY, 0x2000u, emptySet(), emptyList(), emptyList(), "inlined asm here")