moving string escaping out of antlr project

This commit is contained in:
Irmen de Jong 2022-04-10 15:09:12 +02:00
parent 207a7e5160
commit b6eb343234
18 changed files with 181 additions and 185 deletions

View File

@ -24,7 +24,7 @@ compileTestKotlin {
}
dependencies {
implementation project(':virtualmachine')
// should have no dependencies to other modules
implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk8"
implementation "com.michael-bull.kotlin-result:kotlin-result-jvm:1.1.14"
}

View File

@ -10,6 +10,5 @@
<orderEntry type="sourceFolder" forTests="false" />
<orderEntry type="library" name="KotlinJavaRuntime" level="project" />
<orderEntry type="library" name="michael.bull.kotlin.result.jvm" level="project" />
<orderEntry type="module" module-name="virtualmachine" />
</component>
</module>

View File

@ -0,0 +1,89 @@
package prog8.code.core
import kotlin.math.abs
fun Number.toHex(): String {
// 0..15 -> "0".."15"
// 16..255 -> "$10".."$ff"
// 256..65536 -> "$0100".."$ffff"
// negative values are prefixed with '-'.
val integer = this.toInt()
if(integer<0)
return '-' + abs(integer).toHex()
return when (integer) {
in 0 until 16 -> integer.toString()
in 0 until 0x100 -> "$"+integer.toString(16).padStart(2,'0')
in 0 until 0x10000 -> "$"+integer.toString(16).padStart(4,'0')
else -> throw IllegalArgumentException("number too large for 16 bits $this")
}
}
fun UInt.toHex(): String {
// 0..15 -> "0".."15"
// 16..255 -> "$10".."$ff"
// 256..65536 -> "$0100".."$ffff"
return when (this) {
in 0u until 16u -> this.toString()
in 0u until 0x100u -> "$"+this.toString(16).padStart(2,'0')
in 0u until 0x10000u -> "$"+this.toString(16).padStart(4,'0')
else -> throw IllegalArgumentException("number too large for 16 bits $this")
}
}
fun Char.escape(): Char = this.toString().escape()[0]
fun String.escape(): String {
val es = this.map {
when(it) {
'\t' -> "\\t"
'\n' -> "\\n"
'\r' -> "\\r"
'"' -> "\\\""
in '\u8000'..'\u80ff' -> "\\x" + (it.code - 0x8000).toString(16).padStart(2, '0') // 'ugly' passthrough hack
in '\u0000'..'\u00ff' -> it.toString()
else -> "\\u" + it.code.toString(16).padStart(4, '0')
}
}
return es.joinToString("")
}
fun String.unescape(): String {
val result = mutableListOf<Char>()
val iter = this.iterator()
while(iter.hasNext()) {
val c = iter.nextChar()
if(c=='\\') {
val ec = iter.nextChar()
result.add(when(ec) {
'\\' -> '\\'
'n' -> '\n'
'r' -> '\r'
'"' -> '"'
'\'' -> '\''
'u' -> {
try {
"${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}".toInt(16).toChar()
} catch (sb: StringIndexOutOfBoundsException) {
throw IllegalArgumentException("invalid \\u escape sequence")
} catch (nf: NumberFormatException) {
throw IllegalArgumentException("invalid \\u escape sequence")
}
}
'x' -> {
try {
val hex = ("" + iter.nextChar() + iter.nextChar()).toInt(16)
(0x8000 + hex).toChar() // 'ugly' pass-through hack
} catch (sb: StringIndexOutOfBoundsException) {
throw IllegalArgumentException("invalid \\x escape sequence")
} catch (nf: NumberFormatException) {
throw IllegalArgumentException("invalid \\x escape sequence")
}
}
else -> throw IllegalArgumentException("invalid escape char in string: \\$ec")
})
} else {
result.add(c)
}
}
return result.joinToString("")
}

View File

@ -1,31 +0,0 @@
package prog8.code.core
import kotlin.math.abs
fun Number.toHex(): String {
// 0..15 -> "0".."15"
// 16..255 -> "$10".."$ff"
// 256..65536 -> "$0100".."$ffff"
// negative values are prefixed with '-'.
val integer = this.toInt()
if(integer<0)
return '-' + abs(integer).toHex()
return when (integer) {
in 0 until 16 -> integer.toString()
in 0 until 0x100 -> "$"+integer.toString(16).padStart(2,'0')
in 0 until 0x10000 -> "$"+integer.toString(16).padStart(4,'0')
else -> throw IllegalArgumentException("number too large for 16 bits $this")
}
}
fun UInt.toHex(): String {
// 0..15 -> "0".."15"
// 16..255 -> "$10".."$ff"
// 256..65536 -> "$0100".."$ffff"
return when (this) {
in 0u until 16u -> this.toString()
in 0u until 0x100u -> "$"+this.toString(16).padStart(2,'0')
in 0u until 0x10000u -> "$"+this.toString(16).padStart(4,'0')
else -> throw IllegalArgumentException("number too large for 16 bits $this")
}
}

View File

@ -4,9 +4,6 @@ import prog8.code.core.CompilationOptions
import prog8.code.core.CpuType
import prog8.code.core.IMachineDefinition
import prog8.code.core.Zeropage
import prog8.vm.Assembler
import prog8.vm.Memory
import prog8.vm.VirtualMachine
import java.io.File
import java.nio.file.Path
@ -33,14 +30,10 @@ class VirtualMachineDefinition: IMachineDefinition {
override fun launchEmulator(selectedEmulator: Int, programNameWithPath: Path) {
println("\nStarting Virtual Machine...")
// to not have external module dependencies we launch the virtual machine via reflection
val vm = Class.forName("prog8.vm.VmRunner").getDeclaredConstructor().newInstance() as IVirtualMachineRunner
val source = File("$programNameWithPath.p8virt").readText()
val (memsrc, programsrc) = source.split("------PROGRAM------".toRegex(), 2)
val memory = Memory()
val assembler = Assembler()
assembler.initializeMemory(memsrc, memory)
val program = assembler.assembleProgram(programsrc)
val vm = VirtualMachine(memory, program)
vm.run(throttle = true)
vm.runProgram(source, true)
}
override fun isIOAddress(address: UInt): Boolean = false
@ -49,3 +42,7 @@ class VirtualMachineDefinition: IMachineDefinition {
override val opcodeNames = emptySet<String>()
}
interface IVirtualMachineRunner {
fun runProgram(program: String, throttle: Boolean)
}

View File

@ -1,7 +1,6 @@
package prog8.codegen.cpu6502
import prog8.ast.Program
import prog8.ast.antlr.escape
import prog8.ast.statements.*
import prog8.code.*
import prog8.code.core.*
@ -572,7 +571,7 @@ internal class ProgramAndVarsGen(
}
private fun outputStringvar(varname: String, encoding: Encoding, value: String) {
asmgen.out("$varname\t; $encoding:\"${escape(value).replace("\u0000", "<NULL>")}\"")
asmgen.out("$varname\t; $encoding:\"${value.escape().replace("\u0000", "<NULL>")}\"")
val bytes = compTarget.encodeString(value, encoding).plus(0.toUByte())
val outputBytes = bytes.map { "$" + it.toString(16).padStart(2, '0') }
for (chunk in outputBytes.chunked(16))

View File

@ -8,9 +8,8 @@ import io.kotest.core.spec.style.FunSpec
import io.kotest.matchers.shouldBe
import io.kotest.matchers.shouldNotBe
import io.kotest.matchers.string.shouldContain
import prog8.ast.antlr.unescape
import prog8.code.core.Encoding
import prog8.code.core.Position
import prog8.code.core.unescape
import prog8.code.target.C64Target
import prog8.code.target.Cx16Target
import prog8.code.target.Encoder
@ -216,7 +215,7 @@ class TestStringEncodings: FunSpec({
test("special pass-through") {
val passthroughEscaped= """\x00\x1b\x99\xff"""
val passthrough = unescape(passthroughEscaped, Position.DUMMY)
val passthrough = passthroughEscaped.unescape()
passthrough.length shouldBe 4
passthrough[0] shouldBe '\u8000'
passthrough[1] shouldBe '\u801b'

View File

@ -1,6 +1,5 @@
package prog8.ast
import prog8.ast.antlr.escape
import prog8.ast.expressions.*
import prog8.ast.statements.*
import prog8.ast.walk.IAstVisitor
@ -288,16 +287,16 @@ class AstToSourceTextConverter(val output: (text: String) -> Unit, val program:
override fun visit(char: CharLiteral) {
if(char.encoding==Encoding.DEFAULT)
output("'${escape(char.value.toString())}'")
output("'${char.value.escape()}'")
else
output("${char.encoding.prefix}:'${escape(char.value.toString())}'")
output("${char.encoding.prefix}:'${char.value.escape()}'")
}
override fun visit(string: StringLiteral) {
if(string.encoding==Encoding.DEFAULT)
output("\"${escape(string.value)}\"")
output("\"${string.value.escape()}\"")
else
output("${string.encoding.prefix}:\"${escape(string.value)}\"")
output("${string.encoding.prefix}:\"${string.value.escape()}\"")
}
override fun visit(array: ArrayLiteral) {

View File

@ -458,7 +458,12 @@ private fun Prog8ANTLRParser.CharliteralContext.toAst(): CharLiteral {
?: throw SyntaxError("invalid encoding", toPosition())
else
Encoding.DEFAULT
return CharLiteral(unescape(text.substring(1, text.length-1), toPosition())[0], encoding, toPosition())
val raw = text.substring(1, text.length - 1)
try {
return CharLiteral.fromEscaped(raw, encoding, toPosition())
} catch(ex: IllegalArgumentException) {
throw SyntaxError(ex.message!!, toPosition())
}
}
private fun Prog8ANTLRParser.StringliteralContext.toAst(): StringLiteral {
@ -470,7 +475,12 @@ private fun Prog8ANTLRParser.StringliteralContext.toAst(): StringLiteral {
?: throw SyntaxError("invalid encoding", toPosition())
else
Encoding.DEFAULT
return StringLiteral(unescape(text.substring(1, text.length-1), toPosition()), encoding, toPosition())
val raw = text.substring(1, text.length-1)
try {
return StringLiteral.fromEscaped(raw, encoding, toPosition())
} catch(ex: IllegalArgumentException) {
throw SyntaxError(ex.message!!, toPosition())
}
}
private fun Prog8ANTLRParser.ArrayindexedContext.toAst(): ArrayIndexedExpression {

View File

@ -1,61 +0,0 @@
package prog8.ast.antlr
import prog8.ast.base.SyntaxError
import prog8.code.core.Position
fun escape(str: String): String {
val es = str.map {
when(it) {
'\t' -> "\\t"
'\n' -> "\\n"
'\r' -> "\\r"
'"' -> "\\\""
in '\u8000'..'\u80ff' -> "\\x" + (it.code - 0x8000).toString(16).padStart(2, '0')
in '\u0000'..'\u00ff' -> it.toString()
else -> "\\u" + it.code.toString(16).padStart(4, '0')
}
}
return es.joinToString("")
}
fun unescape(str: String, position: Position): String {
val result = mutableListOf<Char>()
val iter = str.iterator()
while(iter.hasNext()) {
val c = iter.nextChar()
if(c=='\\') {
val ec = iter.nextChar()
result.add(when(ec) {
'\\' -> '\\'
'n' -> '\n'
'r' -> '\r'
'"' -> '"'
'\'' -> '\''
'u' -> {
try {
"${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}".toInt(16).toChar()
} catch (sb: StringIndexOutOfBoundsException) {
throw SyntaxError("invalid \\u escape sequence", position)
} catch (nf: NumberFormatException) {
throw SyntaxError("invalid \\u escape sequence", position)
}
}
'x' -> {
// special hack 0x8000..0x80ff will be outputted verbatim without encoding
try {
val hex = ("" + iter.nextChar() + iter.nextChar()).toInt(16)
(0x8000 + hex).toChar()
} catch (sb: StringIndexOutOfBoundsException) {
throw SyntaxError("invalid \\x escape sequence", position)
} catch (nf: NumberFormatException) {
throw SyntaxError("invalid \\x escape sequence", position)
}
}
else -> throw SyntaxError("invalid escape char in string: \\$ec", position)
})
} else {
result.add(c)
}
}
return result.joinToString("")
}

View File

@ -1,7 +1,6 @@
package prog8.ast.expressions
import prog8.ast.*
import prog8.ast.antlr.escape
import prog8.ast.base.ExpressionError
import prog8.ast.base.FatalAstException
import prog8.ast.base.UndefinedSymbolError
@ -597,6 +596,13 @@ class CharLiteral(val value: Char,
this.parent = parent
}
companion object {
fun fromEscaped(raw: String, encoding: Encoding, position: Position): CharLiteral {
val unescaped = raw.unescape()
return CharLiteral(unescaped[0], encoding, position)
}
}
override val isSimple = true
override fun replaceChildNode(node: Node, replacement: Node) {
@ -613,7 +619,7 @@ class CharLiteral(val value: Char,
override fun accept(visitor: IAstVisitor) = visitor.visit(this)
override fun accept(visitor: AstWalker, parent: Node) = visitor.visit(this, parent)
override fun toString(): String = "'${escape(value.toString())}'"
override fun toString(): String = "'${value.escape()}'"
override fun inferType(program: Program) = InferredTypes.knownFor(DataType.UBYTE)
operator fun compareTo(other: CharLiteral): Int = value.compareTo(other.value)
override fun hashCode(): Int = Objects.hash(value, encoding)
@ -633,6 +639,13 @@ class StringLiteral(val value: String,
this.parent = parent
}
companion object {
fun fromEscaped(raw: String, encoding: Encoding, position: Position): StringLiteral {
val unescaped = raw.unescape()
return StringLiteral(unescaped, encoding, position)
}
}
override val isSimple = true
override fun copy() = StringLiteral(value, encoding, position)
@ -645,7 +658,7 @@ class StringLiteral(val value: String,
override fun accept(visitor: IAstVisitor) = visitor.visit(this)
override fun accept(visitor: AstWalker, parent: Node)= visitor.visit(this, parent)
override fun toString(): String = "'${escape(value)}'"
override fun toString(): String = "'${value.escape()}'"
override fun inferType(program: Program) = InferredTypes.knownFor(DataType.STR)
operator fun compareTo(other: StringLiteral): Int = value.compareTo(other.value)
override fun hashCode(): Int = Objects.hash(value, encoding)

View File

@ -7,24 +7,30 @@
main {
sub start() {
txt.chrout('\x40')
txt.chrout('\x41')
txt.chrout('\x42')
txt.chrout('\n')
txt.print("Hello\n\"quotes\"\n")
; a "pixelshader":
void syscall1(8, 0) ; enable lo res creen
ubyte shifter
shifter >>= 1
repeat {
uword xx
uword yy = 0
repeat 240 {
xx = 0
repeat 320 {
syscall3(10, xx, yy, xx*yy + shifter) ; plot pixel
xx++
}
yy++
}
shifter+=4
}
; void syscall1(8, 0) ; enable lo res creen
; ubyte shifter
;
; shifter >>= 1
;
; repeat {
; uword xx
; uword yy = 0
; repeat 240 {
; xx = 0
; repeat 320 {
; syscall3(10, xx, yy, xx*yy + shifter) ; plot pixel
; xx++
; }
; yy++
; }
; shifter+=4
; }
}
}

View File

@ -4,4 +4,4 @@ org.gradle.parallel=true
org.gradle.daemon=true
kotlin.code.style=official
javaVersion=11
kotlinVersion=1.6.10
kotlinVersion=1.6.20

View File

@ -25,6 +25,7 @@ compileTestKotlin {
}
dependencies {
implementation project(':codeCore')
implementation "org.jetbrains.kotlin:kotlin-stdlib-jdk8"
implementation "com.michael-bull.kotlin-result:kotlin-result-jvm:1.1.14"
testImplementation 'io.kotest:kotest-runner-junit5-jvm:5.1.0'

View File

@ -1,5 +1,7 @@
package prog8.vm
import prog8.code.core.unescape
class Assembler {
private val labels = mutableMapOf<String, Int>()
@ -22,11 +24,11 @@ class Assembler {
var address = parseValue(addr, 0)
when(what) {
"str" -> {
val string = unescape(values.trim('"'))
val string = values.trim('"').unescape()
memory.setString(address, string, false)
}
"strz" -> {
val string = unescape(values.trim('"'))
val string = values.trim('"').unescape()
memory.setString(address, string, true)
}
"ubyte", "byte" -> {
@ -185,45 +187,4 @@ class Assembler {
else -> throw IllegalArgumentException("invalid type $typestr")
}
}
private fun unescape(str: String): String {
val result = mutableListOf<Char>()
val iter = str.iterator()
while(iter.hasNext()) {
val c = iter.nextChar()
if(c=='\\') {
val ec = iter.nextChar()
result.add(when(ec) {
'\\' -> '\\'
'n' -> '\n'
'r' -> '\r'
'"' -> '"'
'\'' -> '\''
'u' -> {
try {
"${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}".toInt(16).toChar()
} catch (sb: StringIndexOutOfBoundsException) {
throw IllegalArgumentException("invalid \\u escape sequence")
} catch (nf: NumberFormatException) {
throw IllegalArgumentException("invalid \\u escape sequence")
}
}
'x' -> {
try {
val hex = ("" + iter.nextChar() + iter.nextChar()).toInt(16)
hex.toChar()
} catch (sb: StringIndexOutOfBoundsException) {
throw IllegalArgumentException("invalid \\x escape sequence")
} catch (nf: NumberFormatException) {
throw IllegalArgumentException("invalid \\x escape sequence")
}
}
else -> throw IllegalArgumentException("invalid escape char in string: \\$ec")
})
} else {
result.add(c)
}
}
return result.joinToString("")
}
}

View File

@ -1,5 +1,6 @@
package prog8.vm
import prog8.code.target.virtual.IVirtualMachineRunner
import java.awt.Toolkit
import java.util.*
import kotlin.math.roundToInt
@ -904,3 +905,16 @@ class VirtualMachine(val memory: Memory, program: List<Instruction>) {
Toolkit.getDefaultToolkit().sync() // not really the same as wait on vsync, but there's noting else
}
}
class VmRunner(): IVirtualMachineRunner {
override fun runProgram(source: String, throttle: Boolean) {
val (memsrc, programsrc) = source.split("------PROGRAM------".toRegex(), 2)
val memory = Memory()
val assembler = Assembler()
assembler.initializeMemory(memsrc, memory)
val program = assembler.assembleProgram(programsrc)
val vm = VirtualMachine(memory, program)
vm.run(throttle = true)
}
}

View File

@ -35,15 +35,15 @@ class TestInstructions: FunSpec({
}
test("with label") {
val ins = Instruction(Opcode.BZ, VmDataType.WORD, reg1=11, reg2=22, reg3=33, symbol = listOf("a","b","c"))
val ins = Instruction(Opcode.BZ, VmDataType.WORD, reg1=11, symbol = listOf("a","b","c"))
ins.opcode shouldBe Opcode.BZ
ins.type shouldBe VmDataType.WORD
ins.reg1 shouldBe 11
ins.reg2 shouldBe 22
ins.reg3 shouldBe 33
ins.reg2 shouldBe null
ins.reg3 shouldBe null
ins.value shouldBe null
ins.symbol shouldBe listOf("a","b","c")
ins.toString() shouldBe "bz.w r11,r22,r33,_a.b.c"
ins.toString() shouldBe "bz.w r11,_a.b.c"
}
test("missing type should fail") {

View File

@ -12,5 +12,6 @@
<orderEntry type="library" name="KotlinJavaRuntime" level="project" />
<orderEntry type="library" name="io.kotest.assertions.core.jvm" level="project" />
<orderEntry type="library" name="io.kotest.runner.junit5.jvm" level="project" />
<orderEntry type="module" module-name="codeCore" />
</component>
</module>