now checks for invalid text encodings for given compilation target

This commit is contained in:
Irmen de Jong 2022-02-15 01:39:12 +01:00
parent 73fc18099e
commit 4d16e1e14a
14 changed files with 121 additions and 43 deletions

View File

@ -7,6 +7,7 @@ import prog8.codegen.target.c128.C128MachineDefinition
import prog8.codegen.target.cbm.CbmMemorySizer
import prog8.codegen.target.cbm.asmsub6502ArgsEvalOrder
import prog8.codegen.target.cbm.asmsub6502ArgsHaveRegisterClobberRisk
import prog8.compilerinterface.Encoding
import prog8.compilerinterface.ICompilationTarget
import prog8.compilerinterface.IMemSizer
import prog8.compilerinterface.IStringEncoding
@ -15,6 +16,7 @@ import prog8.compilerinterface.IStringEncoding
class C128Target: ICompilationTarget, IStringEncoding by Encoder, IMemSizer by CbmMemorySizer {
override val name = NAME
override val machine = C128MachineDefinition()
override val supportedEncodings = setOf(Encoding.PETSCII, Encoding.SCREENCODES)
companion object {
const val NAME = "c128"

View File

@ -7,6 +7,7 @@ import prog8.codegen.target.c64.C64MachineDefinition
import prog8.codegen.target.cbm.CbmMemorySizer
import prog8.codegen.target.cbm.asmsub6502ArgsEvalOrder
import prog8.codegen.target.cbm.asmsub6502ArgsHaveRegisterClobberRisk
import prog8.compilerinterface.Encoding
import prog8.compilerinterface.ICompilationTarget
import prog8.compilerinterface.IMemSizer
import prog8.compilerinterface.IStringEncoding
@ -15,6 +16,7 @@ import prog8.compilerinterface.IStringEncoding
class C64Target: ICompilationTarget, IStringEncoding by Encoder, IMemSizer by CbmMemorySizer {
override val name = NAME
override val machine = C64MachineDefinition()
override val supportedEncodings = setOf(Encoding.PETSCII, Encoding.SCREENCODES)
companion object {
const val NAME = "c64"

View File

@ -7,6 +7,7 @@ import prog8.codegen.target.cbm.CbmMemorySizer
import prog8.codegen.target.cbm.asmsub6502ArgsEvalOrder
import prog8.codegen.target.cbm.asmsub6502ArgsHaveRegisterClobberRisk
import prog8.codegen.target.cx16.CX16MachineDefinition
import prog8.compilerinterface.Encoding
import prog8.compilerinterface.ICompilationTarget
import prog8.compilerinterface.IMemSizer
import prog8.compilerinterface.IStringEncoding
@ -15,6 +16,7 @@ import prog8.compilerinterface.IStringEncoding
class Cx16Target: ICompilationTarget, IStringEncoding by Encoder, IMemSizer by CbmMemorySizer {
override val name = NAME
override val machine = CX16MachineDefinition()
override val supportedEncodings = setOf(Encoding.PETSCII, Encoding.SCREENCODES, Encoding.ISO)
companion object {
const val NAME = "cx16"

View File

@ -8,7 +8,7 @@ import prog8.compilerinterface.Encoding
import prog8.compilerinterface.IStringEncoding
internal object Encoder: IStringEncoding {
override fun encodeString(str: String, encoding: Encoding): List<UByte> { // TODO use Result
override fun encodeString(str: String, encoding: Encoding): List<UByte> {
val coded = when(encoding) {
Encoding.PETSCII -> PetsciiEncoding.encodePetscii(str, true)
Encoding.SCREENCODES -> PetsciiEncoding.encodeScreencode(str, true)
@ -20,7 +20,7 @@ internal object Encoder: IStringEncoding {
success = { it }
)
}
override fun decodeString(bytes: List<UByte>, encoding: Encoding): String { // TODO use Result
override fun decodeString(bytes: List<UByte>, encoding: Encoding): String {
val decoded = when(encoding) {
Encoding.PETSCII -> PetsciiEncoding.decodePetscii(bytes, true)
Encoding.SCREENCODES -> PetsciiEncoding.decodeScreencode(bytes, true)

View File

@ -184,7 +184,7 @@ fun parseImports(filepath: Path,
errors: IErrorReporter,
compTarget: ICompilationTarget,
sourceDirs: List<String>): Triple<Program, CompilationOptions, List<Path>> {
println("Compiler target: ${compTarget.name}")
println("Compilation target: ${compTarget.name}")
val bf = BuiltinFunctionsFacade(BuiltinFunctions)
val program = Program(filepath.nameWithoutExtension, bf, compTarget, compTarget)
bf.program = program
@ -284,7 +284,8 @@ private fun processAst(program: Program, errors: IErrorReporter, compilerOptions
// ...but what do we gain from this? We can leave it as it is now: where a char literal is no more than syntactic sugar for an UBYTE value.
// By introduction a CHAR dt, we will also lose the opportunity to do constant-folding on any expression containing a char literal.
// Yes this is different from strings that are only encoded in the code gen phase.
program.charLiteralsToUByteLiterals(compilerOptions.compTarget)
program.charLiteralsToUByteLiterals(compilerOptions.compTarget, errors)
errors.report()
program.constantFold(errors, compilerOptions.compTarget)
errors.report()
program.desugaring(errors)

View File

@ -10,10 +10,7 @@ import prog8.ast.statements.Directive
import prog8.ast.statements.VarDeclOrigin
import prog8.ast.walk.AstWalker
import prog8.ast.walk.IAstModification
import prog8.compilerinterface.CompilationOptions
import prog8.compilerinterface.IErrorReporter
import prog8.compilerinterface.IStringEncoding
import prog8.compilerinterface.IVariablesAndConsts
import prog8.compilerinterface.*
internal fun Program.checkValid(errors: IErrorReporter, compilerOptions: CompilationOptions) {
@ -48,12 +45,16 @@ internal fun Program.reorderStatements(errors: IErrorReporter, options: Compilat
}
}
internal fun Program.charLiteralsToUByteLiterals(enc: IStringEncoding) {
internal fun Program.charLiteralsToUByteLiterals(target: ICompilationTarget, errors: IErrorReporter) {
val walker = object : AstWalker() {
override fun after(char: CharLiteral, parent: Node): Iterable<IAstModification> {
if(char.encoding !in target.supportedEncodings) {
errors.err("compilation target doesn't support this text encoding", char.position)
return noModifications
}
return listOf(IAstModification.ReplaceNode(
char,
NumericLiteral(DataType.UBYTE, enc.encodeString(char.value.toString(), char.encoding)[0].toDouble(), char.position),
NumericLiteral(DataType.UBYTE, target.encodeString(char.value.toString(), char.encoding)[0].toDouble(), char.position),
parent
))
}
@ -96,7 +97,7 @@ internal fun Program.checkIdentifiers(errors: IErrorReporter, options: Compilati
val transforms = AstVariousTransforms(this)
transforms.visit(this)
transforms.applyModifications()
val lit2decl = LiteralsToAutoVars(this)
val lit2decl = LiteralsToAutoVars(this, options.compTarget, errors)
lit2decl.visit(this)
if(errors.noErrors())
lit2decl.applyModifications()

View File

@ -12,11 +12,19 @@ import prog8.ast.statements.VarDecl
import prog8.ast.statements.WhenChoice
import prog8.ast.walk.AstWalker
import prog8.ast.walk.IAstModification
import prog8.compilerinterface.ICompilationTarget
import prog8.compilerinterface.IErrorReporter
internal class LiteralsToAutoVars(private val program: Program) : AstWalker() {
internal class LiteralsToAutoVars(private val program: Program,
private val target: ICompilationTarget,
private val errors: IErrorReporter) : AstWalker() {
override fun after(string: StringLiteral, parent: Node): Iterable<IAstModification> {
if(string.encoding !in target.supportedEncodings) {
errors.err("compilation target doesn't support this text encoding", string.position)
return noModifications
}
if(string.parent !is VarDecl
&& string.parent !is WhenChoice
&& (string.parent !is ContainmentCheck || string.value.length>ContainmentCheck.max_inlined_string_length)) {

View File

@ -6,8 +6,15 @@ import com.github.michaelbull.result.getOrElse
import io.kotest.assertions.withClue
import io.kotest.core.spec.style.FunSpec
import io.kotest.matchers.shouldBe
import io.kotest.matchers.string.shouldContain
import prog8.codegen.target.C64Target
import prog8.codegen.target.Cx16Target
import prog8.codegen.target.cbm.IsoEncoding
import prog8.codegen.target.cbm.PetsciiEncoding
import prog8tests.helpers.ErrorReporterForTests
import prog8tests.helpers.assertFailure
import prog8tests.helpers.assertSuccess
import prog8tests.helpers.compileText
class TestStringEncodings: FunSpec({
@ -188,4 +195,69 @@ class TestStringEncodings: FunSpec({
result.expectError { "should not encode" }
}
}
test("invalid encoding immediately errors the parser") {
val source="""
main {
str string5 = unicorns:"wrong"
ubyte char5 = unicorns:'?'
sub start() {
}
}"""
val errors = ErrorReporterForTests()
compileText(C64Target(), false, source, errors, false).assertFailure()
errors.errors.size shouldBe 0
}
test("unsupported string encoding iso for C64 compilationtarget") {
val source="""
main {
str string1 = "default"
str string2 = sc:"screencodes"
str string3 = iso:"iso"
str string4 = petscii:"petscii"
sub start() {
}
}"""
val errors = ErrorReporterForTests()
compileText(C64Target(), false, source, errors, writeAssembly = false).assertFailure()
errors.errors.size shouldBe 1
errors.errors[0] shouldContain "text encoding"
}
test("unsupported char encoding iso for C64 compilationtarget") {
val source="""
main {
ubyte char1 = 'd'
ubyte char2 = sc:'s'
ubyte char3 = iso:'i'
ubyte char4 = petscii:'p'
sub start() {
}
}"""
val errors = ErrorReporterForTests()
compileText(C64Target(), false, source, errors, writeAssembly = false).assertFailure()
errors.errors.size shouldBe 1
errors.errors[0] shouldContain "text encoding"
}
test("all encodings supported for Cx16 target") {
val source="""
main {
str string1 = "default"
str string2 = sc:"screencodes"
str string3 = iso:"iso"
str string4 = petscii:"petscii"
ubyte char1 = 'd'
ubyte char2 = sc:'s'
ubyte char3 = iso:'i'
ubyte char4 = petscii:'p'
sub start() {
}
}"""
compileText(Cx16Target(), false, source, writeAssembly = false).assertSuccess()
}
})

View File

@ -51,6 +51,7 @@ internal object DummyCompilationTarget : ICompilationTarget {
override val name: String = "dummy"
override val machine: IMachineDefinition
get() = throw NotImplementedError("dummy")
override val supportedEncodings = setOf(Encoding.PETSCII, Encoding.SCREENCODES, Encoding.ISO)
override fun encodeString(str: String, encoding: Encoding): List<UByte> {
throw NotImplementedError("dummy")

View File

@ -5,13 +5,13 @@ import prog8.ast.statements.RegisterOrStatusflag
import prog8.ast.statements.Subroutine
// TODO list of supported string encodings
interface ICompilationTarget: IStringEncoding, IMemSizer {
val name: String
val machine: IMachineDefinition
override fun encodeString(str: String, encoding: Encoding): List<UByte> // TODO use Result
override fun decodeString(bytes: List<UByte>, encoding: Encoding): String // TODO use Result
val supportedEncodings: Set<Encoding>
override fun encodeString(str: String, encoding: Encoding): List<UByte>
override fun decodeString(bytes: List<UByte>, encoding: Encoding): String
fun asmsubArgsEvalOrder(sub: Subroutine): List<Int>
fun asmsubArgsHaveRegisterClobberRisk(args: List<Expression>,

View File

@ -67,7 +67,7 @@ Language features
- Many built-in functions, such as ``sin``, ``cos``, ``rnd``, ``abs``, ``min``, ``max``, ``sqrt``, ``msb``, ``rol``, ``ror``, ``swap``, ``sort`` and ``reverse``
- Programs can be run multiple times without reloading because of automatic variable (re)initializations.
- Supports the sixteen 'virtual' 16-bit registers R0 .. R15 from the Commander X16, also on the other machines.
- If you only use standard kernal and core prog8 library routines, it is possible to compile the *exact same program* for different machines (just change the compiler target flag)!
- If you only use standard kernal and core prog8 library routines, it is possible to compile the *exact same program* for different machines (just change the compilation target flag)!
Code example

View File

@ -191,7 +191,7 @@ Values will usually be part of an expression or assignment statement::
$aa43 ; hex integer number
%100101 ; binary integer number (% is also remainder operator so be careful)
-33.456e52 ; floating point number
"Hi, I am a string" ; text string, encoded with compiler target default encoding
"Hi, I am a string" ; text string, encoded with default encoding
'a' ; byte value (ubyte) for the letter a
sc:"Alternate" ; text string, encoded with c64 screencode encoding
sc:'a' ; byte value of the letter a in c64 screencode encoding
@ -985,7 +985,7 @@ memory(name, size, alignment)
The return value is just a simple uword address so it cannot be used as an array in your program.
You can only treat it as a pointer or use it in inline assembly.
callfar(bank, address, argumentaddress) ; NOTE: specific to cx16 compiler target for now
callfar(bank, address, argumentaddress) ; NOTE: specific to cx16 target for now
Calls an assembly routine in another ram-bank on the CommanderX16 (using the ``jsrfar`` routine)
The banked RAM is located in the address range $A000-$BFFF (8 kilobyte).
Notice that bank $00 is used by the Kernal and should not be used by user code.
@ -996,7 +996,7 @@ callfar(bank, address, argumentaddress) ; NOTE: specific to cx16 compiler t
If the routine requires different arguments or return values, ``callfar`` cannot be used
and you'll have to set up a call to ``jsrfar`` yourself to process this.
callrom(bank, address, argumentaddress) ; NOTE: specific to cx16 compiler target for now
callrom(bank, address, argumentaddress) ; NOTE: specific to cx16 target for now
Calls an assembly routine in another rom-bank on the CommanderX16
The banked ROM is located in the address range $C000-$FFFF (16 kilobyte).
There are 32 banks (0 to 31).

View File

@ -9,7 +9,7 @@ Prog8 targets the following hardware:
- optional use of memory mapped I/O registers
- optional use of system ROM routines
Currently these machines can be selected as a compiler target (via the ``-target`` compiler argument):
Currently these machines can be selected as a compilation target (via the ``-target`` compiler argument):
- 'c64': the Commodore 64
- 'c128': the Commodore 128 (*limited support only for now*)
@ -19,7 +19,7 @@ This chapter explains some relevant system details of the c64 and cx16 machines.
.. hint::
If you only use standard kernal and prog8 library routines,
it is possible to compile the *exact same program* for both machines (just change the compiler target flag)!
it is possible to compile the *exact same program* for both machines (just change the compilation target flag)!
Memory Model

View File

@ -1,25 +1,14 @@
%import textio
main {
str myBar = "main.bar"
str string1 = "default"
str string2 = sc:"screencodes"
str string3 = iso:"iso"
str string4 = petscii:"petscii"
foo_bar:
ubyte char1 = 'd'
ubyte char2 = sc:'s'
ubyte char3 = iso:'i'
ubyte char4 = petscii:'p'
sub start() {
txt.print(myBar)
txt.print(&foo_bar)
return
quert:
quert:
quert:
quert:
}
sub start() {
}
foo_bar:
foo_bar:
foo_bar:
foo_bar:
sub start() {
}
}