From 8e4319cd5ad597ef422390877b830cd79f96f12a Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Wed, 6 Dec 2023 23:41:19 +0100 Subject: [PATCH] module directive %encoding to set the text encoding for that whole file (iso, petscii, etc.) --- .../src/prog8/code/core/ICompilationTarget.kt | 1 - codeCore/src/prog8/code/core/IStringEncoding.kt | 2 ++ codeCore/src/prog8/code/target/Encoder.kt | 2 ++ codeGenCpu6502/test/Dummies.kt | 2 ++ codeGenIntermediate/test/Dummies.kt | 2 ++ .../prog8/compiler/astprocessing/AstChecker.kt | 9 ++++++++- .../astprocessing/AstIdentifiersChecker.kt | 3 ++- .../compiler/astprocessing/AstPreprocessor.kt | 4 ++-- .../astprocessing/StatementReorderer.kt | 2 +- compiler/test/helpers/Dummies.kt | 4 ++++ compilerAst/src/prog8/ast/AstToplevel.kt | 9 +++++++++ docs/source/syntaxreference.rst | 17 +++++++++++++---- docs/source/todo.rst | 1 - parser/antlr/Prog8ANTLR.g4 | 2 +- 14 files changed, 48 insertions(+), 12 deletions(-) diff --git a/codeCore/src/prog8/code/core/ICompilationTarget.kt b/codeCore/src/prog8/code/core/ICompilationTarget.kt index ea1ad660d..37355e8bf 100644 --- a/codeCore/src/prog8/code/core/ICompilationTarget.kt +++ b/codeCore/src/prog8/code/core/ICompilationTarget.kt @@ -3,7 +3,6 @@ package prog8.code.core interface ICompilationTarget: IStringEncoding, IMemSizer { val name: String val machine: IMachineDefinition - val defaultEncoding: Encoding override fun encodeString(str: String, encoding: Encoding): List override fun decodeString(bytes: Iterable, encoding: Encoding): String diff --git a/codeCore/src/prog8/code/core/IStringEncoding.kt b/codeCore/src/prog8/code/core/IStringEncoding.kt index c10393ef0..efbc11475 100644 --- a/codeCore/src/prog8/code/core/IStringEncoding.kt +++ b/codeCore/src/prog8/code/core/IStringEncoding.kt @@ -9,6 +9,8 @@ enum class Encoding(val prefix: String) { } interface IStringEncoding { + val defaultEncoding: Encoding + fun encodeString(str: String, encoding: Encoding): List fun decodeString(bytes: Iterable, encoding: Encoding): String } diff --git a/codeCore/src/prog8/code/target/Encoder.kt b/codeCore/src/prog8/code/target/Encoder.kt index 5de4f0920..16a8703fa 100644 --- a/codeCore/src/prog8/code/target/Encoder.kt +++ b/codeCore/src/prog8/code/target/Encoder.kt @@ -10,6 +10,8 @@ import prog8.code.target.cbm.PetsciiEncoding object Encoder: IStringEncoding { + override val defaultEncoding: Encoding = Encoding.ISO + override fun encodeString(str: String, encoding: Encoding): List { val coded = when(encoding) { Encoding.PETSCII -> PetsciiEncoding.encodePetscii(str, true) diff --git a/codeGenCpu6502/test/Dummies.kt b/codeGenCpu6502/test/Dummies.kt index b2f3bc8ee..0e75d5c23 100644 --- a/codeGenCpu6502/test/Dummies.kt +++ b/codeGenCpu6502/test/Dummies.kt @@ -18,6 +18,8 @@ internal object DummyMemsizer : IMemSizer { } internal object DummyStringEncoder : IStringEncoding { + override val defaultEncoding: Encoding = Encoding.ISO + override fun encodeString(str: String, encoding: Encoding): List { return emptyList() } diff --git a/codeGenIntermediate/test/Dummies.kt b/codeGenIntermediate/test/Dummies.kt index afe25b36f..848d1ec28 100644 --- a/codeGenIntermediate/test/Dummies.kt +++ b/codeGenIntermediate/test/Dummies.kt @@ -16,6 +16,8 @@ internal object DummyMemsizer : IMemSizer { } internal object DummyStringEncoder : IStringEncoding { + override val defaultEncoding: Encoding = Encoding.ISO + override fun encodeString(str: String, encoding: Encoding): List { return emptyList() } diff --git a/compiler/src/prog8/compiler/astprocessing/AstChecker.kt b/compiler/src/prog8/compiler/astprocessing/AstChecker.kt index 0f4abc776..7fa423753 100644 --- a/compiler/src/prog8/compiler/astprocessing/AstChecker.kt +++ b/compiler/src/prog8/compiler/astprocessing/AstChecker.kt @@ -55,7 +55,7 @@ internal class AstChecker(private val program: Program, val directives = module.statements.filterIsInstance().groupBy { it.directive } directives.filter { it.value.size > 1 }.forEach{ entry -> when(entry.key) { - "%output", "%launcher", "%zeropage", "%address" -> + "%output", "%launcher", "%zeropage", "%address", "%encoding" -> entry.value.forEach { errors.err("directive can just occur once", it.position) } } } @@ -850,6 +850,13 @@ internal class AstChecker(private val program: Program, if(directive.args.any { it.name=="verafxmuls" } && compilerOptions.compTarget.name != Cx16Target.NAME) err("verafx option is only valid on cx16 target") } + "%encoding" -> { + if(directive.parent !is Module) + err("this directive may only occur at module level") + val allowedEncodings = Encoding.entries.map {it.prefix} + if(directive.args.size!=1 || directive.args[0].name !in allowedEncodings) + err("invalid encoding directive, expected one of ${allowedEncodings}") + } else -> throw SyntaxError("invalid directive ${directive.directive}", directive.position) } super.visit(directive) diff --git a/compiler/src/prog8/compiler/astprocessing/AstIdentifiersChecker.kt b/compiler/src/prog8/compiler/astprocessing/AstIdentifiersChecker.kt index 04ebcce75..34c2e7d56 100644 --- a/compiler/src/prog8/compiler/astprocessing/AstIdentifiersChecker.kt +++ b/compiler/src/prog8/compiler/astprocessing/AstIdentifiersChecker.kt @@ -177,7 +177,8 @@ internal class AstIdentifiersChecker(private val errors: IErrorReporter, else '_' }.joinToString("") - call.args[0] = StringLiteral(processed, compTarget.defaultEncoding, name.position) + val textEncoding = (call as Node).definingModule.textEncoding + call.args[0] = StringLiteral(processed, textEncoding, name.position) call.args[0].linkParents(call as Node) } } diff --git a/compiler/src/prog8/compiler/astprocessing/AstPreprocessor.kt b/compiler/src/prog8/compiler/astprocessing/AstPreprocessor.kt index 038463b5f..94fe06a4a 100644 --- a/compiler/src/prog8/compiler/astprocessing/AstPreprocessor.kt +++ b/compiler/src/prog8/compiler/astprocessing/AstPreprocessor.kt @@ -58,13 +58,13 @@ class AstPreprocessor(val program: Program, override fun before(char: CharLiteral, parent: Node): Iterable { if(char.encoding== Encoding.DEFAULT) - char.encoding = options.compTarget.defaultEncoding + char.encoding = char.definingModule.textEncoding return noModifications } override fun before(string: StringLiteral, parent: Node): Iterable { if(string.encoding==Encoding.DEFAULT) - string.encoding = options.compTarget.defaultEncoding + string.encoding = string.definingModule.textEncoding return super.before(string, parent) } diff --git a/compiler/src/prog8/compiler/astprocessing/StatementReorderer.kt b/compiler/src/prog8/compiler/astprocessing/StatementReorderer.kt index 2fb9b3035..0b72c34b9 100644 --- a/compiler/src/prog8/compiler/astprocessing/StatementReorderer.kt +++ b/compiler/src/prog8/compiler/astprocessing/StatementReorderer.kt @@ -22,7 +22,7 @@ internal class StatementReorderer( // - sorts the choices in when statement. // - insert AddressOf (&) expression where required (string params to a UWORD function param etc.). - private val directivesToMove = setOf("%output", "%launcher", "%zeropage", "%zpreserved", "%zpallowed", "%address", "%option") + private val directivesToMove = setOf("%output", "%launcher", "%zeropage", "%zpreserved", "%zpallowed", "%address", "%option", "%encoding") override fun after(module: Module, parent: Node): Iterable { val (blocks, other) = module.statements.partition { it is Block } diff --git a/compiler/test/helpers/Dummies.kt b/compiler/test/helpers/Dummies.kt index b0973de40..f19d4f5b4 100644 --- a/compiler/test/helpers/Dummies.kt +++ b/compiler/test/helpers/Dummies.kt @@ -35,6 +35,8 @@ internal object DummyMemsizer : IMemSizer { } internal object DummyStringEncoder : IStringEncoding { + override val defaultEncoding: Encoding = Encoding.ISO + override fun encodeString(str: String, encoding: Encoding): List { return emptyList() } @@ -45,6 +47,8 @@ internal object DummyStringEncoder : IStringEncoding { } internal object AsciiStringEncoder : IStringEncoding { + override val defaultEncoding: Encoding = Encoding.ISO + override fun encodeString(str: String, encoding: Encoding): List = str.map { it.code.toUByte() } override fun decodeString(bytes: Iterable, encoding: Encoding): String { diff --git a/compilerAst/src/prog8/ast/AstToplevel.kt b/compilerAst/src/prog8/ast/AstToplevel.kt index 0ff197b12..8a03cfd6b 100644 --- a/compilerAst/src/prog8/ast/AstToplevel.kt +++ b/compilerAst/src/prog8/ast/AstToplevel.kt @@ -8,6 +8,7 @@ import prog8.ast.statements.* import prog8.ast.walk.AstWalker import prog8.ast.walk.IAstVisitor import prog8.code.core.DataType +import prog8.code.core.Encoding import prog8.code.core.Position import prog8.code.core.SourceCode @@ -317,6 +318,14 @@ open class Module(final override var statements: MutableList, fun accept(visitor: IAstVisitor) = visitor.visit(this) fun accept(visitor: AstWalker, parent: Node) = visitor.visit(this, parent) + val textEncoding: Encoding by lazy { + val encoding = (statements.singleOrNull { it is Directive && it.directive == "%encoding" } as? Directive) + if(encoding!=null) + Encoding.entries.first { it.prefix==encoding.args[0].name } + else + program.encoding.defaultEncoding + } + val isLibrary get() = source.isFromResources } diff --git a/docs/source/syntaxreference.rst b/docs/source/syntaxreference.rst index f0d72ed32..59def1856 100644 --- a/docs/source/syntaxreference.rst +++ b/docs/source/syntaxreference.rst @@ -149,6 +149,12 @@ Directives Only use this if you know what you're doing because it could result in invalid assembly code being generated. - ``verafxmuls`` (block, cx16 target only) uses Vera FX hardware word multiplication on the CommanderX16 for all word multiplications in this block. Warning: this may interfere with IRQs and other Vera operations, so use this only when you know what you're doing. It's safer to explicitly use ``verafx.muls()``. +.. data:: %encoding + + Overrides, in the module file it occurs in, + the default text encoding to use for strings and characters that have no explicit encoding prefix. + You can use one of the recognised encoding names, see :ref:`encodings`. + .. data:: %asmbinary "" [, [, ]] Level: not at module scope. @@ -501,6 +507,8 @@ Note: you can also use array indexing on a 'pointer variable', which is basicall containing a memory address. Currently this is equivalent to directly referencing the bytes in memory at the given index (and allows index values of word size). See :ref:`pointervars` +.. _encodings: + String ^^^^^^ A string literal can occur with or without an encoding prefix (encoding followed by ':' followed by the string itself). @@ -509,10 +517,11 @@ You can choose to store the string in other encodings such as ``sc`` (screencode String length is limited to 255 characters. Here are several examples: - - ``"hello"`` a string translated into the default character encoding (PETSCII) - - ``petscii:"hello"`` same as the above, on CBM machines. - - ``sc:"my name is Alice"`` string with screencode encoding (new syntax) - - ``iso:"Ich heiße François"`` string in iso encoding + - ``"hello"`` a string translated into the default character encoding (PETSCII on the CBM machines) + - ``petscii:"hello"`` string in CBM PETSCII encoding + - ``sc:"my name is Alice"`` string in CBM screencode encoding + - ``iso:"Ich heiße François"`` string in iso-8859-15 encoding + - ``atascii:"I am Atari!"`` string in "atascii" encoding (Atari 8-bit) There are several escape sequences available to put special characters into your string value: diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 3e4cab3e3..886aff09b 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -78,7 +78,6 @@ What if we were to re-introduce Structs in prog8? Some thoughts: Other language/syntax features to think about --------------------------------------------- -- module directive to set the text encoding for that whole file (iso, petscii, etc.) - chained assignments `x=y=z=99` - declare multiple variables `ubyte x,y,z` (if init value present, all get that init value) - chained comparisons `10