module directive %encoding to set the text encoding for that whole file (iso, petscii, etc.)

This commit is contained in:
Irmen de Jong 2023-12-06 23:41:19 +01:00
parent 5a776dd690
commit 8e4319cd5a
14 changed files with 48 additions and 12 deletions

View File

@ -3,7 +3,6 @@ package prog8.code.core
interface ICompilationTarget: IStringEncoding, IMemSizer {
val name: String
val machine: IMachineDefinition
val defaultEncoding: Encoding
override fun encodeString(str: String, encoding: Encoding): List<UByte>
override fun decodeString(bytes: Iterable<UByte>, encoding: Encoding): String

View File

@ -9,6 +9,8 @@ enum class Encoding(val prefix: String) {
}
interface IStringEncoding {
val defaultEncoding: Encoding
fun encodeString(str: String, encoding: Encoding): List<UByte>
fun decodeString(bytes: Iterable<UByte>, encoding: Encoding): String
}

View File

@ -10,6 +10,8 @@ import prog8.code.target.cbm.PetsciiEncoding
object Encoder: IStringEncoding {
override val defaultEncoding: Encoding = Encoding.ISO
override fun encodeString(str: String, encoding: Encoding): List<UByte> {
val coded = when(encoding) {
Encoding.PETSCII -> PetsciiEncoding.encodePetscii(str, true)

View File

@ -18,6 +18,8 @@ internal object DummyMemsizer : IMemSizer {
}
internal object DummyStringEncoder : IStringEncoding {
override val defaultEncoding: Encoding = Encoding.ISO
override fun encodeString(str: String, encoding: Encoding): List<UByte> {
return emptyList()
}

View File

@ -16,6 +16,8 @@ internal object DummyMemsizer : IMemSizer {
}
internal object DummyStringEncoder : IStringEncoding {
override val defaultEncoding: Encoding = Encoding.ISO
override fun encodeString(str: String, encoding: Encoding): List<UByte> {
return emptyList()
}

View File

@ -55,7 +55,7 @@ internal class AstChecker(private val program: Program,
val directives = module.statements.filterIsInstance<Directive>().groupBy { it.directive }
directives.filter { it.value.size > 1 }.forEach{ entry ->
when(entry.key) {
"%output", "%launcher", "%zeropage", "%address" ->
"%output", "%launcher", "%zeropage", "%address", "%encoding" ->
entry.value.forEach { errors.err("directive can just occur once", it.position) }
}
}
@ -850,6 +850,13 @@ internal class AstChecker(private val program: Program,
if(directive.args.any { it.name=="verafxmuls" } && compilerOptions.compTarget.name != Cx16Target.NAME)
err("verafx option is only valid on cx16 target")
}
"%encoding" -> {
if(directive.parent !is Module)
err("this directive may only occur at module level")
val allowedEncodings = Encoding.entries.map {it.prefix}
if(directive.args.size!=1 || directive.args[0].name !in allowedEncodings)
err("invalid encoding directive, expected one of ${allowedEncodings}")
}
else -> throw SyntaxError("invalid directive ${directive.directive}", directive.position)
}
super.visit(directive)

View File

@ -177,7 +177,8 @@ internal class AstIdentifiersChecker(private val errors: IErrorReporter,
else
'_'
}.joinToString("")
call.args[0] = StringLiteral(processed, compTarget.defaultEncoding, name.position)
val textEncoding = (call as Node).definingModule.textEncoding
call.args[0] = StringLiteral(processed, textEncoding, name.position)
call.args[0].linkParents(call as Node)
}
}

View File

@ -58,13 +58,13 @@ class AstPreprocessor(val program: Program,
override fun before(char: CharLiteral, parent: Node): Iterable<IAstModification> {
if(char.encoding== Encoding.DEFAULT)
char.encoding = options.compTarget.defaultEncoding
char.encoding = char.definingModule.textEncoding
return noModifications
}
override fun before(string: StringLiteral, parent: Node): Iterable<IAstModification> {
if(string.encoding==Encoding.DEFAULT)
string.encoding = options.compTarget.defaultEncoding
string.encoding = string.definingModule.textEncoding
return super.before(string, parent)
}

View File

@ -22,7 +22,7 @@ internal class StatementReorderer(
// - sorts the choices in when statement.
// - insert AddressOf (&) expression where required (string params to a UWORD function param etc.).
private val directivesToMove = setOf("%output", "%launcher", "%zeropage", "%zpreserved", "%zpallowed", "%address", "%option")
private val directivesToMove = setOf("%output", "%launcher", "%zeropage", "%zpreserved", "%zpallowed", "%address", "%option", "%encoding")
override fun after(module: Module, parent: Node): Iterable<IAstModification> {
val (blocks, other) = module.statements.partition { it is Block }

View File

@ -35,6 +35,8 @@ internal object DummyMemsizer : IMemSizer {
}
internal object DummyStringEncoder : IStringEncoding {
override val defaultEncoding: Encoding = Encoding.ISO
override fun encodeString(str: String, encoding: Encoding): List<UByte> {
return emptyList()
}
@ -45,6 +47,8 @@ internal object DummyStringEncoder : IStringEncoding {
}
internal object AsciiStringEncoder : IStringEncoding {
override val defaultEncoding: Encoding = Encoding.ISO
override fun encodeString(str: String, encoding: Encoding): List<UByte> = str.map { it.code.toUByte() }
override fun decodeString(bytes: Iterable<UByte>, encoding: Encoding): String {

View File

@ -8,6 +8,7 @@ import prog8.ast.statements.*
import prog8.ast.walk.AstWalker
import prog8.ast.walk.IAstVisitor
import prog8.code.core.DataType
import prog8.code.core.Encoding
import prog8.code.core.Position
import prog8.code.core.SourceCode
@ -317,6 +318,14 @@ open class Module(final override var statements: MutableList<Statement>,
fun accept(visitor: IAstVisitor) = visitor.visit(this)
fun accept(visitor: AstWalker, parent: Node) = visitor.visit(this, parent)
val textEncoding: Encoding by lazy {
val encoding = (statements.singleOrNull { it is Directive && it.directive == "%encoding" } as? Directive)
if(encoding!=null)
Encoding.entries.first { it.prefix==encoding.args[0].name }
else
program.encoding.defaultEncoding
}
val isLibrary get() = source.isFromResources
}

View File

@ -149,6 +149,12 @@ Directives
Only use this if you know what you're doing because it could result in invalid assembly code being generated.
- ``verafxmuls`` (block, cx16 target only) uses Vera FX hardware word multiplication on the CommanderX16 for all word multiplications in this block. Warning: this may interfere with IRQs and other Vera operations, so use this only when you know what you're doing. It's safer to explicitly use ``verafx.muls()``.
.. data:: %encoding <encodingname>
Overrides, in the module file it occurs in,
the default text encoding to use for strings and characters that have no explicit encoding prefix.
You can use one of the recognised encoding names, see :ref:`encodings`.
.. data:: %asmbinary "<filename>" [, <offset>[, <length>]]
Level: not at module scope.
@ -501,6 +507,8 @@ Note: you can also use array indexing on a 'pointer variable', which is basicall
containing a memory address. Currently this is equivalent to directly referencing the bytes in
memory at the given index (and allows index values of word size). See :ref:`pointervars`
.. _encodings:
String
^^^^^^
A string literal can occur with or without an encoding prefix (encoding followed by ':' followed by the string itself).
@ -509,10 +517,11 @@ You can choose to store the string in other encodings such as ``sc`` (screencode
String length is limited to 255 characters.
Here are several examples:
- ``"hello"`` a string translated into the default character encoding (PETSCII)
- ``petscii:"hello"`` same as the above, on CBM machines.
- ``sc:"my name is Alice"`` string with screencode encoding (new syntax)
- ``iso:"Ich heiße François"`` string in iso encoding
- ``"hello"`` a string translated into the default character encoding (PETSCII on the CBM machines)
- ``petscii:"hello"`` string in CBM PETSCII encoding
- ``sc:"my name is Alice"`` string in CBM screencode encoding
- ``iso:"Ich heiße François"`` string in iso-8859-15 encoding
- ``atascii:"I am Atari!"`` string in "atascii" encoding (Atari 8-bit)
There are several escape sequences available to put special characters into your string value:

View File

@ -78,7 +78,6 @@ What if we were to re-introduce Structs in prog8? Some thoughts:
Other language/syntax features to think about
---------------------------------------------
- module directive to set the text encoding for that whole file (iso, petscii, etc.)
- chained assignments `x=y=z=99`
- declare multiple variables `ubyte x,y,z` (if init value present, all get that init value)
- chained comparisons `10<x<20` , `x==y==z` (desugars to `10<x and x<20`, `x==y and y==z`)

View File

@ -126,7 +126,7 @@ unconditionaljump : 'goto' (integerliteral | scoped_identifier) ;
directive :
directivename=('%output' | '%launcher' | '%zeropage' | '%zpreserved' | '%zpallowed' | '%address' | '%import' |
'%breakpoint' | '%asminclude' | '%asmbinary' | '%option' )
'%breakpoint' | '%asminclude' | '%asmbinary' | '%option' | '%encoding' )
(directivearg? | directivearg (',' directivearg)*)
;