mirror of
synced 2025-03-24 16:29:30 +00:00
added '@' alternative string/char encoding
This commit is contained in:
@ -55,8 +55,12 @@ private fun compileMain(args: Array<String>) {
with(CompilationTarget) {
name = "c64"
machine = C64MachineDefinition
encodeString = { str -> Petscii.encodePetscii(str, true) }
decodeString = { bytes -> Petscii.decodePetscii(bytes, true) }
encodeString = { str, altEncoding ->
if(altEncoding) Petscii.encodeScreencode(str, true) else Petscii.encodePetscii(str, true)
decodeString = { bytes, altEncoding ->
if(altEncoding) Petscii.decodeScreencode(bytes, true) else Petscii.decodePetscii(bytes, true)
asmGenerator = ::AsmGen
@ -380,8 +380,13 @@ private fun prog8Parser.DirectiveContext.toAst() : Directive =
Directive(directivename.text, directivearg().map { it.toAst() }, toPosition())
private fun prog8Parser.DirectiveargContext.toAst() : DirectiveArg =
DirectiveArg(stringliteral()?.text, identifier()?.text, integerliteral()?.toAst()?.number?.toInt(), toPosition())
private fun prog8Parser.DirectiveargContext.toAst() : DirectiveArg {
val str = stringliteral()
if(str?.ALT_STRING_ENCODING() != null)
throw AstException("${toPosition()} can't use alternate string encodings for directive arguments")
return DirectiveArg(stringliteral()?.text, identifier()?.text, integerliteral()?.toAst()?.number?.toInt(), toPosition())
private fun prog8Parser.IntegerliteralContext.toAst(): NumericLiteral {
@ -456,11 +461,13 @@ private fun prog8Parser.ExpressionContext.toAst() : Expression {
else -> throw FatalAstException("invalid datatype for numeric literal")
litval.floatliteral()!=null -> NumericLiteralValue(DataType.FLOAT, litval.floatliteral().toAst(), litval.toPosition())
litval.stringliteral()!=null -> StringLiteralValue(unescape(litval.stringliteral().text, litval.toPosition()), litval.toPosition())
litval.stringliteral()!=null -> litval.stringliteral().toAst()
litval.charliteral()!=null -> {
try {
val cc=litval.charliteral()
NumericLiteralValue(DataType.UBYTE, CompilationTarget.encodeString(
unescape(litval.charliteral().text, litval.toPosition()))[0], litval.toPosition())
unescape(litval.charliteral().SINGLECHAR().text, litval.toPosition()),
litval.charliteral().ALT_STRING_ENCODING()!=null)[0], litval.toPosition())
} catch (ce: CharConversionException) {
throw SyntaxError(ce.message ?: ce.toString(), litval.toPosition())
@ -519,6 +526,10 @@ private fun prog8Parser.ExpressionContext.toAst() : Expression {
private fun prog8Parser.StringliteralContext.toAst(): StringLiteralValue =
StringLiteralValue(unescape(this.STRING().text, toPosition()), ALT_STRING_ENCODING()!=null, toPosition())
private fun prog8Parser.ArrayindexedContext.toAst(): ArrayIndexedExpression {
return ArrayIndexedExpression(scoped_identifier().toAst(),
@ -430,6 +430,7 @@ class StructLiteralValue(var values: List<Expression>,
private var heapIdSequence = 0 // unique ids for strings and arrays "on the heap"
class StringLiteralValue(val value: String,
val altEncoding: Boolean, // such as: screencodes instead of Petscii for the C64
override val position: Position) : Expression() {
override lateinit var parent: Node
@ -445,11 +446,11 @@ class StringLiteralValue(val value: String,
override fun toString(): String = "'${escape(value)}'"
override fun inferType(program: Program): InferredTypes.InferredType = InferredTypes.knownFor(DataType.STR)
operator fun compareTo(other: StringLiteralValue): Int = value.compareTo(other.value)
override fun hashCode(): Int = value.hashCode()
override fun hashCode(): Int = Objects.hash(value, altEncoding)
override fun equals(other: Any?): Boolean {
if(other==null || other !is StringLiteralValue)
return false
return value==other.value
return value==other.value && altEncoding == other.altEncoding
@ -552,9 +553,9 @@ class RangeExpr(var from: Expression,
val fromString = from as? StringLiteralValue
val toString = to as? StringLiteralValue
if(fromString!=null && toString!=null ) {
// string range -> int range over petscii values
fromVal = CompilationTarget.encodeString(fromString.value)[0].toInt()
toVal = CompilationTarget.encodeString(toString.value)[0].toInt()
// string range -> int range over character values
fromVal = CompilationTarget.encodeString(fromString.value, fromString.altEncoding)[0].toInt()
toVal = CompilationTarget.encodeString(toString.value, fromString.altEncoding)[0].toInt()
} else {
val fromLv = from as? NumericLiteralValue
val toLv = to as? NumericLiteralValue
@ -326,12 +326,12 @@ internal class AstIdentifiersChecker(private val program: Program) : IAstModifyi
if(constvalue!=null) {
if (expr.operator == "*") {
// repeat a string a number of times
return StringLiteralValue(string.value.repeat(constvalue.number.toInt()), expr.position)
return StringLiteralValue(string.value.repeat(constvalue.number.toInt()), string.altEncoding, expr.position)
if(expr.operator == "+" && operand is StringLiteralValue) {
// concatenate two strings
return StringLiteralValue("${string.value}${operand.value}", expr.position)
return StringLiteralValue("${string.value}${operand.value}", string.altEncoding, expr.position)
return expr
@ -9,8 +9,8 @@ internal interface CompilationTarget {
companion object {
lateinit var name: String
lateinit var machine: IMachineDefinition
lateinit var encodeString: (str: String) -> List<Short>
lateinit var decodeString: (bytes: List<Short>) -> String
lateinit var encodeString: (str: String, altEncoding: Boolean) -> List<Short>
lateinit var decodeString: (bytes: List<Short>, altEncoding: Boolean) -> String
lateinit var asmGenerator: (Program, Zeropage, CompilationOptions, Path) -> IAssemblyGenerator
@ -206,8 +206,8 @@ internal class AsmGen(private val program: Program,
return "$b0, $b1, $b2, $b3, $b4"
private fun petscii(str: String): List<Short> {
val bytes = Petscii.encodePetscii(str, true)
private fun encode(str: String, altEncoding: Boolean): List<Short> {
val bytes = if(altEncoding) Petscii.encodeScreencode(str, true) else Petscii.encodePetscii(str, true)
return bytes.plus(0)
@ -246,8 +246,8 @@ internal class AsmGen(private val program: Program,
DataType.FLOAT -> out("${decl.name}\t.byte 0,0,0,0,0 ; float")
DataType.STRUCT -> {} // is flattened
DataType.STR -> {
val string = (decl.value as StringLiteralValue).value
outputStringvar(decl, petscii(string))
val str = decl.value as StringLiteralValue
outputStringvar(decl, encode(str.value, str.altEncoding))
DataType.ARRAY_UB -> {
val data = makeArrayFillDataUnsigned(decl)
@ -331,7 +331,10 @@ internal class AsmGen(private val program: Program,
// special treatment for string types: merge strings that are identical
val encodedstringVars = normalVars
.filter {it.datatype == DataType.STR }
.map { it to petscii((it.value as StringLiteralValue).value) }
.map {
val str = it.value as StringLiteralValue
it to encode(str.value, str.altEncoding)
.groupBy({it.second}, {it.first})
for((encoded, variables) in encodedstringVars) {
variables.dropLast(1).forEach { out(it.name) }
@ -180,7 +180,7 @@ internal class StatementOptimizer(private val program: Program) : IAstModifyingV
val vardecl = stringVar.targetVarDecl(program.namespace)!!
val string = vardecl.value!! as StringLiteralValue
if(string.value.length==1) {
val firstCharEncoded = CompilationTarget.encodeString(string.value)[0]
val firstCharEncoded = CompilationTarget.encodeString(string.value, string.altEncoding)[0]
functionCallStatement.args.add(NumericLiteralValue.optimalInteger(firstCharEncoded.toInt(), functionCallStatement.position))
functionCallStatement.target = IdentifierReference(listOf("c64", "CHROUT"), functionCallStatement.target.position)
@ -188,7 +188,7 @@ internal class StatementOptimizer(private val program: Program) : IAstModifyingV
return functionCallStatement
} else if(string.value.length==2) {
val firstTwoCharsEncoded = CompilationTarget.encodeString(string.value.take(2))
val firstTwoCharsEncoded = CompilationTarget.encodeString(string.value.take(2), string.altEncoding)
val scope = AnonymousScope(mutableListOf(), functionCallStatement.position)
scope.statements.add(FunctionCallStatement(IdentifierReference(listOf("c64", "CHROUT"), functionCallStatement.target.position),
mutableListOf(NumericLiteralValue.optimalInteger(firstTwoCharsEncoded[0].toInt(), functionCallStatement.position)),
@ -570,10 +570,10 @@ class RuntimeValueNumeric(type: DataType, num: Number): RuntimeValueBase(type) {
class RuntimeValueString(val str: String, val heapId: Int?): RuntimeValueBase(DataType.STR) {
class RuntimeValueString(val str: String, val altEncoding: Boolean, val heapId: Int?): RuntimeValueBase(DataType.STR) {
companion object {
fun fromLv(string: StringLiteralValue): RuntimeValueString {
return RuntimeValueString(string.value, string.heapId)
return RuntimeValueString(string.value, string.altEncoding, string.heapId)
@ -575,7 +575,7 @@ class AstVm(val program: Program, compilationTarget: String) {
DataType.UWORD -> mem.setUWord(address, (value as RuntimeValueNumeric).wordval!!)
DataType.WORD -> mem.setSWord(address, (value as RuntimeValueNumeric).wordval!!)
DataType.FLOAT -> mem.setFloat(address, (value as RuntimeValueNumeric).floatval!!)
DataType.STR -> mem.setString(address, (value as RuntimeValueString).str)
DataType.STR -> mem.setString(address, (value as RuntimeValueString).str, value.altEncoding)
else -> throw VmExecutionException("weird memaddress type $decl")
} else
@ -626,7 +626,7 @@ class AstVm(val program: Program, compilationTarget: String) {
val ident = contextStmt.definingScope().lookup(targetArrayIndexed.identifier.nameInSource, contextStmt) as? VarDecl
?: throw VmExecutionException("can't find assignment target ${target.identifier}")
val identScope = ident.definingScope()
runtimeVariables.set(identScope, ident.name, RuntimeValueString(newstr, array.heapId))
runtimeVariables.set(identScope, ident.name, RuntimeValueString(newstr, false, array.heapId))
else {
@ -124,7 +124,7 @@ fun evaluate(expr: Expression, ctx: EvalContext): RuntimeValueBase {
DataType.UWORD -> RuntimeValueNumeric(DataType.UWORD, ctx.mem.getUWord(address))
DataType.WORD -> RuntimeValueNumeric(DataType.WORD, ctx.mem.getSWord(address))
DataType.FLOAT -> RuntimeValueNumeric(DataType.FLOAT, ctx.mem.getFloat(address))
DataType.STR -> RuntimeValueString(ctx.mem.getString(address), null)
DataType.STR -> RuntimeValueString(ctx.mem.getString(address, false), false, null)
else -> throw VmExecutionException("unexpected datatype $variable")
@ -93,14 +93,14 @@ class Memory(private val readObserver: (address: Int, value: Short) -> Short,
getUByte(address + 3), getUByte(address + 4)).toDouble()
fun setString(address: Int, str: String) {
val encoded = CompilationTarget.encodeString(str)
fun setString(address: Int, str: String, altEncoding: Boolean) {
val encoded = CompilationTarget.encodeString(str, altEncoding)
var addr = address
for (c in encoded) setUByte(addr++, c)
setUByte(addr, 0)
fun getString(strAddress: Int): String {
fun getString(strAddress: Int, altEncoding: Boolean): String {
val encoded = mutableListOf<Short>()
var addr = strAddress
while(true) {
@ -108,7 +108,7 @@ class Memory(private val readObserver: (address: Int, value: Short) -> Short,
if(byte==0.toShort()) break
return CompilationTarget.decodeString(encoded)
return CompilationTarget.decodeString(encoded, altEncoding)
fun clear() {
@ -83,8 +83,11 @@ class TestParserNumericLiteralValue {
fun testEqualsRef() {
assertEquals(StringLiteralValue("hello", dummyPos), StringLiteralValue("hello", dummyPos))
assertNotEquals(StringLiteralValue("hello", dummyPos), StringLiteralValue("bye", dummyPos))
assertEquals(StringLiteralValue("hello", false, dummyPos), StringLiteralValue("hello", false, dummyPos))
assertNotEquals(StringLiteralValue("hello", false, dummyPos), StringLiteralValue("bye", false, dummyPos))
assertEquals(StringLiteralValue("hello", true, dummyPos), StringLiteralValue("hello", true, dummyPos))
assertNotEquals(StringLiteralValue("hello", true, dummyPos), StringLiteralValue("bye", true, dummyPos))
assertNotEquals(StringLiteralValue("hello", true, dummyPos), StringLiteralValue("hello", false, dummyPos))
val lvOne = NumericLiteralValue(DataType.UBYTE, 1, dummyPos)
val lvTwo = NumericLiteralValue(DataType.UBYTE, 2, dummyPos)
@ -369,8 +369,8 @@ class TestPetscii {
assertTrue(ten <= ten)
assertFalse(ten < ten)
val abc = StringLiteralValue("abc", Position("", 0, 0, 0))
val abd = StringLiteralValue("abd", Position("", 0, 0, 0))
val abc = StringLiteralValue("abc", false, Position("", 0, 0, 0))
val abd = StringLiteralValue("abd", false, Position("", 0, 0, 0))
assertEquals(abc, abc)
@ -188,9 +188,11 @@ Values will usually be part of an expression or assignment statement::
12345 ; integer number
$aa43 ; hex integer number
%100101 ; binary integer number (% is also remainder operator so be careful)
"Hi, I am a string" ; text string
'a' ; petscii value (byte) for the letter a
-33.456e52 ; floating point number
"Hi, I am a string" ; text string, encoded with compiler target default encoding
'a' ; byte value (ubyte) for the letter a
@"Alternate" ; text string, encoded with alternate encoding
@'a' ; byte value of the letter a, using alternate encoding
byte counter = 42 ; variable of size 8 bits, with initial value 42
@ -271,8 +273,12 @@ Strings
Strings are a sequence of characters enclosed in ``"`` quotes. The length is limited to 255 characters.
They're stored and treated much the same as a byte array,
but they have some special properties because they are considered to be *text*.
Strings in your source code files will be encoded (translated from ASCII/UTF-8) into the byte-encoding
that is used on the target platform. For the C-64, this is CBM PETSCII.
Strings in your source code files will be encoded (translated from ASCII/UTF-8) into bytes via the
default encoding that is used on the target platform. For the C-64, this is CBM PETSCII.
Alternate-encoding strings (prefixed with ``@``) will be encoded via the alternate encoding for the
platform (if defined). For the C-64, that is SCREEN CODES (also known as POKE codes).
This @-prefix can also be used for character byte values.
You can concatenate two string literals using '+' (not very useful though) or repeat
a string literal a given number of times using '*'::
@ -230,6 +230,7 @@ Various examples::
byte age = 2018 - 1974
float wallet = 55.25
str name = "my name is Irmen"
str name = @"my name is Irmen" ; string with alternative byte encoding
uword address = &counter
byte[] values = [11, 22, 33, 44, 55]
byte[5] values ; array of 5 bytes, initially set to zero
@ -248,7 +249,7 @@ Prog8 supports the following data types:
type identifier type storage size example var declaration and literal value
=============== ======================= ================= =========================================
``byte`` signed byte 1 byte = 8 bits ``byte myvar = -22``
``ubyte`` unsigned byte 1 byte = 8 bits ``ubyte myvar = $8f``
``ubyte`` unsigned byte 1 byte = 8 bits ``ubyte myvar = $8f``, ``ubyte c = 'a'``, ``ubyte c2 = @'a'``
-- boolean 1 byte = 8 bits ``byte myvar = true`` or ``byte myvar == false``
The true and false are actually just aliases
for the byte values 1 and 0.
@ -9,19 +9,29 @@ main {
c64.VMCSB |= 2 ; switch to lowercase charset
str s1 = "HELLO hello 1234 @[/]\n" ; regular strings have default encoding (petscii on c64)
str s2 = @"HELLO hello 1234 @[/]\n" ; TODO @-strings for alternate encoding (screencode on c64)
str s1 = "HELLO hello 1234 @[/]" ; regular strings have default encoding (petscii on c64)
str s2 = @"HELLO hello 1234 @[/]" ; alternative encoding (screencodes on c64)
c64scr.print("\n\n\n\nString output via print:\n")
c64scr.print("petscii-str: ")
c64scr.print("\nscrcode-str: ")
c64scr.print("\nThe top two screen lines are set via screencodes.\n")
c64scr.print("\n\nThe top two screen lines are set via screencodes.\n")
ubyte i
for i in 0 to len(s1)-1
@($0400+i) = s1[i]
for i in 0 to len(s2)-1
@($0400+40+i) = s2[i]
ubyte c1 = 'z'
ubyte c2 = @'z'
c64scr.print("\npetscii z=")
c64scr.print("\nscreencode z=")
@ -25,6 +25,7 @@ DEC_INTEGER : ('0'..'9') | (('1'..'9')('0'..'9')+);
HEX_INTEGER : '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ;
BIN_INTEGER : '%' ('0' | '1')+ ;
FLOAT_NUMBER : FNUMBER (('E'|'e') ('+' | '-')? FNUMBER)? ; // sign comes later from unary expression
fragment FNUMBER : ('0' .. '9') + ('.' ('0' .. '9') +)? ;
@ -219,9 +220,9 @@ arrayliteral : '[' EOL? expression (',' EOL? expression)* EOL? ']' ; // y
structliteral : '{' EOL? expression (',' EOL? expression)* EOL? '}' ; // you can split the values over several lines
stringliteral : STRING ;
charliteral : SINGLECHAR ;
floatliteral : FLOAT_NUMBER ;
@ -34,10 +34,10 @@ public class prog8Lexer extends Lexer {
T__87=88, T__88=89, T__89=90, T__90=91, T__91=92, T__92=93, T__93=94,
T__94=95, T__95=96, T__96=97, T__97=98, T__98=99, T__99=100, T__100=101,
T__101=102, T__102=103, T__103=104, T__104=105, T__105=106, T__106=107,
T__107=108, T__108=109, T__109=110, LINECOMMENT=111, COMMENT=112, WS=113,
T__107=108, T__108=109, LINECOMMENT=110, COMMENT=111, WS=112, EOL=113,
public static String[] channelNames = {
@ -61,10 +61,10 @@ public class prog8Lexer extends Lexer {
"T__81", "T__82", "T__83", "T__84", "T__85", "T__86", "T__87", "T__88",
"T__89", "T__90", "T__91", "T__92", "T__93", "T__94", "T__95", "T__96",
"T__97", "T__98", "T__99", "T__100", "T__101", "T__102", "T__103", "T__104",
"T__105", "T__106", "T__107", "T__108", "T__109", "LINECOMMENT", "COMMENT",
"T__105", "T__106", "T__107", "T__108", "LINECOMMENT", "COMMENT", "WS",
public static final String[] ruleNames = makeRuleNames();
@ -79,13 +79,13 @@ public class prog8Lexer extends Lexer {
"'>>='", "'++'", "'--'", "'+'", "'-'", "'~'", "'**'", "'*'", "'/'", "'%'",
"'<<'", "'>>'", "'<'", "'>'", "'<='", "'>='", "'=='", "'!='", "'^'",
"'|'", "'to'", "'step'", "'and'", "'or'", "'xor'", "'not'", "'('", "')'",
"'as'", "'@'", "'return'", "'break'", "'continue'", "'.'", "'A'", "'X'",
"'Y'", "'AX'", "'AY'", "'XY'", "'Pc'", "'Pz'", "'Pn'", "'Pv'", "'.w'",
"'true'", "'false'", "'%asm'", "'sub'", "'->'", "'asmsub'", "'romsub'",
"'stack'", "'clobbers'", "'if'", "'else'", "'if_cs'", "'if_cc'", "'if_eq'",
"'if_z'", "'if_ne'", "'if_nz'", "'if_pl'", "'if_pos'", "'if_mi'", "'if_neg'",
"'if_vs'", "'if_vc'", "'for'", "'in'", "'while'", "'repeat'", "'until'",
"'when'", null, null, null, null, "'void'", null, null, null, null, "'&'",
"'as'", "'return'", "'break'", "'continue'", "'.'", "'A'", "'X'", "'Y'",
"'AX'", "'AY'", "'XY'", "'Pc'", "'Pz'", "'Pn'", "'Pv'", "'.w'", "'true'",
"'false'", "'%asm'", "'sub'", "'->'", "'asmsub'", "'romsub'", "'stack'",
"'clobbers'", "'if'", "'else'", "'if_cs'", "'if_cc'", "'if_eq'", "'if_z'",
"'if_ne'", "'if_nz'", "'if_pl'", "'if_pos'", "'if_mi'", "'if_neg'", "'if_vs'",
"'if_vc'", "'for'", "'in'", "'while'", "'repeat'", "'until'", "'when'",
null, null, null, null, "'void'", null, null, null, null, "'&'", "'@'",
null, null, null, null, "'@zp'", "'[]'"
@ -101,8 +101,8 @@ public class prog8Lexer extends Lexer {
null, null, null, null, null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null, null, null, null, null,
null, null, null, "LINECOMMENT", "COMMENT", "WS", "EOL", "VOID", "NAME",
@ -243,50 +243,50 @@ public class prog8Lexer extends Lexer {
"\33\65\34\67\359\36;\37= ?!A\"C#E$G%I&K\'M(O)Q*S+U,W-Y.[/]\60_\61a\62"+
"\61\32\63\33\65\34\67\359\36;\37= ?!A\"C#E$G%I&K\'M(O)Q*S+U,W-Y.[/]\60"+
@ -313,22 +313,22 @@ public class prog8Lexer extends Lexer {
@ -402,87 +402,87 @@ public class prog8Lexer extends Lexer {
@ -491,7 +491,7 @@ public class prog8Lexer extends Lexer {
@ -504,7 +504,7 @@ public class prog8Lexer extends Lexer {
public static final ATN _ATN =
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user