improved assignment of single character values and strings

This commit is contained in:
Irmen de Jong 2018-11-22 00:58:21 +01:00
parent 48a6a05109
commit 069f6ea372
11 changed files with 225 additions and 220 deletions

View File

@ -48,6 +48,33 @@ sub start() {
str_p stringp = "hello"
; some string assignments
str stringvar = "??????????\n\n\nnext line\r\r\rnext line after carriagereturn"
ubyte secretnumber = 0
memory uword freadstr_arg = $22 ; argument for FREADSTR
uword testword
ubyte char1 = "@"
ubyte char2 = "\n"
ubyte char3 = "\r"
ubyte char1b = '@'
ubyte char2b = '\n'
ubyte char3b = '\r'
testword = '@'
testword = '\n'
freadstr_arg = '@'
freadstr_arg = '\n'
secretnumber = '@'
secretnumber = '\r'
testword = stringvar ; @todo fix str address assignment
testword = "stringstring" ; @todo fix str address assignment
freadstr_arg = stringvar ; @todo fix str address assignment
freadstr_arg = "stringstring" ; @todo fix str address assignment
secretnumber = "stringstring2222" ; @todo fix str address assignment
; all possible assignments to a BYTE VARIABLE (not array)

View File

@ -16,25 +16,30 @@ sub start() {
byte b1
str stringvar = "??????????"
str stringvar = "??????????\n\n\nnext line\r\r\rnext line after carriagereturn"
ubyte secretnumber = 0
memory uword freadstr_arg = $22 ; argument for FREADSTR
uword testword
ubyte char1 = "@"
ubyte char2 = "\n" ; @todo escapechar
ubyte char3 = "\t" ; @todo escapechar
ubyte char2 = "\n"
ubyte char3 = "\r"
ubyte char1b = '@'
ubyte char2b = '\n'
ubyte char3b = '\r'
testword = '@'
testword = '\n'
freadstr_arg = '@'
freadstr_arg = '\n'
secretnumber = '@'
secretnumber = '\r'
testword = stringvar ; @todo fix str address assignment
testword = "stringstring" ; @todo fix str address assignment
freadstr_arg = stringvar ; @todo fix str address assignment
freadstr_arg = "stringstring" ; @todo fix str address assignment
secretnumber = "stringstring2222" ; @todo fix str address assignment
;testword = stringvar ; @todo fix str address
;testword = "sadfsafsdf" ; @todo fix str address
testword = "@" ; @todo fix argument conversion to UBYTE
testword = "\n" ; @todo fix argument conversion to UBYTE (escapechar)
;freadstr_arg = stringvar
;freadstr_arg = "asdfasdfasdfasdf"
freadstr_arg = "@" ; @todo fix argument conversion to UBYTE
freadstr_arg = "\n" ; @todo fix argument conversion to UBYTE (escapechar)
secretnumber = "@" ; @todo fix argument conversion to UBYTE
secretnumber = "\n" ; @todo fix argument conversion to UBYTE (escapechar)
;secretnumber = "asdfsdf"
address =c64.MEMBOT(1, 40000.w) ; ok!

View File

@ -4,7 +4,6 @@ import org.antlr.v4.runtime.ParserRuleContext
import org.antlr.v4.runtime.tree.TerminalNode
import prog8.compiler.HeapValues
import prog8.compiler.target.c64.Petscii
import prog8.compiler.unescape
import prog8.functions.BuiltinFunctions
import prog8.functions.NotConstArgumentException
import prog8.functions.builtinFunctionReturnType
@ -1959,8 +1958,8 @@ private fun prog8Parser.ExpressionContext.toAst() : IExpression {
else -> throw FatalAstException("invalid datatype for numeric literal")
}
litval.floatliteral()!=null -> LiteralValue(DataType.FLOAT, floatvalue = litval.floatliteral().toAst(), position = litval.toPosition())
litval.stringliteral()!=null -> LiteralValue(DataType.STR, strvalue = litval.stringliteral().text, position = litval.toPosition())
litval.charliteral()!=null -> LiteralValue(DataType.UBYTE, bytevalue = Petscii.encodePetscii(litval.charliteral().text.unescape(), true)[0], position = litval.toPosition())
litval.stringliteral()!=null -> LiteralValue(DataType.STR, strvalue = unescape(litval.stringliteral().text, litval.toPosition()), position = litval.toPosition())
litval.charliteral()!=null -> LiteralValue(DataType.UBYTE, bytevalue = Petscii.encodePetscii(unescape(litval.charliteral().text, litval.toPosition()), true)[0], position = litval.toPosition())
litval.arrayliteral()!=null -> {
val array = litval.arrayliteral()?.toAst()
// the actual type of the arrayspec can not yet be determined here (missing namespace & heap)
@ -2004,6 +2003,7 @@ private fun prog8Parser.ExpressionContext.toAst() : IExpression {
throw FatalAstException(text)
}
private fun prog8Parser.ArrayindexedContext.toAst(): ArrayIndexedExpression {
return ArrayIndexedExpression(identifier()?.toAst() ?: scoped_identifier()?.toAst(),
arrayspec().toAst(),
@ -2116,3 +2116,29 @@ internal fun registerSet(asmReturnvaluesRegisters: Iterable<RegisterOrStatusflag
}
return resultRegisters
}
internal fun escape(str: String) = str.replace("\t", "\\t").replace("\n", "\\n").replace("\r", "\\r")
internal fun unescape(str: String, position: Position): String {
val result = mutableListOf<Char>()
val iter = str.iterator()
while(iter.hasNext()) {
val c = iter.nextChar()
if(c=='\\') {
val ec = iter.nextChar()
result.add(when(ec) {
'\\' -> '\\'
'n' -> '\n'
'r' -> '\r'
'u' -> {
"${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}".toInt(16).toChar()
}
else -> throw AstException("$position invalid escape char in string: \\$ec")
})
} else {
result.add(c)
}
}
return result.joinToString("")
}

View File

@ -6,7 +6,6 @@ import prog8.compiler.intermediate.IntermediateProgram
import prog8.compiler.intermediate.Opcode
import prog8.compiler.intermediate.Value
import prog8.stackvm.Syscall
import prog8.stackvm.VmExecutionException
import java.util.*
import kotlin.math.abs
@ -31,32 +30,6 @@ fun Number.toHex(): String {
}
fun String.unescape(): String {
val result = mutableListOf<Char>()
val iter = this.iterator()
while(iter.hasNext()) {
val c = iter.nextChar()
if(c=='\\') {
val ec = iter.nextChar()
result.add(when(ec) {
'\\' -> '\\'
'b' -> '\b'
'n' -> '\n'
'r' -> '\r'
't' -> '\t'
'u' -> {
"${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}".toInt(16).toChar()
}
else -> throw VmExecutionException("invalid escape char: $ec")
})
} else {
result.add(c)
}
}
return result.joinToString("")
}
class HeapValues {
data class HeapValue(val type: DataType, val str: String?, val array: IntArray?, val doubleArray: DoubleArray?) {
override fun equals(other: Any?): Boolean {
@ -181,7 +154,7 @@ private class StatementTranslator(private val prog: IntermediateProgram,
override fun process(block: Block): IStatement {
prog.newBlock(block.scopedname, block.name, block.address)
processVariables(block) // @todo optimize initializations with same value: load the value only once
processVariables(block) // @todo optimize initializations with same value: load the value only once (sort on initalization value, datatype ?)
prog.label(block.scopedname)
prog.line(block.position)
translate(block.statements)

View File

@ -313,7 +313,7 @@ class IntermediateProgram(val name: String, var loadAddress: Int, val heap: Heap
out.println("%end_memory")
out.println("%heap")
heap.allStrings().forEach {
out.println("${it.index} ${it.value.type.toString().toLowerCase()} \"${it.value.str}\"")
out.println("${it.index} ${it.value.type.toString().toLowerCase()} \"${escape(it.value.str!!)}\"")
}
heap.allArrays().forEach {
out.println("${it.index} ${it.value.type.toString().toLowerCase()} ${it.value.array!!.toList()}")

View File

@ -5,7 +5,7 @@ package prog8.compiler.target.c64
import prog8.ast.DataType
import prog8.ast.Register
import prog8.ast.escape
import prog8.compiler.*
import prog8.compiler.intermediate.*
import prog8.stackvm.Syscall
@ -235,7 +235,7 @@ class AsmGen(val options: CompilationOptions, val program: IntermediateProgram,
DataType.STR_PS -> {
val rawStr = heap.get(v.second.heapId).str!!
val bytes = encodeStr(rawStr, v.second.type).map { "$" + it.toString(16).padStart(2, '0') }
out("${v.first}\t; ${v.second.type} \"$rawStr\"")
out("${v.first}\t; ${v.second.type} \"${escape(rawStr)}\"")
for (chunk in bytes.chunked(16))
out("\t.byte " + chunk.joinToString())
}

View File

@ -46,7 +46,7 @@ class ConstantFolding(private val namespace: INameScope, private val heap: HeapV
// vardecl: for byte/word vars, convert char/string of length 1 initialization values to ubyte integer
val literal = decl.value as? LiteralValue
if (literal != null && literal.isString && literal.strvalue(heap).length == 1) {
val petscii = Petscii.encodePetscii(literal.strvalue(heap))[0]
val petscii = Petscii.encodePetscii(literal.strvalue(heap), true)[0]
val newValue = LiteralValue(DataType.UBYTE, bytevalue = petscii, position = literal.position)
decl.value = newValue
}
@ -541,7 +541,6 @@ class ConstantFolding(private val namespace: INameScope, private val heap: HeapV
when(targetDt) {
DataType.UWORD -> {
// we can convert to UWORD: any UBYTE, BYTE/WORD that are >=0, FLOAT that's an integer 0..65535,
// STR of length 1 (take the character's byte value)
if(lv.type==DataType.UBYTE)
assignment.value = LiteralValue(DataType.UWORD, wordvalue = lv.asIntegerValue, position=lv.position)
else if(lv.type==DataType.BYTE && lv.bytevalue!!>=0)
@ -553,17 +552,9 @@ class ConstantFolding(private val namespace: INameScope, private val heap: HeapV
if(floor(d)==d && d>=0 && d<=65535)
assignment.value = LiteralValue(DataType.UWORD, wordvalue=floor(d).toInt(), position=lv.position)
}
else if(lv.type in StringDatatypes) {
val str = lv.strvalue(heap)
if(str.length==1) {
val petscii = Petscii.encodePetscii(str)[0]
assignment.value = LiteralValue(DataType.UWORD, wordvalue = petscii.toInt(), position = lv.position)
}
}
}
DataType.UBYTE -> {
// we can convert to UBYTE: UWORD <=255, BYTE >=0, FLOAT that's an integer 0..255,
// STR of length 1 (take the character's byte value)
if(lv.type==DataType.UWORD && lv.wordvalue!! <= 255)
assignment.value = LiteralValue(DataType.UBYTE, lv.wordvalue.toShort(), position=lv.position)
else if(lv.type==DataType.BYTE && lv.bytevalue!! >=0)
@ -573,13 +564,6 @@ class ConstantFolding(private val namespace: INameScope, private val heap: HeapV
if(floor(d)==d && d >=0 && d<=255)
assignment.value = LiteralValue(DataType.UBYTE, floor(d).toShort(), position=lv.position)
}
else if(lv.type in StringDatatypes) {
val str = lv.strvalue(heap)
if(str.length==1) {
val petscii = Petscii.encodePetscii(str)[0]
assignment.value = LiteralValue(DataType.UBYTE, bytevalue = petscii, position = lv.position)
}
}
}
DataType.BYTE -> {
// we can convert to BYTE: UWORD/UBYTE <= 127, FLOAT that's an integer 0..127
@ -595,7 +579,6 @@ class ConstantFolding(private val namespace: INameScope, private val heap: HeapV
}
DataType.WORD -> {
// we can convert to WORD: any UBYTE/BYTE, UWORD <= 32767, FLOAT that's an integer -32768..32767,
// STR of length 1 (take the character's byte value)
if(lv.type==DataType.UBYTE || lv.type==DataType.BYTE)
assignment.value = LiteralValue(DataType.WORD, wordvalue=lv.bytevalue!!.toInt(), position=lv.position)
else if(lv.type==DataType.UWORD && lv.wordvalue!! <= 32767)
@ -605,13 +588,6 @@ class ConstantFolding(private val namespace: INameScope, private val heap: HeapV
if(floor(d)==d && d>=-32768 && d<=32767)
assignment.value = LiteralValue(DataType.BYTE, floor(d).toShort(), position=lv.position)
}
else if(lv.type in StringDatatypes) {
val str = lv.strvalue(heap)
if(str.length==1) {
val petscii = Petscii.encodePetscii(str)[0]
assignment.value = LiteralValue(DataType.WORD, wordvalue= petscii.toInt(), position = lv.position)
}
}
}
DataType.FLOAT -> {
if(lv.isNumeric)

View File

@ -1,13 +1,12 @@
// Generated from /home/irmen/Projects/prog8/compiler/antlr/prog8.g4 by ANTLR 4.7
package prog8.parser;
import org.antlr.v4.runtime.Lexer;
import org.antlr.v4.runtime.CharStream;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNDeserializer;
import org.antlr.v4.runtime.atn.LexerATNSimulator;
import org.antlr.v4.runtime.atn.PredictionContextCache;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.misc.*;
@SuppressWarnings({"all", "warnings", "unchecked", "unused", "cast"})
public class prog8Lexer extends Lexer {

View File

@ -1,13 +1,15 @@
// Generated from /home/irmen/Projects/prog8/compiler/antlr/prog8.g4 by ANTLR 4.7
package prog8.parser;
import org.antlr.v4.runtime.atn.*;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.*;
import org.antlr.v4.runtime.misc.*;
import org.antlr.v4.runtime.tree.*;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNDeserializer;
import org.antlr.v4.runtime.atn.ParserATNSimulator;
import org.antlr.v4.runtime.atn.PredictionContextCache;
import org.antlr.v4.runtime.dfa.DFA;
import org.antlr.v4.runtime.tree.TerminalNode;
import java.util.List;
import java.util.Iterator;
import java.util.ArrayList;
@SuppressWarnings({"all", "warnings", "unchecked", "unused", "cast"})
public class prog8Parser extends Parser {

View File

@ -3,7 +3,6 @@ package prog8.stackvm
import prog8.ast.DataType
import prog8.compiler.HeapValues
import prog8.compiler.intermediate.*
import prog8.compiler.unescape
import java.io.File
import java.util.*
import java.util.regex.Pattern
@ -85,7 +84,7 @@ class Program (val name: String,
DataType.STR,
DataType.STR_P,
DataType.STR_S,
DataType.STR_PS -> heap.add(it.second, it.third.substring(1, it.third.length-1).unescape())
DataType.STR_PS -> heap.add(it.second, it.third.substring(1, it.third.length-1))
DataType.ARRAY_UB, DataType.ARRAY_B,
DataType.ARRAY_UW, DataType.ARRAY_W -> {
val numbers = it.third.substring(1, it.third.length-1).split(',')

View File

@ -273,8 +273,6 @@ but they have some special properties because they are considered to be *text*.
Strings in your source code files will be encoded (translated from ASCII/UTF-8) into either CBM PETSCII or C-64 screencodes.
PETSCII is the default choice. If you need screencodes (also called 'poke' codes) instead,
you have to use the ``str_s`` variants of the string type identifier.
If you assign a string literal of length 1 to a non-string variable, it is treated as an *unsigned byte* value instead
with has the PETSCII value of that single character.
.. caution::
It's probably best that you don't change strings after they're created.