diff --git a/compiler/res/version.txt b/compiler/res/version.txt index 515be8f91..234f330ff 100644 --- a/compiler/res/version.txt +++ b/compiler/res/version.txt @@ -1 +1 @@ -4.4 +4.5-SNAPSHOT diff --git a/compiler/src/prog8/ast/antlr/Antr2Kotlin.kt b/compiler/src/prog8/ast/antlr/Antr2Kotlin.kt index 942556244..620f850d6 100644 --- a/compiler/src/prog8/ast/antlr/Antr2Kotlin.kt +++ b/compiler/src/prog8/ast/antlr/Antr2Kotlin.kt @@ -664,6 +664,11 @@ internal fun unescape(str: String, position: Position): String { 'u' -> { "${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}".toInt(16).toChar() } + '$' -> { + // special hack 0x8000..0x80ff will be outputted verbatim without encoding + val hex = ("" + iter.nextChar() + iter.nextChar()).toInt(16) + (0x8000 + hex).toChar() + } else -> throw SyntaxError("invalid escape char in string: \\$ec", position) }) } else { diff --git a/compiler/src/prog8/compiler/target/c64/Petscii.kt b/compiler/src/prog8/compiler/target/c64/Petscii.kt index f569c314f..2b92af0d0 100644 --- a/compiler/src/prog8/compiler/target/c64/Petscii.kt +++ b/compiler/src/prog8/compiler/target/c64/Petscii.kt @@ -1056,6 +1056,10 @@ object Petscii { val petscii = lookup[it] petscii?.toShort() ?: if(it=='\u0000') 0.toShort() + else if(it in '\u8000'..'\u80ff') { + // special case: take the lower 8 bit hex value directly + (it.toInt() - 0x8000).toShort() + } else { val case = if (lowercase) "lower" else "upper" throw CharConversionException("no ${case}case Petscii character for '$it' (${it.toShort()})") @@ -1074,6 +1078,10 @@ object Petscii { val screencode = lookup[it] screencode?.toShort() ?: if(it=='\u0000') 0.toShort() + else if(it in '\u8000'..'\u80ff') { + // special case: take the lower 8 bit hex value directly + (it.toInt() - 0x8000).toShort() + } else { val case = if (lowercase) "lower" else "upper" throw CharConversionException("no ${case}Screencode character for '$it' (${it.toShort()})") diff --git a/docs/source/programming.rst b/docs/source/programming.rst index 0870cecb3..4aa8c46e7 100644 --- a/docs/source/programming.rst +++ b/docs/source/programming.rst @@ -287,7 +287,7 @@ This @-prefix can also be used for character byte values. You can concatenate two string literals using '+' (not very useful though) or repeat a string literal a given number of times using '*'. You can also assign a new string value to another string. No bounds check is done so be sure the destination string is -large enough to contain the new value:: +large enough to contain the new value (it is overwritten in memory):: str string1 = "first part" + "second part" str string2 = "hello!" * 10 @@ -296,6 +296,12 @@ large enough to contain the new value:: string1 = "new value" +There are several 'escape sequences' to help you put special characters into strings, such +as newlines, quote characters themselves, and so on. The ones used most often are +``\\``, ``\"``, ``\n``, ``\r``. For a detailed description of all of them and what they mean, +read the syntax reference on strings. + + .. info:: Strings and uwords (=memory address) can often be interchanged. An array of strings is actually an array of uwords where every element is the memory diff --git a/docs/source/syntaxreference.rst b/docs/source/syntaxreference.rst index dda688e4f..fabba7db6 100644 --- a/docs/source/syntaxreference.rst +++ b/docs/source/syntaxreference.rst @@ -78,7 +78,7 @@ Directives - style ``full`` -- claim the whole ZP for variables for the program, overwriting everything, except the few addresses mentioned above that are used by the system's IRQ routine. Even though the default IRQ routine is still active, it is impossible to use most BASIC and KERNAL ROM routines. - This includes many floating point operations and several utility routines that do I/O, such as ``print_string``. + This includes many floating point operations and several utility routines that do I/O, such as ``print``. This option makes programs smaller and faster because even more variables can be stored in the ZP (which allows for more efficient assembly code). It's not possible to return cleanly to BASIC when the program exits. The only choice is @@ -402,6 +402,23 @@ Struct variables can be assigned a struct literal value (also in their declarati Color rgb = [255, 100, 0] ; note that the value is an array +String +^^^^^^ + +``"hello"`` is a string translated into the default character encoding (PETSCII) + +``@"hello"`` is a string translated into the alternate character encoding (Screencodes/pokes) + +There are several escape sequences available to put special characters into your string value: + +- ``\\`` - the backslash itself, has to be escaped because it is the escape symbol by itself +- ``\n`` - newline character (move cursor down and to beginning of next line) +- ``\r`` - carriage return character (more or less the same as newline if printing to the screen) +- ``\"`` - quote character (otherwise it would terminate the string) +- ``\uHHHH`` - a unicode codepoint \u0000 - \uffff (16-bit hexadecimal) +- ``\$HH`` - 8-bit hex value that will be copied verbatim *without encoding* + + Operators --------- diff --git a/examples/test.p8 b/examples/test.p8 index 656ef2307..8380f1fd7 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -20,23 +20,24 @@ main { sub start() { - txt.print_ub(c1.red) - txt.chrout('\n') - txt.print_ub(c1.green) - txt.chrout('\n') - txt.print_ub(c1.blue) + str s1 = "a\nb\nc\nd\n" + str s2 = "a\rb\rc\rd\n" + + txt.print(s2) + txt.print(s2) + + ubyte cc + for cc in s1 { + txt.print_ubhex(cc, false) + txt.chrout(' ') + } txt.chrout('\n') + for cc in s2 { + txt.print_ubhex(cc, false) + txt.chrout(' ') + } txt.chrout('\n') - c1 = [99,88,77] - - txt.print_ub(c1.red) - txt.chrout('\n') - txt.print_ub(c1.green) - txt.chrout('\n') - txt.print_ub(c1.blue) - txt.chrout('\n') - testX() } asmsub testX() { @@ -55,3 +56,6 @@ _saveX .byte 0 } } + + +