added \$HH escape character to strings

This commit is contained in:
Irmen de Jong 2020-10-03 15:11:09 +02:00
parent 22031f39b0
commit a6427e0949
6 changed files with 57 additions and 17 deletions

View File

@ -1 +1 @@
4.4 4.5-SNAPSHOT

View File

@ -664,6 +664,11 @@ internal fun unescape(str: String, position: Position): String {
'u' -> { 'u' -> {
"${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}".toInt(16).toChar() "${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}${iter.nextChar()}".toInt(16).toChar()
} }
'$' -> {
// special hack 0x8000..0x80ff will be outputted verbatim without encoding
val hex = ("" + iter.nextChar() + iter.nextChar()).toInt(16)
(0x8000 + hex).toChar()
}
else -> throw SyntaxError("invalid escape char in string: \\$ec", position) else -> throw SyntaxError("invalid escape char in string: \\$ec", position)
}) })
} else { } else {

View File

@ -1056,6 +1056,10 @@ object Petscii {
val petscii = lookup[it] val petscii = lookup[it]
petscii?.toShort() ?: if(it=='\u0000') petscii?.toShort() ?: if(it=='\u0000')
0.toShort() 0.toShort()
else if(it in '\u8000'..'\u80ff') {
// special case: take the lower 8 bit hex value directly
(it.toInt() - 0x8000).toShort()
}
else { else {
val case = if (lowercase) "lower" else "upper" val case = if (lowercase) "lower" else "upper"
throw CharConversionException("no ${case}case Petscii character for '$it' (${it.toShort()})") throw CharConversionException("no ${case}case Petscii character for '$it' (${it.toShort()})")
@ -1074,6 +1078,10 @@ object Petscii {
val screencode = lookup[it] val screencode = lookup[it]
screencode?.toShort() ?: if(it=='\u0000') screencode?.toShort() ?: if(it=='\u0000')
0.toShort() 0.toShort()
else if(it in '\u8000'..'\u80ff') {
// special case: take the lower 8 bit hex value directly
(it.toInt() - 0x8000).toShort()
}
else { else {
val case = if (lowercase) "lower" else "upper" val case = if (lowercase) "lower" else "upper"
throw CharConversionException("no ${case}Screencode character for '$it' (${it.toShort()})") throw CharConversionException("no ${case}Screencode character for '$it' (${it.toShort()})")

View File

@ -287,7 +287,7 @@ This @-prefix can also be used for character byte values.
You can concatenate two string literals using '+' (not very useful though) or repeat You can concatenate two string literals using '+' (not very useful though) or repeat
a string literal a given number of times using '*'. You can also assign a new string a string literal a given number of times using '*'. You can also assign a new string
value to another string. No bounds check is done so be sure the destination string is value to another string. No bounds check is done so be sure the destination string is
large enough to contain the new value:: large enough to contain the new value (it is overwritten in memory)::
str string1 = "first part" + "second part" str string1 = "first part" + "second part"
str string2 = "hello!" * 10 str string2 = "hello!" * 10
@ -296,6 +296,12 @@ large enough to contain the new value::
string1 = "new value" string1 = "new value"
There are several 'escape sequences' to help you put special characters into strings, such
as newlines, quote characters themselves, and so on. The ones used most often are
``\\``, ``\"``, ``\n``, ``\r``. For a detailed description of all of them and what they mean,
read the syntax reference on strings.
.. info:: .. info::
Strings and uwords (=memory address) can often be interchanged. Strings and uwords (=memory address) can often be interchanged.
An array of strings is actually an array of uwords where every element is the memory An array of strings is actually an array of uwords where every element is the memory

View File

@ -78,7 +78,7 @@ Directives
- style ``full`` -- claim the whole ZP for variables for the program, overwriting everything, - style ``full`` -- claim the whole ZP for variables for the program, overwriting everything,
except the few addresses mentioned above that are used by the system's IRQ routine. except the few addresses mentioned above that are used by the system's IRQ routine.
Even though the default IRQ routine is still active, it is impossible to use most BASIC and KERNAL ROM routines. Even though the default IRQ routine is still active, it is impossible to use most BASIC and KERNAL ROM routines.
This includes many floating point operations and several utility routines that do I/O, such as ``print_string``. This includes many floating point operations and several utility routines that do I/O, such as ``print``.
This option makes programs smaller and faster because even more variables can This option makes programs smaller and faster because even more variables can
be stored in the ZP (which allows for more efficient assembly code). be stored in the ZP (which allows for more efficient assembly code).
It's not possible to return cleanly to BASIC when the program exits. The only choice is It's not possible to return cleanly to BASIC when the program exits. The only choice is
@ -402,6 +402,23 @@ Struct variables can be assigned a struct literal value (also in their declarati
Color rgb = [255, 100, 0] ; note that the value is an array Color rgb = [255, 100, 0] ; note that the value is an array
String
^^^^^^
``"hello"`` is a string translated into the default character encoding (PETSCII)
``@"hello"`` is a string translated into the alternate character encoding (Screencodes/pokes)
There are several escape sequences available to put special characters into your string value:
- ``\\`` - the backslash itself, has to be escaped because it is the escape symbol by itself
- ``\n`` - newline character (move cursor down and to beginning of next line)
- ``\r`` - carriage return character (more or less the same as newline if printing to the screen)
- ``\"`` - quote character (otherwise it would terminate the string)
- ``\uHHHH`` - a unicode codepoint \u0000 - \uffff (16-bit hexadecimal)
- ``\$HH`` - 8-bit hex value that will be copied verbatim *without encoding*
Operators Operators
--------- ---------

View File

@ -20,23 +20,24 @@ main {
sub start() { sub start() {
txt.print_ub(c1.red) str s1 = "a\nb\nc\nd\n"
txt.chrout('\n') str s2 = "a\rb\rc\rd\n"
txt.print_ub(c1.green)
txt.chrout('\n') txt.print(s2)
txt.print_ub(c1.blue) txt.print(s2)
ubyte cc
for cc in s1 {
txt.print_ubhex(cc, false)
txt.chrout(' ')
}
txt.chrout('\n') txt.chrout('\n')
for cc in s2 {
txt.print_ubhex(cc, false)
txt.chrout(' ')
}
txt.chrout('\n') txt.chrout('\n')
c1 = [99,88,77]
txt.print_ub(c1.red)
txt.chrout('\n')
txt.print_ub(c1.green)
txt.chrout('\n')
txt.print_ub(c1.blue)
txt.chrout('\n')
testX()
} }
asmsub testX() { asmsub testX() {
@ -55,3 +56,6 @@ _saveX .byte 0
} }
} }