mirror of
https://github.com/irmen/prog8.git
synced 2024-11-26 11:49:22 +00:00
document new string encoding syntax
This commit is contained in:
parent
674295e800
commit
9ed7587e3e
@ -4,11 +4,14 @@ import com.github.michaelbull.result.Result
|
||||
import com.github.michaelbull.result.Ok
|
||||
import com.github.michaelbull.result.Err
|
||||
import java.io.CharConversionException
|
||||
import java.nio.charset.Charset
|
||||
|
||||
object IsoEncoding {
|
||||
val charset: Charset = Charset.forName("ISO-8859-15")
|
||||
|
||||
fun encode(str: String): Result<List<UByte>, CharConversionException> {
|
||||
return try {
|
||||
Ok(str.toByteArray(Charsets.ISO_8859_1).map { it.toUByte() })
|
||||
Ok(str.toByteArray(charset).map { it.toUByte() })
|
||||
} catch (ce: CharConversionException) {
|
||||
Err(ce)
|
||||
}
|
||||
@ -16,7 +19,7 @@ object IsoEncoding {
|
||||
|
||||
fun decode(bytes: List<UByte>): Result<String, CharConversionException> {
|
||||
return try {
|
||||
Ok(String(bytes.map { it.toByte() }.toByteArray(), Charsets.ISO_8859_1))
|
||||
Ok(String(bytes.map { it.toByte() }.toByteArray(), charset))
|
||||
} catch (ce: CharConversionException) {
|
||||
Err(ce)
|
||||
}
|
||||
|
@ -36,10 +36,10 @@ RAM, ROM, I/O
|
||||
#. what part(s) of the address space is memory mapped I/O registers?
|
||||
#. is there a banking system? How does it work (how do you select Ram/Rom banks)? How is the default bank configuration set?
|
||||
|
||||
Screen and Character encodings
|
||||
------------------------------
|
||||
Character encodings
|
||||
-------------------
|
||||
#. provide the primary character encoding table that the system uses (i.e. how is text represented in memory)
|
||||
#. provide alternate character encoding table (if any)
|
||||
#. provide alternate character encodings (if any)
|
||||
#. what are the system's character screen dimensions?
|
||||
#. is there a screen matrix directly accessible in Ram? Provide addresses of the character matrix and color attributes matrix, if any.
|
||||
|
||||
|
@ -193,8 +193,9 @@ Values will usually be part of an expression or assignment statement::
|
||||
-33.456e52 ; floating point number
|
||||
"Hi, I am a string" ; text string, encoded with compiler target default encoding
|
||||
'a' ; byte value (ubyte) for the letter a
|
||||
@"Alternate" ; text string, encoded with alternate encoding
|
||||
@'a' ; byte value of the letter a, using alternate encoding
|
||||
@"Alternate" ; text string, encoded with alternate encoding (old deprecated syntax)
|
||||
sc:"Alternate" ; text string, encoded with c64 screencode encoding (current syntax)
|
||||
sc:'a' ; byte value of the letter a in c64 screencode encoding
|
||||
|
||||
byte counter = 42 ; variable of size 8 bits, with initial value 42
|
||||
|
||||
@ -314,12 +315,28 @@ Strings
|
||||
Strings are a sequence of characters enclosed in ``"`` quotes. The length is limited to 255 characters.
|
||||
They're stored and treated much the same as a byte array,
|
||||
but they have some special properties because they are considered to be *text*.
|
||||
Strings in your source code files will be encoded (translated from ASCII/UTF-8) into bytes via the
|
||||
default encoding that is used on the target platform. For the C-64, this is CBM PETSCII.
|
||||
Alternate-encoding strings (prefixed with ``@``) will be encoded via the alternate encoding for the
|
||||
platform (if defined). For the C-64, that is SCREEN CODES (also known as POKE codes).
|
||||
This @-prefix can also be used for character byte values.
|
||||
Strings (without encoding prefix) will be encoded (translated from ASCII/UTF-8) into bytes via the
|
||||
*default encoding* for the target platform. On the CBM machines, this is CBM PETSCII.
|
||||
|
||||
.. sidebar::
|
||||
Deprecated ``@`` prefix
|
||||
|
||||
In older versions of the language, the ``@`` prefix was used to specify the
|
||||
CBM screencode encoding. This syntax is still supported for now, but will be removed
|
||||
in a future language version.
|
||||
|
||||
Alternative encodings can be specified with a ``encodingname:`` prefix to the string or character literal.
|
||||
The following encodings are currently recognised:
|
||||
|
||||
- ``petscii`` Petscii, the default encoding on CBM machines (c64, c128, cx16)
|
||||
- ``sc`` CBM-screencodes aka 'poke' codes (c64, c128, cx16)
|
||||
- ``iso`` iso-8859-15 text (supported on cx16)
|
||||
|
||||
So the following is a string literal that will be encoded into memory bytes using the iso encoding.
|
||||
It can be correctly displayed on the screen only if a iso-8859-15 charset has been activated first
|
||||
(the Commander X16 has this feature built in)::
|
||||
|
||||
iso:"Käse, Straße"
|
||||
|
||||
You can concatenate two string literals using '+', which can be useful to
|
||||
split long strings over separate lines. But remember that the length
|
||||
|
@ -282,8 +282,7 @@ Various examples::
|
||||
byte counter = len([1, 2, 3]) * 20
|
||||
byte age = 2018 - 1974
|
||||
float wallet = 55.25
|
||||
str name = "my name is Irmen"
|
||||
str name = @"my name is Irmen" ; string with alternative byte encoding
|
||||
str name = "my name is Alice"
|
||||
uword address = &counter
|
||||
byte[] values = [11, 22, 33, 44, 55]
|
||||
byte[5] values ; array of 5 bytes, initially set to zero
|
||||
@ -432,10 +431,23 @@ memory at the given index. See :ref:`pointervars`
|
||||
|
||||
String
|
||||
^^^^^^
|
||||
.. sidebar::
|
||||
Deprecated ``@`` prefix
|
||||
|
||||
``"hello"`` is a string translated into the default character encoding (PETSCII)
|
||||
In older versions of the language, the ``@`` prefix was used to specify the
|
||||
CBM screencode encoding. This syntax is still supported for now, but will be removed
|
||||
in a future language version.
|
||||
|
||||
A string literal can occur with or without an encoding prefix (encoding followed by ':' followed by the string itself).
|
||||
When this is omitted, the string is stored in the machine's default character encoding (which is PETSCII on the CBM machines).
|
||||
You can choose to store the string in other encodings such as ``sc`` (screencodes) or ``iso`` (iso-8859-15).
|
||||
Here are several examples:
|
||||
|
||||
- ``"hello"`` a string translated into the default character encoding (PETSCII)
|
||||
- ``petscii:"hello"`` same as the above, on CBM machines.
|
||||
- ``sc:"my name is Alice"`` string with screencode encoding (new syntax)
|
||||
- ``iso:"Ich heiße François"`` string in iso encoding
|
||||
|
||||
``@"hello"`` is a string translated into the alternate character encoding (Screencodes/pokes)
|
||||
|
||||
There are several escape sequences available to put special characters into your string value:
|
||||
|
||||
|
@ -3,7 +3,7 @@ TODO
|
||||
|
||||
For next compiler release (7.7)
|
||||
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
- document new string encoding syntax
|
||||
- make 'petscii' not hardcoded default but specified in machinedefinition
|
||||
|
||||
|
||||
Need help with
|
||||
|
@ -4,10 +4,12 @@
|
||||
main {
|
||||
str s1 = "Irmen_"
|
||||
str s2 = @"IRMEN_"
|
||||
;str s3 = iso:"Irmen_~"
|
||||
str s3 = sc:"IRMEN_"
|
||||
str s4 = iso:"Käse, Straße"
|
||||
|
||||
sub start() {
|
||||
txt.lowercase()
|
||||
txt.iso()
|
||||
; txt.lowercase()
|
||||
txt.nl()
|
||||
txt.nl()
|
||||
txt.nl()
|
||||
@ -17,12 +19,15 @@ main {
|
||||
txt.nl()
|
||||
txt.print(s2)
|
||||
txt.nl()
|
||||
; txt.print(s3)
|
||||
; txt.nl()
|
||||
txt.print(s3)
|
||||
txt.nl()
|
||||
txt.print(s4)
|
||||
txt.nl()
|
||||
|
||||
sc(1, s1)
|
||||
sc(2, s2)
|
||||
; sc(3, s3)
|
||||
sc(3, s3)
|
||||
sc(4, s4)
|
||||
}
|
||||
|
||||
sub sc(ubyte row, str text) {
|
||||
|
Loading…
Reference in New Issue
Block a user