From 05c017e375a8806d4cde507d060673be32a94755 Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Wed, 31 Jul 2019 00:20:18 +0200 Subject: [PATCH] New encodings for MSX --- CHANGELOG.md | 2 + docs/lang/text.md | 31 ++++--- examples/README.md | 5 +- examples/msx/encoding_test.mfk | 19 ++++ include/msx_crt.ini | 5 +- .../scala/millfork/parser/TextCodec.scala | 87 ++++++++++++++++++- 6 files changed, 132 insertions(+), 17 deletions(-) create mode 100644 examples/msx/encoding_test.mfk diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ff346cd..5d81e15b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,8 @@ This matches both the CC65 behaviour and the return values from `readkey()`. * Added `.length` for large arrays and `.lastindex` for numerically indexed arrays. +* New text encodings: `petjp`, `petscrjp`, `msx_intl`, `msx_jp`, `msx_ru`. + * Improved passing of register parameters to assembly functions. * Enabled declaring multiple variables in one line. diff --git a/docs/lang/text.md b/docs/lang/text.md index 5074b55c..e14a306e 100644 --- a/docs/lang/text.md +++ b/docs/lang/text.md @@ -34,6 +34,10 @@ * `iso_de`, `iso_no`, `iso_se`, `iso_yu` – various variants of ISO/IEC-646 * `iso_dk`, `iso_fi` – aliases for `iso_no` and `iso_se` respectively + +* `msx_intl`, `msx_jp`, `msx_ru` – MSX character encoding, International, Japanese and Russian respectively + +* `msx_us`, `msx_uk`, `msx_fr`, `msx_de` – aliases for `msx_intl` * `atascii` or `atari` – ATASCII as seen on Atari 8-bit computers @@ -73,19 +77,22 @@ control codes for changing the text background color ##### Character availability -Encoding | lowercase letters | backslash | pound | yen and katakana | card suits +Encoding | lowercase letters | backslash | pound | yen | katakana | card suits --|--|--|--|--|--|-- -`pet`, `origpet` | yes¹ | no | no | no | yes¹ -`oldpet` | yes² | no | no | no | yes² -`petscr` | yes¹ | no | yes | no | yes¹ -`petjp` | no | no | no | yes³ | yes³ -`petscrjp` | no | no | no | yes³ | yes³ -`sinclair`, `bbc` | yes | yes | yes | no | no -`apple2` | no | yes | no | no | no -`atascii` | yes | yes | no | no | yes -`atasciiscr` | yes | yes | no | no | yes -`jis` | yes | no | no | yes | no -all the rest | yes | yes | no | no | no +`pet`, `origpet` | yes¹ | no | no | no | no | yes¹ +`oldpet` | yes² | no | no | no | no | yes² +`petscr` | yes¹ | no | yes | no | no | yes¹ +`petjp` | no | no | no | yes | yes³ | yes³ +`petscrjp` | no | no | no | yes | yes³ | yes³ +`sinclair`, `bbc` | yes | yes | yes | no | no | no +`apple2` | no | yes | no | no | no | no +`atascii` | yes | yes | no | no | no | yes +`atasciiscr` | yes | yes | no | no | no | yes +`jis` | yes | no | no | yes | yes | no +`msx_intl` | yes | yes | yes | yes | no | yes +`msx_jp` | yes | no | no | yes | yes | yes +`msx_ru` | yes | yes | no | no | no | yes +all the rest | yes | yes | no | no | no | no 1. `pet`, `origpet` and petscr` cannot display card suit symbols and lowercase letters at the same time. Card suit symbols are only available in graphics mode, diff --git a/examples/README.md b/examples/README.md index 196852ff..72f194f7 100644 --- a/examples/README.md +++ b/examples/README.md @@ -57,4 +57,7 @@ how to create a program made of multiple files loaded on demand * [Colors](vcs/colors.mfk) – simple static rasterbars -## PC-88 examples +## MSX examples + +* [Encoding test](msx/encoding_test.mfk) – text encoding test; displays three lines of text in three different languages, +no more one of which will display correctly depending on the default font of your computer. diff --git a/examples/msx/encoding_test.mfk b/examples/msx/encoding_test.mfk new file mode 100644 index 00000000..22d896de --- /dev/null +++ b/examples/msx/encoding_test.mfk @@ -0,0 +1,19 @@ +import stdio + +void main() { + putstrz("This is an encoding test."z) + new_line() + putstrz("This should be in Spanish: "z) + new_line() + putstrz("¡Hola compañeros~♥!"msx_intlz) + new_line() + putstrz("This should be in Russian: "z) + new_line() + putstrz("Привет друзья~♥!"msx_ruz) + new_line() + putstrz("This should be in Japanese: "z) + new_line() + putstrz("ともだち、おはよう~♥!"msx_jpz) + new_line() + while true {} +} diff --git a/include/msx_crt.ini b/include/msx_crt.ini index 83d31a8b..5a915e2b 100644 --- a/include/msx_crt.ini +++ b/include/msx_crt.ini @@ -3,8 +3,9 @@ [compilation] arch=z80 -encoding=ascii -screen_encoding=ascii +encoding=msx_intl +; TODO: won't handle extended characters well: +screen_encoding=msx_intl modules=stdlib,msx,default_panic,msx_crt,default_readword diff --git a/src/main/scala/millfork/parser/TextCodec.scala b/src/main/scala/millfork/parser/TextCodec.scala index 35d19a1c..01be18cf 100644 --- a/src/main/scala/millfork/parser/TextCodec.scala +++ b/src/main/scala/millfork/parser/TextCodec.scala @@ -58,7 +58,7 @@ class TextCodec(val name: String, } private def encodeChar(log: Logger, position: Option[Position], c: Char, lenient: Boolean): Option[List[Int]] = { if (decompositions.contains(c)) { - Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, lenient).getOrElse(Nil))) + Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, lenient).getOrElse(List(x.toInt)))) } else if (extra.contains(c)) Some(List(extra(c))) else { val index = map.indexOf(c) if (index >= 0) { @@ -156,6 +156,14 @@ object TextCodec { case (_, "iso_se") => TextCodec.IsoIec646Se case (_, "iso_fi") => TextCodec.IsoIec646Se case (_, "iso_yu") => TextCodec.IsoIec646Yu + case (_, "msx_intl") => TextCodec.MsxWest + case (_, "msx_us") => TextCodec.MsxWest + case (_, "msx_uk") => TextCodec.MsxWest + case (_, "msx_de") => TextCodec.MsxWest + case (_, "msx_fr") => TextCodec.MsxWest + case (_, "msx_es") => TextCodec.MsxWest + case (_, "msx_ru") => TextCodec.MsxRu + case (_, "msx_jp") => TextCodec.MsxJp case (p, _) => log.error(s"Unknown string encoding: `$name`", p) TextCodec.Ascii @@ -194,6 +202,11 @@ object TextCodec { "ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++ "ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap } + private val StandardHiraganaDecompositions: Map[Char, String] = { + (("かきくけこさしすせそたちつてとはひふへほ")).zip( + "がぎぐげござじずぜぞだぢづでどばびぶべぼ").map { case (u, v) => v -> (u + "゛") }.toMap ++ + "はひふへほ".zip("ぱぴぷぺぽ").map { case (h, p) => p -> (h + "゜") }.toMap + } val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty, AsciiEscapeSequences) @@ -521,10 +534,80 @@ object TextCodec { "円年月日時分秒" + "\ufffd" * 3 + "\\", Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++ - 1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA1)).toMap, + 1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap, StandardKatakanaDecompositions, MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10)) ) + val MsxWest = new TextCodec("MSX-International", + "\ufffd" * 32 + + (0x20 to 0x7e).map(_.toChar).mkString("") + + "\ufffd" + + "ÇüéâäàåçêëèïîìÄÅ" + + "ÉæÆôöòûùÿÖÜ¢£¥₧ƒ" + + "áíóúñѪº¿⌐¬½¼¡«»" + + "ÃãĨĩÕõŨũIJij¾\ufffd\ufffd‰¶§" + + "\ufffd" * 24 + + "Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" + + "αβΓΠΣσµγΦθΩδ∞∅∈∩" + + "≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²", + Map('ß' -> 0xE1, '¦' -> 0x7C), + Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G") , + MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10)) + ) + + val MsxRu = new TextCodec("MSX-RU", + "\ufffd" * 32 + + (0x20 to 0x7e).map(_.toChar).mkString("") + + "\ufffd" + + "\ufffd" * 16 + + "\ufffd" * 8 + + "Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" + + "αβΓΠΣσµγΦθΩδ∞∅∈∩" + + "≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²\ufffd\ufffd" + + "юабцдефгхийклмнопярстужвьызшэщчъ" + + "ЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩ", + Map('ß' -> 0xA1, '¦' -> 0x7C), + Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G"), + MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10)) + ) + + val MsxJp = new TextCodec("MSX-JP", + "\ufffd" * 32 + + (0x20 to 0x7e).map(c => if (c == 0x5c) '¥' else c.toChar).mkString("") + + "\ufffd" + + "♠♡♣♢\uffdd·をぁぃぅぇぉゃゅょっ" + + " あいうえおかきくけこさしすせそ" + + jisHalfwidthKatakanaOrder + + "たちつてとなにぬねのはひふへほま" + + "みむめもやゆよらりるれろわん" + + "" + + "", + Map('♥' -> 0x81, '¦' -> 0x7C) ++ + 1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap, + Map( + '月' -> "\u0001A", + '火' -> "\u0001B", + '水' -> "\u0001C", + '木' -> "\u0001D", + '金' -> "\u0001E", + '土' -> "\u0001F", + '日' -> "\u0001G", + '年' -> "\u0001H", + '円' -> "\u0001I", + '時' -> "\u0001J", + '分' -> "\u0001K", + '秒' -> "\u0001L", + '百' -> "\u0001M", + '千' -> "\u0001N", + '万' -> "\u0001O", + '大' -> "\u0001]", + '中' -> "\u0001^", + '小' -> "\u0001_" + ) ++ + StandardHiraganaDecompositions ++ StandardKatakanaDecompositions, + MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10)) + ) + val lossyAlternatives: Map[Char, List[String]] = { val allowLowercase: Map[Char, List[String]] = ('A' to 'Z').map(c => c -> List(c.toString.toLowerCase(Locale.ROOT))).toMap val allowUppercase: Map[Char, List[String]] = ('a' to 'z').map(c => c -> List(c.toString.toUpperCase(Locale.ROOT))).toMap