New encodings for MSX

This commit is contained in:
Karol Stasiak 2019-07-31 00:20:18 +02:00
parent 4b98f334fc
commit 05c017e375
6 changed files with 132 additions and 17 deletions

View File

@ -20,6 +20,8 @@ This matches both the CC65 behaviour and the return values from `readkey()`.
* Added `.length` for large arrays and `.lastindex` for numerically indexed arrays.
* New text encodings: `petjp`, `petscrjp`, `msx_intl`, `msx_jp`, `msx_ru`.
* Improved passing of register parameters to assembly functions.
* Enabled declaring multiple variables in one line.

View File

@ -34,6 +34,10 @@
* `iso_de`, `iso_no`, `iso_se`, `iso_yu` various variants of ISO/IEC-646
* `iso_dk`, `iso_fi` aliases for `iso_no` and `iso_se` respectively
* `msx_intl`, `msx_jp`, `msx_ru` MSX character encoding, International, Japanese and Russian respectively
* `msx_us`, `msx_uk`, `msx_fr`, `msx_de` aliases for `msx_intl`
* `atascii` or `atari` ATASCII as seen on Atari 8-bit computers
@ -73,19 +77,22 @@ control codes for changing the text background color
##### Character availability
Encoding | lowercase letters | backslash | pound | yen and katakana | card suits
Encoding | lowercase letters | backslash | pound | yen | katakana | card suits
--|--|--|--|--|--|--
`pet`, `origpet` | yes¹ | no | no | no | yes¹
`oldpet` | yes² | no | no | no | yes²
`petscr` | yes¹ | no | yes | no | yes¹
`petjp` | no | no | no | yes³ | yes³
`petscrjp` | no | no | no | yes³ | yes³
`sinclair`, `bbc` | yes | yes | yes | no | no
`apple2` | no | yes | no | no | no
`atascii` | yes | yes | no | no | yes
`atasciiscr` | yes | yes | no | no | yes
`jis` | yes | no | no | yes | no
all the rest | yes | yes | no | no | no
`pet`, `origpet` | yes¹ | no | no | no | no | yes¹
`oldpet` | yes² | no | no | no | no | yes²
`petscr` | yes¹ | no | yes | no | no | yes¹
`petjp` | no | no | no | yes | yes³ | yes³
`petscrjp` | no | no | no | yes | yes³ | yes³
`sinclair`, `bbc` | yes | yes | yes | no | no | no
`apple2` | no | yes | no | no | no | no
`atascii` | yes | yes | no | no | no | yes
`atasciiscr` | yes | yes | no | no | no | yes
`jis` | yes | no | no | yes | yes | no
`msx_intl` | yes | yes | yes | yes | no | yes
`msx_jp` | yes | no | no | yes | yes | yes
`msx_ru` | yes | yes | no | no | no | yes
all the rest | yes | yes | no | no | no | no
1. `pet`, `origpet` and petscr` cannot display card suit symbols and lowercase letters at the same time.
Card suit symbols are only available in graphics mode,

View File

@ -57,4 +57,7 @@ how to create a program made of multiple files loaded on demand
* [Colors](vcs/colors.mfk) simple static rasterbars
## PC-88 examples
## MSX examples
* [Encoding test](msx/encoding_test.mfk) text encoding test; displays three lines of text in three different languages,
no more one of which will display correctly depending on the default font of your computer.

View File

@ -0,0 +1,19 @@
import stdio
void main() {
putstrz("This is an encoding test."z)
new_line()
putstrz("This should be in Spanish: "z)
new_line()
putstrz("¡Hola compañeros~♥!"msx_intlz)
new_line()
putstrz("This should be in Russian: "z)
new_line()
putstrz("Привет друзья~♥!"msx_ruz)
new_line()
putstrz("This should be in Japanese: "z)
new_line()
putstrz("ともだち、おはよう~♥!"msx_jpz)
new_line()
while true {}
}

View File

@ -3,8 +3,9 @@
[compilation]
arch=z80
encoding=ascii
screen_encoding=ascii
encoding=msx_intl
; TODO: won't handle extended characters well:
screen_encoding=msx_intl
modules=stdlib,msx,default_panic,msx_crt,default_readword

View File

@ -58,7 +58,7 @@ class TextCodec(val name: String,
}
private def encodeChar(log: Logger, position: Option[Position], c: Char, lenient: Boolean): Option[List[Int]] = {
if (decompositions.contains(c)) {
Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, lenient).getOrElse(Nil)))
Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, lenient).getOrElse(List(x.toInt))))
} else if (extra.contains(c)) Some(List(extra(c))) else {
val index = map.indexOf(c)
if (index >= 0) {
@ -156,6 +156,14 @@ object TextCodec {
case (_, "iso_se") => TextCodec.IsoIec646Se
case (_, "iso_fi") => TextCodec.IsoIec646Se
case (_, "iso_yu") => TextCodec.IsoIec646Yu
case (_, "msx_intl") => TextCodec.MsxWest
case (_, "msx_us") => TextCodec.MsxWest
case (_, "msx_uk") => TextCodec.MsxWest
case (_, "msx_de") => TextCodec.MsxWest
case (_, "msx_fr") => TextCodec.MsxWest
case (_, "msx_es") => TextCodec.MsxWest
case (_, "msx_ru") => TextCodec.MsxRu
case (_, "msx_jp") => TextCodec.MsxJp
case (p, _) =>
log.error(s"Unknown string encoding: `$name`", p)
TextCodec.Ascii
@ -194,6 +202,11 @@ object TextCodec {
"ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++
"ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap
}
private val StandardHiraganaDecompositions: Map[Char, String] = {
(("かきくけこさしすせそたちつてとはひふへほ")).zip(
"がぎぐげござじずぜぞだぢづでどばびぶべぼ").map { case (u, v) => v -> (u + "゛") }.toMap ++
"はひふへほ".zip("ぱぴぷぺぽ").map { case (h, p) => p -> (h + "゜") }.toMap
}
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty, AsciiEscapeSequences)
@ -521,10 +534,80 @@ object TextCodec {
"円年月日時分秒" +
"\ufffd" * 3 + "\\",
Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA1)).toMap,
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap,
StandardKatakanaDecompositions, MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
)
val MsxWest = new TextCodec("MSX-International",
"\ufffd" * 32 +
(0x20 to 0x7e).map(_.toChar).mkString("") +
"\ufffd" +
"ÇüéâäàåçêëèïîìÄÅ" +
"ÉæÆôöòûùÿÖÜ¢£¥₧ƒ" +
"áíóúñѪº¿⌐¬½¼¡«»" +
"ÃãĨĩÕõŨũIJij¾\ufffd\ufffd‰¶§" +
"\ufffd" * 24 +
"Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" +
"αβΓΠΣσµγΦθΩδ∞∅∈∩" +
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²",
Map('ß' -> 0xE1, '¦' -> 0x7C),
Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G") ,
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
)
val MsxRu = new TextCodec("MSX-RU",
"\ufffd" * 32 +
(0x20 to 0x7e).map(_.toChar).mkString("") +
"\ufffd" +
"\ufffd" * 16 +
"\ufffd" * 8 +
"Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" +
"αβΓΠΣσµγΦθΩδ∞∅∈∩" +
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²\ufffd\ufffd" +
"юабцдефгхийклмнопярстужвьызшэщчъ" +
"ЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩ",
Map('ß' -> 0xA1, '¦' -> 0x7C),
Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G"),
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
)
val MsxJp = new TextCodec("MSX-JP",
"\ufffd" * 32 +
(0x20 to 0x7e).map(c => if (c == 0x5c) '¥' else c.toChar).mkString("") +
"\ufffd" +
"♠♡♣♢\uffdd·をぁぃぅぇぉゃゅょっ" +
" あいうえおかきくけこさしすせそ" +
jisHalfwidthKatakanaOrder +
"たちつてとなにぬねのはひふへほま" +
"みむめもやゆよらりるれろわん" +
"" +
"",
Map('♥' -> 0x81, '¦' -> 0x7C) ++
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap,
Map(
'月' -> "\u0001A",
'火' -> "\u0001B",
'水' -> "\u0001C",
'木' -> "\u0001D",
'金' -> "\u0001E",
'土' -> "\u0001F",
'日' -> "\u0001G",
'年' -> "\u0001H",
'円' -> "\u0001I",
'時' -> "\u0001J",
'分' -> "\u0001K",
'秒' -> "\u0001L",
'百' -> "\u0001M",
'千' -> "\u0001N",
'万' -> "\u0001O",
'大' -> "\u0001]",
'中' -> "\u0001^",
'小' -> "\u0001_"
) ++
StandardHiraganaDecompositions ++ StandardKatakanaDecompositions,
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
)
val lossyAlternatives: Map[Char, List[String]] = {
val allowLowercase: Map[Char, List[String]] = ('A' to 'Z').map(c => c -> List(c.toString.toLowerCase(Locale.ROOT))).toMap
val allowUppercase: Map[Char, List[String]] = ('a' to 'z').map(c => c -> List(c.toString.toUpperCase(Locale.ROOT))).toMap