1
0
mirror of https://github.com/KarolS/millfork.git synced 2024-06-26 11:29:28 +00:00

New encodings for MSX

This commit is contained in:
Karol Stasiak 2019-07-31 00:20:18 +02:00
parent 4b98f334fc
commit 05c017e375
6 changed files with 132 additions and 17 deletions

View File

@ -20,6 +20,8 @@ This matches both the CC65 behaviour and the return values from `readkey()`.
* Added `.length` for large arrays and `.lastindex` for numerically indexed arrays. * Added `.length` for large arrays and `.lastindex` for numerically indexed arrays.
* New text encodings: `petjp`, `petscrjp`, `msx_intl`, `msx_jp`, `msx_ru`.
* Improved passing of register parameters to assembly functions. * Improved passing of register parameters to assembly functions.
* Enabled declaring multiple variables in one line. * Enabled declaring multiple variables in one line.

View File

@ -35,6 +35,10 @@
* `iso_dk`, `iso_fi` aliases for `iso_no` and `iso_se` respectively * `iso_dk`, `iso_fi` aliases for `iso_no` and `iso_se` respectively
* `msx_intl`, `msx_jp`, `msx_ru` MSX character encoding, International, Japanese and Russian respectively
* `msx_us`, `msx_uk`, `msx_fr`, `msx_de` aliases for `msx_intl`
* `atascii` or `atari` ATASCII as seen on Atari 8-bit computers * `atascii` or `atari` ATASCII as seen on Atari 8-bit computers
* `atasciiscr` or `atariscr` screencodes used by Atari 8-bit computers * `atasciiscr` or `atariscr` screencodes used by Atari 8-bit computers
@ -73,19 +77,22 @@ control codes for changing the text background color
##### Character availability ##### Character availability
Encoding | lowercase letters | backslash | pound | yen and katakana | card suits Encoding | lowercase letters | backslash | pound | yen | katakana | card suits
--|--|--|--|--|--|-- --|--|--|--|--|--|--
`pet`, `origpet` | yes¹ | no | no | no | yes¹ `pet`, `origpet` | yes¹ | no | no | no | no | yes¹
`oldpet` | yes² | no | no | no | yes² `oldpet` | yes² | no | no | no | no | yes²
`petscr` | yes¹ | no | yes | no | yes¹ `petscr` | yes¹ | no | yes | no | no | yes¹
`petjp` | no | no | no | yes³ | yes³ `petjp` | no | no | no | yes | yes³ | yes³
`petscrjp` | no | no | no | yes³ | yes³ `petscrjp` | no | no | no | yes | yes³ | yes³
`sinclair`, `bbc` | yes | yes | yes | no | no `sinclair`, `bbc` | yes | yes | yes | no | no | no
`apple2` | no | yes | no | no | no `apple2` | no | yes | no | no | no | no
`atascii` | yes | yes | no | no | yes `atascii` | yes | yes | no | no | no | yes
`atasciiscr` | yes | yes | no | no | yes `atasciiscr` | yes | yes | no | no | no | yes
`jis` | yes | no | no | yes | no `jis` | yes | no | no | yes | yes | no
all the rest | yes | yes | no | no | no `msx_intl` | yes | yes | yes | yes | no | yes
`msx_jp` | yes | no | no | yes | yes | yes
`msx_ru` | yes | yes | no | no | no | yes
all the rest | yes | yes | no | no | no | no
1. `pet`, `origpet` and petscr` cannot display card suit symbols and lowercase letters at the same time. 1. `pet`, `origpet` and petscr` cannot display card suit symbols and lowercase letters at the same time.
Card suit symbols are only available in graphics mode, Card suit symbols are only available in graphics mode,

View File

@ -57,4 +57,7 @@ how to create a program made of multiple files loaded on demand
* [Colors](vcs/colors.mfk) simple static rasterbars * [Colors](vcs/colors.mfk) simple static rasterbars
## PC-88 examples ## MSX examples
* [Encoding test](msx/encoding_test.mfk) text encoding test; displays three lines of text in three different languages,
no more one of which will display correctly depending on the default font of your computer.

View File

@ -0,0 +1,19 @@
import stdio
void main() {
putstrz("This is an encoding test."z)
new_line()
putstrz("This should be in Spanish: "z)
new_line()
putstrz("¡Hola compañeros~♥!"msx_intlz)
new_line()
putstrz("This should be in Russian: "z)
new_line()
putstrz("Привет друзья~♥!"msx_ruz)
new_line()
putstrz("This should be in Japanese: "z)
new_line()
putstrz("ともだち、おはよう~♥!"msx_jpz)
new_line()
while true {}
}

View File

@ -3,8 +3,9 @@
[compilation] [compilation]
arch=z80 arch=z80
encoding=ascii encoding=msx_intl
screen_encoding=ascii ; TODO: won't handle extended characters well:
screen_encoding=msx_intl
modules=stdlib,msx,default_panic,msx_crt,default_readword modules=stdlib,msx,default_panic,msx_crt,default_readword

View File

@ -58,7 +58,7 @@ class TextCodec(val name: String,
} }
private def encodeChar(log: Logger, position: Option[Position], c: Char, lenient: Boolean): Option[List[Int]] = { private def encodeChar(log: Logger, position: Option[Position], c: Char, lenient: Boolean): Option[List[Int]] = {
if (decompositions.contains(c)) { if (decompositions.contains(c)) {
Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, lenient).getOrElse(Nil))) Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, lenient).getOrElse(List(x.toInt))))
} else if (extra.contains(c)) Some(List(extra(c))) else { } else if (extra.contains(c)) Some(List(extra(c))) else {
val index = map.indexOf(c) val index = map.indexOf(c)
if (index >= 0) { if (index >= 0) {
@ -156,6 +156,14 @@ object TextCodec {
case (_, "iso_se") => TextCodec.IsoIec646Se case (_, "iso_se") => TextCodec.IsoIec646Se
case (_, "iso_fi") => TextCodec.IsoIec646Se case (_, "iso_fi") => TextCodec.IsoIec646Se
case (_, "iso_yu") => TextCodec.IsoIec646Yu case (_, "iso_yu") => TextCodec.IsoIec646Yu
case (_, "msx_intl") => TextCodec.MsxWest
case (_, "msx_us") => TextCodec.MsxWest
case (_, "msx_uk") => TextCodec.MsxWest
case (_, "msx_de") => TextCodec.MsxWest
case (_, "msx_fr") => TextCodec.MsxWest
case (_, "msx_es") => TextCodec.MsxWest
case (_, "msx_ru") => TextCodec.MsxRu
case (_, "msx_jp") => TextCodec.MsxJp
case (p, _) => case (p, _) =>
log.error(s"Unknown string encoding: `$name`", p) log.error(s"Unknown string encoding: `$name`", p)
TextCodec.Ascii TextCodec.Ascii
@ -194,6 +202,11 @@ object TextCodec {
"ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++ "ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++
"ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap "ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap
} }
private val StandardHiraganaDecompositions: Map[Char, String] = {
(("かきくけこさしすせそたちつてとはひふへほ")).zip(
"がぎぐげござじずぜぞだぢづでどばびぶべぼ").map { case (u, v) => v -> (u + "゛") }.toMap ++
"はひふへほ".zip("ぱぴぷぺぽ").map { case (h, p) => p -> (h + "゜") }.toMap
}
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty, AsciiEscapeSequences) val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty, AsciiEscapeSequences)
@ -521,10 +534,80 @@ object TextCodec {
"円年月日時分秒" + "円年月日時分秒" +
"\ufffd" * 3 + "\\", "\ufffd" * 3 + "\\",
Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++ Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA1)).toMap, 1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap,
StandardKatakanaDecompositions, MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10)) StandardKatakanaDecompositions, MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
) )
val MsxWest = new TextCodec("MSX-International",
"\ufffd" * 32 +
(0x20 to 0x7e).map(_.toChar).mkString("") +
"\ufffd" +
"ÇüéâäàåçêëèïîìÄÅ" +
"ÉæÆôöòûùÿÖÜ¢£¥₧ƒ" +
"áíóúñѪº¿⌐¬½¼¡«»" +
"ÃãĨĩÕõŨũIJij¾\ufffd\ufffd‰¶§" +
"\ufffd" * 24 +
"Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" +
"αβΓΠΣσµγΦθΩδ∞∅∈∩" +
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²",
Map('ß' -> 0xE1, '¦' -> 0x7C),
Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G") ,
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
)
val MsxRu = new TextCodec("MSX-RU",
"\ufffd" * 32 +
(0x20 to 0x7e).map(_.toChar).mkString("") +
"\ufffd" +
"\ufffd" * 16 +
"\ufffd" * 8 +
"Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" +
"αβΓΠΣσµγΦθΩδ∞∅∈∩" +
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²\ufffd\ufffd" +
"юабцдефгхийклмнопярстужвьызшэщчъ" +
"ЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩ",
Map('ß' -> 0xA1, '¦' -> 0x7C),
Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G"),
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
)
val MsxJp = new TextCodec("MSX-JP",
"\ufffd" * 32 +
(0x20 to 0x7e).map(c => if (c == 0x5c) '¥' else c.toChar).mkString("") +
"\ufffd" +
"♠♡♣♢\uffdd·をぁぃぅぇぉゃゅょっ" +
" あいうえおかきくけこさしすせそ" +
jisHalfwidthKatakanaOrder +
"たちつてとなにぬねのはひふへほま" +
"みむめもやゆよらりるれろわん" +
"" +
"",
Map('♥' -> 0x81, '¦' -> 0x7C) ++
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap,
Map(
'月' -> "\u0001A",
'火' -> "\u0001B",
'水' -> "\u0001C",
'木' -> "\u0001D",
'金' -> "\u0001E",
'土' -> "\u0001F",
'日' -> "\u0001G",
'年' -> "\u0001H",
'円' -> "\u0001I",
'時' -> "\u0001J",
'分' -> "\u0001K",
'秒' -> "\u0001L",
'百' -> "\u0001M",
'千' -> "\u0001N",
'万' -> "\u0001O",
'大' -> "\u0001]",
'中' -> "\u0001^",
'小' -> "\u0001_"
) ++
StandardHiraganaDecompositions ++ StandardKatakanaDecompositions,
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
)
val lossyAlternatives: Map[Char, List[String]] = { val lossyAlternatives: Map[Char, List[String]] = {
val allowLowercase: Map[Char, List[String]] = ('A' to 'Z').map(c => c -> List(c.toString.toLowerCase(Locale.ROOT))).toMap val allowLowercase: Map[Char, List[String]] = ('A' to 'Z').map(c => c -> List(c.toString.toLowerCase(Locale.ROOT))).toMap
val allowUppercase: Map[Char, List[String]] = ('a' to 'z').map(c => c -> List(c.toString.toUpperCase(Locale.ROOT))).toMap val allowUppercase: Map[Char, List[String]] = ('a' to 'z').map(c => c -> List(c.toString.toUpperCase(Locale.ROOT))).toMap