mirror of
https://github.com/KarolS/millfork.git
synced 2025-04-04 22:29:32 +00:00
Add KOI-7 N2 and MSX-BR encodings. Some encoding enhancements.
This commit is contained in:
parent
08ef0beeb7
commit
c45cf7d51d
@ -76,6 +76,8 @@ For example, if `-flenient-encoding` is enabled, then a literal `"£¥↑ž©ß"
|
||||
|
||||
* `"?¥^z(C)ss"` if the default encoding is `jisx`
|
||||
|
||||
* `"£¥^z(C)β"` if the default encoding is `msx_intl`
|
||||
|
||||
Note that the final length of the string may vary.
|
||||
|
||||
## Character literals
|
||||
|
@ -35,7 +35,7 @@
|
||||
|
||||
* `iso_dk`, `iso_fi` – aliases for `iso_no` and `iso_se` respectively
|
||||
|
||||
* `msx_intl`, `msx_jp`, `msx_ru` – MSX character encoding, International, Japanese and Russian respectively
|
||||
* `msx_intl`, `msx_jp`, `msx_ru`, `msx_br` – MSX character encoding, International, Japanese, Russian and Brazilian respectively
|
||||
|
||||
* `msx_us`, `msx_uk`, `msx_fr`, `msx_de` – aliases for `msx_intl`
|
||||
|
||||
@ -43,6 +43,8 @@
|
||||
|
||||
* `atasciiscr` or `atariscr` – screencodes used by Atari 8-bit computers
|
||||
|
||||
* `koi7n2` or `short_koi` – KOI-7 N2
|
||||
|
||||
* `vectrex` – built-in Vectrex font
|
||||
|
||||
When programming for Commodore,
|
||||
@ -93,23 +95,24 @@ control codes for changing the text background color
|
||||
|
||||
##### Character availability
|
||||
|
||||
Encoding | lowercase letters | backslash | pound | yen | katakana | card suits
|
||||
--|--|--|--|--|--|--
|
||||
`pet`, `origpet` | yes¹ | no | no | no | no | yes¹
|
||||
`oldpet` | yes² | no | no | no | no | yes²
|
||||
`petscr` | yes¹ | no | yes | no | no | yes¹
|
||||
`petjp` | no | no | no | yes | yes³ | yes³
|
||||
`petscrjp` | no | no | no | yes | yes³ | yes³
|
||||
`sinclair`, `bbc` | yes | yes | yes | no | no | no
|
||||
`apple2` | no | yes | no | no | no | no
|
||||
`atascii` | yes | yes | no | no | no | yes
|
||||
`atasciiscr` | yes | yes | no | no | no | yes
|
||||
`jis` | yes | no | no | yes | yes | no
|
||||
`msx_intl` | yes | yes | yes | yes | no | yes
|
||||
`msx_jp` | yes | no | no | yes | yes | yes
|
||||
`msx_ru` | yes | yes | no | no | no | yes
|
||||
`vectrex` | no | yes | no | no | no | no
|
||||
all the rest | yes | yes | no | no | no | no
|
||||
Encoding | lowercase letters | backslash | pound | yen | intl | card suits
|
||||
---------|-------------------|-----------|-------|-----|------|-----------
|
||||
`pet`, `origpet` | yes¹ | no | no | no | none | yes¹
|
||||
`oldpet` | yes² | no | no | no | none | yes²
|
||||
`petscr` | yes¹ | no | yes | no | none | yes¹
|
||||
`petjp` | no | no | no | yes | katakana³ | yes³
|
||||
`petscrjp` | no | no | no | yes | katakana³ | yes³
|
||||
`sinclair`, `bbc` | yes | yes | yes | no | none | no
|
||||
`apple2` | no | yes | no | no | none | no
|
||||
`atascii` | yes | yes | no | no | none | yes
|
||||
`atasciiscr` | yes | yes | no | no | none | yes
|
||||
`jis` | yes | no | no | yes | both kana | no
|
||||
`msx_intl`,`msx_br` | yes | yes | yes | yes | Western | yes
|
||||
`msx_jp` | yes | no | no | yes | katakana | yes
|
||||
`msx_ru` | yes | yes | no | no | Russian⁴ | yes
|
||||
`koi7n2` | no | yes | no | no | Russian⁵ | no
|
||||
`vectrex` | no | yes | no | no | none | no
|
||||
all the rest | yes | yes | no | no | none | no
|
||||
|
||||
1. `pet`, `origpet` and `petscr` cannot display card suit symbols and lowercase letters at the same time.
|
||||
Card suit symbols are only available in graphics mode,
|
||||
@ -118,16 +121,23 @@ in which lowercase letters are displayed as uppercase and uppercase letters are
|
||||
2. `oldpet` cannot display card suit symbols and lowercase letters at the same time.
|
||||
Card suit symbols are only available in graphics mode, in which lowercase letters are displayed as symbols.
|
||||
|
||||
3. `petjp` and `petscrjp` cannot display card suit symbols and katakana at the same time
|
||||
3. `petjp` and `petscrjp` cannot display card suit symbols and katakana at the same time.
|
||||
Card suit symbols are only available in graphics mode, in which katakana is displayed as symbols.
|
||||
|
||||
If the encoding does not support lowercase letters (e.g. `apple2`, `petjp`, `petscrjp`),
|
||||
4. Letter **Ё** and uppercase **Ъ** are not available.
|
||||
|
||||
5. Only uppercase. Letters **Ё** and **Ъ** are not available.
|
||||
|
||||
If the encoding does not support lowercase letters (e.g. `apple2`, `petjp`, `petscrjp`, `koi7n2`, `vectrex`),
|
||||
then text and character literals containing lowercase letters are automatically converted to uppercase.
|
||||
Only unaccented Latin and Cyrillic letters will be converted as such.
|
||||
Accented Latin letters will not be converted and will fail to compile without `-flenient-encoding`.
|
||||
To detect if your default encoding does not support lowercase letters, test `'A' == 'a'`.
|
||||
|
||||
##### Escape sequence availability
|
||||
|
||||
Encoding | new line | braces | backspace | cursor movement | text colour | reverse | background colour
|
||||
--|--|--|--|--|--|--|--
|
||||
---------|----------|--------|-----------|-----------------|-------------|---------|------------------
|
||||
`pet`,`petjp` | yes | no | no | yes | yes | yes | no
|
||||
`origpet` | yes | no | no | yes | no | yes | no
|
||||
`oldpet` | yes | no | no | yes | no | yes | no
|
||||
@ -137,5 +147,7 @@ Encoding | new line | braces | backspace | cursor movement | text colour | rever
|
||||
`apple2` | no | yes | no | no | no | no | no
|
||||
`atascii` | yes | no | yes | yes | no | no | no
|
||||
`atasciiscr` | no | no | no | no | no | no | no
|
||||
`msx_*` | yes | yes | yes | yes | no | no | no
|
||||
`koi7n2` | yes | no | yes | no | no | no | no
|
||||
`vectrex` | no | no | no | no | no | no | no
|
||||
all the rest | yes | yes | no | no | no | no | no
|
||||
|
@ -181,7 +181,10 @@ object TextCodec {
|
||||
case (_, "msx_es") => TextCodec.MsxWest
|
||||
case (_, "msx_ru") => TextCodec.MsxRu
|
||||
case (_, "msx_jp") => TextCodec.MsxJp
|
||||
case (_, "msx_br") => TextCodec.MsxBr
|
||||
case (_, "vectrex") => TextCodec.Vectrex
|
||||
case (_, "koi7n2") => TextCodec.Koi7N2
|
||||
case (_, "short_koi") => TextCodec.Koi7N2
|
||||
case (p, _) =>
|
||||
log.error(s"Unknown string encoding: `$name`", p)
|
||||
TextCodec.Ascii
|
||||
@ -441,6 +444,25 @@ object TextCodec {
|
||||
)
|
||||
)
|
||||
|
||||
val Koi7N2 = new TextCodec("KOI-7 N2", 0,
|
||||
"\ufffd" * 32 +
|
||||
" !\"#¤%&'()*+,-./" +
|
||||
"0123456789:;<=>?" +
|
||||
"@ABCDEFGHIJKLMNO" +
|
||||
"PQRSTUVWXYZ[\\]^_" +
|
||||
"ЮАБЦДЕФГХИЙКЛМНО" +
|
||||
"ПЯРСТУЖВЬЫЗШЭЩЧ",
|
||||
Map('↑' -> 0x5E, '$' -> 0x24) ++
|
||||
('a' to 'z').map(l => l -> l.toUpper.toInt).toMap ++
|
||||
('а' to 'я').filter(_ != 'ъ').map(l => l -> l.toUpper.toInt).toMap,
|
||||
Map.empty, Map(
|
||||
"n" -> List(13), // TODO: ?
|
||||
"b" -> List(8), // TODO: ?
|
||||
"q" -> List('\"'.toInt),
|
||||
"apos" -> List('\''.toInt)
|
||||
)
|
||||
)
|
||||
|
||||
val OldPetscii = new TextCodec("Old PETSCII", 0,
|
||||
"\ufffd" * 32 +
|
||||
0x20.to(0x3f).map(_.toChar).mkString +
|
||||
@ -600,15 +622,46 @@ object TextCodec {
|
||||
"Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" +
|
||||
"αβΓΠΣσµγΦθΩδ∞∅∈∩" +
|
||||
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²",
|
||||
Map('ß' -> 0xE1, '¦' -> 0x7C),
|
||||
Map('ß' -> 0xE1, '¦' -> 0x7C, 'Ő' -> 0xB4, 'ő' -> 0xB5, 'Ű' -> 0xB6, 'ű' -> 0xB7),
|
||||
Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G") ,
|
||||
MinimalEscapeSequencesWithBraces ++ Map(
|
||||
"right" -> List(0x1c),
|
||||
"left" -> List(0x1d),
|
||||
"up" -> List(0x1e),
|
||||
"down" -> List(0x1f),
|
||||
"b" -> List(8),
|
||||
"n" -> List(13, 10),
|
||||
"pound" -> List(0x9c),
|
||||
"yen" -> List(0x9d),
|
||||
)
|
||||
)
|
||||
|
||||
val MsxBr = new TextCodec("MSX-BR", 0,
|
||||
"\ufffd" * 32 +
|
||||
(0x20 to 0x7e).map(_.toChar).mkString("") +
|
||||
"\ufffd" +
|
||||
"ÇüéâÁà\ufffdçêÍÓÚÂÊÔÀ" +
|
||||
"ÉæÆôöòûùÿÖÜ¢£¥₧ƒ" +
|
||||
"áíóúñѪº¿⌐¬½¼¡«»" +
|
||||
"ÃãĨĩÕõŨũIJij¾\ufffd\ufffd‰¶§" +
|
||||
"\ufffd" * 24 +
|
||||
"Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" +
|
||||
"αβΓΠΣσµγΦθΩδ∞∅∈∩" +
|
||||
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²",
|
||||
Map('ß' -> 0xE1, '¦' -> 0x7C, 'Ő' -> 0xB4, 'ő' -> 0xB5, 'Ű' -> 0xB6, 'ű' -> 0xB7),
|
||||
Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G") ,
|
||||
MinimalEscapeSequencesWithBraces ++ Map(
|
||||
"right" -> List(0x1c),
|
||||
"left" -> List(0x1d),
|
||||
"up" -> List(0x1e),
|
||||
"down" -> List(0x1f),
|
||||
"n" -> List(13, 10),
|
||||
"b" -> List(8),
|
||||
"pound" -> List(0x9c),
|
||||
"yen" -> List(0x9d),
|
||||
)
|
||||
)
|
||||
|
||||
val MsxRu = new TextCodec("MSX-RU", 0,
|
||||
"\ufffd" * 32 +
|
||||
(0x20 to 0x7e).map(_.toChar).mkString("") +
|
||||
@ -617,12 +670,19 @@ object TextCodec {
|
||||
"\ufffd" * 8 +
|
||||
"Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" +
|
||||
"αβΓΠΣσµγΦθΩδ∞∅∈∩" +
|
||||
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²\ufffd\ufffd" +
|
||||
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²\ufffd¤" +
|
||||
"юабцдефгхийклмнопярстужвьызшэщчъ" +
|
||||
"ЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩ",
|
||||
Map('ß' -> 0xA1, '¦' -> 0x7C),
|
||||
Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G"),
|
||||
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
|
||||
MinimalEscapeSequencesWithBraces ++ Map(
|
||||
"right" -> List(0x1c),
|
||||
"left" -> List(0x1d),
|
||||
"up" -> List(0x1e),
|
||||
"down" -> List(0x1f),
|
||||
"b" -> List(8),
|
||||
"n" -> List(13, 10)
|
||||
)
|
||||
)
|
||||
|
||||
val MsxJp = new TextCodec("MSX-JP", 0,
|
||||
@ -660,6 +720,11 @@ object TextCodec {
|
||||
) ++
|
||||
StandardHiraganaDecompositions ++ StandardKatakanaDecompositions,
|
||||
MinimalEscapeSequencesWithBraces ++ Map(
|
||||
"right" -> List(0x1c),
|
||||
"left" -> List(0x1d),
|
||||
"up" -> List(0x1e),
|
||||
"down" -> List(0x1f),
|
||||
"b" -> List(8),
|
||||
"n" -> List(13, 10),
|
||||
"yen" -> List(0x5c)
|
||||
)
|
||||
@ -668,19 +733,33 @@ object TextCodec {
|
||||
val lossyAlternatives: Map[Char, List[String]] = {
|
||||
val allowLowercase: Map[Char, List[String]] = ('A' to 'Z').map(c => c -> List(c.toString.toLowerCase(Locale.ROOT))).toMap
|
||||
val allowUppercase: Map[Char, List[String]] = ('a' to 'z').map(c => c -> List(c.toString.toUpperCase(Locale.ROOT))).toMap
|
||||
val allowLowercaseCyr: Map[Char, List[String]] = ('а' to 'я').map(c => c -> List(c.toString.toUpperCase(Locale.ROOT))).toMap
|
||||
val allowUppercaseCyr: Map[Char, List[String]] = ('а' to 'я').map(c => c -> List(c.toString.toUpperCase(Locale.ROOT))).toMap
|
||||
val ligaturesAndSymbols: Map[Char, List[String]] = Map(
|
||||
// commonly used alternative forms:
|
||||
'¦' -> List("|"),
|
||||
'|' -> List("¦"),
|
||||
// Eszett:
|
||||
'ß' -> List("ss", "SS"),
|
||||
'β' -> List("ß"),
|
||||
// various ligatures:
|
||||
'ff' -> List("ff", "FF"),
|
||||
'fl' -> List("fl", "FL"),
|
||||
'fi' -> List("fi", "FI"),
|
||||
'ffi' -> List("ffi", "FFI"),
|
||||
'ffl' -> List("ffl", "FFL"),
|
||||
'ij' -> List("ij", "IJ"),
|
||||
'IJ' -> List("IJ", "ij"),
|
||||
// fractions:
|
||||
'½' -> List("1/2"),
|
||||
'¼' -> List("1/4"),
|
||||
'¾' -> List("3/4"),
|
||||
// currencies:
|
||||
'₧' -> List("Pt", "PT"),
|
||||
'¢' -> List("c", "C"),
|
||||
'$' -> List("¤"),
|
||||
'¥' -> List("Y", "y"),
|
||||
// kanji:
|
||||
'円' -> List("¥", "Y", "y"),
|
||||
'年' -> List("Y", "y"),
|
||||
'月' -> List("M", "m"),
|
||||
@ -688,11 +767,13 @@ object TextCodec {
|
||||
'時' -> List("h", "H"),
|
||||
'分' -> List("m", "M"),
|
||||
'秒' -> List("s", "S"),
|
||||
// card suits:
|
||||
'♥' -> List("H", "h"),
|
||||
'♠' -> List("S", "s"),
|
||||
'♡' -> List("H", "h"),
|
||||
'♢' -> List("D", "d"),
|
||||
'♣' -> List("C", "c"),
|
||||
// Eastern punctuation:
|
||||
'。' -> List("."),
|
||||
'、' -> List(","),
|
||||
'・' -> List("-"),
|
||||
@ -701,33 +782,52 @@ object TextCodec {
|
||||
'」' -> List("]", ")"),
|
||||
'。' -> List("."),
|
||||
'。' -> List("."),
|
||||
// quote marks:
|
||||
'«' -> List("\""),
|
||||
'»' -> List("\""),
|
||||
'‟' -> List("\""),
|
||||
'”' -> List("\""),
|
||||
'„' -> List("\""),
|
||||
'’' -> List("\'"),
|
||||
'‘' -> List("\'"),
|
||||
// pi:
|
||||
'π' -> List("Π"),
|
||||
'Π' -> List("π"),
|
||||
// alternative symbols:
|
||||
'^' -> List("↑"),
|
||||
'↑' -> List("^"),
|
||||
'‾' -> List("~"),
|
||||
'¯' -> List("~"),
|
||||
'«' -> List("\""),
|
||||
'»' -> List("\""),
|
||||
'§' -> List("#"),
|
||||
'[' -> List("("),
|
||||
']' -> List(")"),
|
||||
'{' -> List("("),
|
||||
'}' -> List(")"),
|
||||
'§' -> List("#"),
|
||||
'§' -> List("#"),
|
||||
'©' -> List("(C)"),
|
||||
'İ' -> List("I", "i"),
|
||||
'©' -> List("(C)", "(c)"),
|
||||
'®' -> List("(R)", "(r)"),
|
||||
'‰' -> List("%."),
|
||||
'×' -> List("x"),
|
||||
'÷' -> List("/"),
|
||||
'ª' -> List("a", "A"),
|
||||
'º' -> List("o", "O"),
|
||||
'‰' -> List("%."),
|
||||
'÷' -> List("/"),
|
||||
'ij' -> List("ij", "IJ"),
|
||||
'IJ' -> List("IJ", "ij"),
|
||||
// Turkish I with dot:
|
||||
'İ' -> List("I", "i"),
|
||||
// partially supported Russian letters:
|
||||
'ё' -> List("е", "Ё", "Е"),
|
||||
'Ё' -> List("Е", "ё", "е"),
|
||||
'Ъ' -> List("ъ"),
|
||||
'ъ' -> List("Ъ"),
|
||||
// Latin lookalikes for Cyrillic:
|
||||
'і' -> List("i", "I"),
|
||||
'І' -> List("I", "i"),
|
||||
'ј' -> List("j", "J"),
|
||||
'Ј' -> List("J", "j"),
|
||||
)
|
||||
val accentedLetters: Map[Char, List[String]] = List(
|
||||
"áàäãåąāǎă" -> "a",
|
||||
"çčċćĉ" -> "c",
|
||||
"đď" -> "d",
|
||||
"ð" -> "dh",
|
||||
"ðď" -> "d",
|
||||
"đ" -> "dj",
|
||||
"éèêëęēėě" -> "e",
|
||||
"ğǧĝģġ" -> "g",
|
||||
"ħĥ" -> "h",
|
||||
@ -760,7 +860,7 @@ object TextCodec {
|
||||
else fw -> List(hw.toString)
|
||||
}.toMap
|
||||
val halfWidth = (0xff61 to 0xff9f).map{ c => c.toChar -> List(jisHalfwidthKatakanaOrder(c - 0xff60).toString)}.toMap
|
||||
allowLowercase ++ allowUppercase ++ ligaturesAndSymbols ++ accentedLetters ++ hiragana ++ fullWidth ++ halfWidth
|
||||
allowLowercase ++ allowUppercase ++ allowLowercaseCyr ++ allowUppercaseCyr ++ ligaturesAndSymbols ++ accentedLetters ++ hiragana ++ fullWidth ++ halfWidth
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user