mirror of
https://github.com/KarolS/millfork.git
synced 2024-12-22 16:31:02 +00:00
More text codecs
This commit is contained in:
parent
2ea964f35b
commit
95375378ed
@ -4,7 +4,7 @@
|
||||
|
||||
* Added array initialization syntax with `for` (not yet finalized).
|
||||
|
||||
* Added `atascii` text codec.
|
||||
* Added multiple new text codecs.
|
||||
|
||||
* Fixed several bugs, most importantly invalid offsets for branching instructions.
|
||||
|
||||
|
@ -21,6 +21,9 @@ String literals are surrounded with double quotes and followed by the name of th
|
||||
Characters between the quotes are interpreted literally,
|
||||
there are no ways to escape special characters or quotes.
|
||||
|
||||
In some encodings, multiple characters are mapped to the same byte value,
|
||||
for compatibility with multiple variants.
|
||||
|
||||
Currently available encodings:
|
||||
|
||||
* `ascii` – standard ASCII
|
||||
@ -29,6 +32,16 @@ Currently available encodings:
|
||||
|
||||
* `scr` – Commodore screencodes
|
||||
|
||||
* `apple2` – Apple II charset ($A0–$FE)
|
||||
|
||||
* `bbc` – BBC Micro and ZX Spectrum character set
|
||||
|
||||
* `jis` – JIS X 0201
|
||||
|
||||
* `iso_de`, `iso_no`, `iso_se`, `iso_yu` – various variants of ISO/IEC-646
|
||||
|
||||
* `iso_dk`, `iso_fi` – aliases for `iso_no` and `iso_se` respectively
|
||||
|
||||
When programming for Commodore,
|
||||
use `pet` for strings you're printing using standard I/O routines
|
||||
and `scr` for strings you're copying to screen memory directly.
|
||||
|
@ -207,6 +207,16 @@ case class MfParser(filename: String, input: String, currentDirectory: String, o
|
||||
case (_, "scr") => TextCodec.CbmScreencodes
|
||||
case (_, "atascii") => TextCodec.Atascii
|
||||
case (_, "atari") => TextCodec.Atascii
|
||||
case (_, "bbc") => TextCodec.Bbc
|
||||
case (_, "apple2") => TextCodec.Apple2
|
||||
case (_, "jis") => TextCodec.Jis
|
||||
case (_, "jisx") => TextCodec.Jis
|
||||
case (_, "iso_de") => TextCodec.IsoIec646De
|
||||
case (_, "iso_no") => TextCodec.IsoIec646No
|
||||
case (_, "iso_dk") => TextCodec.IsoIec646No
|
||||
case (_, "iso_se") => TextCodec.IsoIec646Se
|
||||
case (_, "iso_fi") => TextCodec.IsoIec646Se
|
||||
case (_, "iso_yu") => TextCodec.IsoIec646Yu
|
||||
case (p, x) =>
|
||||
ErrorReporting.error(s"Unknown string encoding: `$x`", Some(p))
|
||||
TextCodec.Ascii
|
||||
@ -226,7 +236,7 @@ case class MfParser(filename: String, input: String, currentDirectory: String, o
|
||||
}
|
||||
|
||||
def arrayStringContents: P[ArrayContents] = P(position() ~ doubleQuotedString ~/ HWS ~ codec).map {
|
||||
case (p, s, co) => LiteralContents(s.map(c => LiteralExpression(co.decode(None, c), 1).pos(p)))
|
||||
case (p, s, co) => LiteralContents(s.flatMap(c => co.encode(None, c)).map(c => LiteralExpression(c, 1).pos(p)))
|
||||
}
|
||||
|
||||
def arrayLoopContents: P[ArrayContents] = for {
|
||||
|
@ -6,36 +6,97 @@ import millfork.node.Position
|
||||
/**
|
||||
* @author Karol Stasiak
|
||||
*/
|
||||
class TextCodec(val name: String, private val map: String, private val extra: Map[Char, Int]) {
|
||||
def decode(position: Option[Position], c: Char): Int = {
|
||||
if (extra.contains(c)) extra(c) else {
|
||||
class TextCodec(val name: String, private val map: String, private val extra: Map[Char, Int], private val decompositions: Map[Char, String]) {
|
||||
def encode(position: Option[Position], c: Char): List[Int] = {
|
||||
if (decompositions.contains(c)) {
|
||||
decompositions(c).toList.flatMap(x => encode(position, x))
|
||||
} else if (extra.contains(c)) List(extra(c)) else {
|
||||
val index = map.indexOf(c)
|
||||
if (index >= 0) {
|
||||
index
|
||||
List(index)
|
||||
} else {
|
||||
ErrorReporting.fatal("Invalid character in string in ")
|
||||
ErrorReporting.fatal("Invalid character in string")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
def decode(by: Int): Char = {
|
||||
val index = by & 0xff
|
||||
if (index < map.length) map(index) else TextCodec.NotAChar
|
||||
}
|
||||
}
|
||||
|
||||
object TextCodec {
|
||||
val NotAChar = '\ufffd'
|
||||
|
||||
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty)
|
||||
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty)
|
||||
|
||||
val Apple2 = new TextCodec("APPLE-II", 0.until(255).map { i => if (i < 160) NotAChar else (i - 128).toChar }.mkString, Map.empty, Map.empty)
|
||||
|
||||
val IsoIec646De = new TextCodec("ISO-IEC-646-DE",
|
||||
"\ufffd" * 32 +
|
||||
" !\"#$%^'()*+,-./0123456789:;<=>?" +
|
||||
"§ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ^_" +
|
||||
"`abcdefghijklmnopqrstuvwxyzäöüß",
|
||||
Map.empty, Map.empty
|
||||
)
|
||||
|
||||
val IsoIec646Se = new TextCodec("ISO-IEC-646-SE",
|
||||
"\ufffd" * 32 +
|
||||
" !\"#¤%^'()*+,-./0123456789:;<=>?" +
|
||||
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÅ^_" +
|
||||
"`abcdefghijklmnopqrstuvwxyzäöå~",
|
||||
Map('¯' -> '~'.toInt,
|
||||
'‾' -> '~'.toInt,
|
||||
'É' -> '@'.toInt,
|
||||
'é' -> '`'.toInt,
|
||||
'Ü' -> '^'.toInt,
|
||||
'ü' -> '~'.toInt,
|
||||
'$' -> '¤'.toInt),
|
||||
Map.empty
|
||||
)
|
||||
|
||||
val IsoIec646No = new TextCodec("ISO-IEC-646-NO",
|
||||
"\ufffd" * 32 +
|
||||
" !\"#$%^'()*+,-./0123456789:;<=>?" +
|
||||
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ^_" +
|
||||
"`abcdefghijklmnopqrstuvwxyzæøå~",
|
||||
Map('¯' -> '~'.toInt,
|
||||
'‾' -> '~'.toInt,
|
||||
'|' -> '~'.toInt,
|
||||
'¤' -> '$'.toInt,
|
||||
'Ä' -> '@'.toInt,
|
||||
'ä' -> '`'.toInt,
|
||||
'Ü' -> '^'.toInt,
|
||||
'ü' -> '~'.toInt,
|
||||
'«' -> '"'.toInt,
|
||||
'»' -> '#'.toInt,
|
||||
'§' -> '#'.toInt),
|
||||
Map.empty
|
||||
)
|
||||
|
||||
val IsoIec646Yu = new TextCodec("ISO-IEC-646-YU",
|
||||
"\ufffd" * 32 +
|
||||
" !\"#$%^'()*+,-./0123456789:;<=>?" +
|
||||
"ŽABCDEFGHIJKLMNOPQRSTUVWXYZŠĐĆČ_" +
|
||||
"žabcdefghijklmnopqrstuvwxyzšđćč",
|
||||
Map('Ë' -> '$'.toInt, 'ë' -> '_'.toInt),
|
||||
Map.empty
|
||||
)
|
||||
|
||||
val CbmScreencodes = new TextCodec("CBM-Screen",
|
||||
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
|
||||
0x20.to(0x3f).map(_.toChar).mkString +
|
||||
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
Map('^' -> 0x3E, 'π' -> 0x5E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51))
|
||||
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
|
||||
Map('^' -> 0x3E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51), Map.empty
|
||||
)
|
||||
|
||||
val Petscii = new TextCodec("PETSCII",
|
||||
"\ufffd" * 32 +
|
||||
0x20.to(0x3f).map(_.toChar).mkString +
|
||||
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
|
||||
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
||||
Map('^' -> 0x5E, 'π' -> 0x7E, '♥' -> 0x73, '♡' -> 0x73, '♠' -> 0x61, '♣' -> 0x78, '♢' -> 0x7A, '•' -> 0x71)
|
||||
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
|
||||
Map('^' -> 0x5E, '♥' -> 0x73, '♡' -> 0x73, '♠' -> 0x61, '♣' -> 0x78, '♢' -> 0x7A, '•' -> 0x71), Map.empty
|
||||
)
|
||||
|
||||
val Atascii = new TextCodec("ATASCII",
|
||||
@ -45,7 +106,37 @@ object TextCodec {
|
||||
"\ufffd" * 11 +
|
||||
0x20.to(0x5f).map(_.toChar).mkString +
|
||||
"♢abcdefghijklmnopqrstuvwxyz♠|",
|
||||
Map('♥' -> 0, '·' -> 0x14)
|
||||
Map('♥' -> 0, '·' -> 0x14), Map.empty
|
||||
)
|
||||
|
||||
val Bbc = new TextCodec("BBC",
|
||||
"\ufffd" * 32 +
|
||||
0x20.to(0x5f).map(_.toChar).mkString +
|
||||
"£" + 0x61.to(0x7E).map(_.toChar).mkString + "©",
|
||||
Map('↑' -> '^'.toInt), Map.empty
|
||||
)
|
||||
|
||||
//noinspection ScalaUnnecessaryParentheses
|
||||
val Jis = new TextCodec("JIS-X-0201",
|
||||
"\ufffd" * 32 +
|
||||
' '.to('Z').mkString +
|
||||
"[¥]^_" +
|
||||
"`" + 'a'.to('z').mkString + "{|}~\ufffd" +
|
||||
"\ufffd" * 32 +
|
||||
"\ufffd。「」、・ヲァィゥェォャュョッ" +
|
||||
"ーアイウエオカキクケコサシスセソ" +
|
||||
"タチツテトナニヌネノハヒフヘホマ" +
|
||||
"ミムメモヤユヨラリルレロワン゛゜" +
|
||||
"\ufffd" * 8 +
|
||||
"♠♡♢♣" +
|
||||
"\ufffd" * 4 +
|
||||
"円年月日時分秒" +
|
||||
"\ufffd" * 3 + "\\",
|
||||
Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++
|
||||
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA1)).toMap,
|
||||
(("カキクケコサシスセソタチツテトハヒフヘホ")).zip(
|
||||
"ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++
|
||||
"ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap
|
||||
)
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user