1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-01-10 20:29:35 +00:00

More text codecs

This commit is contained in:
Karol Stasiak 2018-04-02 19:47:11 +02:00
parent 2ea964f35b
commit 95375378ed
4 changed files with 127 additions and 13 deletions

View File

@ -4,7 +4,7 @@
* Added array initialization syntax with `for` (not yet finalized).
* Added `atascii` text codec.
* Added multiple new text codecs.
* Fixed several bugs, most importantly invalid offsets for branching instructions.

View File

@ -21,6 +21,9 @@ String literals are surrounded with double quotes and followed by the name of th
Characters between the quotes are interpreted literally,
there are no ways to escape special characters or quotes.
In some encodings, multiple characters are mapped to the same byte value,
for compatibility with multiple variants.
Currently available encodings:
* `ascii` standard ASCII
@ -29,6 +32,16 @@ Currently available encodings:
* `scr` Commodore screencodes
* `apple2` Apple II charset ($A0$FE)
* `bbc` BBC Micro and ZX Spectrum character set
* `jis` JIS X 0201
* `iso_de`, `iso_no`, `iso_se`, `iso_yu` various variants of ISO/IEC-646
* `iso_dk`, `iso_fi` aliases for `iso_no` and `iso_se` respectively
When programming for Commodore,
use `pet` for strings you're printing using standard I/O routines
and `scr` for strings you're copying to screen memory directly.

View File

@ -207,6 +207,16 @@ case class MfParser(filename: String, input: String, currentDirectory: String, o
case (_, "scr") => TextCodec.CbmScreencodes
case (_, "atascii") => TextCodec.Atascii
case (_, "atari") => TextCodec.Atascii
case (_, "bbc") => TextCodec.Bbc
case (_, "apple2") => TextCodec.Apple2
case (_, "jis") => TextCodec.Jis
case (_, "jisx") => TextCodec.Jis
case (_, "iso_de") => TextCodec.IsoIec646De
case (_, "iso_no") => TextCodec.IsoIec646No
case (_, "iso_dk") => TextCodec.IsoIec646No
case (_, "iso_se") => TextCodec.IsoIec646Se
case (_, "iso_fi") => TextCodec.IsoIec646Se
case (_, "iso_yu") => TextCodec.IsoIec646Yu
case (p, x) =>
ErrorReporting.error(s"Unknown string encoding: `$x`", Some(p))
TextCodec.Ascii
@ -226,7 +236,7 @@ case class MfParser(filename: String, input: String, currentDirectory: String, o
}
def arrayStringContents: P[ArrayContents] = P(position() ~ doubleQuotedString ~/ HWS ~ codec).map {
case (p, s, co) => LiteralContents(s.map(c => LiteralExpression(co.decode(None, c), 1).pos(p)))
case (p, s, co) => LiteralContents(s.flatMap(c => co.encode(None, c)).map(c => LiteralExpression(c, 1).pos(p)))
}
def arrayLoopContents: P[ArrayContents] = for {

View File

@ -6,36 +6,97 @@ import millfork.node.Position
/**
* @author Karol Stasiak
*/
class TextCodec(val name: String, private val map: String, private val extra: Map[Char, Int]) {
def decode(position: Option[Position], c: Char): Int = {
if (extra.contains(c)) extra(c) else {
class TextCodec(val name: String, private val map: String, private val extra: Map[Char, Int], private val decompositions: Map[Char, String]) {
def encode(position: Option[Position], c: Char): List[Int] = {
if (decompositions.contains(c)) {
decompositions(c).toList.flatMap(x => encode(position, x))
} else if (extra.contains(c)) List(extra(c)) else {
val index = map.indexOf(c)
if (index >= 0) {
index
List(index)
} else {
ErrorReporting.fatal("Invalid character in string in ")
ErrorReporting.fatal("Invalid character in string")
}
}
}
def decode(by: Int): Char = {
val index = by & 0xff
if (index < map.length) map(index) else TextCodec.NotAChar
}
}
object TextCodec {
val NotAChar = '\ufffd'
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty)
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty)
val Apple2 = new TextCodec("APPLE-II", 0.until(255).map { i => if (i < 160) NotAChar else (i - 128).toChar }.mkString, Map.empty, Map.empty)
val IsoIec646De = new TextCodec("ISO-IEC-646-DE",
"\ufffd" * 32 +
" !\"#$%^'()*+,-./0123456789:;<=>?" +
"§ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ^_" +
"`abcdefghijklmnopqrstuvwxyzäöüß",
Map.empty, Map.empty
)
val IsoIec646Se = new TextCodec("ISO-IEC-646-SE",
"\ufffd" * 32 +
" !\"#¤%^'()*+,-./0123456789:;<=>?" +
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÅ^_" +
"`abcdefghijklmnopqrstuvwxyzäöå~",
Map('¯' -> '~'.toInt,
'‾' -> '~'.toInt,
'É' -> '@'.toInt,
'é' -> '`'.toInt,
'Ü' -> '^'.toInt,
'ü' -> '~'.toInt,
'$' -> '¤'.toInt),
Map.empty
)
val IsoIec646No = new TextCodec("ISO-IEC-646-NO",
"\ufffd" * 32 +
" !\"#$%^'()*+,-./0123456789:;<=>?" +
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ^_" +
"`abcdefghijklmnopqrstuvwxyzæøå~",
Map('¯' -> '~'.toInt,
'‾' -> '~'.toInt,
'|' -> '~'.toInt,
'¤' -> '$'.toInt,
'Ä' -> '@'.toInt,
'ä' -> '`'.toInt,
'Ü' -> '^'.toInt,
'ü' -> '~'.toInt,
'«' -> '"'.toInt,
'»' -> '#'.toInt,
'§' -> '#'.toInt),
Map.empty
)
val IsoIec646Yu = new TextCodec("ISO-IEC-646-YU",
"\ufffd" * 32 +
" !\"#$%^'()*+,-./0123456789:;<=>?" +
"ŽABCDEFGHIJKLMNOPQRSTUVWXYZŠĐĆČ_" +
"žabcdefghijklmnopqrstuvwxyzšđćč",
Map('Ë' -> '$'.toInt, 'ë' -> '_'.toInt),
Map.empty
)
val CbmScreencodes = new TextCodec("CBM-Screen",
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
0x20.to(0x3f).map(_.toChar).mkString +
"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
Map('^' -> 0x3E, 'π' -> 0x5E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51))
"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
Map('^' -> 0x3E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51), Map.empty
)
val Petscii = new TextCodec("PETSCII",
"\ufffd" * 32 +
0x20.to(0x3f).map(_.toChar).mkString +
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
"ABCDEFGHIJKLMNOPQRSTUVWXYZ",
Map('^' -> 0x5E, 'π' -> 0x7E, '♥' -> 0x73, '♡' -> 0x73, '♠' -> 0x61, '♣' -> 0x78, '♢' -> 0x7A, '•' -> 0x71)
"ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
Map('^' -> 0x5E, '♥' -> 0x73, '♡' -> 0x73, '♠' -> 0x61, '♣' -> 0x78, '♢' -> 0x7A, '•' -> 0x71), Map.empty
)
val Atascii = new TextCodec("ATASCII",
@ -45,7 +106,37 @@ object TextCodec {
"\ufffd" * 11 +
0x20.to(0x5f).map(_.toChar).mkString +
"♢abcdefghijklmnopqrstuvwxyz♠|",
Map('♥' -> 0, '·' -> 0x14)
Map('♥' -> 0, '·' -> 0x14), Map.empty
)
val Bbc = new TextCodec("BBC",
"\ufffd" * 32 +
0x20.to(0x5f).map(_.toChar).mkString +
"£" + 0x61.to(0x7E).map(_.toChar).mkString + "©",
Map('↑' -> '^'.toInt), Map.empty
)
//noinspection ScalaUnnecessaryParentheses
val Jis = new TextCodec("JIS-X-0201",
"\ufffd" * 32 +
' '.to('Z').mkString +
"[¥]^_" +
"`" + 'a'.to('z').mkString + "{|}~\ufffd" +
"\ufffd" * 32 +
"\ufffd。「」、・ヲァィゥェォャュョッ" +
"ーアイウエオカキクケコサシスセソ" +
"タチツテトナニヌネノハヒフヘホマ" +
"ミムメモヤユヨラリルレロワン゛゜" +
"\ufffd" * 8 +
"♠♡♢♣" +
"\ufffd" * 4 +
"円年月日時分秒" +
"\ufffd" * 3 + "\\",
Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA1)).toMap,
(("カキクケコサシスセソタチツテトハヒフヘホ")).zip(
"ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++
"ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap
)
}