1
0
mirror of https://github.com/KarolS/millfork.git synced 2025-01-10 20:29:35 +00:00

More text codecs

This commit is contained in:
Karol Stasiak 2018-04-02 19:47:11 +02:00
parent 2ea964f35b
commit 95375378ed
4 changed files with 127 additions and 13 deletions

View File

@ -4,7 +4,7 @@
* Added array initialization syntax with `for` (not yet finalized). * Added array initialization syntax with `for` (not yet finalized).
* Added `atascii` text codec. * Added multiple new text codecs.
* Fixed several bugs, most importantly invalid offsets for branching instructions. * Fixed several bugs, most importantly invalid offsets for branching instructions.

View File

@ -21,6 +21,9 @@ String literals are surrounded with double quotes and followed by the name of th
Characters between the quotes are interpreted literally, Characters between the quotes are interpreted literally,
there are no ways to escape special characters or quotes. there are no ways to escape special characters or quotes.
In some encodings, multiple characters are mapped to the same byte value,
for compatibility with multiple variants.
Currently available encodings: Currently available encodings:
* `ascii` standard ASCII * `ascii` standard ASCII
@ -29,6 +32,16 @@ Currently available encodings:
* `scr` Commodore screencodes * `scr` Commodore screencodes
* `apple2` Apple II charset ($A0$FE)
* `bbc` BBC Micro and ZX Spectrum character set
* `jis` JIS X 0201
* `iso_de`, `iso_no`, `iso_se`, `iso_yu` various variants of ISO/IEC-646
* `iso_dk`, `iso_fi` aliases for `iso_no` and `iso_se` respectively
When programming for Commodore, When programming for Commodore,
use `pet` for strings you're printing using standard I/O routines use `pet` for strings you're printing using standard I/O routines
and `scr` for strings you're copying to screen memory directly. and `scr` for strings you're copying to screen memory directly.

View File

@ -207,6 +207,16 @@ case class MfParser(filename: String, input: String, currentDirectory: String, o
case (_, "scr") => TextCodec.CbmScreencodes case (_, "scr") => TextCodec.CbmScreencodes
case (_, "atascii") => TextCodec.Atascii case (_, "atascii") => TextCodec.Atascii
case (_, "atari") => TextCodec.Atascii case (_, "atari") => TextCodec.Atascii
case (_, "bbc") => TextCodec.Bbc
case (_, "apple2") => TextCodec.Apple2
case (_, "jis") => TextCodec.Jis
case (_, "jisx") => TextCodec.Jis
case (_, "iso_de") => TextCodec.IsoIec646De
case (_, "iso_no") => TextCodec.IsoIec646No
case (_, "iso_dk") => TextCodec.IsoIec646No
case (_, "iso_se") => TextCodec.IsoIec646Se
case (_, "iso_fi") => TextCodec.IsoIec646Se
case (_, "iso_yu") => TextCodec.IsoIec646Yu
case (p, x) => case (p, x) =>
ErrorReporting.error(s"Unknown string encoding: `$x`", Some(p)) ErrorReporting.error(s"Unknown string encoding: `$x`", Some(p))
TextCodec.Ascii TextCodec.Ascii
@ -226,7 +236,7 @@ case class MfParser(filename: String, input: String, currentDirectory: String, o
} }
def arrayStringContents: P[ArrayContents] = P(position() ~ doubleQuotedString ~/ HWS ~ codec).map { def arrayStringContents: P[ArrayContents] = P(position() ~ doubleQuotedString ~/ HWS ~ codec).map {
case (p, s, co) => LiteralContents(s.map(c => LiteralExpression(co.decode(None, c), 1).pos(p))) case (p, s, co) => LiteralContents(s.flatMap(c => co.encode(None, c)).map(c => LiteralExpression(c, 1).pos(p)))
} }
def arrayLoopContents: P[ArrayContents] = for { def arrayLoopContents: P[ArrayContents] = for {

View File

@ -6,36 +6,97 @@ import millfork.node.Position
/** /**
* @author Karol Stasiak * @author Karol Stasiak
*/ */
class TextCodec(val name: String, private val map: String, private val extra: Map[Char, Int]) { class TextCodec(val name: String, private val map: String, private val extra: Map[Char, Int], private val decompositions: Map[Char, String]) {
def decode(position: Option[Position], c: Char): Int = { def encode(position: Option[Position], c: Char): List[Int] = {
if (extra.contains(c)) extra(c) else { if (decompositions.contains(c)) {
decompositions(c).toList.flatMap(x => encode(position, x))
} else if (extra.contains(c)) List(extra(c)) else {
val index = map.indexOf(c) val index = map.indexOf(c)
if (index >= 0) { if (index >= 0) {
index List(index)
} else { } else {
ErrorReporting.fatal("Invalid character in string in ") ErrorReporting.fatal("Invalid character in string")
} }
} }
} }
def decode(by: Int): Char = {
val index = by & 0xff
if (index < map.length) map(index) else TextCodec.NotAChar
}
} }
object TextCodec { object TextCodec {
val NotAChar = '\ufffd' val NotAChar = '\ufffd'
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty) val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty)
val Apple2 = new TextCodec("APPLE-II", 0.until(255).map { i => if (i < 160) NotAChar else (i - 128).toChar }.mkString, Map.empty, Map.empty)
val IsoIec646De = new TextCodec("ISO-IEC-646-DE",
"\ufffd" * 32 +
" !\"#$%^'()*+,-./0123456789:;<=>?" +
"§ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ^_" +
"`abcdefghijklmnopqrstuvwxyzäöüß",
Map.empty, Map.empty
)
val IsoIec646Se = new TextCodec("ISO-IEC-646-SE",
"\ufffd" * 32 +
" !\"#¤%^'()*+,-./0123456789:;<=>?" +
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÅ^_" +
"`abcdefghijklmnopqrstuvwxyzäöå~",
Map('¯' -> '~'.toInt,
'‾' -> '~'.toInt,
'É' -> '@'.toInt,
'é' -> '`'.toInt,
'Ü' -> '^'.toInt,
'ü' -> '~'.toInt,
'$' -> '¤'.toInt),
Map.empty
)
val IsoIec646No = new TextCodec("ISO-IEC-646-NO",
"\ufffd" * 32 +
" !\"#$%^'()*+,-./0123456789:;<=>?" +
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ^_" +
"`abcdefghijklmnopqrstuvwxyzæøå~",
Map('¯' -> '~'.toInt,
'‾' -> '~'.toInt,
'|' -> '~'.toInt,
'¤' -> '$'.toInt,
'Ä' -> '@'.toInt,
'ä' -> '`'.toInt,
'Ü' -> '^'.toInt,
'ü' -> '~'.toInt,
'«' -> '"'.toInt,
'»' -> '#'.toInt,
'§' -> '#'.toInt),
Map.empty
)
val IsoIec646Yu = new TextCodec("ISO-IEC-646-YU",
"\ufffd" * 32 +
" !\"#$%^'()*+,-./0123456789:;<=>?" +
"ŽABCDEFGHIJKLMNOPQRSTUVWXYZŠĐĆČ_" +
"žabcdefghijklmnopqrstuvwxyzšđćč",
Map('Ë' -> '$'.toInt, 'ë' -> '_'.toInt),
Map.empty
)
val CbmScreencodes = new TextCodec("CBM-Screen", val CbmScreencodes = new TextCodec("CBM-Screen",
"@abcdefghijklmnopqrstuvwxyz[£]↑←" + "@abcdefghijklmnopqrstuvwxyz[£]↑←" +
0x20.to(0x3f).map(_.toChar).mkString + 0x20.to(0x3f).map(_.toChar).mkString +
"ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
Map('^' -> 0x3E, 'π' -> 0x5E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51)) Map('^' -> 0x3E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51), Map.empty
)
val Petscii = new TextCodec("PETSCII", val Petscii = new TextCodec("PETSCII",
"\ufffd" * 32 + "\ufffd" * 32 +
0x20.to(0x3f).map(_.toChar).mkString + 0x20.to(0x3f).map(_.toChar).mkString +
"@abcdefghijklmnopqrstuvwxyz[£]↑←" + "@abcdefghijklmnopqrstuvwxyz[£]↑←" +
"ABCDEFGHIJKLMNOPQRSTUVWXYZ", "ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
Map('^' -> 0x5E, 'π' -> 0x7E, '♥' -> 0x73, '♡' -> 0x73, '♠' -> 0x61, '♣' -> 0x78, '♢' -> 0x7A, '•' -> 0x71) Map('^' -> 0x5E, '♥' -> 0x73, '♡' -> 0x73, '♠' -> 0x61, '♣' -> 0x78, '♢' -> 0x7A, '•' -> 0x71), Map.empty
) )
val Atascii = new TextCodec("ATASCII", val Atascii = new TextCodec("ATASCII",
@ -45,7 +106,37 @@ object TextCodec {
"\ufffd" * 11 + "\ufffd" * 11 +
0x20.to(0x5f).map(_.toChar).mkString + 0x20.to(0x5f).map(_.toChar).mkString +
"♢abcdefghijklmnopqrstuvwxyz♠|", "♢abcdefghijklmnopqrstuvwxyz♠|",
Map('♥' -> 0, '·' -> 0x14) Map('♥' -> 0, '·' -> 0x14), Map.empty
)
val Bbc = new TextCodec("BBC",
"\ufffd" * 32 +
0x20.to(0x5f).map(_.toChar).mkString +
"£" + 0x61.to(0x7E).map(_.toChar).mkString + "©",
Map('↑' -> '^'.toInt), Map.empty
)
//noinspection ScalaUnnecessaryParentheses
val Jis = new TextCodec("JIS-X-0201",
"\ufffd" * 32 +
' '.to('Z').mkString +
"[¥]^_" +
"`" + 'a'.to('z').mkString + "{|}~\ufffd" +
"\ufffd" * 32 +
"\ufffd。「」、・ヲァィゥェォャュョッ" +
"ーアイウエオカキクケコサシスセソ" +
"タチツテトナニヌネノハヒフヘホマ" +
"ミムメモヤユヨラリルレロワン゛゜" +
"\ufffd" * 8 +
"♠♡♢♣" +
"\ufffd" * 4 +
"円年月日時分秒" +
"\ufffd" * 3 + "\\",
Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA1)).toMap,
(("カキクケコサシスセソタチツテトハヒフヘホ")).zip(
"ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++
"ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap
) )
} }