mirror of
https://github.com/KarolS/millfork.git
synced 2025-01-10 20:29:35 +00:00
More text codecs
This commit is contained in:
parent
2ea964f35b
commit
95375378ed
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
* Added array initialization syntax with `for` (not yet finalized).
|
* Added array initialization syntax with `for` (not yet finalized).
|
||||||
|
|
||||||
* Added `atascii` text codec.
|
* Added multiple new text codecs.
|
||||||
|
|
||||||
* Fixed several bugs, most importantly invalid offsets for branching instructions.
|
* Fixed several bugs, most importantly invalid offsets for branching instructions.
|
||||||
|
|
||||||
|
@ -21,6 +21,9 @@ String literals are surrounded with double quotes and followed by the name of th
|
|||||||
Characters between the quotes are interpreted literally,
|
Characters between the quotes are interpreted literally,
|
||||||
there are no ways to escape special characters or quotes.
|
there are no ways to escape special characters or quotes.
|
||||||
|
|
||||||
|
In some encodings, multiple characters are mapped to the same byte value,
|
||||||
|
for compatibility with multiple variants.
|
||||||
|
|
||||||
Currently available encodings:
|
Currently available encodings:
|
||||||
|
|
||||||
* `ascii` – standard ASCII
|
* `ascii` – standard ASCII
|
||||||
@ -29,6 +32,16 @@ Currently available encodings:
|
|||||||
|
|
||||||
* `scr` – Commodore screencodes
|
* `scr` – Commodore screencodes
|
||||||
|
|
||||||
|
* `apple2` – Apple II charset ($A0–$FE)
|
||||||
|
|
||||||
|
* `bbc` – BBC Micro and ZX Spectrum character set
|
||||||
|
|
||||||
|
* `jis` – JIS X 0201
|
||||||
|
|
||||||
|
* `iso_de`, `iso_no`, `iso_se`, `iso_yu` – various variants of ISO/IEC-646
|
||||||
|
|
||||||
|
* `iso_dk`, `iso_fi` – aliases for `iso_no` and `iso_se` respectively
|
||||||
|
|
||||||
When programming for Commodore,
|
When programming for Commodore,
|
||||||
use `pet` for strings you're printing using standard I/O routines
|
use `pet` for strings you're printing using standard I/O routines
|
||||||
and `scr` for strings you're copying to screen memory directly.
|
and `scr` for strings you're copying to screen memory directly.
|
||||||
|
@ -207,6 +207,16 @@ case class MfParser(filename: String, input: String, currentDirectory: String, o
|
|||||||
case (_, "scr") => TextCodec.CbmScreencodes
|
case (_, "scr") => TextCodec.CbmScreencodes
|
||||||
case (_, "atascii") => TextCodec.Atascii
|
case (_, "atascii") => TextCodec.Atascii
|
||||||
case (_, "atari") => TextCodec.Atascii
|
case (_, "atari") => TextCodec.Atascii
|
||||||
|
case (_, "bbc") => TextCodec.Bbc
|
||||||
|
case (_, "apple2") => TextCodec.Apple2
|
||||||
|
case (_, "jis") => TextCodec.Jis
|
||||||
|
case (_, "jisx") => TextCodec.Jis
|
||||||
|
case (_, "iso_de") => TextCodec.IsoIec646De
|
||||||
|
case (_, "iso_no") => TextCodec.IsoIec646No
|
||||||
|
case (_, "iso_dk") => TextCodec.IsoIec646No
|
||||||
|
case (_, "iso_se") => TextCodec.IsoIec646Se
|
||||||
|
case (_, "iso_fi") => TextCodec.IsoIec646Se
|
||||||
|
case (_, "iso_yu") => TextCodec.IsoIec646Yu
|
||||||
case (p, x) =>
|
case (p, x) =>
|
||||||
ErrorReporting.error(s"Unknown string encoding: `$x`", Some(p))
|
ErrorReporting.error(s"Unknown string encoding: `$x`", Some(p))
|
||||||
TextCodec.Ascii
|
TextCodec.Ascii
|
||||||
@ -226,7 +236,7 @@ case class MfParser(filename: String, input: String, currentDirectory: String, o
|
|||||||
}
|
}
|
||||||
|
|
||||||
def arrayStringContents: P[ArrayContents] = P(position() ~ doubleQuotedString ~/ HWS ~ codec).map {
|
def arrayStringContents: P[ArrayContents] = P(position() ~ doubleQuotedString ~/ HWS ~ codec).map {
|
||||||
case (p, s, co) => LiteralContents(s.map(c => LiteralExpression(co.decode(None, c), 1).pos(p)))
|
case (p, s, co) => LiteralContents(s.flatMap(c => co.encode(None, c)).map(c => LiteralExpression(c, 1).pos(p)))
|
||||||
}
|
}
|
||||||
|
|
||||||
def arrayLoopContents: P[ArrayContents] = for {
|
def arrayLoopContents: P[ArrayContents] = for {
|
||||||
|
@ -6,36 +6,97 @@ import millfork.node.Position
|
|||||||
/**
|
/**
|
||||||
* @author Karol Stasiak
|
* @author Karol Stasiak
|
||||||
*/
|
*/
|
||||||
class TextCodec(val name: String, private val map: String, private val extra: Map[Char, Int]) {
|
class TextCodec(val name: String, private val map: String, private val extra: Map[Char, Int], private val decompositions: Map[Char, String]) {
|
||||||
def decode(position: Option[Position], c: Char): Int = {
|
def encode(position: Option[Position], c: Char): List[Int] = {
|
||||||
if (extra.contains(c)) extra(c) else {
|
if (decompositions.contains(c)) {
|
||||||
|
decompositions(c).toList.flatMap(x => encode(position, x))
|
||||||
|
} else if (extra.contains(c)) List(extra(c)) else {
|
||||||
val index = map.indexOf(c)
|
val index = map.indexOf(c)
|
||||||
if (index >= 0) {
|
if (index >= 0) {
|
||||||
index
|
List(index)
|
||||||
} else {
|
} else {
|
||||||
ErrorReporting.fatal("Invalid character in string in ")
|
ErrorReporting.fatal("Invalid character in string")
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
def decode(by: Int): Char = {
|
||||||
|
val index = by & 0xff
|
||||||
|
if (index < map.length) map(index) else TextCodec.NotAChar
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
object TextCodec {
|
object TextCodec {
|
||||||
val NotAChar = '\ufffd'
|
val NotAChar = '\ufffd'
|
||||||
|
|
||||||
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty)
|
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty)
|
||||||
|
|
||||||
|
val Apple2 = new TextCodec("APPLE-II", 0.until(255).map { i => if (i < 160) NotAChar else (i - 128).toChar }.mkString, Map.empty, Map.empty)
|
||||||
|
|
||||||
|
val IsoIec646De = new TextCodec("ISO-IEC-646-DE",
|
||||||
|
"\ufffd" * 32 +
|
||||||
|
" !\"#$%^'()*+,-./0123456789:;<=>?" +
|
||||||
|
"§ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ^_" +
|
||||||
|
"`abcdefghijklmnopqrstuvwxyzäöüß",
|
||||||
|
Map.empty, Map.empty
|
||||||
|
)
|
||||||
|
|
||||||
|
val IsoIec646Se = new TextCodec("ISO-IEC-646-SE",
|
||||||
|
"\ufffd" * 32 +
|
||||||
|
" !\"#¤%^'()*+,-./0123456789:;<=>?" +
|
||||||
|
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÅ^_" +
|
||||||
|
"`abcdefghijklmnopqrstuvwxyzäöå~",
|
||||||
|
Map('¯' -> '~'.toInt,
|
||||||
|
'‾' -> '~'.toInt,
|
||||||
|
'É' -> '@'.toInt,
|
||||||
|
'é' -> '`'.toInt,
|
||||||
|
'Ü' -> '^'.toInt,
|
||||||
|
'ü' -> '~'.toInt,
|
||||||
|
'$' -> '¤'.toInt),
|
||||||
|
Map.empty
|
||||||
|
)
|
||||||
|
|
||||||
|
val IsoIec646No = new TextCodec("ISO-IEC-646-NO",
|
||||||
|
"\ufffd" * 32 +
|
||||||
|
" !\"#$%^'()*+,-./0123456789:;<=>?" +
|
||||||
|
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ^_" +
|
||||||
|
"`abcdefghijklmnopqrstuvwxyzæøå~",
|
||||||
|
Map('¯' -> '~'.toInt,
|
||||||
|
'‾' -> '~'.toInt,
|
||||||
|
'|' -> '~'.toInt,
|
||||||
|
'¤' -> '$'.toInt,
|
||||||
|
'Ä' -> '@'.toInt,
|
||||||
|
'ä' -> '`'.toInt,
|
||||||
|
'Ü' -> '^'.toInt,
|
||||||
|
'ü' -> '~'.toInt,
|
||||||
|
'«' -> '"'.toInt,
|
||||||
|
'»' -> '#'.toInt,
|
||||||
|
'§' -> '#'.toInt),
|
||||||
|
Map.empty
|
||||||
|
)
|
||||||
|
|
||||||
|
val IsoIec646Yu = new TextCodec("ISO-IEC-646-YU",
|
||||||
|
"\ufffd" * 32 +
|
||||||
|
" !\"#$%^'()*+,-./0123456789:;<=>?" +
|
||||||
|
"ŽABCDEFGHIJKLMNOPQRSTUVWXYZŠĐĆČ_" +
|
||||||
|
"žabcdefghijklmnopqrstuvwxyzšđćč",
|
||||||
|
Map('Ë' -> '$'.toInt, 'ë' -> '_'.toInt),
|
||||||
|
Map.empty
|
||||||
|
)
|
||||||
|
|
||||||
val CbmScreencodes = new TextCodec("CBM-Screen",
|
val CbmScreencodes = new TextCodec("CBM-Screen",
|
||||||
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
|
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
|
||||||
0x20.to(0x3f).map(_.toChar).mkString +
|
0x20.to(0x3f).map(_.toChar).mkString +
|
||||||
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
|
||||||
Map('^' -> 0x3E, 'π' -> 0x5E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51))
|
Map('^' -> 0x3E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51), Map.empty
|
||||||
|
)
|
||||||
|
|
||||||
val Petscii = new TextCodec("PETSCII",
|
val Petscii = new TextCodec("PETSCII",
|
||||||
"\ufffd" * 32 +
|
"\ufffd" * 32 +
|
||||||
0x20.to(0x3f).map(_.toChar).mkString +
|
0x20.to(0x3f).map(_.toChar).mkString +
|
||||||
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
|
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
|
||||||
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ",
|
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
|
||||||
Map('^' -> 0x5E, 'π' -> 0x7E, '♥' -> 0x73, '♡' -> 0x73, '♠' -> 0x61, '♣' -> 0x78, '♢' -> 0x7A, '•' -> 0x71)
|
Map('^' -> 0x5E, '♥' -> 0x73, '♡' -> 0x73, '♠' -> 0x61, '♣' -> 0x78, '♢' -> 0x7A, '•' -> 0x71), Map.empty
|
||||||
)
|
)
|
||||||
|
|
||||||
val Atascii = new TextCodec("ATASCII",
|
val Atascii = new TextCodec("ATASCII",
|
||||||
@ -45,7 +106,37 @@ object TextCodec {
|
|||||||
"\ufffd" * 11 +
|
"\ufffd" * 11 +
|
||||||
0x20.to(0x5f).map(_.toChar).mkString +
|
0x20.to(0x5f).map(_.toChar).mkString +
|
||||||
"♢abcdefghijklmnopqrstuvwxyz♠|",
|
"♢abcdefghijklmnopqrstuvwxyz♠|",
|
||||||
Map('♥' -> 0, '·' -> 0x14)
|
Map('♥' -> 0, '·' -> 0x14), Map.empty
|
||||||
|
)
|
||||||
|
|
||||||
|
val Bbc = new TextCodec("BBC",
|
||||||
|
"\ufffd" * 32 +
|
||||||
|
0x20.to(0x5f).map(_.toChar).mkString +
|
||||||
|
"£" + 0x61.to(0x7E).map(_.toChar).mkString + "©",
|
||||||
|
Map('↑' -> '^'.toInt), Map.empty
|
||||||
|
)
|
||||||
|
|
||||||
|
//noinspection ScalaUnnecessaryParentheses
|
||||||
|
val Jis = new TextCodec("JIS-X-0201",
|
||||||
|
"\ufffd" * 32 +
|
||||||
|
' '.to('Z').mkString +
|
||||||
|
"[¥]^_" +
|
||||||
|
"`" + 'a'.to('z').mkString + "{|}~\ufffd" +
|
||||||
|
"\ufffd" * 32 +
|
||||||
|
"\ufffd。「」、・ヲァィゥェォャュョッ" +
|
||||||
|
"ーアイウエオカキクケコサシスセソ" +
|
||||||
|
"タチツテトナニヌネノハヒフヘホマ" +
|
||||||
|
"ミムメモヤユヨラリルレロワン゛゜" +
|
||||||
|
"\ufffd" * 8 +
|
||||||
|
"♠♡♢♣" +
|
||||||
|
"\ufffd" * 4 +
|
||||||
|
"円年月日時分秒" +
|
||||||
|
"\ufffd" * 3 + "\\",
|
||||||
|
Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++
|
||||||
|
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA1)).toMap,
|
||||||
|
(("カキクケコサシスセソタチツテトハヒフヘホ")).zip(
|
||||||
|
"ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++
|
||||||
|
"ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap
|
||||||
)
|
)
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user