mirror of
https://github.com/KarolS/millfork.git
synced 2024-05-31 18:41:30 +00:00
710 lines
25 KiB
Scala
710 lines
25 KiB
Scala
package millfork.parser
|
||
|
||
import java.util.Locale
|
||
|
||
import millfork.CompilationOptions
|
||
import millfork.error.{ConsoleLogger, Logger}
|
||
import millfork.node.Position
|
||
|
||
/**
|
||
* @author Karol Stasiak
|
||
*/
|
||
class TextCodec(val name: String,
|
||
private val map: String,
|
||
private val extra: Map[Char, Int],
|
||
private val decompositions: Map[Char, String],
|
||
private val escapeSequences: Map[String, List[Int]]) {
|
||
|
||
private def isPrintable(c: Char) = {
|
||
c.getType match {
|
||
case Character.LOWERCASE_LETTER => true
|
||
case Character.UPPERCASE_LETTER => true
|
||
case Character.TITLECASE_LETTER => true
|
||
case Character.OTHER_LETTER => true
|
||
case Character.LETTER_NUMBER => true
|
||
case Character.DECIMAL_DIGIT_NUMBER => true
|
||
case Character.OTHER_NUMBER => true
|
||
case Character.DASH_PUNCTUATION => true
|
||
case Character.START_PUNCTUATION => true
|
||
case Character.END_PUNCTUATION => true
|
||
case Character.INITIAL_QUOTE_PUNCTUATION => true
|
||
case Character.FINAL_QUOTE_PUNCTUATION => true
|
||
case Character.OTHER_PUNCTUATION => true
|
||
case Character.CURRENCY_SYMBOL => true
|
||
case Character.OTHER_SYMBOL => true
|
||
case Character.MATH_SYMBOL => true
|
||
case Character.SPACE_SEPARATOR => true
|
||
case Character.PARAGRAPH_SEPARATOR => false
|
||
case Character.LINE_SEPARATOR => false
|
||
case Character.CONTROL => false
|
||
case Character.MODIFIER_SYMBOL => false
|
||
case Character.SURROGATE => false
|
||
case Character.NON_SPACING_MARK => false
|
||
case Character.COMBINING_SPACING_MARK => false
|
||
case _ => false
|
||
}
|
||
}
|
||
|
||
private def format(c:Char):String = {
|
||
val u = f"U+${c.toInt}%04X"
|
||
if (isPrintable(c)) f"`$c%c` ($u%s)"
|
||
else u
|
||
}
|
||
|
||
private def format(s:String) = {
|
||
val u = s.map(c => f"U+${c.toInt}%04X").mkString(",")
|
||
if (s.forall(isPrintable)) f"`$s%s` ($u%s)"
|
||
else u
|
||
}
|
||
private def encodeChar(log: Logger, position: Option[Position], c: Char, lenient: Boolean): Option[List[Int]] = {
|
||
if (decompositions.contains(c)) {
|
||
Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, lenient).getOrElse(List(x.toInt))))
|
||
} else if (extra.contains(c)) Some(List(extra(c))) else {
|
||
val index = map.indexOf(c)
|
||
if (index >= 0) {
|
||
Some(List(index))
|
||
} else if (lenient) {
|
||
val alternative = TextCodec.lossyAlternatives.getOrElse(c, Nil).:+("?").find(alts => alts.forall(alt => encodeChar(log, position, alt, lenient = false).isDefined)).getOrElse("")
|
||
log.warn(s"Cannot encode ${format(c)} in encoding `$name`, replaced it with ${format(alternative)}", position)
|
||
Some(alternative.toList.flatMap(encodeChar(log, position, _, lenient = false).get))
|
||
} else {
|
||
None
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
def encode(log: Logger, position: Option[Position], s: List[Char], lenient: Boolean): List[Int] = s match {
|
||
case '{' :: tail =>
|
||
val (escSeq, closingBrace) = tail.span(_ != '}')
|
||
closingBrace match {
|
||
case '}' :: xs =>
|
||
encodeEscapeSequence(log, escSeq.mkString(""), position, lenient) ++ encode(log, position, xs, lenient)
|
||
case _ =>
|
||
log.error(f"Unclosed escape sequence", position)
|
||
Nil
|
||
}
|
||
case head :: tail =>
|
||
(encodeChar(log, position, head, lenient) match {
|
||
case Some(x) => x
|
||
case None =>
|
||
log.error(f"Invalid character ${format(head)} in string", position)
|
||
Nil
|
||
}) ++ encode(log, position, tail, lenient)
|
||
case Nil => Nil
|
||
}
|
||
|
||
private def encodeEscapeSequence(log: Logger, escSeq: String, position: Option[Position], lenient: Boolean): List[Int] = {
|
||
if (escSeq.length == 3 && (escSeq(0) == 'X' || escSeq(0) == 'x' || escSeq(0) == '$')){
|
||
try {
|
||
return List(Integer.parseInt(escSeq.tail, 16))
|
||
} catch {
|
||
case _: NumberFormatException =>
|
||
}
|
||
}
|
||
escapeSequences.getOrElse(escSeq, {
|
||
if (lenient) {
|
||
log.warn(s"Cannot encode escape sequence {$escSeq} in encoding `$name`, skipped it", position)
|
||
} else {
|
||
log.error(s"Invalid escape sequence {$escSeq} for encoding `$name`", position)
|
||
}
|
||
Nil
|
||
})
|
||
}
|
||
|
||
def decode(by: Int): Char = {
|
||
val index = by & 0xff
|
||
if (index < map.length) map(index) else TextCodec.NotAChar
|
||
}
|
||
|
||
def dump(): Unit = {
|
||
(0 until 256).map(decode).zipWithIndex.grouped(32).map(row => row.head._2.toHexString + "\t" + row.map(_._1).mkString("")).foreach(println(_))
|
||
}
|
||
}
|
||
|
||
object TextCodec {
|
||
|
||
def forName(name: String, position: Option[Position], log: Logger): (TextCodec, Boolean) = {
|
||
val zeroTerminated = name.endsWith("z")
|
||
val cleanName = name.stripSuffix("z")
|
||
val codec = (position, cleanName) match {
|
||
case (_, "ascii") => TextCodec.Ascii
|
||
case (_, "petscii") => TextCodec.Petscii
|
||
case (_, "pet") => TextCodec.Petscii
|
||
case (_, "petsciijp") => TextCodec.PetsciiJp
|
||
case (_, "petjp") => TextCodec.PetsciiJp
|
||
case (_, "oldpetscii") => TextCodec.OldPetscii
|
||
case (_, "oldpet") => TextCodec.OldPetscii
|
||
case (_, "origpetscii") => TextCodec.OriginalPetscii
|
||
case (_, "origpet") => TextCodec.OriginalPetscii
|
||
case (_, "cbmscr") => TextCodec.CbmScreencodes
|
||
case (_, "petscr") => TextCodec.CbmScreencodes
|
||
case (_, "cbmscrjp") => TextCodec.CbmScreencodesJp
|
||
case (_, "petscrjp") => TextCodec.CbmScreencodesJp
|
||
case (_, "atascii") => TextCodec.Atascii
|
||
case (_, "atari") => TextCodec.Atascii
|
||
case (_, "atasciiscr") => TextCodec.AtasciiScreencodes
|
||
case (_, "atariscr") => TextCodec.AtasciiScreencodes
|
||
case (_, "bbc") => TextCodec.Bbc
|
||
case (_, "sinclair") => TextCodec.Sinclair
|
||
case (_, "apple2") => TextCodec.Apple2
|
||
case (_, "jis") => TextCodec.Jis
|
||
case (_, "jisx") => TextCodec.Jis
|
||
case (_, "iso_de") => TextCodec.IsoIec646De
|
||
case (_, "iso_no") => TextCodec.IsoIec646No
|
||
case (_, "iso_dk") => TextCodec.IsoIec646No
|
||
case (_, "iso_se") => TextCodec.IsoIec646Se
|
||
case (_, "iso_fi") => TextCodec.IsoIec646Se
|
||
case (_, "iso_yu") => TextCodec.IsoIec646Yu
|
||
case (_, "msx_intl") => TextCodec.MsxWest
|
||
case (_, "msx_us") => TextCodec.MsxWest
|
||
case (_, "msx_uk") => TextCodec.MsxWest
|
||
case (_, "msx_de") => TextCodec.MsxWest
|
||
case (_, "msx_fr") => TextCodec.MsxWest
|
||
case (_, "msx_es") => TextCodec.MsxWest
|
||
case (_, "msx_ru") => TextCodec.MsxRu
|
||
case (_, "msx_jp") => TextCodec.MsxJp
|
||
case (p, _) =>
|
||
log.error(s"Unknown string encoding: `$name`", p)
|
||
TextCodec.Ascii
|
||
}
|
||
codec -> zeroTerminated
|
||
}
|
||
|
||
val NotAChar = '\ufffd'
|
||
|
||
private val DefaultOverrides: Map[Char, Int] = ('\u2400' to '\u2420').map(c => c->(c.toInt - 0x2400)).toMap + ('\u2421' -> 127)
|
||
|
||
//noinspection ScalaUnusedSymbol
|
||
private val AsciiEscapeSequences: Map[String, List[Int]] = Map(
|
||
"n" -> List(13, 10),
|
||
"t" -> List(9),
|
||
"b" -> List(8),
|
||
"q" -> List('\"'.toInt),
|
||
"apos" -> List('\''.toInt),
|
||
"lbrace" -> List('{'.toInt),
|
||
"rbrace" -> List('}'.toInt))
|
||
|
||
//noinspection ScalaUnusedSymbol
|
||
private val MinimalEscapeSequencesWithoutBraces: Map[String, List[Int]] = Map(
|
||
"apos" -> List('\''.toInt),
|
||
"q" -> List('\"'.toInt))
|
||
|
||
//noinspection ScalaUnusedSymbol
|
||
private val MinimalEscapeSequencesWithBraces: Map[String, List[Int]] = Map(
|
||
"apos" -> List('\''.toInt),
|
||
"q" -> List('\"'.toInt),
|
||
"lbrace" -> List('{'.toInt),
|
||
"rbrace" -> List('}'.toInt))
|
||
|
||
private val StandardKatakanaDecompositions: Map[Char, String] = {
|
||
(("カキクケコサシスセソタチツテトハヒフヘホ")).zip(
|
||
"ガギグゲゴザジズゼゾダヂヅデドバビブベボ").map { case (u, v) => v -> (u + "゛") }.toMap ++
|
||
"ハヒフヘホ".zip("パピプペポ").map { case (h, p) => p -> (h + "゜") }.toMap
|
||
}
|
||
private val StandardHiraganaDecompositions: Map[Char, String] = {
|
||
(("かきくけこさしすせそたちつてとはひふへほ")).zip(
|
||
"がぎぐげござじずぜぞだぢづでどばびぶべぼ").map { case (u, v) => v -> (u + "゛") }.toMap ++
|
||
"はひふへほ".zip("ぱぴぷぺぽ").map { case (h, p) => p -> (h + "゜") }.toMap
|
||
}
|
||
|
||
val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty, AsciiEscapeSequences)
|
||
|
||
val Apple2 = new TextCodec("APPLE-II", 0.until(255).map { i =>
|
||
if (i < 0xa0) NotAChar
|
||
else if (i < 0xe0) (i - 128).toChar
|
||
else NotAChar
|
||
}.mkString,
|
||
('a' to 'z').map(l => l -> (l - 'a' + 0xC1)).toMap, Map.empty, MinimalEscapeSequencesWithBraces)
|
||
|
||
val IsoIec646De = new TextCodec("ISO-IEC-646-DE",
|
||
"\ufffd" * 32 +
|
||
" !\"#$%^'()*+,-./0123456789:;<=>?" +
|
||
"§ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ^_" +
|
||
"`abcdefghijklmnopqrstuvwxyzäöüß",
|
||
DefaultOverrides, Map.empty, AsciiEscapeSequences ++ Map(
|
||
"UE" -> List('['.toInt),
|
||
"OE" -> List('\\'.toInt),
|
||
"AE" -> List(']'.toInt),
|
||
"ue" -> List('{'.toInt),
|
||
"oe" -> List('|'.toInt),
|
||
"ae" -> List('}'.toInt),
|
||
"ss" -> List('~'.toInt)
|
||
)
|
||
)
|
||
|
||
val IsoIec646Se = new TextCodec("ISO-IEC-646-SE",
|
||
"\ufffd" * 32 +
|
||
" !\"#¤%^'()*+,-./0123456789:;<=>?" +
|
||
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÅ^_" +
|
||
"`abcdefghijklmnopqrstuvwxyzäöå~",
|
||
Map('¯' -> '~'.toInt,
|
||
'‾' -> '~'.toInt,
|
||
'É' -> '@'.toInt,
|
||
'é' -> '`'.toInt,
|
||
'Ü' -> '^'.toInt,
|
||
'ü' -> '~'.toInt,
|
||
'$' -> '¤'.toInt),
|
||
Map.empty, AsciiEscapeSequences ++ Map(
|
||
"AE" -> List('['.toInt),
|
||
"OE" -> List('\\'.toInt),
|
||
"AA" -> List(']'.toInt),
|
||
"ae" -> List('{'.toInt),
|
||
"oe" -> List('|'.toInt),
|
||
"aa" -> List('}'.toInt)
|
||
)
|
||
)
|
||
|
||
val IsoIec646No = new TextCodec("ISO-IEC-646-NO",
|
||
"\ufffd" * 32 +
|
||
" !\"#$%^'()*+,-./0123456789:;<=>?" +
|
||
"@ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ^_" +
|
||
"`abcdefghijklmnopqrstuvwxyzæøå~",
|
||
Map('¯' -> '~'.toInt,
|
||
'‾' -> '~'.toInt,
|
||
'|' -> '~'.toInt,
|
||
'¤' -> '$'.toInt,
|
||
'Ä' -> '@'.toInt,
|
||
'ä' -> '`'.toInt,
|
||
'Ü' -> '^'.toInt,
|
||
'ü' -> '~'.toInt,
|
||
'«' -> '"'.toInt,
|
||
'»' -> '"'.toInt,
|
||
'§' -> '#'.toInt),
|
||
Map.empty, AsciiEscapeSequences ++ Map(
|
||
"AE" -> List('['.toInt),
|
||
"OE" -> List('\\'.toInt),
|
||
"AA" -> List(']'.toInt),
|
||
"ae" -> List('{'.toInt),
|
||
"oe" -> List('|'.toInt),
|
||
"aa" -> List('}'.toInt)
|
||
)
|
||
)
|
||
|
||
|
||
val IsoIec646Yu = new TextCodec("ISO-IEC-646-YU",
|
||
"\ufffd" * 32 +
|
||
" !\"#$%^'()*+,-./0123456789:;<=>?" +
|
||
"ŽABCDEFGHIJKLMNOPQRSTUVWXYZŠĐĆČ_" +
|
||
"žabcdefghijklmnopqrstuvwxyzšđćč",
|
||
Map('Ë' -> '$'.toInt, 'ë' -> '_'.toInt),
|
||
Map.empty, AsciiEscapeSequences)
|
||
|
||
val CbmScreencodes = new TextCodec("CBM-Screen",
|
||
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
|
||
0x20.to(0x3f).map(_.toChar).mkString +
|
||
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
|
||
Map('^' -> 0x1E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51),
|
||
Map.empty, MinimalEscapeSequencesWithoutBraces
|
||
)
|
||
|
||
val CbmScreencodesJp = new TextCodec("CBM-Screen-JP",
|
||
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]↑←" + // 00-1f
|
||
0x20.to(0x3f).map(_.toChar).mkString +
|
||
"タチツテトナニヌネノハヒフヘホマ" + // 40-4f
|
||
"ミムメモヤユヨラリルレロワン゛゜" + // 50-5f
|
||
"\ufffd円年月\ufffd\ufffdヲ\ufffd" + // 60-67
|
||
"πアイウエオカキクケコサシスセソ" + // 70-7f
|
||
"",
|
||
Map('^' -> 0x1E, '\\' -> 0x1C,
|
||
'♥' -> 0x44, '♡' -> 0x44, '♠' -> 0x41, '♣' -> 0x7B, '♢' -> 0x42, '•' -> 0x5D,
|
||
'ー' -> '-'.toInt, 0xff70.toChar -> '-'.toInt, 0xff66.toChar -> 0x66,
|
||
'ヮ' -> 0x5C, 'ヵ' -> 0x76, 'ヶ' -> 0x79,
|
||
'ァ' -> 0x71, 0xff67.toChar -> 0x71,
|
||
'ィ' -> 0x72, 0xff68.toChar -> 0x72,
|
||
'ゥ' -> 0x73, 0xff69.toChar -> 0x73,
|
||
'ェ' -> 0x74, 0xff6a.toChar -> 0x74,
|
||
'ォ' -> 0x75, 0xff6b.toChar -> 0x75,
|
||
'ャ' -> 0x54, 0xff6c.toChar -> 0x54,
|
||
'ュ' -> 0x55, 0xff6d.toChar -> 0x55,
|
||
'ョ' -> 0x56, 0xff6e.toChar -> 0x56,
|
||
'ッ' -> 0x42, 0xff6f.toChar -> 0x42
|
||
) ++
|
||
('a' to 'z').map(l => l -> (l - 'a' + 1)) ++
|
||
(1 to 0xf).map(i => (i + 0xff70).toChar -> (i + 0x70)) ++
|
||
(0x10 to 0x2f).map(i => (i + 0xff70).toChar -> (i + 0x40)),
|
||
StandardKatakanaDecompositions, MinimalEscapeSequencesWithoutBraces
|
||
)
|
||
|
||
val Petscii = new TextCodec("PETSCII",
|
||
"\ufffd" * 32 +
|
||
0x20.to(0x3f).map(_.toChar).mkString +
|
||
"@abcdefghijklmnopqrstuvwxyz[£]↑←" +
|
||
"\ufffd" * 32 + // 60-7f
|
||
"\ufffd" * 32 + // 80-9f
|
||
"\ufffd" * 32 + // a0-bf
|
||
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ", // c0-df
|
||
Map('^' -> 0x5E, '♥' -> 0xD3, '♡' -> 0xD3, '♠' -> 0xC1, '♣' -> 0xD8, '♢' -> 0xDA, '•' -> 0xD1), Map.empty, Map(
|
||
"n" -> List(13),
|
||
"q" -> List('\"'.toInt),
|
||
"apos" -> List('\''.toInt),
|
||
"up" -> List(0x91),
|
||
"down" -> List(0x11),
|
||
"left" -> List(0x9d),
|
||
"right" -> List(0x1d),
|
||
"white" -> List(5),
|
||
"black" -> List(0x90),
|
||
"red" -> List(0x1c),
|
||
"blue" -> List(0x1f),
|
||
"green" -> List(0x1e),
|
||
"cyan" -> List(0x9f),
|
||
"purple" -> List(0x9c),
|
||
"yellow" -> List(0x9e),
|
||
"reverse" -> List(0x12),
|
||
"reverseoff" -> List(0x92)
|
||
)
|
||
)
|
||
|
||
val PetsciiJp = new TextCodec("PETSCII-JP",
|
||
"\ufffd" * 32 +
|
||
0x20.to(0x3f).map(_.toChar).mkString +
|
||
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]↑←" +
|
||
"\ufffd" * 32 + // 60-7f
|
||
"\ufffd" * 32 + // 80-9f
|
||
"\ufffd円年月\ufffd\ufffdヲ\ufffd" + // a0-a7
|
||
"\ufffd" * 8 + // a8-af
|
||
"πアイウエオカキクケコサシスセソ" + // b0-bf
|
||
"タチツテトナニヌネノハヒフヘホマ" + // c0-cf
|
||
"ミムメモヤユヨラリルレロワン゛゜", // d0-df
|
||
Map('^' -> 0x5E, '\\' -> 0x5C,
|
||
'♥' -> 0xC4, '♡' -> 0x73, '♠' -> 0xC1, '♣' -> 0xBB, '♢' -> 0xC2, '•' -> 0xDD,
|
||
'ー' -> '-'.toInt, 0xff70.toChar -> '-'.toInt, 0xff66.toChar -> 0xa6,
|
||
'ヮ' -> 0xDC, 'ヵ' -> 0xB6, 'ヶ' -> 0xB9,
|
||
'ァ' -> 0xB1, 0xff67.toChar -> 0xB1,
|
||
'ィ' -> 0xB2, 0xff68.toChar -> 0xB2,
|
||
'ゥ' -> 0xB3, 0xff69.toChar -> 0xB3,
|
||
'ェ' -> 0xB4, 0xff6a.toChar -> 0xB4,
|
||
'ォ' -> 0xB5, 0xff6b.toChar -> 0xB5,
|
||
'ャ' -> 0xD4, 0xff6c.toChar -> 0xD4,
|
||
'ュ' -> 0xD5, 0xff6d.toChar -> 0xD5,
|
||
'ョ' -> 0xD6, 0xff6e.toChar -> 0xD6,
|
||
'ッ' -> 0xC2, 0xff6f.toChar -> 0xC2) ++
|
||
('a' to 'z').map(l => l -> l.toUpper.toInt) ++
|
||
(1 to 0x2f).map(i => (i+0xff70).toChar -> (i+0xb0)),
|
||
StandardKatakanaDecompositions, Map(
|
||
"n" -> List(13),
|
||
"q" -> List('\"'.toInt),
|
||
"apos" -> List('\''.toInt),
|
||
"up" -> List(0x91),
|
||
"down" -> List(0x11),
|
||
"left" -> List(0x9d),
|
||
"right" -> List(0x1d),
|
||
"white" -> List(5),
|
||
"black" -> List(0x90),
|
||
"red" -> List(0x1c),
|
||
"blue" -> List(0x1f),
|
||
"green" -> List(0x1e),
|
||
"cyan" -> List(0x9f),
|
||
"purple" -> List(0x9c),
|
||
"yellow" -> List(0x9e),
|
||
"reverse" -> List(0x12),
|
||
"reverseoff" -> List(0x92)
|
||
)
|
||
)
|
||
|
||
val OldPetscii = new TextCodec("Old PETSCII",
|
||
"\ufffd" * 32 +
|
||
0x20.to(0x3f).map(_.toChar).mkString +
|
||
"@abcdefghijklmnopqrstuvwxyz[\\]↑←" +
|
||
"\ufffd" * 32 +
|
||
"\ufffd" * 32 +
|
||
"\ufffd" * 32 +
|
||
"–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ",
|
||
Map('^' -> 0x5E, '♥' -> 0xD3, '♡' -> 0xD3, '♠' -> 0xC1, '♣' -> 0xC8, '♢' -> 0xDA, '•' -> 0xD1), Map.empty, Map(
|
||
"n" -> List(13),
|
||
"q" -> List('\"'.toInt),
|
||
"apos" -> List('\''.toInt),
|
||
"up" -> List(0x91),
|
||
"down" -> List(0x11),
|
||
"left" -> List(0x9d),
|
||
"right" -> List(0x1d),
|
||
"reverse" -> List(0x12),
|
||
"reverseoff" -> List(0x92)
|
||
)
|
||
)
|
||
|
||
val OriginalPetscii = new TextCodec("Original PETSCII",
|
||
"\ufffd" * 32 +
|
||
0x20.to(0x3f).map(_.toChar).mkString +
|
||
"@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]↑←" +
|
||
"\ufffd" * 32 +
|
||
"\ufffd" * 32 +
|
||
"\ufffd" * 32 +
|
||
"–abcdefghijklmnopqrstuvwxyz\ufffd\ufffd\ufffdπ",
|
||
Map('^' -> 0x5E, '♥' -> 0xD3, '♡' -> 0xD3, '♠' -> 0xC1, '♣' -> 0xC8, '♢' -> 0xDA, '•' -> 0xD1), Map.empty, Map(
|
||
"n" -> List(13),
|
||
"q" -> List('\"'.toInt),
|
||
"apos" -> List('\''.toInt),
|
||
"up" -> List(0x91),
|
||
"down" -> List(0x11),
|
||
"left" -> List(0x9d),
|
||
"right" -> List(0x1d),
|
||
"reverse" -> List(0x12),
|
||
"reverseoff" -> List(0x92)
|
||
)
|
||
)
|
||
|
||
val Atascii = new TextCodec("ATASCII",
|
||
"♡" +
|
||
"\ufffd" * 15 +
|
||
"♣\ufffd–\ufffd•" +
|
||
"\ufffd" * 11 +
|
||
0x20.to(0x5f).map(_.toChar).mkString +
|
||
"♢abcdefghijklmnopqrstuvwxyz♠|",
|
||
Map('♥' -> 0, '·' -> 0x14), Map.empty, MinimalEscapeSequencesWithoutBraces ++ Seq(
|
||
"n" -> List(0x9b),
|
||
"up" -> List(0x1c),
|
||
"down" -> List(0x1d),
|
||
"left" -> List(0x1e),
|
||
"right" -> List(0x1f),
|
||
"b" -> List(0x7e),
|
||
)
|
||
)
|
||
|
||
val AtasciiScreencodes = new TextCodec("ATASCII-Screen",
|
||
0x20.to(0x3f).map(_.toChar).mkString +
|
||
0x40.to(0x5f).map(_.toChar).mkString +
|
||
"♡" +
|
||
"\ufffd" * 15 +
|
||
"♣\ufffd–\ufffd•" +
|
||
"\ufffd" * 7 + "↑↓←→"+
|
||
"♢abcdefghijklmnopqrstuvwxyz♠|",
|
||
Map('♥' -> 0x40, '·' -> 0x54), Map.empty, MinimalEscapeSequencesWithoutBraces
|
||
)
|
||
|
||
val Bbc = new TextCodec("BBC",
|
||
"\ufffd" * 32 +
|
||
0x20.to(0x5f).map(_.toChar).mkString +
|
||
"£" + 0x61.to(0x7E).map(_.toChar).mkString + "©",
|
||
Map('↑' -> '^'.toInt), Map.empty, MinimalEscapeSequencesWithBraces + ("n" -> List(13))
|
||
)
|
||
|
||
val Sinclair = new TextCodec("Sinclair",
|
||
"\ufffd" * 32 +
|
||
0x20.to(0x5f).map(_.toChar).mkString +
|
||
"£" + 0x61.to(0x7E).map(_.toChar).mkString + "©",
|
||
Map('↑' -> '^'.toInt), Map.empty, Map(
|
||
"n" -> List(13),
|
||
"q" -> List('\"'.toInt),
|
||
"apos" -> List('\''.toInt),
|
||
"lbrace" -> List('{'.toInt),
|
||
"rbrace" -> List('}'.toInt),
|
||
"up" -> List(11),
|
||
"down" -> List(10),
|
||
"left" -> List(8),
|
||
"right" -> List(9),
|
||
"white" -> List(0x10, 7),
|
||
"black" -> List(0x10, 8),
|
||
"red" -> List(0x10, 2),
|
||
"blue" -> List(0x10, 1),
|
||
"green" -> List(0x10, 4),
|
||
"cyan" -> List(0x10, 5),
|
||
"purple" -> List(0x10, 3),
|
||
"yellow" -> List(0x10, 6),
|
||
"bgwhite" -> List(0x11, 7),
|
||
"bgblack" -> List(0x11, 8),
|
||
"bgred" -> List(0x11, 2),
|
||
"bgblue" -> List(0x11, 1),
|
||
"bggreen" -> List(0x11, 4),
|
||
"bgcyan" -> List(0x11, 5),
|
||
"bgpurple" -> List(0x11, 3),
|
||
"bgyellow" -> List(0x11, 6),
|
||
"reverse" -> List(0x14, 1),
|
||
"reverseoff" -> List(0x14, 0)
|
||
)
|
||
)
|
||
|
||
private val jisHalfwidthKatakanaOrder: String =
|
||
"\ufffd。「」、・ヲァィゥェォャュョッ" +
|
||
"ーアイウエオカキクケコサシスセソ" +
|
||
"タチツテトナニヌネノハヒフヘホマ" +
|
||
"ミムメモヤユヨラリルレロワン゛゜"
|
||
|
||
//noinspection ScalaUnnecessaryParentheses
|
||
val Jis = new TextCodec("JIS-X-0201",
|
||
"\ufffd" * 32 +
|
||
' '.to('Z').mkString +
|
||
"[¥]^_" +
|
||
"`" + 'a'.to('z').mkString + "{|}~\ufffd" +
|
||
"\ufffd" * 32 +
|
||
jisHalfwidthKatakanaOrder +
|
||
"\ufffd" * 8 +
|
||
"♠♡♢♣" +
|
||
"\ufffd" * 4 +
|
||
"円年月日時分秒" +
|
||
"\ufffd" * 3 + "\\",
|
||
Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++
|
||
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap,
|
||
StandardKatakanaDecompositions, MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
|
||
)
|
||
|
||
val MsxWest = new TextCodec("MSX-International",
|
||
"\ufffd" * 32 +
|
||
(0x20 to 0x7e).map(_.toChar).mkString("") +
|
||
"\ufffd" +
|
||
"ÇüéâäàåçêëèïîìÄÅ" +
|
||
"ÉæÆôöòûùÿÖÜ¢£¥₧ƒ" +
|
||
"áíóúñѪº¿⌐¬½¼¡«»" +
|
||
"ÃãĨĩÕõŨũIJij¾\ufffd\ufffd‰¶§" +
|
||
"\ufffd" * 24 +
|
||
"Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" +
|
||
"αβΓΠΣσµγΦθΩδ∞∅∈∩" +
|
||
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²",
|
||
Map('ß' -> 0xE1, '¦' -> 0x7C),
|
||
Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G") ,
|
||
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
|
||
)
|
||
|
||
val MsxRu = new TextCodec("MSX-RU",
|
||
"\ufffd" * 32 +
|
||
(0x20 to 0x7e).map(_.toChar).mkString("") +
|
||
"\ufffd" +
|
||
"\ufffd" * 16 +
|
||
"\ufffd" * 8 +
|
||
"Δ\ufffdω\ufffd\ufffd\ufffd\ufffd\ufffd" +
|
||
"αβΓΠΣσµγΦθΩδ∞∅∈∩" +
|
||
"≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²\ufffd\ufffd" +
|
||
"юабцдефгхийклмнопярстужвьызшэщчъ" +
|
||
"ЮАБЦДЕФГХИЙКЛМНОПЯРСТУЖВЬЫЗШЭЩ",
|
||
Map('ß' -> 0xA1, '¦' -> 0x7C),
|
||
Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G"),
|
||
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
|
||
)
|
||
|
||
val MsxJp = new TextCodec("MSX-JP",
|
||
"\ufffd" * 32 +
|
||
(0x20 to 0x7e).map(c => if (c == 0x5c) '¥' else c.toChar).mkString("") +
|
||
"\ufffd" +
|
||
"♠♡♣♢\uffdd·をぁぃぅぇぉゃゅょっ" +
|
||
" あいうえおかきくけこさしすせそ" +
|
||
jisHalfwidthKatakanaOrder +
|
||
"たちつてとなにぬねのはひふへほま" +
|
||
"みむめもやゆよらりるれろわん" +
|
||
"" +
|
||
"",
|
||
Map('♥' -> 0x81, '¦' -> 0x7C) ++
|
||
1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap,
|
||
Map(
|
||
'月' -> "\u0001A",
|
||
'火' -> "\u0001B",
|
||
'水' -> "\u0001C",
|
||
'木' -> "\u0001D",
|
||
'金' -> "\u0001E",
|
||
'土' -> "\u0001F",
|
||
'日' -> "\u0001G",
|
||
'年' -> "\u0001H",
|
||
'円' -> "\u0001I",
|
||
'時' -> "\u0001J",
|
||
'分' -> "\u0001K",
|
||
'秒' -> "\u0001L",
|
||
'百' -> "\u0001M",
|
||
'千' -> "\u0001N",
|
||
'万' -> "\u0001O",
|
||
'大' -> "\u0001]",
|
||
'中' -> "\u0001^",
|
||
'小' -> "\u0001_"
|
||
) ++
|
||
StandardHiraganaDecompositions ++ StandardKatakanaDecompositions,
|
||
MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10))
|
||
)
|
||
|
||
val lossyAlternatives: Map[Char, List[String]] = {
|
||
val allowLowercase: Map[Char, List[String]] = ('A' to 'Z').map(c => c -> List(c.toString.toLowerCase(Locale.ROOT))).toMap
|
||
val allowUppercase: Map[Char, List[String]] = ('a' to 'z').map(c => c -> List(c.toString.toUpperCase(Locale.ROOT))).toMap
|
||
val ligaturesAndSymbols: Map[Char, List[String]] = Map(
|
||
'¦' -> List("|"),
|
||
'|' -> List("¦"),
|
||
'ß' -> List("ss", "SS"),
|
||
'ff' -> List("ff", "FF"),
|
||
'fl' -> List("fl", "FL"),
|
||
'fi' -> List("fi", "FI"),
|
||
'ffi' -> List("ffi", "FFI"),
|
||
'ffl' -> List("ffl", "FFL"),
|
||
'½' -> List("1/2"),
|
||
'¼' -> List("1/4"),
|
||
'¾' -> List("3/4"),
|
||
'¥' -> List("Y", "y"),
|
||
'円' -> List("¥", "Y", "y"),
|
||
'年' -> List("Y", "y"),
|
||
'月' -> List("M", "m"),
|
||
'日' -> List("D", "d"),
|
||
'時' -> List("h", "H"),
|
||
'分' -> List("m", "M"),
|
||
'秒' -> List("s", "S"),
|
||
'♥' -> List("H", "h"),
|
||
'♠' -> List("S", "s"),
|
||
'♡' -> List("H", "h"),
|
||
'♢' -> List("D", "d"),
|
||
'♣' -> List("C", "c"),
|
||
'。' -> List("."),
|
||
'、' -> List(","),
|
||
'・' -> List("-"),
|
||
'•' -> List("・", "*"),
|
||
'「' -> List("[", "("),
|
||
'」' -> List("]", ")"),
|
||
'。' -> List("."),
|
||
'。' -> List("."),
|
||
'^' -> List("↑"),
|
||
'↑' -> List("^"),
|
||
'‾' -> List("~"),
|
||
'¯' -> List("~"),
|
||
'«' -> List("\""),
|
||
'»' -> List("\""),
|
||
'§' -> List("#"),
|
||
'[' -> List("("),
|
||
']' -> List(")"),
|
||
'{' -> List("("),
|
||
'}' -> List(")"),
|
||
'§' -> List("#"),
|
||
'§' -> List("#"),
|
||
'©' -> List("(C)"),
|
||
'İ' -> List("I", "i"),
|
||
'ª' -> List("a", "A"),
|
||
'º' -> List("o", "O"),
|
||
'‰' -> List("%."),
|
||
'÷' -> List("/"),
|
||
'ij' -> List("ij", "IJ"),
|
||
'IJ' -> List("IJ", "ij"),
|
||
)
|
||
val accentedLetters: Map[Char, List[String]] = List(
|
||
"áàäãåąāǎă" -> "a",
|
||
"çčċćĉ" -> "c",
|
||
"đď" -> "d",
|
||
"ð" -> "dh",
|
||
"éèêëęēėě" -> "e",
|
||
"ğǧĝģġ" -> "g",
|
||
"ħĥ" -> "h",
|
||
"íıìîïįīǐĭĩ" -> "i",
|
||
"ĵ" -> "j",
|
||
"ķ" -> "k",
|
||
"ĺľłļŀ" -> "l",
|
||
"ñńňņŋ" -> "n",
|
||
"óòöôőõøōǒ" -> "o",
|
||
"řŗŕ" -> "r",
|
||
"śšŝșşſ" -> "s",
|
||
"ţțťŧ" -> "t",
|
||
"þ" -> "th",
|
||
"úùũûüűųūǔůǘǜǚǖ" -> "u",
|
||
"ẃẁŵ" -> "w",
|
||
"ýÿỳŷȳ" -> "y",
|
||
"žźż" -> "z",
|
||
"æ" -> "ae",
|
||
"œ" -> "oe",
|
||
).flatMap{case (acc, plain) => acc.toList.flatMap(letter => List(
|
||
letter -> List(plain, plain.toUpperCase(Locale.ROOT)),
|
||
letter.toUpper -> List(plain.toUpperCase(Locale.ROOT), plain)
|
||
))}.toMap
|
||
val hiragana: Map[Char, List[String]] = (0x3041 to 0x3096).map{ kana => kana.toChar -> List(kana.+(0x60).toChar.toString)}.toMap
|
||
val fullWidth: Map[Char, List[String]] = (0xff01 to 0xff5e).map{ i =>
|
||
val fw = i.toChar
|
||
val hw = i.-(0xfee0).toChar
|
||
if (hw.isUpper) fw -> List(hw.toString, hw.toString.toLowerCase(Locale.ROOT))
|
||
else if (hw.isLower) fw -> List(hw.toString, hw.toString.toUpperCase(Locale.ROOT))
|
||
else fw -> List(hw.toString)
|
||
}.toMap
|
||
val halfWidth = (0xff61 to 0xff9f).map{ c => c.toChar -> List(jisHalfwidthKatakanaOrder(c - 0xff60).toString)}.toMap
|
||
allowLowercase ++ allowUppercase ++ ligaturesAndSymbols ++ accentedLetters ++ hiragana ++ fullWidth ++ halfWidth
|
||
}
|
||
|
||
}
|