From 960d16fa18b6463c79b64b352fdbdded65020f03 Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Fri, 16 Aug 2019 00:46:11 +0200 Subject: [PATCH] Add nullchar constant, NULLCHAR feature, and vectrex encoding --- CHANGELOG.md | 4 + docs/lang/literals.md | 4 +- docs/lang/text.md | 12 ++ include/stdio.mfk | 2 +- include/string.mfk | 2 +- include/string_fastindices.mfk | 6 +- include/string_fastpointers.mfk | 8 +- src/main/scala/millfork/env/Environment.scala | 3 + src/main/scala/millfork/parser/MfParser.scala | 8 +- .../scala/millfork/parser/TextCodec.scala | 154 ++++++++++++------ 10 files changed, 139 insertions(+), 64 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cfe9af9f..a899b764 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## Current version +* Added `nullchar` constant as the null terminator for strings and `NULLCHAR` feature to define its value. + +* Added `vectrex` text encoding. + ## 0.3.6 * **Breaking change!** diff --git a/docs/lang/literals.md b/docs/lang/literals.md index c8eb3c06..d14c38b4 100644 --- a/docs/lang/literals.md +++ b/docs/lang/literals.md @@ -36,7 +36,9 @@ Two encoding names are special and refer to platform-specific encodings: `default` and `scr`. You can also append `z` to the name of the encoding to make the string zero-terminated. -This means that the string will have one extra byte appended, equal to 0. +This means that the string will have one extra byte appended, equal to `nullchar`. +The exact value of `nullchar` is encoding-dependent: in the `vectrex` encoding it's $80, +in other encodings it's 0 (this might be a subject to change in future versions). "this is a zero-terminated string" asciiz "this is also a zero-terminated string"z diff --git a/docs/lang/text.md b/docs/lang/text.md index 96e45add..73b163d1 100644 --- a/docs/lang/text.md +++ b/docs/lang/text.md @@ -43,6 +43,8 @@ * `atasciiscr` or `atariscr` – screencodes used by Atari 8-bit computers +* `vectrex` – built-in Vectrex font + When programming for Commodore, use `pet` for strings you're printing using standard I/O routines and `petscr` for strings you're copying to screen memory directly. @@ -61,6 +63,12 @@ Some escape sequences may expand to multiple characters. For example, in several * `{x00}`–`{xff}` – a character of the given hexadecimal value +* `{copyright_year}` – this expands to the current year in digits + +* `{program_name}` – this expands to the name of the output file without the file extension + +* `{program_name_upper}` – the same, but uppercased + ##### Available only in some encodings * `{n}` – new line @@ -79,6 +87,8 @@ control codes for changing the text background color * `{reverse}`, `{reverseoff}` – inverted mode on/off +* `{yen}`, `{pound}`, `{copy}` – yen symbol, pound symbol, copyright symbol + ##### Character availability Encoding | lowercase letters | backslash | pound | yen | katakana | card suits @@ -96,6 +106,7 @@ Encoding | lowercase letters | backslash | pound | yen | katakana | card suits `msx_intl` | yes | yes | yes | yes | no | yes `msx_jp` | yes | no | no | yes | yes | yes `msx_ru` | yes | yes | no | no | no | yes +`vectrex` | no | yes | no | no | no | no all the rest | yes | yes | no | no | no | no 1. `pet`, `origpet` and `petscr` cannot display card suit symbols and lowercase letters at the same time. @@ -124,4 +135,5 @@ Encoding | new line | braces | backspace | cursor movement | text colour | rever `apple2` | no | yes | no | no | no | no | no `atascii` | yes | no | yes | yes | no | no | no `atasciiscr` | no | no | no | no | no | no | no +`vectrex` | no | no | no | no | no | no | no all the rest | yes | yes | no | no | no | no | no diff --git a/include/stdio.mfk b/include/stdio.mfk index ce5639c7..6ac71e02 100644 --- a/include/stdio.mfk +++ b/include/stdio.mfk @@ -26,7 +26,7 @@ asm void putstrz(pointer hl) @$5550 extern void putstrz(pointer str) { byte index index = 0 - while str[index] != 0 { + while str[index] != nullchar { putchar(str[index]) index += 1 } diff --git a/include/string.mfk b/include/string.mfk index f330376d..b8f2fe7d 100644 --- a/include/string.mfk +++ b/include/string.mfk @@ -25,7 +25,7 @@ word strz2word(pointer str) { errno = err_ok while true { char = str[i] - if char == 0 { + if char == nullchar { if i == 0 { errno = err_numberformat } diff --git a/include/string_fastindices.mfk b/include/string_fastindices.mfk index 28bcba8f..6a5da45a 100644 --- a/include/string_fastindices.mfk +++ b/include/string_fastindices.mfk @@ -1,7 +1,7 @@ byte strzlen(pointer str) { byte index index = 0 - while str[index] != 0 { + while str[index] != nullchar { index += 1 } return index @@ -17,7 +17,7 @@ sbyte strzcmp(pointer str1, pointer str2) { if str1[i1] < str2[i2] { return -1 } return 1 } - if str1[i1] == 0 { + if str1[i1] == nullchar { return 0 } i1 += 1 @@ -33,5 +33,5 @@ void strzcopy(pointer dest, pointer src) { c = src[i] dest[i] = c i += 1 - } while c != 0 + } while c != nullchar } diff --git a/include/string_fastpointers.mfk b/include/string_fastpointers.mfk index 87047bd7..00dc4cea 100644 --- a/include/string_fastpointers.mfk +++ b/include/string_fastpointers.mfk @@ -3,7 +3,7 @@ byte strzlen(pointer str) { pointer end end = str - while end[0] != 0 { + while end[0] != nullchar { end += 1 } return lo(end - str) @@ -11,8 +11,8 @@ byte strzlen(pointer str) { sbyte strzcmp(pointer str1, pointer str2) { while true { - if str1[0] == 0 { - if str2[0] == 0 { + if str1[0] == nullchar { + if str2[0] == nullchar { return 0 } else { return -1 @@ -33,5 +33,5 @@ void strzcopy(pointer dest, pointer src) { dest[0] = c src += 1 dest += 1 - } while c != 0 + } while c != nullchar } diff --git a/src/main/scala/millfork/env/Environment.scala b/src/main/scala/millfork/env/Environment.scala index d664f403..fda129be 100644 --- a/src/main/scala/millfork/env/Environment.scala +++ b/src/main/scala/millfork/env/Environment.scala @@ -433,6 +433,9 @@ class Environment(val parent: Option[Environment], val prefix: String, val cpuFa addThing(ConstantThing("nullptr.raw", nullptrConstant, p), None) addThing(ConstantThing("nullptr.raw.hi", nullptrConstant.hiByte.quickSimplify, b), None) addThing(ConstantThing("nullptr.raw.lo", nullptrConstant.loByte.quickSimplify, b), None) + val nullcharValue = options.features.getOrElse("NULLCHAR", 0L) + val nullcharConstant = NumericConstant(nullcharValue, 1) + addThing(ConstantThing("nullchar", nullcharConstant, b), None) val __zeropage_usage = UnexpandedConstant("__zeropage_usage", 1) addThing(ConstantThing("__zeropage_usage", __zeropage_usage, b), None) def addUnexpandedWordConstant(name: String): Unit = { diff --git a/src/main/scala/millfork/parser/MfParser.scala b/src/main/scala/millfork/parser/MfParser.scala index 4b19a795..b60f9fb0 100644 --- a/src/main/scala/millfork/parser/MfParser.scala +++ b/src/main/scala/millfork/parser/MfParser.scala @@ -68,12 +68,12 @@ abstract class MfParser[T](fileId: String, input: String, currentDirectory: Stri if (zt) { log.error("Zero-terminated encoding is not a valid encoding for a character literal", Some(p)) } - co.encode(options.log, Some(p), c.toList, lenient = lenient) match { + co.encode(options.log, Some(p), c.toList, options, lenient = lenient) match { case List(value) => LiteralExpression(value, 1) case _ => log.error(s"Character `$c` cannot be encoded as one byte", Some(p)) - LiteralExpression(0, 1) + LiteralExpression(co.stringTerminator, 1) } } @@ -87,8 +87,8 @@ abstract class MfParser[T](fileId: String, input: String, currentDirectory: Stri val textLiteral: P[List[Expression]] = P(position() ~ doubleQuotedString ~/ HWS ~ codec).map { case (p, s, ((co, zt), lenient)) => - val characters = co.encode(options.log, None, s, lenient = lenient).map(c => LiteralExpression(c, 1).pos(p)) - if (zt) characters :+ LiteralExpression(0,1) + val characters = co.encode(options.log, None, s, options, lenient = lenient).map(c => LiteralExpression(c, 1).pos(p)) + if (zt) characters :+ LiteralExpression(co.stringTerminator, 1) else characters } diff --git a/src/main/scala/millfork/parser/TextCodec.scala b/src/main/scala/millfork/parser/TextCodec.scala index 01be18cf..8a93a275 100644 --- a/src/main/scala/millfork/parser/TextCodec.scala +++ b/src/main/scala/millfork/parser/TextCodec.scala @@ -1,8 +1,9 @@ package millfork.parser +import java.time.LocalDate import java.util.Locale -import millfork.CompilationOptions +import millfork.{CompilationFlag, CompilationOptions} import millfork.error.{ConsoleLogger, Logger} import millfork.node.Position @@ -10,6 +11,7 @@ import millfork.node.Position * @author Karol Stasiak */ class TextCodec(val name: String, + val stringTerminator: Int, private val map: String, private val extra: Map[Char, Int], private val decompositions: Map[Char, String], @@ -56,17 +58,17 @@ class TextCodec(val name: String, if (s.forall(isPrintable)) f"`$s%s` ($u%s)" else u } - private def encodeChar(log: Logger, position: Option[Position], c: Char, lenient: Boolean): Option[List[Int]] = { + private def encodeChar(log: Logger, position: Option[Position], c: Char, options: CompilationOptions, lenient: Boolean): Option[List[Int]] = { if (decompositions.contains(c)) { - Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, lenient).getOrElse(List(x.toInt)))) + Some(decompositions(c).toList.flatMap(x => encodeChar(log, position, x, options, lenient).getOrElse(List(x.toInt)))) } else if (extra.contains(c)) Some(List(extra(c))) else { val index = map.indexOf(c) if (index >= 0) { Some(List(index)) } else if (lenient) { - val alternative = TextCodec.lossyAlternatives.getOrElse(c, Nil).:+("?").find(alts => alts.forall(alt => encodeChar(log, position, alt, lenient = false).isDefined)).getOrElse("") + val alternative = TextCodec.lossyAlternatives.getOrElse(c, Nil).:+("?").find(alts => alts.forall(alt => encodeChar(log, position, alt, options, lenient = false).isDefined)).getOrElse("") log.warn(s"Cannot encode ${format(c)} in encoding `$name`, replaced it with ${format(alternative)}", position) - Some(alternative.toList.flatMap(encodeChar(log, position, _, lenient = false).get)) + Some(alternative.toList.flatMap(encodeChar(log, position, _, options, lenient = false).get)) } else { None } @@ -74,27 +76,30 @@ class TextCodec(val name: String, } - def encode(log: Logger, position: Option[Position], s: List[Char], lenient: Boolean): List[Int] = s match { - case '{' :: tail => - val (escSeq, closingBrace) = tail.span(_ != '}') - closingBrace match { - case '}' :: xs => - encodeEscapeSequence(log, escSeq.mkString(""), position, lenient) ++ encode(log, position, xs, lenient) - case _ => - log.error(f"Unclosed escape sequence", position) - Nil - } - case head :: tail => - (encodeChar(log, position, head, lenient) match { - case Some(x) => x - case None => - log.error(f"Invalid character ${format(head)} in string", position) - Nil - }) ++ encode(log, position, tail, lenient) - case Nil => Nil + def encode(log: Logger, position: Option[Position], s: List[Char], options: CompilationOptions, lenient: Boolean): List[Int] = { + val lenient = options.flag(CompilationFlag.LenientTextEncoding) + s match { + case '{' :: tail => + val (escSeq, closingBrace) = tail.span(_ != '}') + closingBrace match { + case '}' :: xs => + encodeEscapeSequence(log, escSeq.mkString(""), position, options, lenient) ++ encode(log, position, xs, options, lenient) + case _ => + log.error(f"Unclosed escape sequence", position) + Nil + } + case head :: tail => + (encodeChar(log, position, head, options, lenient) match { + case Some(x) => x + case None => + log.error(f"Invalid character ${format(head)} in string", position) + Nil + }) ++ encode(log, position, tail, options, lenient) + case Nil => Nil + } } - private def encodeEscapeSequence(log: Logger, escSeq: String, position: Option[Position], lenient: Boolean): List[Int] = { + private def encodeEscapeSequence(log: Logger, escSeq: String, position: Option[Position], options: CompilationOptions, lenient: Boolean): List[Int] = { if (escSeq.length == 3 && (escSeq(0) == 'X' || escSeq(0) == 'x' || escSeq(0) == '$')){ try { return List(Integer.parseInt(escSeq.tail, 16)) @@ -102,6 +107,15 @@ class TextCodec(val name: String, case _: NumberFormatException => } } + if (escSeq == "program_name_upper") { + return encode(log, position, options.outputFileName.getOrElse("MILLFORK").toUpperCase(Locale.ROOT).toList, options, lenient) + } + if (escSeq == "program_name") { + return encode(log, position, options.outputFileName.getOrElse("MILLFORK").toList, options, lenient) + } + if (escSeq == "copyright_year") { + return encode(log, position, LocalDate.now.getYear.toString.toList, options, lenient) + } escapeSequences.getOrElse(escSeq, { if (lenient) { log.warn(s"Cannot encode escape sequence {$escSeq} in encoding `$name`, skipped it", position) @@ -164,6 +178,7 @@ object TextCodec { case (_, "msx_es") => TextCodec.MsxWest case (_, "msx_ru") => TextCodec.MsxRu case (_, "msx_jp") => TextCodec.MsxJp + case (_, "vectrex") => TextCodec.Vectrex case (p, _) => log.error(s"Unknown string encoding: `$name`", p) TextCodec.Ascii @@ -208,16 +223,16 @@ object TextCodec { "はひふへほ".zip("ぱぴぷぺぽ").map { case (h, p) => p -> (h + "゜") }.toMap } - val Ascii = new TextCodec("ASCII", 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty, AsciiEscapeSequences) + val Ascii = new TextCodec("ASCII", 0, 0.until(127).map { i => if (i < 32) NotAChar else i.toChar }.mkString, Map.empty, Map.empty, AsciiEscapeSequences) - val Apple2 = new TextCodec("APPLE-II", 0.until(255).map { i => + val Apple2 = new TextCodec("APPLE-II", 0, 0.until(255).map { i => if (i < 0xa0) NotAChar else if (i < 0xe0) (i - 128).toChar else NotAChar }.mkString, ('a' to 'z').map(l => l -> (l - 'a' + 0xC1)).toMap, Map.empty, MinimalEscapeSequencesWithBraces) - val IsoIec646De = new TextCodec("ISO-IEC-646-DE", + val IsoIec646De = new TextCodec("ISO-IEC-646-DE", 0, "\ufffd" * 32 + " !\"#$%^'()*+,-./0123456789:;<=>?" + "§ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÜ^_" + @@ -233,7 +248,7 @@ object TextCodec { ) ) - val IsoIec646Se = new TextCodec("ISO-IEC-646-SE", + val IsoIec646Se = new TextCodec("ISO-IEC-646-SE", 0, "\ufffd" * 32 + " !\"#¤%^'()*+,-./0123456789:;<=>?" + "@ABCDEFGHIJKLMNOPQRSTUVWXYZÄÖÅ^_" + @@ -255,7 +270,7 @@ object TextCodec { ) ) - val IsoIec646No = new TextCodec("ISO-IEC-646-NO", + val IsoIec646No = new TextCodec("ISO-IEC-646-NO", 0, "\ufffd" * 32 + " !\"#$%^'()*+,-./0123456789:;<=>?" + "@ABCDEFGHIJKLMNOPQRSTUVWXYZÆØÅ^_" + @@ -282,7 +297,7 @@ object TextCodec { ) - val IsoIec646Yu = new TextCodec("ISO-IEC-646-YU", + val IsoIec646Yu = new TextCodec("ISO-IEC-646-YU", 0, "\ufffd" * 32 + " !\"#$%^'()*+,-./0123456789:;<=>?" + "ŽABCDEFGHIJKLMNOPQRSTUVWXYZŠĐĆČ_" + @@ -290,15 +305,18 @@ object TextCodec { Map('Ë' -> '$'.toInt, 'ë' -> '_'.toInt), Map.empty, AsciiEscapeSequences) - val CbmScreencodes = new TextCodec("CBM-Screen", + val CbmScreencodes = new TextCodec("CBM-Screen", 0, "@abcdefghijklmnopqrstuvwxyz[£]↑←" + 0x20.to(0x3f).map(_.toChar).mkString + "–ABCDEFGHIJKLMNOPQRSTUVWXYZ\ufffd\ufffd\ufffdπ", Map('^' -> 0x1E, '♥' -> 0x53, '♡' -> 0x53, '♠' -> 0x41, '♣' -> 0x58, '♢' -> 0x5A, '•' -> 0x51), - Map.empty, MinimalEscapeSequencesWithoutBraces + Map.empty, MinimalEscapeSequencesWithoutBraces ++ Map( + "pound" -> List(0x1c), + "pi" -> List(0x5f), + ) ) - val CbmScreencodesJp = new TextCodec("CBM-Screen-JP", + val CbmScreencodesJp = new TextCodec("CBM-Screen-JP", 0, "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]↑←" + // 00-1f 0x20.to(0x3f).map(_.toChar).mkString + "タチツテトナニヌネノハヒフヘホマ" + // 40-4f @@ -323,10 +341,13 @@ object TextCodec { ('a' to 'z').map(l => l -> (l - 'a' + 1)) ++ (1 to 0xf).map(i => (i + 0xff70).toChar -> (i + 0x70)) ++ (0x10 to 0x2f).map(i => (i + 0xff70).toChar -> (i + 0x40)), - StandardKatakanaDecompositions, MinimalEscapeSequencesWithoutBraces + StandardKatakanaDecompositions, MinimalEscapeSequencesWithoutBraces ++ Map( + "pi" -> List(0x70), + "yen" -> List(0x1c), + ) ) - val Petscii = new TextCodec("PETSCII", + val Petscii = new TextCodec("PETSCII", 0, "\ufffd" * 32 + 0x20.to(0x3f).map(_.toChar).mkString + "@abcdefghijklmnopqrstuvwxyz[£]↑←" + @@ -337,6 +358,8 @@ object TextCodec { Map('^' -> 0x5E, '♥' -> 0xD3, '♡' -> 0xD3, '♠' -> 0xC1, '♣' -> 0xD8, '♢' -> 0xDA, '•' -> 0xD1), Map.empty, Map( "n" -> List(13), "q" -> List('\"'.toInt), + "pound" -> List(0x5c), + "pi" -> List(0xdf), "apos" -> List('\''.toInt), "up" -> List(0x91), "down" -> List(0x11), @@ -355,7 +378,7 @@ object TextCodec { ) ) - val PetsciiJp = new TextCodec("PETSCII-JP", + val PetsciiJp = new TextCodec("PETSCII-JP", 0, "\ufffd" * 32 + 0x20.to(0x3f).map(_.toChar).mkString + "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[¥]↑←" + @@ -385,6 +408,8 @@ object TextCodec { "n" -> List(13), "q" -> List('\"'.toInt), "apos" -> List('\''.toInt), + "yen" -> List(0x5c), + "pi" -> List(0xb0), "up" -> List(0x91), "down" -> List(0x11), "left" -> List(0x9d), @@ -402,7 +427,18 @@ object TextCodec { ) ) - val OldPetscii = new TextCodec("Old PETSCII", + val Vectrex = new TextCodec("Vectrex", 0x80, + "\ufffd" * 32 + + 0x20.to(0x3f).map(_.toChar).mkString + + "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_" + + "\ufffd↑\ufffd↓\ufffd\ufffd\ufffd©\ufffd\ufffd\ufffd\ufffd∞", + ('a' to 'z').map(l => l -> l.toUpper.toInt).toMap, + Map.empty, Map( + "copy" -> List('g'.toInt) + ) + ) + + val OldPetscii = new TextCodec("Old PETSCII", 0, "\ufffd" * 32 + 0x20.to(0x3f).map(_.toChar).mkString + "@abcdefghijklmnopqrstuvwxyz[\\]↑←" + @@ -413,6 +449,7 @@ object TextCodec { Map('^' -> 0x5E, '♥' -> 0xD3, '♡' -> 0xD3, '♠' -> 0xC1, '♣' -> 0xC8, '♢' -> 0xDA, '•' -> 0xD1), Map.empty, Map( "n" -> List(13), "q" -> List('\"'.toInt), + "pi" -> List(0xdf), "apos" -> List('\''.toInt), "up" -> List(0x91), "down" -> List(0x11), @@ -423,7 +460,7 @@ object TextCodec { ) ) - val OriginalPetscii = new TextCodec("Original PETSCII", + val OriginalPetscii = new TextCodec("Original PETSCII", 0, "\ufffd" * 32 + 0x20.to(0x3f).map(_.toChar).mkString + "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]↑←" + @@ -435,6 +472,7 @@ object TextCodec { "n" -> List(13), "q" -> List('\"'.toInt), "apos" -> List('\''.toInt), + "pi" -> List(0xdf), "up" -> List(0x91), "down" -> List(0x11), "left" -> List(0x9d), @@ -444,7 +482,7 @@ object TextCodec { ) ) - val Atascii = new TextCodec("ATASCII", + val Atascii = new TextCodec("ATASCII", 0, "♡" + "\ufffd" * 15 + "♣\ufffd–\ufffd•" + @@ -461,7 +499,7 @@ object TextCodec { ) ) - val AtasciiScreencodes = new TextCodec("ATASCII-Screen", + val AtasciiScreencodes = new TextCodec("ATASCII-Screen", 0, 0x20.to(0x3f).map(_.toChar).mkString + 0x40.to(0x5f).map(_.toChar).mkString + "♡" + @@ -472,14 +510,18 @@ object TextCodec { Map('♥' -> 0x40, '·' -> 0x54), Map.empty, MinimalEscapeSequencesWithoutBraces ) - val Bbc = new TextCodec("BBC", + val Bbc = new TextCodec("BBC", 0, "\ufffd" * 32 + 0x20.to(0x5f).map(_.toChar).mkString + "£" + 0x61.to(0x7E).map(_.toChar).mkString + "©", - Map('↑' -> '^'.toInt), Map.empty, MinimalEscapeSequencesWithBraces + ("n" -> List(13)) + Map('↑' -> '^'.toInt), Map.empty, MinimalEscapeSequencesWithBraces ++ Map( + "n" -> List(13), + "pound" -> List(0x60), + "copy" -> List(0x7f), + ) ) - val Sinclair = new TextCodec("Sinclair", + val Sinclair = new TextCodec("Sinclair", 0, "\ufffd" * 32 + 0x20.to(0x5f).map(_.toChar).mkString + "£" + 0x61.to(0x7E).map(_.toChar).mkString + "©", @@ -487,6 +529,8 @@ object TextCodec { "n" -> List(13), "q" -> List('\"'.toInt), "apos" -> List('\''.toInt), + "pound" -> List(0x60), + "copy" -> List(0x7f), "lbrace" -> List('{'.toInt), "rbrace" -> List('}'.toInt), "up" -> List(11), @@ -521,7 +565,7 @@ object TextCodec { "ミムメモヤユヨラリルレロワン゛゜" //noinspection ScalaUnnecessaryParentheses - val Jis = new TextCodec("JIS-X-0201", + val Jis = new TextCodec("JIS-X-0201", 0, "\ufffd" * 32 + ' '.to('Z').mkString + "[¥]^_" + @@ -535,10 +579,13 @@ object TextCodec { "\ufffd" * 3 + "\\", Map('¯' -> '~'.toInt, '‾' -> '~'.toInt, '♥' -> 0xE9) ++ 1.to(0x3F).map(i => (i + 0xff60).toChar -> (i + 0xA0)).toMap, - StandardKatakanaDecompositions, MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10)) + StandardKatakanaDecompositions, MinimalEscapeSequencesWithBraces ++ Map( + "n" -> List(13, 10), + "yen" -> List(0x5c) + ) ) - val MsxWest = new TextCodec("MSX-International", + val MsxWest = new TextCodec("MSX-International", 0, "\ufffd" * 32 + (0x20 to 0x7e).map(_.toChar).mkString("") + "\ufffd" + @@ -552,10 +599,14 @@ object TextCodec { "≡±≥≤\ufffd\ufffd÷\ufffd\ufffd\ufffd\ufffd\ufffdⁿ²", Map('ß' -> 0xE1, '¦' -> 0x7C), Map('♥' -> "\u0001C", '♡' -> "\u0001C", '♢' -> "\u0001D", '♢' -> "\u0001D", '♣' -> "\u0001E", '♠' -> "\u0001F", '·' -> "\u0001G") , - MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10)) + MinimalEscapeSequencesWithBraces ++ Map( + "n" -> List(13, 10), + "pound" -> List(0x9c), + "yen" -> List(0x9d), + ) ) - val MsxRu = new TextCodec("MSX-RU", + val MsxRu = new TextCodec("MSX-RU", 0, "\ufffd" * 32 + (0x20 to 0x7e).map(_.toChar).mkString("") + "\ufffd" + @@ -571,7 +622,7 @@ object TextCodec { MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10)) ) - val MsxJp = new TextCodec("MSX-JP", + val MsxJp = new TextCodec("MSX-JP", 0, "\ufffd" * 32 + (0x20 to 0x7e).map(c => if (c == 0x5c) '¥' else c.toChar).mkString("") + "\ufffd" + @@ -605,7 +656,10 @@ object TextCodec { '小' -> "\u0001_" ) ++ StandardHiraganaDecompositions ++ StandardKatakanaDecompositions, - MinimalEscapeSequencesWithBraces + ("n" -> List(13, 10)) + MinimalEscapeSequencesWithBraces ++ Map( + "n" -> List(13, 10), + "yen" -> List(0x5c) + ) ) val lossyAlternatives: Map[Char, List[String]] = {