diff --git a/docs/lang/preprocessor.md b/docs/lang/preprocessor.md index d6711ff4..f643ae63 100644 --- a/docs/lang/preprocessor.md +++ b/docs/lang/preprocessor.md @@ -157,6 +157,28 @@ TODO The following Millfork operators and functions are not available in the preprocessor: `+'`, `-'`, `*'`, `<<'`, `>>'`, `:`, `>>>>`, `nonet`, all the assignment operators + +### Character literals + +Preprocessor supports character literals. By default, they are interpreted in the default encoding, +but you can suffix them with other encodings. + + // usually prints 97: + #infoeval 'a' + // prints 97: + #infoeval 'a'ascii + +Exceptionally, you can suffix the character literal with `utf32`. +This gives the literal the value of the Unicode codepoint of the character: + + // may print 94, 96, 112, 173, 176, 184, 185, 222, 227, 234, 240, something else, or even fail to compile: + #infoeval 'π' + // prints 960: + #infoeval 'π'utf32 + +Escape sequences are supported, as per encoding. `utf32` pseudoencoding supports the same escape sequences as `utf8`. + + ### `#template` Defines the source to be a module template. See [Modules](./modules.md) for more information. diff --git a/src/main/scala/millfork/parser/Preprocessor.scala b/src/main/scala/millfork/parser/Preprocessor.scala index 30da6021..bda7a194 100644 --- a/src/main/scala/millfork/parser/Preprocessor.scala +++ b/src/main/scala/millfork/parser/Preprocessor.scala @@ -5,6 +5,8 @@ import millfork.{CompilationFlag, CompilationOptions, Platform, SeparatedList} import millfork.error.{ConsoleLogger, Logger} import millfork.node.Position +import java.nio.charset.StandardCharsets +import scala.collection.immutable.BitSet import scala.collection.mutable /** @@ -230,7 +232,54 @@ class PreprocessorParser(options: CompilationOptions) { val alwaysNone: M => Option[Long] = (_: M) => None val log: Logger = options.log - val literalAtom: P[Q] = (MfParser.binaryAtom | MfParser.hexAtom | MfParser.octalAtom | MfParser.quaternaryAtom | MfParser.decimalAtom).map(l => _ => Some(l.value)) + val invalidCharLiteralTypes: BitSet = BitSet( + Character.LINE_SEPARATOR, + Character.PARAGRAPH_SEPARATOR, + Character.CONTROL, + Character.PRIVATE_USE, + Character.SURROGATE, + Character.UNASSIGNED) + + val charAtom: P[Q] = + ("'" ~/ CharPred(c => c >= ' ' && c != '\'' && !invalidCharLiteralTypes(Character.getType(c))).rep.! ~/ "'" ~/ HWS ~ identifier.?).map { + case (content, encodingNameOpt) => + def theOnly(list: List[Int]): Q = { + list match { + case List(value) => + _ => Some(value.toLong) + case _ => + log.error(s"Character `$content` cannot be encoded as one byte", None) + _ => None + } + } + val lenient = options.flag(CompilationFlag.LenientTextEncoding) + val codepoints = content.codePoints().toArray.toList + encodingNameOpt match { + case Some("utf32") => + theOnly(TextCodecRepository.RawUtf32.encode(log, None, codepoints, options, lenient)) + case _ => + encodingNameOpt.getOrElse("default") match { + case "default" => + theOnly(options.platform.defaultCodec.encode(log, None, codepoints, options, lenient)) + case "scr" => + theOnly(options.platform.screenCodec.encode(log, None, codepoints, options, lenient)) + case "z" | "pz" | "p" | "pdefault" | "defaultz" | "pdefaultz" | "pscr" | "scrz" | "pscrz" => + log.error("Invalid encoding for character literal") + _ => None + case encodingName => + val cwf = options.textCodecRepository.forName(encodingName, None, log) + if (cwf.lengthPrefixed || cwf.nullTerminated) { + log.error("Invalid encoding for character literal") + _ => None + } else { + theOnly(cwf.codec.encode(log, None, codepoints, options, cwf.lenient)) + } + } + } + + } + + val literalAtom: P[Q] = (MfParser.binaryAtom | MfParser.hexAtom | MfParser.octalAtom | MfParser.quaternaryAtom | MfParser.decimalAtom).map(l => (_:M) => Some(l.value)) | charAtom val variableAtom: P[Q] = identifier.map(k => _.get(k))