Allow character literals in preprocessor

2025-04-04 22:29:32 +00:00 · 2021-03-13 21:40:38 +01:00 · 2021-03-13 21:40:38 +01:00 · 8aac3bc329
commit 8aac3bc329
parent 24eac6708b
2 changed files with 72 additions and 1 deletions
--- a/docs/lang/preprocessor.md
+++ b/docs/lang/preprocessor.md
@ -157,6 +157,28 @@ TODO
 The following Millfork operators and functions are not available in the preprocessor:  
 `+'`, `-'`, `*'`, `<<'`, `>>'`, `:`, `>>>>`, `nonet`, all the assignment operators

+
+### Character literals
+
+Preprocessor supports character literals. By default, they are interpreted in the default encoding,
+but you can suffix them with other encodings.
+
+    // usually prints 97:
+    #infoeval 'a'
+    // prints 97:
+    #infoeval 'a'ascii
+
+Exceptionally, you can suffix the character literal with `utf32`.
+This gives the literal the value of the Unicode codepoint of the character:
+
+    // may print 94, 96, 112, 173, 176, 184, 185, 222, 227, 234, 240, something else, or even fail to compile:
+    #infoeval 'π'
+    // prints 960:
+    #infoeval 'π'utf32
+
+Escape sequences are supported, as per encoding. `utf32` pseudoencoding supports the same escape sequences as `utf8`.
+
+
 ### `#template`

 Defines the source to be a module template. See [Modules](./modules.md) for more information.
--- a/src/main/scala/millfork/parser/Preprocessor.scala
+++ b/src/main/scala/millfork/parser/Preprocessor.scala
@ -5,6 +5,8 @@ import millfork.{CompilationFlag, CompilationOptions, Platform, SeparatedList}
 import millfork.error.{ConsoleLogger, Logger}
 import millfork.node.Position

+import java.nio.charset.StandardCharsets
+import scala.collection.immutable.BitSet
 import scala.collection.mutable

 /**
@ -230,7 +232,54 @@ class PreprocessorParser(options: CompilationOptions) {
  val alwaysNone: M => Option[Long] = (_: M) => None
  val log: Logger = options.log

-  val literalAtom: P[Q] = (MfParser.binaryAtom | MfParser.hexAtom | MfParser.octalAtom | MfParser.quaternaryAtom | MfParser.decimalAtom).map(l => _ => Some(l.value))
+  val invalidCharLiteralTypes: BitSet = BitSet(
+    Character.LINE_SEPARATOR,
+    Character.PARAGRAPH_SEPARATOR,
+    Character.CONTROL,
+    Character.PRIVATE_USE,
+    Character.SURROGATE,
+    Character.UNASSIGNED)
+
+  val charAtom: P[Q] =
+    ("'" ~/ CharPred(c => c >= ' ' && c != '\'' && !invalidCharLiteralTypes(Character.getType(c))).rep.! ~/ "'" ~/ HWS ~ identifier.?).map {
+      case (content, encodingNameOpt) =>
+        def theOnly(list: List[Int]): Q = {
+          list match {
+            case List(value) =>
+              _ => Some(value.toLong)
+            case _ =>
+              log.error(s"Character `$content` cannot be encoded as one byte", None)
+              _ => None
+          }
+        }
+        val lenient = options.flag(CompilationFlag.LenientTextEncoding)
+        val codepoints = content.codePoints().toArray.toList
+        encodingNameOpt match {
+          case Some("utf32") =>
+            theOnly(TextCodecRepository.RawUtf32.encode(log, None, codepoints, options, lenient))
+          case _ =>
+            encodingNameOpt.getOrElse("default") match {
+              case "default" =>
+                theOnly(options.platform.defaultCodec.encode(log, None, codepoints, options, lenient))
+              case "scr" =>
+                theOnly(options.platform.screenCodec.encode(log, None, codepoints, options, lenient))
+              case "z" | "pz" | "p" | "pdefault" | "defaultz" | "pdefaultz" | "pscr" | "scrz" | "pscrz" =>
+                log.error("Invalid encoding for character literal")
+                _ => None
+              case encodingName =>
+                val cwf = options.textCodecRepository.forName(encodingName, None, log)
+                if (cwf.lengthPrefixed || cwf.nullTerminated) {
+                  log.error("Invalid encoding for character literal")
+                  _ => None
+                } else {
+                  theOnly(cwf.codec.encode(log, None, codepoints, options, cwf.lenient))
+                }
+            }
+        }
+
+    }
+
+  val literalAtom: P[Q] = (MfParser.binaryAtom | MfParser.hexAtom | MfParser.octalAtom | MfParser.quaternaryAtom | MfParser.decimalAtom).map(l => (_:M) => Some(l.value)) | charAtom

  val variableAtom: P[Q] = identifier.map(k => _.get(k))