1
0
mirror of https://github.com/KarolS/millfork.git synced 2024-05-28 13:41:31 +00:00

Allow character literals in preprocessor

This commit is contained in:
Karol Stasiak 2021-03-13 21:40:38 +01:00
parent 24eac6708b
commit 8aac3bc329
2 changed files with 72 additions and 1 deletions

View File

@ -157,6 +157,28 @@ TODO
The following Millfork operators and functions are not available in the preprocessor:
`+'`, `-'`, `*'`, `<<'`, `>>'`, `:`, `>>>>`, `nonet`, all the assignment operators
### Character literals
Preprocessor supports character literals. By default, they are interpreted in the default encoding,
but you can suffix them with other encodings.
// usually prints 97:
#infoeval 'a'
// prints 97:
#infoeval 'a'ascii
Exceptionally, you can suffix the character literal with `utf32`.
This gives the literal the value of the Unicode codepoint of the character:
// may print 94, 96, 112, 173, 176, 184, 185, 222, 227, 234, 240, something else, or even fail to compile:
#infoeval 'π'
// prints 960:
#infoeval 'π'utf32
Escape sequences are supported, as per encoding. `utf32` pseudoencoding supports the same escape sequences as `utf8`.
### `#template`
Defines the source to be a module template. See [Modules](./modules.md) for more information.

View File

@ -5,6 +5,8 @@ import millfork.{CompilationFlag, CompilationOptions, Platform, SeparatedList}
import millfork.error.{ConsoleLogger, Logger}
import millfork.node.Position
import java.nio.charset.StandardCharsets
import scala.collection.immutable.BitSet
import scala.collection.mutable
/**
@ -230,7 +232,54 @@ class PreprocessorParser(options: CompilationOptions) {
val alwaysNone: M => Option[Long] = (_: M) => None
val log: Logger = options.log
val literalAtom: P[Q] = (MfParser.binaryAtom | MfParser.hexAtom | MfParser.octalAtom | MfParser.quaternaryAtom | MfParser.decimalAtom).map(l => _ => Some(l.value))
val invalidCharLiteralTypes: BitSet = BitSet(
Character.LINE_SEPARATOR,
Character.PARAGRAPH_SEPARATOR,
Character.CONTROL,
Character.PRIVATE_USE,
Character.SURROGATE,
Character.UNASSIGNED)
val charAtom: P[Q] =
("'" ~/ CharPred(c => c >= ' ' && c != '\'' && !invalidCharLiteralTypes(Character.getType(c))).rep.! ~/ "'" ~/ HWS ~ identifier.?).map {
case (content, encodingNameOpt) =>
def theOnly(list: List[Int]): Q = {
list match {
case List(value) =>
_ => Some(value.toLong)
case _ =>
log.error(s"Character `$content` cannot be encoded as one byte", None)
_ => None
}
}
val lenient = options.flag(CompilationFlag.LenientTextEncoding)
val codepoints = content.codePoints().toArray.toList
encodingNameOpt match {
case Some("utf32") =>
theOnly(TextCodecRepository.RawUtf32.encode(log, None, codepoints, options, lenient))
case _ =>
encodingNameOpt.getOrElse("default") match {
case "default" =>
theOnly(options.platform.defaultCodec.encode(log, None, codepoints, options, lenient))
case "scr" =>
theOnly(options.platform.screenCodec.encode(log, None, codepoints, options, lenient))
case "z" | "pz" | "p" | "pdefault" | "defaultz" | "pdefaultz" | "pscr" | "scrz" | "pscrz" =>
log.error("Invalid encoding for character literal")
_ => None
case encodingName =>
val cwf = options.textCodecRepository.forName(encodingName, None, log)
if (cwf.lengthPrefixed || cwf.nullTerminated) {
log.error("Invalid encoding for character literal")
_ => None
} else {
theOnly(cwf.codec.encode(log, None, codepoints, options, cwf.lenient))
}
}
}
}
val literalAtom: P[Q] = (MfParser.binaryAtom | MfParser.hexAtom | MfParser.octalAtom | MfParser.quaternaryAtom | MfParser.decimalAtom).map(l => (_:M) => Some(l.value)) | charAtom
val variableAtom: P[Q] = identifier.map(k => _.get(k))