From 40c98567a0eb27c64a1bd9188236510df8c02917 Mon Sep 17 00:00:00 2001 From: Karol Stasiak Date: Fri, 6 Jul 2018 01:05:24 +0200 Subject: [PATCH] Make the parser much faster --- src/main/scala/millfork/parser/MfParser.scala | 477 +++++++++--------- .../scala/millfork/parser/MosParser.scala | 18 +- .../scala/millfork/parser/Z80Parser.scala | 42 +- 3 files changed, 289 insertions(+), 248 deletions(-) diff --git a/src/main/scala/millfork/parser/MfParser.scala b/src/main/scala/millfork/parser/MfParser.scala index e5eef95e..d1886bcc 100644 --- a/src/main/scala/millfork/parser/MfParser.scala +++ b/src/main/scala/millfork/parser/MfParser.scala @@ -1,9 +1,9 @@ package millfork.parser import java.nio.file.{Files, Paths} +import java.util import fastparse.all._ -import millfork.assembly.mos.Opcode import millfork.env._ import millfork.error.ErrorReporting import millfork.node._ @@ -14,6 +14,8 @@ import millfork.{CompilationOptions, SeparatedList} */ abstract class MfParser[T](filename: String, input: String, currentDirectory: String, options: CompilationOptions) { + import MfParser._ + var lastPosition = Position(filename, 1, 1, 0) var lastLabel = "" @@ -24,8 +26,13 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St def position(label: String = ""): P[Position] = Index.map(i => indexToPosition(i, label)) def indexToPosition(i: Int, label: String): Position = { - val prefix = lineStarts.takeWhile(_ <= i) - val newPosition = Position(filename, prefix.length, i - prefix.last, i) + var lineNumber = util.Arrays.binarySearch(lineStarts, i) + if (lineNumber < 0) { + lineNumber = - lineNumber - 2 + } + val columnNumber = i - lineStarts(lineNumber) + lineNumber += 1 + val newPosition = Position(filename, lineNumber, columnNumber, i) if (newPosition.cursor > lastPosition.cursor) { lastPosition = newPosition lastLabel = label @@ -33,200 +40,7 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St newPosition } - val comment: P[Unit] = P("//" ~/ CharsWhile(c => c != '\n' && c != '\r', min = 0) ~ ("\r\n" | "\r" | "\n")) - - val SWS: P[Unit] = P(CharsWhileIn(" \t", min = 1)).opaque("") - - val HWS: P[Unit] = P(CharsWhileIn(" \t", min = 0)).opaque("") - - val AWS: P[Unit] = P((CharIn(" \t\n\r;") | NoCut(comment)).rep(min = 0)).opaque("") - - val EOL: P[Unit] = P(HWS ~ ("\r\n" | "\r" | "\n" | comment).opaque("") ~ AWS).opaque("") - - val letter: P[String] = P(CharIn("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_").!) - - val letterOrDigit: P[Unit] = P(CharIn("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_.$1234567890")) - - val lettersOrDigits: P[String] = P(CharsWhileIn("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_.$1234567890", min = 0).!) - - val identifier: P[String] = P((letter ~ lettersOrDigits).map { case (a, b) => a + b }).opaque("") - - // def operator: P[String] = P(CharsWhileIn("!-+*/><=~|&^", min=1).!) // TODO: only valid operators - - private val invalidCharLiteralTypes = Set[Int]( - Character.LINE_SEPARATOR, - Character.PARAGRAPH_SEPARATOR, - Character.CONTROL, - Character.PRIVATE_USE, - Character.SURROGATE, - Character.UNASSIGNED) - - def charAtom: P[LiteralExpression] = for { - p <- position() - c <- "'" ~/ CharPred(c => c >= ' ' && !invalidCharLiteralTypes(Character.getType(c))).! ~/ "'" - co <- HWS ~ codec - } yield { - co.encode(Some(p), c.charAt(0)) match { - case List(value) => - LiteralExpression(value, 1) - case _ => - ErrorReporting.error(s"Character `$c` cannot be encoded as one byte", Some(p)) - LiteralExpression(0, 1) - } - } - - def size(value: Int, wordLiteral: Boolean, farwordLiteral: Boolean, longLiteral: Boolean): Int = { - val w = value > 255 || value < -0x80 || wordLiteral - val f = value > 0xffff || value < -0x8000 || farwordLiteral - val l = value > 0xffffff || value < -0x800000 || longLiteral - if (l) 4 else if (f) 3 else if (w) 2 else 1 - } - - def sign(abs: Int, minus: Boolean): Int = if (minus) -abs else abs - - val decimalAtom: P[LiteralExpression] = - for { - p <- position() - minus <- "-".!.? - s <- CharsWhileIn("1234567890", min = 1).!.opaque("") ~ !("x" | "b") - } yield { - val abs = Integer.parseInt(s, 10) - val value = sign(abs, minus.isDefined) - LiteralExpression(value, size(value, s.length > 3, s.length > 5, s.length > 7)).pos(p) - } - - val binaryAtom: P[LiteralExpression] = - for { - p <- position() - minus <- "-".!.? - _ <- P("0b" | "%") ~/ Pass - s <- CharsWhileIn("01", min = 1).!.opaque("") - } yield { - val abs = Integer.parseInt(s, 2) - val value = sign(abs, minus.isDefined) - LiteralExpression(value, size(value, s.length > 8, s.length > 16, s.length > 24)).pos(p) - } - - val hexAtom: P[LiteralExpression] = - for { - p <- position() - minus <- "-".!.? - _ <- P("0x" | "0X" | "$") ~/ Pass - s <- CharsWhileIn("1234567890abcdefABCDEF", min = 1).!.opaque("") - } yield { - val abs = Integer.parseInt(s, 16) - val value = sign(abs, minus.isDefined) - LiteralExpression(value, size(value, s.length > 2, s.length > 4, s.length > 6)).pos(p) - } - - val octalAtom: P[LiteralExpression] = - for { - p <- position() - minus <- "-".!.? - _ <- P("0o" | "0O") ~/ Pass - s <- CharsWhileIn("01234567", min = 1).!.opaque("") - } yield { - val abs = Integer.parseInt(s, 8) - val value = sign(abs, minus.isDefined) - LiteralExpression(value, size(value, s.length > 3, s.length > 6, s.length > 9)).pos(p) - } - - val quaternaryAtom: P[LiteralExpression] = - for { - p <- position() - minus <- "-".!.? - _ <- P("0q" | "0Q") ~/ Pass - s <- CharsWhileIn("0123", min = 1).!.opaque("") - } yield { - val abs = Integer.parseInt(s, 4) - val value = sign(abs, minus.isDefined) - LiteralExpression(value, size(value, s.length > 4, s.length > 8, s.length > 12)).pos(p) - } - - val literalAtom: P[LiteralExpression] = charAtom | binaryAtom | hexAtom | octalAtom | quaternaryAtom | decimalAtom - - val atom: P[Expression] = P(literalAtom | (position() ~ identifier).map { case (p, i) => VariableExpression(i).pos(p) }) - - val mfOperators = List( - List("+=", "-=", "+'=", "-'=", "^=", "&=", "|=", "*=", "*'=", "<<=", ">>=", "<<'=", ">>'="), - List("||", "^^"), - List("&&"), - List("==", "<=", ">=", "!=", "<", ">"), - List(":"), - List("+'", "-'", "<<'", ">>'", ">>>>", "+", "-", "&", "|", "^", "<<", ">>"), - List("*'", "*")) - - val nonStatementLevel = 1 // everything but not `=` - val mathLevel = 4 // the `:` operator - - def flags(allowed: String*): P[Set[String]] = StringIn(allowed: _*).!.rep(min = 0, sep = SWS).map(_.toSet).opaque("") - - def variableDefinition(implicitlyGlobal: Boolean): P[Seq[DeclarationStatement]] = for { - p <- position() - bank <- bankDeclaration - flags <- flags("const", "static", "volatile", "stack", "register") ~ HWS - typ <- identifier ~ SWS - name <- identifier ~/ HWS ~/ Pass - addr <- ("@" ~/ HWS ~/ mfExpression(1)).?.opaque("
") ~ HWS - initialValue <- ("=" ~/ HWS ~/ mfExpression(1)).? ~ HWS - _ <- &(EOL) ~/ "" - } yield { - Seq(VariableDeclarationStatement(name, typ, - bank, - global = implicitlyGlobal || flags("static"), - stack = flags("stack"), - constant = flags("const"), - volatile = flags("volatile"), - register = flags("register"), - initialValue, addr).pos(p)) - } - - val externFunctionBody: P[Option[List[Statement]]] = P("extern" ~/ PassWith(None)) - - val paramDefinition: P[ParameterDeclaration] = for { - p <- position() - typ <- identifier ~/ SWS ~/ Pass - name <- identifier ~/ Pass - } yield { - ParameterDeclaration(typ, ByVariable(name)).pos(p) - } - - def asmExpression: P[Expression] = (position() ~ NoCut( - ("<" ~/ HWS ~ mfExpression(mathLevel)).map(e => HalfWordExpression(e, hiByte = false)) | - (">" ~/ HWS ~ mfExpression(mathLevel)).map(e => HalfWordExpression(e, hiByte = true)) | - mfExpression(mathLevel) - )).map { case (p, e) => e.pos(p) } - - def asmExpressionWithParens: P[(Expression, Boolean)] = (position() ~ NoCut( - ("(" ~ HWS ~ asmExpression ~ HWS ~ ")").map(_ -> true) | - asmExpression.map(_ -> false) - )).map { case (p, e) => e._1.pos(p) -> e._2 } - - def elidable: P[Boolean] = ("?".! ~/ HWS).?.map(_.isDefined) - - val appcComplex: P[ParamPassingConvention] = P((("const" | "ref").! ~/ AWS).? ~ AWS ~ identifier) map { - case (None, name) => ByVariable(name) - case (Some("const"), name) => ByConstant(name) - case (Some("ref"), name) => ByReference(name) - case x => ErrorReporting.fatal(s"Unknown assembly parameter passing convention: `$x`") - } - - def asmParamDefinition: P[ParameterDeclaration] - - def arrayListElement: P[ArrayContents] = arrayStringContents | arrayLoopContents | arrayFileContents | mfExpression(nonStatementLevel).map(e => LiteralContents(List(e))) - - def arrayProcessedContents: P[ArrayContents] = for { - _ <- "@" ~/ HWS - filter <- identifier - _ <- AWS - contents <- arrayContents - } yield ProcessedContents(filter, contents) - - def arrayListContents: P[ArrayContents] = ("[" ~/ AWS ~/ arrayListElement.rep(sep = AWS ~ "," ~/ AWS) ~ AWS ~ "]" ~/ Pass).map(c => CombinedContents(c.toList)) - - val doubleQuotedString: P[List[Char]] = P("\"" ~/ CharsWhile(c => c != '\"' && c != '\n' && c != '\r').! ~ "\"").map(_.toList) - - def codec: P[TextCodec] = P(position() ~ identifier).map { + val codec: P[TextCodec] = P(position("text codec identifier") ~ identifier).map { case (_, "ascii") => TextCodec.Ascii case (_, "petscii") => TextCodec.Petscii case (_, "pet") => TextCodec.Petscii @@ -248,9 +62,84 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St TextCodec.Ascii } + // def operator: P[String] = P(CharsWhileIn("!-+*/><=~|&^", min=1).!) // TODO: only valid operators + + val charAtom: P[LiteralExpression] = for { + p <- position() + c <- "'" ~/ CharPred(c => c >= ' ' && !invalidCharLiteralTypes(Character.getType(c))).! ~/ "'" + co <- HWS ~ codec + } yield { + co.encode(Some(p), c.charAt(0)) match { + case List(value) => + LiteralExpression(value, 1) + case _ => + ErrorReporting.error(s"Character `$c` cannot be encoded as one byte", Some(p)) + LiteralExpression(0, 1) + } + } + + val literalAtom: P[LiteralExpression] = charAtom | binaryAtom | hexAtom | octalAtom | quaternaryAtom | decimalAtom + + val atom: P[Expression] = P(position() ~ (literalAtom | variableAtom)).map{case (p,a) => a.pos(p)} + + val globalVariableDefinition: P[Seq[DeclarationStatement]] = variableDefinition(true) + val localVariableDefinition: P[Seq[DeclarationStatement]] = variableDefinition(false) + + def variableDefinition(implicitlyGlobal: Boolean): P[Seq[DeclarationStatement]] = for { + p <- position() + bank <- bankDeclaration + flags <- variableFlags ~ HWS + typ <- identifier ~ SWS + name <- identifier ~/ HWS ~/ Pass + addr <- ("@" ~/ HWS ~/ mfExpression(1)).?.opaque("
") ~ HWS + initialValue <- ("=" ~/ HWS ~/ mfExpression(1)).? ~ HWS + _ <- &(EOL) ~/ "" + } yield { + Seq(VariableDeclarationStatement(name, typ, + bank, + global = implicitlyGlobal || flags("static"), + stack = flags("stack"), + constant = flags("const"), + volatile = flags("volatile"), + register = flags("register"), + initialValue, addr).pos(p)) + } + + val paramDefinition: P[ParameterDeclaration] = for { + p <- position() + typ <- identifier ~/ SWS ~/ Pass + name <- identifier ~/ Pass + } yield { + ParameterDeclaration(typ, ByVariable(name)).pos(p) + } + + def asmExpression: P[Expression] = (position() ~ NoCut( + ("<" ~/ HWS ~ mfExpression(mathLevel)).map(e => HalfWordExpression(e, hiByte = false)) | + (">" ~/ HWS ~ mfExpression(mathLevel)).map(e => HalfWordExpression(e, hiByte = true)) | + mfExpression(mathLevel) + )).map { case (p, e) => e.pos(p) } + + def asmExpressionWithParens: P[(Expression, Boolean)] = (position() ~ NoCut( + ("(" ~ HWS ~ asmExpression ~ HWS ~ ")").map(_ -> true) | + asmExpression.map(_ -> false) + )).map { case (p, e) => e._1.pos(p) -> e._2 } + + def asmParamDefinition: P[ParameterDeclaration] + + def arrayListElement: P[ArrayContents] = arrayStringContents | arrayLoopContents | arrayFileContents | mfExpression(nonStatementLevel).map(e => LiteralContents(List(e))) + + def arrayProcessedContents: P[ArrayContents] = for { + _ <- "@" ~/ HWS + filter <- identifier + _ <- AWS + contents <- arrayContents + } yield ProcessedContents(filter, contents) + + def arrayListContents: P[ArrayContents] = ("[" ~/ AWS ~/ arrayListElement.rep(sep = AWS ~ "," ~/ AWS) ~ AWS ~ "]" ~/ Pass).map(c => CombinedContents(c.toList)) + // TODO: should reserve the `file` identifier here? - def arrayFileContents: P[ArrayContents] = for { - p <- "file" ~ HWS ~/ "(" ~/ HWS ~/ position() + val arrayFileContents: P[ArrayContents] = for { + p <- "file" ~ HWS ~/ "(" ~/ HWS ~/ position("file name") filePath <- doubleQuotedString ~/ HWS optSlice <- ("," ~/ HWS ~/ literalAtom ~/ HWS ~/ "," ~/ HWS ~/ literalAtom ~/ HWS ~/ Pass).? _ <- ")" ~/ Pass @@ -269,7 +158,7 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St def arrayLoopContents: P[ArrayContents] = for { identifier <- "for" ~ SWS ~/ identifier ~/ HWS ~ "," ~/ HWS ~ Pass start <- mfExpression(nonStatementLevel) ~ HWS ~ "," ~/ HWS ~/ Pass - pos <- position() + pos <- position("loop direction") direction <- forDirection ~/ HWS ~/ "," ~/ HWS ~/ Pass end <- mfExpression(nonStatementLevel) body <- AWS ~ arrayContents @@ -290,7 +179,7 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St def arrayContentsForAsm: P[RawBytesStatement] = (arrayListContents | arrayStringContents).map(RawBytesStatement) - def arrayDefinition: P[Seq[ArrayDeclarationStatement]] = for { + val arrayDefinition: P[Seq[ArrayDeclarationStatement]] = for { p <- position() bank <- bankDeclaration name <- "array" ~ !letterOrDigit ~/ SWS ~ identifier ~ HWS @@ -304,7 +193,7 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St def tightMfExpressionButNotCall: P[Expression] = P(mfParenExpr | mfIndexedExpression | atom) // TODO def mfExpression(level: Int): P[Expression] = { - val allowedOperators = mfOperators.drop(level).flatten + val allowedOperators = mfOperatorsDropFlatten(level) def inner: P[SeparatedList[Expression, String]] = { for { @@ -391,7 +280,7 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St def asmStatement: P[ExecutableStatement] - def statement: P[Seq[Statement]] = (position() ~ P(keywordStatement | variableDefinition(false) | expressionStatement)).map { case (p, s) => s.map(_.pos(p)) } + def statement: P[Seq[Statement]] = (position() ~ P(keywordStatement | localVariableDefinition | expressionStatement)).map { case (p, s) => s.map(_.pos(p)) } def asmStatements: P[List[ExecutableStatement]] = ("{" ~/ AWS ~/ asmStatement.rep(sep = NoCut(EOL) ~ !"}" ~/ Pass) ~/ AWS ~/ "}" ~/ Pass).map(_.toList) @@ -399,7 +288,7 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St def executableStatements: P[Seq[ExecutableStatement]] = ("{" ~/ AWS ~/ executableStatement.rep(sep = NoCut(EOL) ~ !"}" ~/ Pass) ~/ AWS ~ "}").map(_.flatten) - def dispatchLabel: P[ReturnDispatchLabel] = + val dispatchLabel: P[ReturnDispatchLabel] = ("default" ~ !letterOrDigit ~/ AWS ~/ ("(" ~/ position("default branch range") ~ AWS ~/ mfExpression(nonStatementLevel).rep(min = 0, sep = AWS ~ "," ~/ AWS) ~ AWS ~/ ")" ~/ "").?).map{ case None => DefaultReturnDispatchLabel(None, None) case Some((_, Seq())) => DefaultReturnDispatchLabel(None, None) @@ -410,14 +299,14 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St DefaultReturnDispatchLabel(None, None) } | mfExpression(nonStatementLevel).rep(min = 0, sep = AWS ~ "," ~/ AWS).map(exprs => StandardReturnDispatchLabel(exprs.toList)) - def dispatchBranch: P[ReturnDispatchBranch] = for { + val dispatchBranch: P[ReturnDispatchBranch] = for { pos <- position() l <- dispatchLabel ~/ HWS ~/ "@" ~/ HWS f <- tightMfExpressionButNotCall ~/ HWS parameters <- ("(" ~/ position("dispatch actual parameters") ~ AWS ~/ mfExpression(nonStatementLevel).rep(min = 0, sep = AWS ~ "," ~/ AWS) ~ AWS ~/ ")" ~/ "").? } yield ReturnDispatchBranch(l, f, parameters.map(_._2.toList).getOrElse(Nil)).pos(pos) - def dispatchStatementBody: P[Seq[ExecutableStatement]] = for { + val dispatchStatementBody: P[Seq[ExecutableStatement]] = for { indexer <- "[" ~/ AWS ~/ mfExpression(nonStatementLevel) ~/ AWS ~/ "]" ~/ AWS _ <- position("dispatch statement body") parameters <- ("(" ~/ position("dispatch parameters") ~ AWS ~/ mfLhsExpression.rep(min = 0, sep = AWS ~ "," ~/ AWS) ~ AWS ~/ ")" ~/ "").? @@ -426,11 +315,7 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St _ <- AWS ~/ "}" } yield Seq(ReturnDispatchStatement(indexer, parameters.map(_._2.toList).getOrElse(Nil), branches.toList)) - def returnOrDispatchStatement: P[Seq[ExecutableStatement]] = "return" ~ !letterOrDigit ~/ HWS ~ (dispatchStatementBody | mfExpression(nonStatementLevel).?.map(ReturnStatement).map(Seq(_))) - - def breakStatement: P[Seq[ExecutableStatement]] = ("break" ~ !letterOrDigit ~/ HWS ~ identifier.?).map(l => Seq(BreakStatement(l.getOrElse("")))) - - def continueStatement: P[Seq[ExecutableStatement]] = ("continue" ~ !letterOrDigit ~/ HWS ~ identifier.?).map(l => Seq(ContinueStatement(l.getOrElse("")))) + val returnOrDispatchStatement: P[Seq[ExecutableStatement]] = "return" ~ !letterOrDigit ~/ HWS ~ (dispatchStatementBody | mfExpression(nonStatementLevel).?.map(ReturnStatement).map(Seq(_))) def ifStatement: P[Seq[ExecutableStatement]] = for { condition <- "if" ~ !letterOrDigit ~/ HWS ~/ mfExpression(nonStatementLevel) @@ -443,13 +328,6 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St body <- AWS ~ executableStatements } yield Seq(WhileStatement(condition, body.toList, Nil)) - def forDirection: P[ForDirection.Value] = - ("parallel" ~ HWS ~ "to").!.map(_ => ForDirection.ParallelTo) | - ("parallel" ~ HWS ~ "until").!.map(_ => ForDirection.ParallelUntil) | - "until".!.map(_ => ForDirection.Until) | - "to".!.map(_ => ForDirection.To) | - ("down" ~/ HWS ~/ "to").!.map(_ => ForDirection.DownTo) - def forStatement: P[Seq[ExecutableStatement]] = for { identifier <- "for" ~ SWS ~/ identifier ~/ HWS ~ "," ~/ HWS ~ Pass start <- mfExpression(nonStatementLevel) ~ HWS ~ "," ~/ HWS ~/ Pass @@ -458,10 +336,7 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St body <- AWS ~ executableStatements } yield Seq(ForStatement(identifier, start, end, direction, body.toList)) - def inlineAssembly: P[Seq[ExecutableStatement]] = for { - condition <- "asm" ~ !letterOrDigit ~/ Pass - body <- AWS ~ asmStatements - } yield body + def inlineAssembly: P[Seq[ExecutableStatement]] = "asm" ~ !letterOrDigit ~/ AWS ~ asmStatements //noinspection MutatorLikeMethodIsParameterless def doWhileStatement: P[Seq[ExecutableStatement]] = for { @@ -469,15 +344,10 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St condition <- "while" ~ !letterOrDigit ~/ HWS ~/ mfExpression(nonStatementLevel) } yield Seq(DoWhileStatement(body.toList, Nil, condition)) - - - - def bankDeclaration: P[Option[String]] = ("segment" ~/ AWS ~/ "(" ~/ AWS ~/ identifier ~/ AWS ~/ ")" ~/ AWS).? - - def functionDefinition: P[Seq[DeclarationStatement]] = for { + val functionDefinition: P[Seq[DeclarationStatement]] = for { p <- position() bank <- bankDeclaration - flags <- flags("asm", "inline", "interrupt", "macro", "noinline", "reentrant", "kernal_interrupt") ~ HWS + flags <- functionFlags ~ HWS returnType <- identifier ~ SWS name <- identifier ~ HWS params <- "(" ~/ AWS ~/ (if (flags("asm")) asmParamDefinition else paramDefinition).rep(sep = AWS ~ "," ~/ AWS) ~ AWS ~ ")" ~/ AWS @@ -510,13 +380,152 @@ abstract class MfParser[T](filename: String, input: String, currentDirectory: St def validateAsmFunctionBody(p: Position, flags: Set[String], name: String, statements: Option[List[Statement]]) - def importStatement: Parser[Seq[ImportStatement]] = ("import" ~ !letterOrDigit ~/ SWS ~/ identifier).map(x => Seq(ImportStatement(x))) - - def program: Parser[Program] = for { + val program: Parser[Program] = for { _ <- Start ~/ AWS ~/ Pass - definitions <- (importStatement | arrayDefinition | functionDefinition | variableDefinition(true)).rep(sep = EOL) + definitions <- (importStatement | arrayDefinition | functionDefinition | globalVariableDefinition).rep(sep = EOL) _ <- AWS ~ End } yield Program(definitions.flatten.toList) - +} + +object MfParser { + + val comment: P[Unit] = P("//" ~/ CharsWhile(c => c != '\n' && c != '\r', min = 0) ~ ("\r\n" | "\r" | "\n")) + + val SWS: P[Unit] = P(CharsWhileIn(" \t", min = 1)).opaque("") + + val HWS: P[Unit] = P(CharsWhileIn(" \t", min = 0)).opaque("") + + val AWS: P[Unit] = P((CharIn(" \t\n\r;") | NoCut(comment)).rep(min = 0)).opaque("") + + val EOL: P[Unit] = P(HWS ~ ("\r\n" | "\r" | "\n" | comment).opaque("") ~ AWS).opaque("") + + val letter: P[String] = P(CharIn("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_").!) + + val letterOrDigit: P[Unit] = P(CharIn("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_.$1234567890")) + + val lettersOrDigits: P[String] = P(CharsWhileIn("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz_.$1234567890", min = 0).!) + + val identifier: P[String] = P((letter ~ lettersOrDigits).map { case (a, b) => a + b }).opaque("") + + val doubleQuotedString: P[List[Char]] = P("\"" ~/ CharsWhile(c => c != '\"' && c != '\n' && c != '\r').! ~ "\"").map(_.toList) + + def size(value: Int, wordLiteral: Boolean, farwordLiteral: Boolean, longLiteral: Boolean): Int = { + val w = value > 255 || value < -0x80 || wordLiteral + val f = value > 0xffff || value < -0x8000 || farwordLiteral + val l = value > 0xffffff || value < -0x800000 || longLiteral + if (l) 4 else if (f) 3 else if (w) 2 else 1 + } + + def sign(abs: Int, minus: Boolean): Int = if (minus) -abs else abs + + val invalidCharLiteralTypes: Set[Int] = Set[Int]( + Character.LINE_SEPARATOR, + Character.PARAGRAPH_SEPARATOR, + Character.CONTROL, + Character.PRIVATE_USE, + Character.SURROGATE, + Character.UNASSIGNED) + + val decimalAtom: P[LiteralExpression] = + for { + minus <- "-".!.? + s <- CharsWhileIn("1234567890", min = 1).!.opaque("") ~ !("x" | "b") + } yield { + val abs = Integer.parseInt(s, 10) + val value = sign(abs, minus.isDefined) + LiteralExpression(value, size(value, s.length > 3, s.length > 5, s.length > 7)) + } + + val binaryAtom: P[LiteralExpression] = + for { + minus <- "-".!.? + _ <- P("0b" | "%") ~/ Pass + s <- CharsWhileIn("01", min = 1).!.opaque("") + } yield { + val abs = Integer.parseInt(s, 2) + val value = sign(abs, minus.isDefined) + LiteralExpression(value, size(value, s.length > 8, s.length > 16, s.length > 24)) + } + + val hexAtom: P[LiteralExpression] = + for { + minus <- "-".!.? + _ <- P("0x" | "0X" | "$") ~/ Pass + s <- CharsWhileIn("1234567890abcdefABCDEF", min = 1).!.opaque("") + } yield { + val abs = Integer.parseInt(s, 16) + val value = sign(abs, minus.isDefined) + LiteralExpression(value, size(value, s.length > 2, s.length > 4, s.length > 6)) + } + + val octalAtom: P[LiteralExpression] = + for { + minus <- "-".!.? + _ <- P("0o" | "0O") ~/ Pass + s <- CharsWhileIn("01234567", min = 1).!.opaque("") + } yield { + val abs = Integer.parseInt(s, 8) + val value = sign(abs, minus.isDefined) + LiteralExpression(value, size(value, s.length > 3, s.length > 6, s.length > 9)) + } + + val quaternaryAtom: P[LiteralExpression] = + for { + minus <- "-".!.? + _ <- P("0q" | "0Q") ~/ Pass + s <- CharsWhileIn("0123", min = 1).!.opaque("") + } yield { + val abs = Integer.parseInt(s, 4) + val value = sign(abs, minus.isDefined) + LiteralExpression(value, size(value, s.length > 4, s.length > 8, s.length > 12)) + } + + val variableAtom: P[VariableExpression] = identifier.map(VariableExpression) + + val mfOperators = List( + List("+=", "-=", "+'=", "-'=", "^=", "&=", "|=", "*=", "*'=", "<<=", ">>=", "<<'=", ">>'="), + List("||", "^^"), + List("&&"), + List("==", "<=", ">=", "!=", "<", ">"), + List(":"), + List("+'", "-'", "<<'", ">>'", ">>>>", "+", "-", "&", "|", "^", "<<", ">>"), + List("*'", "*")) + + val mfOperatorsDropFlatten: IndexedSeq[List[String]] = (0 until mfOperators.length).map(i => mfOperators.drop(i).flatten) + + val nonStatementLevel = 1 // everything but not `=` + val mathLevel = 4 // the `:` operator + + val elidable: P[Boolean] = ("?".! ~/ HWS).?.map(_.isDefined) + + val appcComplex: P[ParamPassingConvention] = P((("const" | "ref").! ~/ AWS).? ~ AWS ~ identifier) map { + case (None, name) => ByVariable(name) + case (Some("const"), name) => ByConstant(name) + case (Some("ref"), name) => ByReference(name) + case x => ErrorReporting.fatal(s"Unknown assembly parameter passing convention: `$x`") + } + + val externFunctionBody: P[Option[List[Statement]]] = P("extern" ~/ PassWith(None)) + + val bankDeclaration: P[Option[String]] = ("segment" ~/ AWS ~/ "(" ~/ AWS ~/ identifier ~/ AWS ~/ ")" ~/ AWS).? + + val breakStatement: P[Seq[ExecutableStatement]] = ("break" ~ !letterOrDigit ~/ HWS ~ identifier.?).map(l => Seq(BreakStatement(l.getOrElse("")))) + + val continueStatement: P[Seq[ExecutableStatement]] = ("continue" ~ !letterOrDigit ~/ HWS ~ identifier.?).map(l => Seq(ContinueStatement(l.getOrElse("")))) + + val importStatement: P[Seq[ImportStatement]] = ("import" ~ !letterOrDigit ~/ SWS ~/ identifier).map(x => Seq(ImportStatement(x))) + + val forDirection: P[ForDirection.Value] = + ("parallel" ~ HWS ~ "to").!.map(_ => ForDirection.ParallelTo) | + ("parallel" ~ HWS ~ "until").!.map(_ => ForDirection.ParallelUntil) | + "until".!.map(_ => ForDirection.Until) | + "to".!.map(_ => ForDirection.To) | + ("down" ~/ HWS ~/ "to").!.map(_ => ForDirection.DownTo) + + private def flags_(allowed: String*): P[Set[String]] = StringIn(allowed: _*).!.rep(min = 0, sep = SWS).map(_.toSet).opaque("") + + val variableFlags: P[Set[String]] = flags_("const", "static", "volatile", "stack", "register") + + val functionFlags: P[Set[String]] = flags_("asm", "inline", "interrupt", "macro", "noinline", "reentrant", "kernal_interrupt") } diff --git a/src/main/scala/millfork/parser/MosParser.scala b/src/main/scala/millfork/parser/MosParser.scala index 2440a24e..4b0f567a 100644 --- a/src/main/scala/millfork/parser/MosParser.scala +++ b/src/main/scala/millfork/parser/MosParser.scala @@ -12,12 +12,14 @@ import millfork.CompilationOptions */ case class MosParser(filename: String, input: String, currentDirectory: String, options: CompilationOptions) extends MfParser[AssemblyLine](filename, input, currentDirectory, options) { + import MfParser._ + // TODO: label and instruction in one line - def asmLabel: P[ExecutableStatement] = (identifier ~ HWS ~ ":" ~/ HWS).map(l => MosAssemblyStatement(Opcode.LABEL, AddrMode.DoesNotExist, VariableExpression(l), elidable = true)) + val asmLabel: P[ExecutableStatement] = (identifier ~ HWS ~ ":" ~/ HWS).map(l => MosAssemblyStatement(Opcode.LABEL, AddrMode.DoesNotExist, VariableExpression(l), elidable = true)) // def zeropageAddrModeHint: P[Option[Boolean]] = Pass - def asmOpcode: P[Opcode.Value] = (position() ~ letter.rep(exactly = 3).! ~ ("_W" | "_w").?.!).map { case (p, suffix, o) => Opcode.lookup(o + suffix, Some(p)) } + val asmOpcode: P[Opcode.Value] = (position() ~ letter.rep(exactly = 3).! ~ ("_W" | "_w").?.!).map { case (p, suffix, o) => Opcode.lookup(o + suffix, Some(p)) } private val commaX = HWS ~ "," ~ HWS ~ ("X" | "x") ~ HWS private val commaY = HWS ~ "," ~ HWS ~ ("Y" | "y") ~ HWS @@ -26,7 +28,7 @@ case class MosParser(filename: String, input: String, currentDirectory: String, val farKeyword: P[Unit] = P(("f" | "F") ~ ("a" | "A") ~ ("r" | "R")) - def asmParameter: P[(AddrMode.Value, Expression)] = { + val asmParameter: P[(AddrMode.Value, Expression)] = { (SWS ~ ( ("##" ~ asmExpression).map(AddrMode.WordImmediate -> _) | ("#" ~ asmExpression).map(AddrMode.Immediate -> _) | @@ -46,7 +48,7 @@ case class MosParser(filename: String, input: String, currentDirectory: String, )).?.map(_.getOrElse(AddrMode.Implied -> LiteralExpression(0, 1))) } - def asmInstruction: P[ExecutableStatement] = { + val asmInstruction: P[ExecutableStatement] = { val lineParser: P[(Boolean, Opcode.Value, (AddrMode.Value, Expression))] = !"}" ~ elidable ~/ asmOpcode ~/ asmParameter lineParser.map { case (elid, op, param) => (op, param._1) match { @@ -60,9 +62,9 @@ case class MosParser(filename: String, input: String, currentDirectory: String, } } - def asmMacro: P[ExecutableStatement] = ("+" ~/ HWS ~/ functionCall).map(ExpressionStatement) + val asmMacro: P[ExecutableStatement] = ("+" ~/ HWS ~/ functionCall).map(ExpressionStatement) - def asmStatement: P[ExecutableStatement] = (position("assembly statement") ~ P(asmLabel | asmMacro | arrayContentsForAsm | asmInstruction)).map { case (p, s) => s.pos(p) } // TODO: macros + val asmStatement: P[ExecutableStatement] = (position("assembly statement") ~ P(asmLabel | asmMacro | arrayContentsForAsm | asmInstruction)).map { case (p, s) => s.pos(p) } // TODO: macros val appcSimple: P[ParamPassingConvention] = P("xy" | "yx" | "ax" | "ay" | "xa" | "ya" | "stack" | "a" | "x" | "y").!.map { @@ -78,7 +80,7 @@ case class MosParser(filename: String, input: String, currentDirectory: String, case x => ErrorReporting.fatal(s"Unknown assembly parameter passing convention: `$x`") } - val asmParamDefinition: P[ParameterDeclaration] = for { + override val asmParamDefinition: P[ParameterDeclaration] = for { p <- position() typ <- identifier ~ SWS appc <- appcSimple | appcComplex @@ -86,7 +88,7 @@ case class MosParser(filename: String, input: String, currentDirectory: String, def validateAsmFunctionBody(p: Position, flags: Set[String], name: String, statements: Option[List[Statement]]): Unit = { statements match { - case Some(Nil) => ErrorReporting.warn("Assembly function `$name` is empty, did you mean RTS or RTI", options, Some(p)) + case Some(Nil) => ErrorReporting.warn("Assembly function `$name` is empty, did you mean RTS, RTI or JMP", options, Some(p)) case Some(xs) => if (flags("interrupt")) { if (xs.exists { diff --git a/src/main/scala/millfork/parser/Z80Parser.scala b/src/main/scala/millfork/parser/Z80Parser.scala index 55eec5e7..162a3136 100644 --- a/src/main/scala/millfork/parser/Z80Parser.scala +++ b/src/main/scala/millfork/parser/Z80Parser.scala @@ -15,6 +15,8 @@ import millfork.node._ */ case class Z80Parser(filename: String, input: String, currentDirectory: String, options: CompilationOptions) extends MfParser[ZLine](filename, input, currentDirectory, options) { + import MfParser._ + private val zero = LiteralExpression(0, 1) val appcSimple: P[ParamPassingConvention] = P("a" | "b" | "c" | "d" | "e" | "hl" | "bc" | "de").!.map { @@ -29,16 +31,16 @@ case class Z80Parser(filename: String, input: String, currentDirectory: String, case x => ErrorReporting.fatal(s"Unknown assembly parameter passing convention: `$x`") } - override def asmParamDefinition: P[ParameterDeclaration] = for { + override val asmParamDefinition: P[ParameterDeclaration] = for { p <- position() typ <- identifier ~ SWS appc <- appcSimple | appcComplex } yield ParameterDeclaration(typ, appc).pos(p) // TODO: label and instruction in one line - def asmLabel: P[ExecutableStatement] = (identifier ~ HWS ~ ":" ~/ HWS).map(l => Z80AssemblyStatement(ZOpcode.LABEL, NoRegisters, VariableExpression(l), elidable = true)) + val asmLabel: P[ExecutableStatement] = (identifier ~ HWS ~ ":" ~/ HWS).map(l => Z80AssemblyStatement(ZOpcode.LABEL, NoRegisters, VariableExpression(l), elidable = true)) - def asmMacro: P[ExecutableStatement] = ("+" ~/ HWS ~/ functionCall).map(ExpressionStatement) + val asmMacro: P[ExecutableStatement] = ("+" ~/ HWS ~/ functionCall).map(ExpressionStatement) private def normalOp8(op: ZOpcode.Value): P[(ZOpcode.Value, ZRegisters, Expression)] = asmExpressionWithParens.map { case (VariableExpression("A" | "a"), false) => (op, OneRegister(ZRegister.A), zero) @@ -76,7 +78,7 @@ case class Z80Parser(filename: String, input: String, currentDirectory: String, private val jumpConditionWithComma: P[ZRegisters] = (jumpCondition ~ "," ~/ HWS).?.map (_.getOrElse(NoRegisters)) - def asmInstruction: P[ExecutableStatement] = { + val asmInstruction: P[ExecutableStatement] = { import ZOpcode._ for { el <- elidable @@ -109,9 +111,37 @@ case class Z80Parser(filename: String, input: String, currentDirectory: String, (opcode, NoRegisters, zero) } - override def asmStatement: P[ExecutableStatement] = (position("assembly statement") ~ P(asmLabel | asmMacro | arrayContentsForAsm | asmInstruction)).map { case (p, s) => s.pos(p) } // TODO: macros + override val asmStatement: P[ExecutableStatement] = (position("assembly statement") ~ P(asmLabel | asmMacro | arrayContentsForAsm | asmInstruction)).map { case (p, s) => s.pos(p) } // TODO: macros override def validateAsmFunctionBody(p: Position, flags: Set[String], name: String, statements: Option[List[Statement]]): Unit = { - // TODO + statements match { + case Some(Nil) => ErrorReporting.warn("Assembly function `$name` is empty, did you mean RET, RETI, RETN or JP", options, Some(p)) + case Some(xs) => + if (flags("interrupt")) { + if (xs.exists { + case Z80AssemblyStatement(ZOpcode.RET, _, _, _) => true + case _ => false + }) ErrorReporting.warn("Assembly interrupt function `$name` contains RET, did you mean RETI/RETN?", options, Some(p)) + } else { + if (xs.exists { + case Z80AssemblyStatement(ZOpcode.RETI, _, _, _) => true + case Z80AssemblyStatement(ZOpcode.RETN, _, _, _) => true + case _ => false + }) ErrorReporting.warn("Assembly non-interrupt function `$name` contains RETI or RETN, did you mean RET?", options, Some(p)) + } + if (!name.startsWith("__") && !flags("macro")) { + xs.last match { + case Z80AssemblyStatement(ZOpcode.RET, NoRegisters, _, _) => () // OK + case Z80AssemblyStatement(ZOpcode.RETN, NoRegisters, _, _) => () // OK + case Z80AssemblyStatement(ZOpcode.RETI, NoRegisters, _, _) => () // OK + case Z80AssemblyStatement(ZOpcode.JP, NoRegisters, _, _) => () // OK + case Z80AssemblyStatement(ZOpcode.JR, NoRegisters, _, _) => () // OK + case _ => + val validReturn = if (flags("interrupt")) "RETI/RETN" else "RET" + ErrorReporting.warn(s"Non-macro assembly function `$name` should end in " + validReturn, options, Some(p)) + } + } + case None => () + } } }