diff --git a/compilerAst/src/prog8/parser/ModuleParsing.kt b/compilerAst/src/prog8/parser/ModuleParsing.kt index 4c0b0a8ef..efae7049e 100644 --- a/compilerAst/src/prog8/parser/ModuleParsing.kt +++ b/compilerAst/src/prog8/parser/ModuleParsing.kt @@ -44,11 +44,7 @@ class ModuleImporter(private val program: Program, if(!Files.isReadable(filePath)) throw ParsingFailedError("No such file: $filePath") - var content = filePath.toFile().readText().replace("\r\n", "\n") // normalize line endings - if(content.last()!='\n') - content+='\n' // grammar requires blocks (and thus module files) to end in an EOL - - return importModule(CharStreams.fromString(content), filePath, false) + return importModule(CharStreams.fromPath(filePath), filePath, false) } fun importLibraryModule(name: String): Module? { diff --git a/compilerAst/test/TestAntlrParser.kt b/compilerAst/test/TestAntlrParser.kt index 45fc55679..6f53a3826 100644 --- a/compilerAst/test/TestAntlrParser.kt +++ b/compilerAst/test/TestAntlrParser.kt @@ -1,11 +1,14 @@ package prog8tests import org.antlr.v4.runtime.* +import org.antlr.v4.runtime.misc.ParseCancellationException import org.junit.jupiter.api.Test import prog8.ast.IStringEncoding import prog8.ast.antlr.toAst import prog8.ast.statements.Block -import prog8.parser.* +import prog8.parser.ParsingFailedError +import prog8.parser.prog8Lexer +import prog8.parser.prog8Parser import java.nio.file.Path import kotlin.test.* @@ -13,10 +16,46 @@ class TestAntlrParser { class MyErrorListener: ConsoleErrorListener() { override fun syntaxError(recognizer: Recognizer<*, *>?, offendingSymbol: Any?, line: Int, charPositionInLine: Int, msg: String, e: RecognitionException?) { - throw ParsingFailedError(msg) + throw ParsingFailedError("line $line:$charPositionInLine $msg") } } + class MyErrorStrategy: BailErrorStrategy() { + override fun recover(recognizer: Parser?, e: RecognitionException?) { + try { + // let it fill in e in all the contexts + super.recover(recognizer, e) + } catch (pce: ParseCancellationException) { + reportError(recognizer, e) + } + } + + override fun recoverInline(recognizer: Parser?): Token { + throw InputMismatchException(recognizer) + } + } + + private fun parseModule(srcText: String): prog8Parser.ModuleContext { + return parseModule(CharStreams.fromString(srcText)) + } + + private fun parseModule(srcFile: Path): prog8Parser.ModuleContext { + return parseModule(CharStreams.fromPath(srcFile)) + } + + private fun parseModule(srcStream: CharStream): prog8Parser.ModuleContext { + val errorListener = MyErrorListener() + val lexer = prog8Lexer(srcStream) + lexer.removeErrorListeners() + lexer.addErrorListener(errorListener) + val tokens = CommonTokenStream(lexer) + val parser = prog8Parser(tokens) + parser.errorHandler = MyErrorStrategy() + parser.removeErrorListeners() + parser.addErrorListener(errorListener) + return parser.module() + } + object TestStringEncoding: IStringEncoding { override fun encodeString(str: String, altEncoding: Boolean): List { TODO("Not yet implemented") @@ -28,24 +67,64 @@ class TestAntlrParser { } @Test - fun testAntlrTree() { - // can create charstreams from many other sources as well; - val charstream = CharStreams.fromString(""" -main { - sub start() { - return + fun testModuleSourceNeedNotEndWithNewline() { + val nl = "\n" // say, Unix-style (different flavours tested elsewhere) + val srcText = "foo {" + nl + "}" // source ends with '}' (= NO newline, issue #40) + + // before the fix, prog8Parser would have reported (thrown) "missing at ''" + val parseTree = parseModule(srcText) + assertEquals(parseTree.block().size, 1) } -} -""") - val lexer = prog8Lexer(charstream) - val tokens = CommonTokenStream(lexer) - val parser = prog8Parser(tokens) - parser.errorHandler = BailErrorStrategy() -// parser.removeErrorListeners() -// parser.addErrorListener(MyErrorListener()) - val nodes = parser.module() - val blockName = nodes.block(0).identifier().NAME().text - assertEquals(blockName, "main") + + @Test + fun testModuleSourceMayEndWithNewline() { + val nl = "\n" // say, Unix-style (different flavours tested elsewhere) + val srcText = "foo {" + nl + "}" + nl // source does end with a newline (issue #40) + val parseTree = parseModule(srcText) + assertEquals(parseTree.block().size, 1) + } + + @Test + fun testAllBlocksButLastMustEndWithNewline() { + val nl = "\n" // say, Unix-style (different flavours tested elsewhere) + + // BAD: 2nd block `bar` does NOT start on new line; however, there's is a nl at the very end + val srcBad = "foo {" + nl + "}" + " bar {" + nl + "}" + nl + + // GOOD: 2nd block `bar` does start on a new line; however, a nl at the very end ain't needed + val srcGood = "foo {" + nl + "}" + nl + "bar {" + nl + "}" + + assertFailsWith { parseModule(srcBad) } + val parseTree = parseModule(srcGood) + assertEquals(parseTree.block().size, 2) + } + + @Test + fun testWindowsAndMacNewlinesAreAlsoFine() { + val nlWin = "\r\n" + val nlUnix = "\n" + val nlMac = "\r" + + //parseModule(Paths.get("test", "fixtures", "mac_newlines.p8").toAbsolutePath()) + + // a good mix of all kinds of newlines: + val srcText = + "foo {" + + nlMac + + nlWin + + "}" + + nlMac + // <-- do test a single \r (!) where an EOL is expected + "bar {" + + nlUnix + + "}" + + nlUnix + nlMac // both should be "eaten up" by just one EOL token + "combi {" + + nlMac + nlWin + nlUnix // all three should be "eaten up" by just one EOL token + "}" + + nlUnix // end with newline (see testModuleSourceNeedNotEndWithNewline) + + val parseTree = parseModule(srcText) + assertEquals(parseTree.block().size, 2) } @Test diff --git a/parser/antlr/prog8.g4 b/parser/antlr/prog8.g4 index ba017e72b..c5e117cf9 100644 --- a/parser/antlr/prog8.g4 +++ b/parser/antlr/prog8.g4 @@ -5,7 +5,6 @@ NOTES: - whitespace is ignored. (tabs/spaces) - every position can be empty, be a comment, or contain ONE statement. -- input is assumed to be a text file with UNIX line endings (\n). */ @@ -15,10 +14,11 @@ grammar prog8; package prog8.parser; } -LINECOMMENT : [\n][ \t]* COMMENT -> channel(HIDDEN); -COMMENT : ';' ~[\n]* -> channel(HIDDEN) ; +LINECOMMENT : ('\r'? '\n' | '\r') [ \t]* COMMENT -> channel(HIDDEN); +COMMENT : ';' ~[\r\n]* -> channel(HIDDEN) ; +EOL : ('\r'? '\n' | '\r')+ ; + WS : [ \t] -> skip ; -EOL : [\n]+ ; // WS2 : '\\' EOL -> skip; VOID: 'void'; NAME : [a-zA-Z][a-zA-Z0-9_]* ; @@ -73,9 +73,12 @@ ARRAYSIG : cpuregister: 'A' | 'X' | 'Y'; register: 'A' | 'X' | 'Y' | 'AX' | 'AY' | 'XY' | 'Pc' | 'Pz' | 'Pn' | 'Pv' | 'R0' | 'R1' | 'R2' | 'R3' | 'R4' | 'R5' | 'R6' | 'R7' | 'R8' | 'R9' | 'R10' | 'R11' | 'R12' | 'R13' | 'R14' | 'R15'; -module : (directive | block | EOL)* EOF ; +// A module (file) consists of zero or more directives or blocks, in any order. +// If there are more than one, then they must be separated by EOL (one or more newlines). +// However, trailing EOL is NOT required. +module: EOL? ((directive | block) (EOL (directive | block))*)? EOL? EOF; -block: identifier integerliteral? '{' EOL (block_statement | EOL) * '}' EOL ; +block: identifier integerliteral? '{' EOL (block_statement | EOL)* '}'; block_statement: