Merge pull request #45 from meisl/issue40(EOF,EOL)

Issue #40, EOF/EOL
This commit is contained in:
Irmen de Jong 2021-06-18 20:22:59 +02:00 committed by GitHub
commit cfe4753913
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 108 additions and 30 deletions

View File

@ -44,11 +44,7 @@ class ModuleImporter(private val program: Program,
if(!Files.isReadable(filePath))
throw ParsingFailedError("No such file: $filePath")
var content = filePath.toFile().readText().replace("\r\n", "\n") // normalize line endings
if(content.last()!='\n')
content+='\n' // grammar requires blocks (and thus module files) to end in an EOL
return importModule(CharStreams.fromString(content), filePath, false)
return importModule(CharStreams.fromPath(filePath), filePath, false)
}
fun importLibraryModule(name: String): Module? {

View File

@ -1,11 +1,14 @@
package prog8tests
import org.antlr.v4.runtime.*
import org.antlr.v4.runtime.misc.ParseCancellationException
import org.junit.jupiter.api.Test
import prog8.ast.IStringEncoding
import prog8.ast.antlr.toAst
import prog8.ast.statements.Block
import prog8.parser.*
import prog8.parser.ParsingFailedError
import prog8.parser.prog8Lexer
import prog8.parser.prog8Parser
import java.nio.file.Path
import kotlin.test.*
@ -13,10 +16,46 @@ class TestAntlrParser {
class MyErrorListener: ConsoleErrorListener() {
override fun syntaxError(recognizer: Recognizer<*, *>?, offendingSymbol: Any?, line: Int, charPositionInLine: Int, msg: String, e: RecognitionException?) {
throw ParsingFailedError(msg)
throw ParsingFailedError("line $line:$charPositionInLine $msg")
}
}
class MyErrorStrategy: BailErrorStrategy() {
override fun recover(recognizer: Parser?, e: RecognitionException?) {
try {
// let it fill in e in all the contexts
super.recover(recognizer, e)
} catch (pce: ParseCancellationException) {
reportError(recognizer, e)
}
}
override fun recoverInline(recognizer: Parser?): Token {
throw InputMismatchException(recognizer)
}
}
private fun parseModule(srcText: String): prog8Parser.ModuleContext {
return parseModule(CharStreams.fromString(srcText))
}
private fun parseModule(srcFile: Path): prog8Parser.ModuleContext {
return parseModule(CharStreams.fromPath(srcFile))
}
private fun parseModule(srcStream: CharStream): prog8Parser.ModuleContext {
val errorListener = MyErrorListener()
val lexer = prog8Lexer(srcStream)
lexer.removeErrorListeners()
lexer.addErrorListener(errorListener)
val tokens = CommonTokenStream(lexer)
val parser = prog8Parser(tokens)
parser.errorHandler = MyErrorStrategy()
parser.removeErrorListeners()
parser.addErrorListener(errorListener)
return parser.module()
}
object TestStringEncoding: IStringEncoding {
override fun encodeString(str: String, altEncoding: Boolean): List<Short> {
TODO("Not yet implemented")
@ -28,24 +67,64 @@ class TestAntlrParser {
}
@Test
fun testAntlrTree() {
// can create charstreams from many other sources as well;
val charstream = CharStreams.fromString("""
main {
sub start() {
return
fun testModuleSourceNeedNotEndWithNewline() {
val nl = "\n" // say, Unix-style (different flavours tested elsewhere)
val srcText = "foo {" + nl + "}" // source ends with '}' (= NO newline, issue #40)
// before the fix, prog8Parser would have reported (thrown) "missing <EOL> at '<EOF>'"
val parseTree = parseModule(srcText)
assertEquals(parseTree.block().size, 1)
}
}
""")
val lexer = prog8Lexer(charstream)
val tokens = CommonTokenStream(lexer)
val parser = prog8Parser(tokens)
parser.errorHandler = BailErrorStrategy()
// parser.removeErrorListeners()
// parser.addErrorListener(MyErrorListener())
val nodes = parser.module()
val blockName = nodes.block(0).identifier().NAME().text
assertEquals(blockName, "main")
@Test
fun testModuleSourceMayEndWithNewline() {
val nl = "\n" // say, Unix-style (different flavours tested elsewhere)
val srcText = "foo {" + nl + "}" + nl // source does end with a newline (issue #40)
val parseTree = parseModule(srcText)
assertEquals(parseTree.block().size, 1)
}
@Test
fun testAllBlocksButLastMustEndWithNewline() {
val nl = "\n" // say, Unix-style (different flavours tested elsewhere)
// BAD: 2nd block `bar` does NOT start on new line; however, there's is a nl at the very end
val srcBad = "foo {" + nl + "}" + " bar {" + nl + "}" + nl
// GOOD: 2nd block `bar` does start on a new line; however, a nl at the very end ain't needed
val srcGood = "foo {" + nl + "}" + nl + "bar {" + nl + "}"
assertFailsWith<ParsingFailedError> { parseModule(srcBad) }
val parseTree = parseModule(srcGood)
assertEquals(parseTree.block().size, 2)
}
@Test
fun testWindowsAndMacNewlinesAreAlsoFine() {
val nlWin = "\r\n"
val nlUnix = "\n"
val nlMac = "\r"
//parseModule(Paths.get("test", "fixtures", "mac_newlines.p8").toAbsolutePath())
// a good mix of all kinds of newlines:
val srcText =
"foo {" +
nlMac +
nlWin +
"}" +
nlMac + // <-- do test a single \r (!) where an EOL is expected
"bar {" +
nlUnix +
"}" +
nlUnix + nlMac // both should be "eaten up" by just one EOL token
"combi {" +
nlMac + nlWin + nlUnix // all three should be "eaten up" by just one EOL token
"}" +
nlUnix // end with newline (see testModuleSourceNeedNotEndWithNewline)
val parseTree = parseModule(srcText)
assertEquals(parseTree.block().size, 2)
}
@Test

View File

@ -5,7 +5,6 @@ NOTES:
- whitespace is ignored. (tabs/spaces)
- every position can be empty, be a comment, or contain ONE statement.
- input is assumed to be a text file with UNIX line endings (\n).
*/
@ -15,10 +14,11 @@ grammar prog8;
package prog8.parser;
}
LINECOMMENT : [\n][ \t]* COMMENT -> channel(HIDDEN);
COMMENT : ';' ~[\n]* -> channel(HIDDEN) ;
LINECOMMENT : ('\r'? '\n' | '\r') [ \t]* COMMENT -> channel(HIDDEN);
COMMENT : ';' ~[\r\n]* -> channel(HIDDEN) ;
EOL : ('\r'? '\n' | '\r')+ ;
WS : [ \t] -> skip ;
EOL : [\n]+ ;
// WS2 : '\\' EOL -> skip;
VOID: 'void';
NAME : [a-zA-Z][a-zA-Z0-9_]* ;
@ -73,9 +73,12 @@ ARRAYSIG :
cpuregister: 'A' | 'X' | 'Y';
register: 'A' | 'X' | 'Y' | 'AX' | 'AY' | 'XY' | 'Pc' | 'Pz' | 'Pn' | 'Pv' | 'R0' | 'R1' | 'R2' | 'R3' | 'R4' | 'R5' | 'R6' | 'R7' | 'R8' | 'R9' | 'R10' | 'R11' | 'R12' | 'R13' | 'R14' | 'R15';
module : (directive | block | EOL)* EOF ;
// A module (file) consists of zero or more directives or blocks, in any order.
// If there are more than one, then they must be separated by EOL (one or more newlines).
// However, trailing EOL is NOT required.
module: EOL? ((directive | block) (EOL (directive | block))*)? EOL? EOF;
block: identifier integerliteral? '{' EOL (block_statement | EOL) * '}' EOL ;
block: identifier integerliteral? '{' EOL (block_statement | EOL)* '}';
block_statement: