/* Prog8 combined lexer and parser grammar NOTES: - whitespace is ignored. (tabs/spaces) - every position can be empty, be a comment, or contain ONE statement. */ // -> java classes Prog8ANTLRParser and Prog8ANTLRLexer, // both NOT to be used from Kotlin code, but ONLY through Kotlin class Prog8Parser grammar Prog8ANTLR; @header { package prog8.parser; } EOL : ('\r'? '\n' | '\r' | '\n')+ ; LINECOMMENT : EOL [ \t]* COMMENT -> channel(HIDDEN); COMMENT : ';' ~[\r\n]* -> channel(HIDDEN) ; BLOCK_COMMENT : '/*' ( BLOCK_COMMENT | . )*? '*/' -> skip ; WS : [ \t] -> skip ; // WS2 : '\\' EOL -> skip; VOID: 'void'; NAME : [\p{Letter}][\p{Letter}\p{Mark}\p{Digit}_]* ; // match unicode properties UNDERSCORENAME : '_' NAME ; // match unicode properties DEC_INTEGER : DEC_DIGIT (DEC_DIGIT | '_')* ; HEX_INTEGER : '$' HEX_DIGIT (HEX_DIGIT | '_')* ; BIN_INTEGER : '%' BIN_DIGIT (BIN_DIGIT | '_')* ; ADDRESS_OF: '&' ; INVALID_AND_COMPOSITE: '&&' ; fragment HEX_DIGIT: ('a'..'f') | ('A'..'F') | ('0'..'9') ; fragment BIN_DIGIT: ('0' | '1') ; fragment DEC_DIGIT: ('0'..'9') ; FLOAT_NUMBER : FNUMBER (('E'|'e') ('+' | '-')? DEC_INTEGER)? ; // sign comes later from unary expression FNUMBER : FDOTNUMBER | FNUMDOTNUMBER ; FDOTNUMBER : '.' (DEC_DIGIT | '_')+ ; FNUMDOTNUMBER : DEC_DIGIT (DEC_DIGIT | '_')* FDOTNUMBER? ; STRING_ESCAPE_SEQ : '\\' . | '\\x' . . | '\\u' . . . .; STRING : '"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* '"' ; INLINEASMBLOCK : '{{' .+? '}}' ; SINGLECHAR : '\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f'] ) '\'' ; ZEROPAGE : '@zp' ; ZEROPAGEREQUIRE : '@requirezp' ; SHARED : '@shared' ; SPLIT: '@split' ; ARRAYSIG : '[]' ; // A module (file) consists of zero or more directives or blocks, in any order. // If there are more than one, then they must be separated by EOL (one or more newlines). // However, trailing EOL is NOT required. // Note: the parser may see *several* consecutive EOLs - this happens when EOL and comments are interleaved (see #47) module: EOL* ((directive | block) (EOL+ (directive | block))*)? EOL* EOF; block: identifier integerliteral? EOL? '{' EOL? (block_statement | EOL)* '}'; block_statement: directive | variabledeclaration | subroutinedeclaration | inlineasm | inlineir | labeldef ; statement : directive | variabledeclaration | assignment | augassignment | unconditionaljump | postincrdecr | functioncall_stmt | if_stmt | branch_stmt | subroutinedeclaration | inlineasm | inlineir | returnstmt | forloop | whileloop | untilloop | repeatloop | unrollloop | whenstmt | breakstmt | continuestmt | labeldef ; variabledeclaration : varinitializer | vardecl | constdecl | memoryvardecl ; subroutinedeclaration : subroutine | asmsubroutine | romsubroutine ; labeldef : identifier ':' ; unconditionaljump : 'goto' (integerliteral | scoped_identifier) ; directive : directivename=('%output' | '%launcher' | '%zeropage' | '%zpreserved' | '%zpallowed' | '%address' | '%import' | '%breakpoint' | '%asminclude' | '%asmbinary' | '%option' | '%encoding' ) (directivearg? | directivearg (',' directivearg)*) ; directivearg : stringliteral | identifier | integerliteral ; vardecl: datatype (arrayindex | ARRAYSIG)? decloptions identifier (',' identifier)* ; decloptions: (SHARED | ZEROPAGE | ZEROPAGEREQUIRE | SPLIT)* ; varinitializer : vardecl '=' expression ; constdecl: 'const' varinitializer ; memoryvardecl: ADDRESS_OF varinitializer; datatype: 'ubyte' | 'byte' | 'uword' | 'word' | 'float' | 'str' | 'bool' ; arrayindex: '[' expression ']' ; assignment : (assign_target '=' expression) | (assign_target '=' assignment) | (multi_assign_target '=' expression); augassignment : assign_target operator=('+=' | '-=' | '/=' | '*=' | '&=' | '|=' | '^=' | '%=' | '<<=' | '>>=' ) expression ; assign_target: scoped_identifier #IdentifierTarget | arrayindexed #ArrayindexedTarget | directmemory #MemoryTarget ; multi_assign_target: assign_target (',' assign_target)+ ; postincrdecr : assign_target operator = ('++' | '--') ; expression : '(' expression ')' | functioncall | prefix = ('+'|'-'|'~') expression | left = expression EOL? bop = ('*' | '/' | '%' ) EOL? right = expression | left = expression EOL? bop = ('+' | '-' ) EOL? right = expression | left = expression EOL? bop = ('<<' | '>>' ) EOL? right = expression | left = expression EOL? bop = '&' EOL? right = expression | left = expression EOL? bop = '^' EOL? right = expression | left = expression EOL? bop = '|' EOL? right = expression | left = expression EOL? bop = ('<' | '>' | '<=' | '>=') EOL? right = expression | left = expression EOL? bop = ('==' | '!=') EOL? right = expression | rangefrom = expression rto = ('to'|'downto') rangeto = expression ('step' rangestep = expression)? // can't create separate rule due to mutual left-recursion | left = expression EOL? bop = 'in' EOL? right = expression | left = expression EOL? bop = ('not in ' | 'not in\t' | 'not in\n' | 'not in\r') EOL? right = expression | prefix = 'not' expression | left = expression EOL? bop = 'and' EOL? right = expression | left = expression EOL? bop = 'or' EOL? right = expression | left = expression EOL? bop = 'xor' EOL? right = expression | literalvalue | scoped_identifier | arrayindexed | directmemory | addressof | expression typecast ; arrayindexed: scoped_identifier arrayindex ; typecast : 'as' datatype; directmemory : '@' '(' expression ')'; addressof : ADDRESS_OF (scoped_identifier | arrayindexed) ; functioncall : scoped_identifier '(' expression_list? ')' ; functioncall_stmt : VOID? scoped_identifier '(' expression_list? ')' ; expression_list : expression (',' EOL? expression)* // you can split the expression list over several lines ; returnstmt : 'return' expression? ; breakstmt : 'break'; continuestmt: 'continue'; identifier : NAME | UNDERSCORENAME | VOID; scoped_identifier : identifier ('.' identifier)* ; integerliteral : intpart=(DEC_INTEGER | HEX_INTEGER | BIN_INTEGER) ; booleanliteral : 'true' | 'false' ; arrayliteral : '[' EOL? expression (',' EOL? expression)* EOL? ']' ; // you can split the values over several lines stringliteral : (encoding=NAME ':')? STRING ; charliteral : (encoding=NAME ':')? SINGLECHAR ; floatliteral : FLOAT_NUMBER ; literalvalue : integerliteral | booleanliteral | arrayliteral | stringliteral | charliteral | floatliteral ; inlineasm : '%asm' EOL? INLINEASMBLOCK; inlineir: '%ir' EOL? INLINEASMBLOCK; inline: 'inline'; subroutine : 'sub' identifier '(' sub_params? ')' sub_return_part? EOL? (statement_block EOL) ; sub_return_part : '->' datatype ; statement_block : '{' EOL? (statement | EOL) * '}' ; sub_params : vardecl (',' EOL? vardecl)* ; asmsubroutine : inline? 'asmsub' asmsub_decl EOL? (statement_block EOL) ; romsubroutine : 'romsub' integerliteral '=' asmsub_decl ; asmsub_decl : identifier '(' asmsub_params? ')' asmsub_clobbers? asmsub_returns? ; asmsub_params : asmsub_param (',' EOL? asmsub_param)* ; asmsub_param : vardecl '@' register=NAME ; // A,X,Y,AX,AY,XY,Pc,Pz,Pn,Pv allowed. asmsub_clobbers : 'clobbers' '(' clobber? ')' ; clobber : NAME (',' NAME)* ; // A,X,Y allowed asmsub_returns : '->' asmsub_return (',' EOL? asmsub_return)* ; asmsub_return : datatype '@' register=NAME ; // A,X,Y,AX,AY,XY,Pc,Pz,Pn,Pv allowed if_stmt : 'if' expression EOL? (statement | statement_block) EOL? else_part? ; // statement is constrained later else_part : 'else' EOL? (statement | statement_block) ; // statement is constrained later branch_stmt : branchcondition EOL? (statement | statement_block) EOL? else_part? EOL ; branchcondition: 'if_cs' | 'if_cc' | 'if_eq' | 'if_z' | 'if_ne' | 'if_nz' | 'if_pl' | 'if_pos' | 'if_mi' | 'if_neg' | 'if_vs' | 'if_vc' ; forloop : 'for' scoped_identifier 'in' expression EOL? (statement | statement_block) ; whileloop: 'while' expression EOL? (statement | statement_block) ; untilloop: 'do' (statement | statement_block) EOL? 'until' expression ; repeatloop: 'repeat' expression? EOL? (statement | statement_block) ; unrollloop: 'unroll' expression EOL? (statement | statement_block) ; // note: expression must evaluate to a constant whenstmt: 'when' expression EOL? '{' EOL? (when_choice | EOL) * '}' EOL? ; when_choice: (expression_list | 'else' ) '->' (statement | statement_block ) ;