prog8/parser/antlr/Prog8ANTLR.g4

313 lines
8.2 KiB
Plaintext
Raw Normal View History

2018-08-09 01:54:43 +02:00
/*
2018-09-15 16:21:05 +02:00
Prog8 combined lexer and parser grammar
2018-08-10 23:56:30 +02:00
NOTES:
- whitespace is ignored. (tabs/spaces)
- every position can be empty, be a comment, or contain ONE statement.
2018-08-10 23:56:30 +02:00
2018-08-09 01:54:43 +02:00
*/
// -> java classes Prog8ANTLRParser and Prog8ANTLRLexer,
// both NOT to be used from Kotlin code, but ONLY through Kotlin class Prog8Parser
grammar Prog8ANTLR;
2018-08-09 01:54:43 +02:00
2019-01-29 12:06:33 +01:00
@header {
package prog8.parser;
}
2018-08-11 14:06:43 +02:00
EOL : ('\r'? '\n' | '\r' | '\n')+ ;
LINECOMMENT : EOL [ \t]* COMMENT -> channel(HIDDEN);
COMMENT : ';' ~[\r\n]* -> channel(HIDDEN) ;
2023-07-04 00:18:58 +02:00
BLOCK_COMMENT : '/*' ( BLOCK_COMMENT | . )*? '*/' -> skip ;
WS : [ \t] -> skip ;
// WS2 : '\\' EOL -> skip;
VOID: 'void';
2023-12-05 17:38:23 +01:00
NAME : [\p{Letter}][\p{Letter}\p{Mark}\p{Digit}_]* ; // match unicode properties
DEC_INTEGER : DEC_DIGIT (DEC_DIGIT | '_')* ;
HEX_INTEGER : '$' HEX_DIGIT (HEX_DIGIT | '_')* ;
BIN_INTEGER : '%' BIN_DIGIT (BIN_DIGIT | '_')* ;
ADDRESS_OF: '&' ;
INVALID_AND_COMPOSITE: '&&' ;
2018-08-09 01:54:43 +02:00
fragment HEX_DIGIT: ('a'..'f') | ('A'..'F') | ('0'..'9') ;
fragment BIN_DIGIT: ('0' | '1') ;
fragment DEC_DIGIT: ('0'..'9') ;
FLOAT_NUMBER : FNUMBER (('E'|'e') ('+' | '-')? DEC_INTEGER)? ; // sign comes later from unary expression
2023-07-04 00:18:58 +02:00
FNUMBER : FDOTNUMBER | FNUMDOTNUMBER ;
FDOTNUMBER : '.' (DEC_DIGIT | '_')+ ;
FNUMDOTNUMBER : DEC_DIGIT (DEC_DIGIT | '_')* FDOTNUMBER? ;
2018-08-09 01:54:43 +02:00
2023-07-04 00:18:58 +02:00
STRING_ESCAPE_SEQ : '\\' . | '\\x' . . | '\\u' . . . .;
2018-08-10 00:26:41 +02:00
STRING :
'"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* '"'
2018-08-09 01:54:43 +02:00
;
2018-08-10 23:56:30 +02:00
INLINEASMBLOCK :
'{{' .+? '}}'
;
2018-08-09 01:54:43 +02:00
SINGLECHAR :
'\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f'] ) '\''
;
ZEROPAGE : '@zp' ;
2019-01-26 22:46:01 +01:00
ZEROPAGEREQUIRE : '@requirezp' ;
SHARED : '@shared' ;
2023-05-22 20:31:28 +02:00
SPLIT: '@split' ;
ARRAYSIG :
'[]'
;
// A module (file) consists of zero or more directives or blocks, in any order.
// If there are more than one, then they must be separated by EOL (one or more newlines).
// However, trailing EOL is NOT required.
2021-06-18 21:55:03 +02:00
// Note: the parser may see *several* consecutive EOLs - this happens when EOL and comments are interleaved (see #47)
module: EOL* ((directive | block) (EOL+ (directive | block))*)? EOL* EOF;
2018-08-10 23:56:30 +02:00
2023-06-27 01:59:22 +02:00
block: identifier integerliteral? EOL? '{' EOL? (block_statement | EOL)* '}';
block_statement:
directive
| variabledeclaration
| subroutinedeclaration
| inlineasm
| inlineir
| labeldef
;
2018-08-10 00:26:41 +02:00
statement :
2018-08-09 01:54:43 +02:00
directive
2020-03-14 13:23:13 +01:00
| variabledeclaration
2018-08-09 01:54:43 +02:00
| assignment
| augassignment
2018-08-10 23:56:30 +02:00
| unconditionaljump
| postincrdecr
2018-08-14 02:22:59 +02:00
| functioncall_stmt
2018-08-14 14:33:36 +02:00
| if_stmt
2018-09-02 18:32:48 +02:00
| branch_stmt
2020-03-14 13:23:13 +01:00
| subroutinedeclaration
2018-08-10 23:56:30 +02:00
| inlineasm
| inlineir
2018-08-12 17:16:36 +02:00
| returnstmt
2018-09-16 03:00:32 +02:00
| forloop
2018-09-23 02:04:45 +02:00
| whileloop
| untilloop
2018-09-23 02:04:45 +02:00
| repeatloop
2023-03-14 23:37:49 +01:00
| unrollloop
2019-07-09 00:02:38 +02:00
| whenstmt
| breakstmt
2023-11-19 17:52:43 +01:00
| continuestmt
2018-09-19 02:41:35 +02:00
| labeldef
2018-08-09 01:54:43 +02:00
;
2018-09-16 03:00:32 +02:00
2020-03-14 13:23:13 +01:00
variabledeclaration :
varinitializer
| vardecl
| constdecl
| memoryvardecl
;
subroutinedeclaration :
subroutine
| asmsubroutine
| romsubroutine
;
2018-08-12 17:16:36 +02:00
labeldef : identifier ':' ;
2018-08-10 23:56:30 +02:00
2019-01-02 23:32:41 +01:00
unconditionaljump : 'goto' (integerliteral | scoped_identifier) ;
2018-08-09 01:54:43 +02:00
2018-08-10 23:56:30 +02:00
directive :
directivename=('%output' | '%launcher' | '%zeropage' | '%zpreserved' | '%zpallowed' | '%address' | '%import' |
'%breakpoint' | '%asminclude' | '%asmbinary' | '%option' | '%encoding' )
2018-08-10 23:56:30 +02:00
(directivearg? | directivearg (',' directivearg)*)
;
2018-08-09 01:54:43 +02:00
2018-08-10 23:56:30 +02:00
directivearg : stringliteral | identifier | integerliteral ;
2018-08-09 01:54:43 +02:00
2023-12-08 22:08:17 +01:00
vardecl: datatype (arrayindex | ARRAYSIG)? decloptions identifier (',' identifier)* ;
2023-05-22 20:31:28 +02:00
decloptions: (SHARED | ZEROPAGE | ZEROPAGEREQUIRE | SPLIT)* ;
2019-01-26 22:46:01 +01:00
varinitializer : vardecl '=' expression ;
2018-08-09 01:54:43 +02:00
2018-08-10 00:26:41 +02:00
constdecl: 'const' varinitializer ;
memoryvardecl: ADDRESS_OF varinitializer;
2018-08-10 00:26:41 +02:00
2022-07-04 23:42:49 +02:00
datatype: 'ubyte' | 'byte' | 'uword' | 'word' | 'float' | 'str' | 'bool' ;
2018-08-10 00:26:41 +02:00
arrayindex: '[' expression ']' ;
2018-08-10 00:26:41 +02:00
2023-12-08 00:57:39 +01:00
assignment : (assign_target '=' expression) | (assign_target '=' assignment);
2018-08-09 01:54:43 +02:00
augassignment :
assign_target operator=('+=' | '-=' | '/=' | '*=' | '&=' | '|=' | '^=' | '%=' | '<<=' | '>>=' ) expression
2018-08-09 01:54:43 +02:00
;
2018-08-10 00:26:41 +02:00
assign_target:
scoped_identifier #IdentifierTarget
| arrayindexed #ArrayindexedTarget
| directmemory #MemoryTarget
2018-08-10 00:26:41 +02:00
;
2018-08-09 01:54:43 +02:00
2018-08-10 23:56:30 +02:00
postincrdecr : assign_target operator = ('++' | '--') ;
2018-08-09 01:54:43 +02:00
expression :
'(' expression ')'
| functioncall
2018-12-31 04:48:26 +01:00
| <assoc=right> prefix = ('+'|'-'|'~') expression
| left = expression EOL? bop = ('*' | '/' | '%' ) EOL? right = expression
| left = expression EOL? bop = ('+' | '-' ) EOL? right = expression
| left = expression EOL? bop = ('<<' | '>>' ) EOL? right = expression
| left = expression EOL? bop = '&' EOL? right = expression
| left = expression EOL? bop = '^' EOL? right = expression
| left = expression EOL? bop = '|' EOL? right = expression
| left = expression EOL? bop = ('<' | '>' | '<=' | '>=') EOL? right = expression
| left = expression EOL? bop = ('==' | '!=') EOL? right = expression
2020-03-11 20:47:42 +01:00
| rangefrom = expression rto = ('to'|'downto') rangeto = expression ('step' rangestep = expression)? // can't create separate rule due to mutual left-recursion
| left = expression EOL? bop = 'in' EOL? right = expression
| left = expression EOL? bop = 'not in' EOL? right = expression
| prefix = 'not' expression
| left = expression EOL? bop = 'and' EOL? right = expression
| left = expression EOL? bop = 'or' EOL? right = expression
| left = expression EOL? bop = 'xor' EOL? right = expression
2018-08-09 01:54:43 +02:00
| literalvalue
2018-08-10 02:58:41 +02:00
| scoped_identifier
| arrayindexed
| directmemory
2019-04-11 20:55:20 +02:00
| addressof
| expression typecast
2018-08-09 01:54:43 +02:00
;
arrayindexed:
scoped_identifier arrayindex
;
typecast : 'as' datatype;
2018-12-31 04:48:26 +01:00
directmemory : '@' '(' expression ')';
2023-09-17 18:30:57 +02:00
addressof : <assoc=right> ADDRESS_OF (scoped_identifier | arrayindexed) ;
2019-04-04 21:02:24 +02:00
2018-10-01 22:23:16 +02:00
functioncall : scoped_identifier '(' expression_list? ')' ;
2018-08-14 02:22:59 +02:00
functioncall_stmt : VOID? scoped_identifier '(' expression_list? ')' ;
2018-08-14 02:22:59 +02:00
2018-08-12 17:16:36 +02:00
expression_list :
2018-12-31 01:52:18 +01:00
expression (',' EOL? expression)* // you can split the expression list over several lines
2018-08-09 01:54:43 +02:00
;
returnstmt : 'return' expression? ;
2018-08-12 17:16:36 +02:00
2018-09-16 03:00:32 +02:00
breakstmt : 'break';
2023-11-19 17:52:43 +01:00
continuestmt: 'continue';
2018-08-10 02:58:41 +02:00
identifier : NAME ;
2018-08-09 01:54:43 +02:00
2019-01-02 23:32:41 +01:00
scoped_identifier : NAME ('.' NAME)* ;
2018-08-09 01:54:43 +02:00
integerliteral : intpart=(DEC_INTEGER | HEX_INTEGER | BIN_INTEGER) ;
2018-08-09 01:54:43 +02:00
2018-08-10 00:26:41 +02:00
booleanliteral : 'true' | 'false' ;
2018-08-09 01:54:43 +02:00
2019-07-16 00:08:28 +02:00
arrayliteral : '[' EOL? expression (',' EOL? expression)* EOL? ']' ; // you can split the values over several lines
stringliteral : (encoding=NAME ':')? STRING ;
2018-08-09 01:54:43 +02:00
charliteral : (encoding=NAME ':')? SINGLECHAR ;
2018-08-10 00:26:41 +02:00
floatliteral : FLOAT_NUMBER ;
2018-08-09 01:54:43 +02:00
2019-07-16 00:08:28 +02:00
2018-08-10 00:26:41 +02:00
literalvalue :
integerliteral
| booleanliteral
| arrayliteral
| stringliteral
| charliteral
2018-08-10 00:26:41 +02:00
| floatliteral
2018-08-09 01:54:43 +02:00
;
2018-08-10 23:56:30 +02:00
inlineasm : '%asm' EOL? INLINEASMBLOCK;
2018-08-13 01:30:33 +02:00
inlineir: '%ir' EOL? INLINEASMBLOCK;
inline: 'inline';
2018-08-13 01:30:33 +02:00
subroutine :
'sub' identifier '(' sub_params? ')' sub_return_part? EOL? (statement_block EOL)
2018-08-13 04:12:42 +02:00
;
sub_return_part : '->' datatype ;
2018-08-14 14:33:36 +02:00
statement_block :
2023-06-27 01:59:22 +02:00
'{' EOL?
2018-08-13 01:30:33 +02:00
(statement | EOL) *
2018-08-14 14:33:36 +02:00
'}'
2018-08-13 01:30:33 +02:00
;
2018-08-13 04:12:42 +02:00
sub_params : vardecl (',' EOL? vardecl)* ;
asmsubroutine :
inline? 'asmsub' asmsub_decl EOL? (statement_block EOL)
;
2020-03-10 23:09:31 +01:00
romsubroutine :
'romsub' integerliteral '=' asmsub_decl
;
asmsub_decl : identifier '(' asmsub_params? ')' asmsub_clobbers? asmsub_returns? ;
asmsub_params : asmsub_param (',' EOL? asmsub_param)* ;
asmsub_param : vardecl '@' register=NAME ; // A,X,Y,AX,AY,XY,Pc,Pz,Pn,Pv allowed.
2019-07-08 23:00:18 +02:00
asmsub_clobbers : 'clobbers' '(' clobber? ')' ;
clobber : NAME (',' NAME)* ; // A,X,Y allowed
2018-08-13 01:30:33 +02:00
2019-07-08 23:00:18 +02:00
asmsub_returns : '->' asmsub_return (',' EOL? asmsub_return)* ;
2018-08-13 01:30:33 +02:00
asmsub_return : datatype '@' register=NAME ; // A,X,Y,AX,AY,XY,Pc,Pz,Pn,Pv allowed
2018-08-14 14:33:36 +02:00
2019-01-01 18:45:21 +01:00
if_stmt : 'if' expression EOL? (statement | statement_block) EOL? else_part? ; // statement is constrained later
2018-08-14 14:33:36 +02:00
else_part : 'else' EOL? (statement | statement_block) ; // statement is constrained later
2018-09-02 18:32:48 +02:00
branch_stmt : branchcondition EOL? (statement | statement_block) EOL? else_part? EOL ;
2018-09-22 00:33:25 +02:00
branchcondition: 'if_cs' | 'if_cc' | 'if_eq' | 'if_z' | 'if_ne' | 'if_nz' | 'if_pl' | 'if_pos' | 'if_mi' | 'if_neg' | 'if_vs' | 'if_vc' ;
2018-09-16 03:00:32 +02:00
forloop : 'for' scoped_identifier 'in' expression EOL? (statement | statement_block) ;
2018-09-23 02:04:45 +02:00
whileloop: 'while' expression EOL? (statement | statement_block) ;
untilloop: 'do' (statement | statement_block) EOL? 'until' expression ;
2019-07-09 00:02:38 +02:00
repeatloop: 'repeat' expression? EOL? (statement | statement_block) ;
2020-03-14 18:11:04 +01:00
unrollloop: 'unroll' expression EOL? (statement | statement_block) ; // note: expression must evaluate to a constant
2023-03-14 23:37:49 +01:00
2023-06-27 01:59:22 +02:00
whenstmt: 'when' expression EOL? '{' EOL? (when_choice | EOL) * '}' EOL? ;
2019-07-09 00:02:38 +02:00
when_choice: (expression_list | 'else' ) '->' (statement | statement_block ) ;