prog8/compiler/antlr/prog8.g4

267 lines
6.3 KiB
Plaintext
Raw Normal View History

2018-08-09 01:54:43 +02:00
/*
2018-09-15 16:21:05 +02:00
Prog8 combined lexer and parser grammar
2018-08-10 23:56:30 +02:00
NOTES:
- whitespace is ignored. (tabs/spaces)
- every position can be empty, be a comment, or contain ONE statement.
2018-08-10 23:56:30 +02:00
2018-08-09 01:54:43 +02:00
*/
2018-09-15 16:21:05 +02:00
grammar prog8;
2018-08-09 01:54:43 +02:00
2018-08-11 14:06:43 +02:00
LINECOMMENT : [\r\n][ \t]* COMMENT -> channel(HIDDEN);
2018-08-10 23:56:30 +02:00
COMMENT : ';' ~[\r\n]* -> channel(HIDDEN) ;
2018-08-10 00:26:41 +02:00
WS : [ \t] -> skip ;
2018-08-10 23:56:30 +02:00
EOL : [\r\n]+ ;
2018-08-09 01:54:43 +02:00
NAME : [a-zA-Z_][a-zA-Z0-9_]* ;
2018-08-10 00:26:41 +02:00
DEC_INTEGER : ('0'..'9') | (('1'..'9')('0'..'9')+);
HEX_INTEGER : '$' (('a'..'f') | ('A'..'F') | ('0'..'9'))+ ;
BIN_INTEGER : '%' ('0' | '1')+ ;
2018-08-09 01:54:43 +02:00
2018-08-10 00:26:41 +02:00
FLOAT_NUMBER : FNUMBER (('E'|'e') ('+' | '-')? FNUMBER)? ; // sign comes later from unary expression
fragment FNUMBER : ('0' .. '9') + ('.' ('0' .. '9') +)? ;
2018-08-09 01:54:43 +02:00
2018-08-10 00:26:41 +02:00
fragment STRING_ESCAPE_SEQ : '\\' . | '\\' EOL;
STRING :
'"' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] )* '"'
{
// get rid of the enclosing quotes
String s = getText();
setText(s.substring(1, s.length() - 1));
}
2018-08-09 01:54:43 +02:00
;
2018-08-10 23:56:30 +02:00
INLINEASMBLOCK :
'{{' .+? '}}'
{
// get rid of the enclosing double braces
String s = getText();
setText(s.substring(2, s.length() - 2));
}
;
2018-08-09 01:54:43 +02:00
SINGLECHAR :
'\'' ( STRING_ESCAPE_SEQ | ~[\\\r\n\f"] ) '\''
{
// get rid of the enclosing quotes
String s = getText();
setText(s.substring(1, s.length() - 1));
}
;
2018-08-10 00:26:41 +02:00
2018-08-11 14:06:43 +02:00
module : (modulestatement | EOL)* EOF ;
2018-08-10 23:56:30 +02:00
modulestatement: directive | block ;
2018-08-14 14:33:36 +02:00
block: '~' identifier integerliteral? statement_block EOL ;
2018-08-10 00:26:41 +02:00
statement :
2018-08-09 01:54:43 +02:00
directive
2018-08-10 02:58:41 +02:00
| varinitializer
| vardecl
2018-08-10 00:26:41 +02:00
| constdecl
| memoryvardecl
2018-08-09 01:54:43 +02:00
| assignment
| augassignment
2018-08-10 23:56:30 +02:00
| unconditionaljump
| postincrdecr
2018-08-14 02:22:59 +02:00
| functioncall_stmt
2018-08-14 14:33:36 +02:00
| if_stmt
2018-09-02 18:32:48 +02:00
| branch_stmt
2018-08-13 01:30:33 +02:00
| subroutine
| asmsubroutine
2018-08-10 23:56:30 +02:00
| inlineasm
2018-08-12 17:16:36 +02:00
| returnstmt
2018-09-16 03:00:32 +02:00
| forloop
2018-09-23 02:04:45 +02:00
| whileloop
| repeatloop
| breakstmt
| continuestmt
2018-09-19 02:41:35 +02:00
| labeldef
2018-08-09 01:54:43 +02:00
;
2018-09-16 03:00:32 +02:00
2018-08-12 17:16:36 +02:00
labeldef : identifier ':' ;
2018-08-10 23:56:30 +02:00
2018-08-13 04:12:42 +02:00
unconditionaljump : 'goto' (integerliteral | identifier | scoped_identifier) ;
2018-08-09 01:54:43 +02:00
2018-08-10 23:56:30 +02:00
directive :
2018-10-09 00:01:53 +02:00
directivename=('%output' | '%launcher' | '%zeropage' | '%zpreserved' | '%address' | '%import' |
2018-08-13 10:51:05 +02:00
'%breakpoint' | '%asminclude' | '%asmbinary' | '%option')
2018-08-10 23:56:30 +02:00
(directivearg? | directivearg (',' directivearg)*)
;
2018-08-09 01:54:43 +02:00
2018-08-10 23:56:30 +02:00
directivearg : stringliteral | identifier | integerliteral ;
2018-08-09 01:54:43 +02:00
2018-08-10 02:58:41 +02:00
vardecl: datatype arrayspec? identifier ;
2018-08-09 01:54:43 +02:00
2018-08-10 02:58:41 +02:00
varinitializer : datatype arrayspec? identifier '=' expression ;
2018-08-09 01:54:43 +02:00
2018-08-10 00:26:41 +02:00
constdecl: 'const' varinitializer ;
memoryvardecl: 'memory' varinitializer;
2018-10-10 09:21:20 +02:00
datatype: 'ubyte' | 'byte' | 'uword' | 'word' | 'float' | 'str' | 'str_p' | 'str_s' | 'str_ps' ;
2018-08-10 00:26:41 +02:00
2018-10-30 20:29:03 +01:00
arrayspec: '[' expression ']' ;
2018-08-10 00:26:41 +02:00
2018-11-14 01:50:16 +01:00
assignment : assign_targets '=' expression ;
assign_targets : assign_target (',' assign_target)* ;
2018-08-09 01:54:43 +02:00
augassignment :
assign_target operator=('+=' | '-=' | '/=' | '//=' | '*=' | '**=' | '&=' | '|=' | '^=' | '%=' ) expression
2018-08-09 01:54:43 +02:00
;
2018-08-10 00:26:41 +02:00
assign_target:
register
2018-08-10 02:58:41 +02:00
| identifier
| scoped_identifier
2018-10-02 03:07:46 +02:00
| arrayindexed
2018-08-10 00:26:41 +02:00
;
2018-08-09 01:54:43 +02:00
2018-08-10 23:56:30 +02:00
postincrdecr : assign_target operator = ('++' | '--') ;
2018-08-09 01:54:43 +02:00
expression :
2018-08-10 23:56:30 +02:00
'(' expression ')'
| functioncall
2018-08-11 19:15:39 +02:00
| prefix = ('+'|'-'|'~') expression
2018-08-10 23:56:30 +02:00
| left = expression bop = '**' right = expression
2018-09-19 01:24:28 +02:00
| left = expression bop = ('*' | '/' | '//' | '%' ) right = expression
2018-08-10 23:56:30 +02:00
| left = expression bop = ('+' | '-' ) right = expression
| left = expression bop = ('<' | '>' | '<=' | '>=') right = expression
| left = expression bop = ('==' | '!=') right = expression
| left = expression bop = '&' right = expression
| left = expression bop = '^' right = expression
| left = expression bop = '|' right = expression
| rangefrom = expression 'to' rangeto = expression ('step' rangestep = expression)? // can't create separate rule due to mutual left-recursion
2018-08-10 23:56:30 +02:00
| left = expression bop = 'and' right = expression
| left = expression bop = 'or' right = expression
| left = expression bop = 'xor' right = expression
2018-08-11 19:15:39 +02:00
| prefix = 'not' expression
2018-08-09 01:54:43 +02:00
| literalvalue
| register
2018-08-10 02:58:41 +02:00
| identifier
| scoped_identifier
2018-10-01 22:23:16 +02:00
| arrayindexed
| expression typecast
2018-08-09 01:54:43 +02:00
;
2018-08-10 23:56:30 +02:00
typecast : 'as' datatype;
2018-10-01 22:23:16 +02:00
arrayindexed :
(identifier | scoped_identifier ) arrayspec
2018-10-01 22:23:16 +02:00
;
2018-08-10 23:56:30 +02:00
functioncall :
2018-08-13 04:12:42 +02:00
(identifier | scoped_identifier) '(' expression_list? ')'
;
2018-08-14 02:22:59 +02:00
functioncall_stmt :
(identifier | scoped_identifier) '(' expression_list? ')'
;
2018-08-12 17:16:36 +02:00
expression_list :
expression (',' EOL? expression)*
2018-08-09 01:54:43 +02:00
;
2018-08-12 17:16:36 +02:00
returnstmt : 'return' expression_list? ;
2018-09-16 03:00:32 +02:00
breakstmt : 'break';
continuestmt: 'continue';
2018-08-10 02:58:41 +02:00
identifier : NAME ;
2018-08-09 01:54:43 +02:00
2018-08-10 02:58:41 +02:00
scoped_identifier : NAME ('.' NAME)+ ;
2018-08-09 01:54:43 +02:00
register : 'A' | 'X' | 'Y' ;
2018-09-02 18:32:48 +02:00
registerorpair : 'A' | 'X' | 'Y' | 'AX' | 'AY' | 'XY' ; // only used in subroutine params and returnvalues
statusregister : 'Pc' | 'Pz' | 'Pn' | 'Pv' ;
integerliteral : intpart=(DEC_INTEGER | HEX_INTEGER | BIN_INTEGER) wordsuffix? ;
wordsuffix : '.w' ;
2018-08-09 01:54:43 +02:00
2018-08-10 00:26:41 +02:00
booleanliteral : 'true' | 'false' ;
2018-08-09 01:54:43 +02:00
arrayliteral : '[' EOL? expression (',' EOL? expression)* EOL? ']' ;
2018-08-09 01:54:43 +02:00
2018-08-10 00:26:41 +02:00
stringliteral : STRING ;
2018-08-09 01:54:43 +02:00
charliteral : SINGLECHAR ;
2018-08-10 00:26:41 +02:00
floatliteral : FLOAT_NUMBER ;
2018-08-09 01:54:43 +02:00
2018-08-10 00:26:41 +02:00
literalvalue :
integerliteral
| booleanliteral
| arrayliteral
| stringliteral
| charliteral
2018-08-10 00:26:41 +02:00
| floatliteral
2018-08-09 01:54:43 +02:00
;
2018-08-10 23:56:30 +02:00
inlineasm : '%asm' INLINEASMBLOCK;
2018-08-13 01:30:33 +02:00
subroutine :
'sub' identifier '(' sub_params? ')' sub_return_part? (statement_block EOL)
2018-08-13 04:12:42 +02:00
;
sub_return_part : '->' sub_returns ;
2018-08-14 14:33:36 +02:00
statement_block :
2018-08-13 04:12:42 +02:00
'{' EOL
2018-08-13 01:30:33 +02:00
(statement | EOL) *
2018-08-14 14:33:36 +02:00
'}'
2018-08-13 01:30:33 +02:00
;
2018-08-13 04:12:42 +02:00
sub_params : vardecl (',' EOL? vardecl)* ;
sub_returns : datatype (',' EOL? datatype)* ;
asmsubroutine :
'asmsub' identifier '(' asmsub_params? ')'
'->' 'clobbers' '(' clobber? ')' '->' '(' asmsub_returns? ')' (asmsub_address | statement_block )
;
asmsub_address : '=' address=integerliteral ;
asmsub_params : asmsub_param (',' EOL? asmsub_param)* ;
asmsub_param : vardecl '@' (registerorpair | statusregister);
clobber : register (',' register)* ;
2018-08-13 01:30:33 +02:00
asmsub_returns : asmsub_return (',' EOL? asmsub_return)* ;
2018-08-13 01:30:33 +02:00
asmsub_return : datatype '@' (registerorpair | statusregister) ;
2018-08-14 14:33:36 +02:00
2018-09-23 02:04:45 +02:00
if_stmt : 'if' expression EOL? (statement | statement_block) EOL? else_part? EOL ; // statement is constrained later
2018-08-14 14:33:36 +02:00
else_part : 'else' EOL? (statement | statement_block) ; // statement is constrained later
2018-09-02 18:32:48 +02:00
branch_stmt : branchcondition EOL? (statement | statement_block) EOL? else_part? EOL ;
2018-09-22 00:33:25 +02:00
branchcondition: 'if_cs' | 'if_cc' | 'if_eq' | 'if_z' | 'if_ne' | 'if_nz' | 'if_pl' | 'if_pos' | 'if_mi' | 'if_neg' | 'if_vs' | 'if_vc' ;
2018-09-16 03:00:32 +02:00
forloop : 'for' datatype? (register | identifier) 'in' expression EOL? statement_block ;
2018-09-23 02:04:45 +02:00
whileloop: 'while' expression EOL? (statement | statement_block) ;
repeatloop: 'repeat' (statement | statement_block) EOL? 'until' expression ;