""" Programming Language for 6502/6510 microprocessors This is the lexer of the IL65 code, that generates a stream of tokens for the parser. Written by Irmen de Jong (irmen@razorvine.net) License: GNU GPL 3.0, see LICENSE """ import sys import ply.lex import attr @attr.s(slots=True, frozen=True) class SourceRef: file = attr.ib(type=str) line = attr.ib(type=int) column = attr.ib(type=int, default=0) def __str__(self) -> str: if self.column: return "{:s}:{:d}:{:d}".format(self.file, self.line, self.column) if self.line: return "{:s}:{:d}".format(self.file, self.line) return self.file # token names tokens = ( "INTEGER", "FLOATINGPOINT", "DOTTEDNAME", "NAME", "IS", "CLOBBEREDREGISTER", "REGISTER", "COMMENT", "DIRECTIVE", "AUGASSIGN", "EQUALS", "NOTEQUALS", "RARROW", "RETURN", "VARTYPE", "SUB", "DATATYPE", "CHARACTER", "STRING", "BOOLEAN", "GOTO", "INCR", "DECR", "LT", "GT", "LE", "GE", "BITAND", "BITOR", "BITXOR", "BITINVERT", "LOGICAND", "LOGICOR", "LOGICNOT", "POWER", "LABEL", "IF", "PRESERVEREGS", "INLINEASM", "ENDL" ) literals = ['+', '-', '*', '/', '(', ')', '[', ']', '{', '}', '.', ',', '!', '?', ':'] # regex rules for simple tokens t_BITAND = r"&" t_BITOR = r"\|" t_BITXOR = r"\^" t_BITINVERT = r"~" t_IS = r"=" t_AUGASSIGN = r"\+=|-=|/=|\*=|<<=|>>=|&=|\|=|\^=" t_DECR = r"--" t_INCR = r"\+\+" t_EQUALS = r"==" t_NOTEQUALS = r"!=" t_LT = r"<" t_GT = r">" t_LE = r"<=" t_GE = r">=" t_IF = "if(_[a-z]+)?" t_RARROW = r"->" t_POWER = r"\*\*" # ignore inline whitespace t_ignore = " \t" t_inlineasm_ignore = " \t\r\n" # states for allowing %asm inclusion of raw assembly states = ( ('inlineasm', 'exclusive'), ) # reserved words reserved = { "sub": "SUB", "var": "VARTYPE", "memory": "VARTYPE", "const": "VARTYPE", "goto": "GOTO", "return": "RETURN", "true": "BOOLEAN", "false": "BOOLEAN", "not": "LOGICNOT", "and": "LOGICAND", "or": "LOGICOR", "AX": "REGISTER", "AY": "REGISTER", "XY": "REGISTER", "SC": "REGISTER", "SI": "REGISTER", "SZ": "REGISTER", "A": "REGISTER", "X": "REGISTER", "Y": "REGISTER", "if": "IF", "if_true": "IF", "if_not": "IF", "if_zero": "IF", "if_ne": "IF", "if_eq": "IF", "if_cc": "IF", "if_cs": "IF", "if_vc": "IF", "if_vs": "IF", "if_ge": "IF", "if_le": "IF", "if_gt": "IF", "if_lt": "IF", "if_pos": "IF", "if_get": "IF", } # rules for tokens with some actions def t_inlineasm(t): r"%asm\s*\{[^\S\n]*" t.lexer.code_start = t.lexer.lexpos # Record start position t.lexer.level = 1 # initial brace level t.lexer.begin("inlineasm") # enter state 'inlineasm' def t_inlineasm_lbrace(t): r"\{" t.lexer.level += 1 def t_inlineasm_rbrace(t): r"\}" t.lexer.level -= 1 # if closing brace, return code fragment if t.lexer.level == 0: t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos-1] t.type = "INLINEASM" t.lexer.lineno += t.value.count("\n") t.lexer.begin("INITIAL") # back to normal lexing rules return t def t_inlineasm_comment(t): r";[^\n]*" pass def t_inlineasm_string(t): r"""(?x) # verbose mode (?") sref = SourceRef(filename, line, col) if hasattr(t.lexer, "error_function"): t.lexer.error_function(sref, "illegal character '{:s}'", t.value[0]) else: print("{}: illegal character '{:s}'".format(sref, t.value[0]), file=sys.stderr) t.lexer.skip(1) def find_tok_column(token): """ Find the column of the token in its line.""" last_cr = lexer.lexdata.rfind('\n', 0, token.lexpos) return token.lexpos - last_cr def print_warning(text: str, sourceref: SourceRef = None) -> None: if sourceref: print_bold("warning: {}: {:s}".format(sourceref, text)) else: print_bold("warning: " + text) def print_bold(text: str) -> None: if sys.stdout.isatty(): print("\x1b[1m" + text + "\x1b[0m", flush=True) else: print(text) lexer = ply.lex.lex() if __name__ == "__main__": ply.lex.runmain() # lexer = ply.lex.Lexer() # ply.lex.runmain(lexer=lexer)