prog8/il65/plylex.py

"""
Programming Language for 6502/6510 microprocessors, codename 'Sick'
This is the lexer of the IL65 code, that generates a stream of tokens for the parser.

Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
"""

import ast
import sys
import ply.lex
import attr


@attr.s(slots=True, frozen=True)
class SourceRef:
    file = attr.ib(type=str)
    line = attr.ib(type=int)
    column = attr.ib(type=int, default=0)

    def __str__(self) -> str:
        if self.column:
            return "{:s}:{:d}:{:d}".format(self.file, self.line, self.column)
        if self.line:
            return "{:s}:{:d}".format(self.file, self.line)
        return self.file


# token names

tokens = (
    "INTEGER",
    "FLOATINGPOINT",
    "DOTTEDNAME",
    "NAME",
    "IS",
    "CLOBBEREDREGISTER",
    "REGISTER",
    "COMMENT",
    "DIRECTIVE",
    "AUGASSIGN",
    "EQUALS",
    "NOTEQUALS",
    "RARROW",
    "RETURN",
    "VARTYPE",
    "SUB",
    "DATATYPE",
    "CHARACTER",
    "STRING",
    "BOOLEAN",
    "GOTO",
    "INCR",
    "DECR",
    "LT",
    "GT",
    "LE",
    "GE",
    "BITAND",
    "BITOR",
    "BITXOR",
    "BITINVERT",
    "SHIFTLEFT",
    "SHIFTRIGHT",
    "LOGICAND",
    "LOGICOR",
    "LOGICXOR",
    "LOGICNOT",
    "INTEGERDIVIDE",
    "MODULO",
    "POWER",
    "LABEL",
    "IF",
    "PRESERVEREGS",
    "INLINEASM",
    "ENDL"
)

literals = ['+', '-', '*', '/', '(', ')', '[', ']', '{', '}', '.', ',', '!', '?', ':']

# regex rules for simple tokens

t_SHIFTLEFT = r"<<"
t_SHIFTRIGHT = r">>"
t_INTEGERDIVIDE = r"//"
t_BITAND = r"&"
t_BITOR = r"\|"
t_BITXOR = r"\^"
t_BITINVERT = r"~"
t_IS = r"="
t_AUGASSIGN = r"\+=|-=|/=|//=|\*=|\*\*=|<<=|>>=|&=|\|=|\^="
t_DECR = r"--"
t_INCR = r"\+\+"
t_EQUALS = r"=="
t_NOTEQUALS = r"!="
t_LT = r"<"
t_GT = r">"
t_LE = r"<="
t_GE = r">="
t_IF = "if(_[a-z]+)?"
t_RARROW = r"->"
t_POWER = r"\*\*"


# ignore inline whitespace
t_ignore = " \t"
t_inlineasm_ignore = " \t\r\n"


# states for allowing %asm inclusion of raw assembly
states = (
    ('inlineasm', 'exclusive'),
)

# reserved words
reserved = {
    "sub": "SUB",
    "var": "VARTYPE",
    "memory": "VARTYPE",
    "const": "VARTYPE",
    "goto": "GOTO",
    "return": "RETURN",
    "true": "BOOLEAN",
    "false": "BOOLEAN",
    "not": "LOGICNOT",
    "and": "LOGICAND",
    "or": "LOGICOR",
    "xor": "LOGICXOR",
    "mod": "MODULO",
    "AX": "REGISTER",
    "AY": "REGISTER",
    "XY": "REGISTER",
    "SC": "REGISTER",
    "SI": "REGISTER",
    "SZ": "REGISTER",
    "A": "REGISTER",
    "X": "REGISTER",
    "Y": "REGISTER",
    "if": "IF",
    "if_true": "IF",
    "if_not": "IF",
    "if_zero": "IF",
    "if_ne": "IF",
    "if_eq": "IF",
    "if_cc": "IF",
    "if_cs": "IF",
    "if_vc": "IF",
    "if_vs": "IF",
    "if_ge": "IF",
    "if_le": "IF",
    "if_gt": "IF",
    "if_lt": "IF",
    "if_pos": "IF",
    "if_get": "IF",
}


# rules for tokens with some actions

def t_inlineasm(t):
    r"""%asm\s*\{[^\S\n]*"""
    t.lexer.code_start = t.lexer.lexpos     # Record start position
    t.lexer.level = 1                       # initial brace level
    t.lexer.begin("inlineasm")             # enter state 'inlineasm'


def t_inlineasm_lbrace(t):
    r"""\{"""
    t.lexer.level += 1


def t_inlineasm_rbrace(t):
    r"""\}"""
    t.lexer.level -= 1
    # if closing brace, return code fragment
    if t.lexer.level == 0:
        t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos-1]
        t.type = "INLINEASM"
        t.lexer.lineno += t.value.count("\n")
        t.lexer.begin("INITIAL")    # back to normal lexing rules
        return t


def t_inlineasm_comment(t):
    r""";[^\n]*"""
    pass


def t_inlineasm_string(t):
    r"""(?x)   # verbose mode
    (?<!\\)    # not preceded by a backslash
    "          # a literal double-quote
    .*?        # 1-or-more characters
    (?<!\\)    # not preceded by a backslash
    "          # a literal double-quote
    |
    (?<!\\)    # not preceded by a backslash
    '          # a literal single quote
    .*?        # 1-or-more characters
    (?<!\\)    # not preceded by a backslash
    '          # a literal double-quote
    """
    pass


def t_inlineasm_nonspace(t):
    r"""[^\s\{\}\'\"]+"""
    pass


def t_inlineasm_error(t):
    # For bad characters, we just skip over it
    t.lexer.skip(1)


def t_CLOBBEREDREGISTER(t):
    r"""(AX|AY|XY|A|X|Y)\?"""
    t.value = t.value[:-1]
    return t


def t_DATATYPE(t):
    r"""\.byte|\.wordarray|\.float|\.array|\.word|\.text|\.stext|\.ptext|\.pstext|\.matrix"""
    t.value = t.value[1:]
    return t


def t_LABEL(t):
    r"""[a-zA-Z_]\w*\s*:"""
    t.value = t.value[:-1].strip()
    return t


def t_BOOLEAN(t):
    r"""true|false"""
    t.value = t.value == "true"
    return t


def t_DOTTEDNAME(t):
    r"""[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)+"""
    first, second = t.value.split(".")
    if first in reserved or second in reserved:
        custom_error(t, "reserved word as part of dotted name")
        return None
    return t


def t_NAME(t):
    r"""[a-zA-Z_]\w*"""
    t.type = reserved.get(t.value, "NAME")   # check for reserved words
    return t


def t_DIRECTIVE(t):
    r"""%[a-z]+\b"""
    t.value = t.value[1:]
    return t


def t_STRING(t):
    r"""(?x)   # verbose mode
    (?<!\\)    # not preceded by a backslash
    "          # a literal double-quote
    .*?        # 1-or-more characters
    (?<!\\)    # not preceded by a backslash
    "          # a literal double-quote
    |
    (?<!\\)    # not preceded by a backslash
    '          # a literal single quote
    .*?        # 1-or-more characters
    (?<!\\)    # not preceded by a backslash
    '          # a literal double-quote
    """
    t.value = ast.literal_eval(t.value)
    if len(t.value) == 1:
        t.type = "CHARACTER"
    if len(t.value) == 2 and t.value[0] == '\\':
        t.type = "CHARACTER"
    return t


def t_FLOATINGPOINT(t):
    r"""((?: (?: \d* \. \d+ ) | (?: \d+ \.? ) )(?: [Ee] [+-]? \d+ ) ?)(?![a-z])"""
    try:
        t.value = int(t.value)
        t.type = "INTEGER"
    except ValueError:
        t.value = float(t.value)
    return t


def t_INTEGER(t):
    r"""\$?[a-fA-F\d]+ | [\$%]?\d+ | %?[01]+"""
    sign = 1
    if t.value[0] in "+-":
        sign = -1 if t.value[0] == "-" else 1
        t.value = t.value[1:]
    if t.value[0] == '$':
        t.value = int(t.value[1:], 16) * sign
    elif t.value[0] == '%':
        t.value = int(t.value[1:], 2) * sign
    else:
        t.value = int(t.value) * sign
    return t


def t_COMMENT(t):
    r"""[ \t]*;[^\n]*"""  # dont eat newline
    return None   # don't process comments


def t_PRESERVEREGS(t):
    r"""!\s*[AXY]{0,3}\s*(?!=)"""
    t.value = t.value[1:-1].strip()
    return t


def t_ENDL(t):
    r"""\n+"""
    t.lexer.lineno += len(t.value)
    t.value = "\n"
    return t    # end of lines are significant to the parser


def t_error(t):
    line, col = t.lineno, find_tok_column(t)
    filename = getattr(t.lexer, "source_filename", "<unknown-file>")
    sref = SourceRef(filename, line, col)
    if hasattr(t.lexer, "error_function"):
        t.lexer.error_function(sref, "illegal character '{:s}'", t.value[0])
    else:
        print("{}: illegal character '{:s}'".format(sref, t.value[0]))
    t.lexer.skip(1)


def custom_error(t, message):
    line, col = t.lineno, find_tok_column(t)
    filename = getattr(t.lexer, "source_filename", "<unknown-file>")
    sref = SourceRef(filename, line, col)
    if hasattr(t.lexer, "error_function"):
        t.lexer.error_function(sref, message)
    else:
        print(sref, message)
    t.lexer.skip(1)


def find_tok_column(token):
    """Find the column of the token in its line."""
    last_cr = lexer.lexdata.rfind('\n', 0, token.lexpos)
    chunk = lexer.lexdata[last_cr:token.lexpos]
    return len(chunk.expandtabs())


def print_warning(text: str, sourceref: SourceRef = None) -> None:
    if sourceref:
        print_bold("warning: {}: {:s}".format(sourceref, text))
    else:
        print_bold("warning: " + text)


def print_bold(text: str) -> None:
    if sys.stdout.isatty():
        print("\x1b[1m" + text + "\x1b[0m", flush=True)
    else:
        print(text)


lexer = ply.lex.lex()


if __name__ == "__main__":
    ply.lex.runmain()
plyparsing more or less done 2018-01-07 01:36:27 +00:00			`"""`
optimize, tests, refactor 2018-01-08 02:31:23 +00:00			`Programming Language for 6502/6510 microprocessors, codename 'Sick'`
plyparsing more or less done 2018-01-07 01:36:27 +00:00			`This is the lexer of the IL65 code, that generates a stream of tokens for the parser.`

optimize, tests, refactor 2018-01-08 02:31:23 +00:00			`Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0`
plyparsing more or less done 2018-01-07 01:36:27 +00:00			`"""`

tweaks 2018-01-11 23:55:47 +00:00			`import ast`
ply 2018-01-03 20:43:19 +00:00			`import sys`
			`import ply.lex`
renames 2018-01-07 22:45:42 +00:00			`import attr`


			`@attr.s(slots=True, frozen=True)`
			`class SourceRef:`
			`file = attr.ib(type=str)`
			`line = attr.ib(type=int)`
			`column = attr.ib(type=int, default=0)`

			`def __str__(self) -> str:`
			`if self.column:`
			`return "{:s}:{:d}:{:d}".format(self.file, self.line, self.column)`
			`if self.line:`
			`return "{:s}:{:d}".format(self.file, self.line)`
			`return self.file`
comp 2018-01-07 18:14:21 +00:00
ply 2018-01-03 20:43:19 +00:00
			`# token names`

			`tokens = (`
			`"INTEGER",`
			`"FLOATINGPOINT",`
			`"DOTTEDNAME",`
			`"NAME",`
			`"IS",`
			`"CLOBBEREDREGISTER",`
			`"REGISTER",`
			`"COMMENT",`
			`"DIRECTIVE",`
			`"AUGASSIGN",`
endl parsing 2018-01-05 21:52:23 +00:00			`"EQUALS",`
			`"NOTEQUALS",`
ply 2018-01-03 20:43:19 +00:00			`"RARROW",`
			`"RETURN",`
			`"VARTYPE",`
			`"SUB",`
			`"DATATYPE",`
			`"CHARACTER",`
			`"STRING",`
			`"BOOLEAN",`
			`"GOTO",`
			`"INCR",`
			`"DECR",`
			`"LT",`
			`"GT",`
			`"LE",`
			`"GE",`
endl parsing 2018-01-05 21:52:23 +00:00			`"BITAND",`
			`"BITOR",`
			`"BITXOR",`
			`"BITINVERT",`
expressions 2018-01-18 22:33:02 +00:00			`"SHIFTLEFT",`
			`"SHIFTRIGHT",`
endl parsing 2018-01-05 21:52:23 +00:00			`"LOGICAND",`
			`"LOGICOR",`
much improved constant folding by actually evaluating const expressions 2018-02-07 01:10:52 +00:00			`"LOGICXOR",`
endl parsing 2018-01-05 21:52:23 +00:00			`"LOGICNOT",`
expression 2018-01-09 23:44:11 +00:00			`"INTEGERDIVIDE",`
expressions 2018-01-18 22:33:02 +00:00			`"MODULO",`
endl parsing 2018-01-05 21:52:23 +00:00			`"POWER",`
ply 2018-01-03 20:43:19 +00:00			`"LABEL",`
			`"IF",`
			`"PRESERVEREGS",`
			`"INLINEASM",`
endl parsing 2018-01-05 21:52:23 +00:00			`"ENDL"`
ply 2018-01-03 20:43:19 +00:00			`)`

			`literals = ['+', '-', '*', '/', '(', ')', '[', ']', '{', '}', '.', ',', '!', '?', ':']`

			`# regex rules for simple tokens`

expressions 2018-01-18 22:33:02 +00:00			`t_SHIFTLEFT = r"<<"`
			`t_SHIFTRIGHT = r">>"`
expression 2018-01-09 23:44:11 +00:00			`t_INTEGERDIVIDE = r"//"`
endl parsing 2018-01-05 21:52:23 +00:00			`t_BITAND = r"&"`
			`t_BITOR = r"\\|"`
			`t_BITXOR = r"\^"`
			`t_BITINVERT = r"~"`
ply 2018-01-03 20:43:19 +00:00			`t_IS = r"="`
zero division checks and more optimizations 2018-01-23 23:41:50 +00:00			`t_AUGASSIGN = r"\+=\|-=\|/=\|//=\|\=\|\\*=\|<<=\|>>=\|&=\|\\|=\|\^="`
ply 2018-01-03 20:43:19 +00:00			`t_DECR = r"--"`
			`t_INCR = r"\+\+"`
endl parsing 2018-01-05 21:52:23 +00:00			`t_EQUALS = r"=="`
			`t_NOTEQUALS = r"!="`
ply 2018-01-03 20:43:19 +00:00			`t_LT = r"<"`
			`t_GT = r">"`
			`t_LE = r"<="`
			`t_GE = r">="`
			`t_IF = "if(_[a-z]+)?"`
			`t_RARROW = r"->"`
endl parsing 2018-01-05 21:52:23 +00:00			`t_POWER = r"\\"`

ply 2018-01-03 20:43:19 +00:00
			`# ignore inline whitespace`
			`t_ignore = " \t"`
			`t_inlineasm_ignore = " \t\r\n"`


			`# states for allowing %asm inclusion of raw assembly`
			`states = (`
			`('inlineasm', 'exclusive'),`
			`)`

			`# reserved words`
			`reserved = {`
			`"sub": "SUB",`
			`"var": "VARTYPE",`
			`"memory": "VARTYPE",`
			`"const": "VARTYPE",`
			`"goto": "GOTO",`
			`"return": "RETURN",`
			`"true": "BOOLEAN",`
			`"false": "BOOLEAN",`
endl parsing 2018-01-05 21:52:23 +00:00			`"not": "LOGICNOT",`
			`"and": "LOGICAND",`
			`"or": "LOGICOR",`
much improved constant folding by actually evaluating const expressions 2018-02-07 01:10:52 +00:00			`"xor": "LOGICXOR",`
expressions 2018-01-18 22:33:02 +00:00			`"mod": "MODULO",`
ply 2018-01-03 20:43:19 +00:00			`"AX": "REGISTER",`
			`"AY": "REGISTER",`
			`"XY": "REGISTER",`
			`"SC": "REGISTER",`
			`"SI": "REGISTER",`
			`"SZ": "REGISTER",`
			`"A": "REGISTER",`
			`"X": "REGISTER",`
			`"Y": "REGISTER",`
			`"if": "IF",`
			`"if_true": "IF",`
			`"if_not": "IF",`
endl parsing 2018-01-05 21:52:23 +00:00			`"if_zero": "IF",`
ply 2018-01-03 20:43:19 +00:00			`"if_ne": "IF",`
			`"if_eq": "IF",`
			`"if_cc": "IF",`
			`"if_cs": "IF",`
			`"if_vc": "IF",`
			`"if_vs": "IF",`
endl parsing 2018-01-05 21:52:23 +00:00			`"if_ge": "IF",`
			`"if_le": "IF",`
ply 2018-01-03 20:43:19 +00:00			`"if_gt": "IF",`
			`"if_lt": "IF",`
			`"if_pos": "IF",`
			`"if_get": "IF",`
			`}`


			`# rules for tokens with some actions`

			`def t_inlineasm(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""%asm\s\{[^\S\n]"""`
ply 2018-01-03 20:43:19 +00:00			`t.lexer.code_start = t.lexer.lexpos # Record start position`
			`t.lexer.level = 1 # initial brace level`
			`t.lexer.begin("inlineasm") # enter state 'inlineasm'`


			`def t_inlineasm_lbrace(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""\{"""`
ply 2018-01-03 20:43:19 +00:00			`t.lexer.level += 1`


			`def t_inlineasm_rbrace(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""\}"""`
ply 2018-01-03 20:43:19 +00:00			`t.lexer.level -= 1`
print parse tree 2018-01-05 01:41:38 +00:00			`# if closing brace, return code fragment`
ply 2018-01-03 20:43:19 +00:00			`if t.lexer.level == 0:`
			`t.value = t.lexer.lexdata[t.lexer.code_start:t.lexer.lexpos-1]`
			`t.type = "INLINEASM"`
			`t.lexer.lineno += t.value.count("\n")`
			`t.lexer.begin("INITIAL") # back to normal lexing rules`
			`return t`


			`def t_inlineasm_comment(t):`
restructure 2018-02-08 20:10:52 +00:00			`r""";[^\n]*"""`
ply 2018-01-03 20:43:19 +00:00			`pass`


			`def t_inlineasm_string(t):`
			`r"""(?x) # verbose mode`
			`(?<!\\) # not preceded by a backslash`
			`" # a literal double-quote`
			`.*? # 1-or-more characters`
			`(?<!\\) # not preceded by a backslash`
			`" # a literal double-quote`
			`\|`
			`(?<!\\) # not preceded by a backslash`
			`' # a literal single quote`
			`.*? # 1-or-more characters`
			`(?<!\\) # not preceded by a backslash`
			`' # a literal double-quote`
			`"""`
			`pass`


			`def t_inlineasm_nonspace(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""[^\s\{\}\'\"]+"""`
ply 2018-01-03 20:43:19 +00:00			`pass`


			`def t_inlineasm_error(t):`
			`# For bad characters, we just skip over it`
			`t.lexer.skip(1)`


			`def t_CLOBBEREDREGISTER(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""(AX\|AY\|XY\|A\|X\|Y)\?"""`
ply 2018-01-03 20:43:19 +00:00			`t.value = t.value[:-1]`
			`return t`


			`def t_DATATYPE(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""\.byte\|\.wordarray\|\.float\|\.array\|\.word\|\.text\|\.stext\|\.ptext\|\.pstext\|\.matrix"""`
ply 2018-01-03 20:43:19 +00:00			`t.value = t.value[1:]`
			`return t`


			`def t_LABEL(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""[a-zA-Z_]\w\s:"""`
ply 2018-01-03 20:43:19 +00:00			`t.value = t.value[:-1].strip()`
			`return t`


expression 2018-01-09 23:44:11 +00:00			`def t_BOOLEAN(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""true\|false"""`
expression 2018-01-09 23:44:11 +00:00			`t.value = t.value == "true"`
			`return t`


ply 2018-01-03 20:43:19 +00:00			`def t_DOTTEDNAME(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""[a-zA-Z_]\w(\.[a-zA-Z_]\w)+"""`
improved sourceref column calculation when dealing with tabs, added more error checks 2018-01-14 17:02:39 +00:00			`first, second = t.value.split(".")`
			`if first in reserved or second in reserved:`
			`custom_error(t, "reserved word as part of dotted name")`
			`return None`
ply 2018-01-03 20:43:19 +00:00			`return t`


			`def t_NAME(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""[a-zA-Z_]\w*"""`
ply 2018-01-03 20:43:19 +00:00			`t.type = reserved.get(t.value, "NAME") # check for reserved words`
			`return t`


endl parsing 2018-01-05 21:52:23 +00:00			`def t_DIRECTIVE(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""%[a-z]+\b"""`
endl parsing 2018-01-05 21:52:23 +00:00			`t.value = t.value[1:]`
			`return t`


ply 2018-01-03 20:43:19 +00:00			`def t_STRING(t):`
			`r"""(?x) # verbose mode`
			`(?<!\\) # not preceded by a backslash`
			`" # a literal double-quote`
			`.*? # 1-or-more characters`
			`(?<!\\) # not preceded by a backslash`
			`" # a literal double-quote`
			`\|`
			`(?<!\\) # not preceded by a backslash`
			`' # a literal single quote`
			`.*? # 1-or-more characters`
			`(?<!\\) # not preceded by a backslash`
			`' # a literal double-quote`
			`"""`
tweaks 2018-01-11 23:55:47 +00:00			`t.value = ast.literal_eval(t.value)`
ply 2018-01-03 20:43:19 +00:00			`if len(t.value) == 1:`
			`t.type = "CHARACTER"`
			`if len(t.value) == 2 and t.value[0] == '\\':`
			`t.type = "CHARACTER"`
			`return t`


			`def t_FLOATINGPOINT(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""((?: (?: \d* \. \d+ ) \| (?: \d+ \.? ) )(?: [Ee] [+-]? \d+ ) ?)(?![a-z])"""`
ply 2018-01-03 20:43:19 +00:00			`try:`
			`t.value = int(t.value)`
			`t.type = "INTEGER"`
			`except ValueError:`
			`t.value = float(t.value)`
			`return t`


			`def t_INTEGER(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""\$?[a-fA-F\d]+ \| [\$%]?\d+ \| %?[01]+"""`
ply 2018-01-03 20:43:19 +00:00			`sign = 1`
			`if t.value[0] in "+-":`
			`sign = -1 if t.value[0] == "-" else 1`
			`t.value = t.value[1:]`
			`if t.value[0] == '$':`
			`t.value = int(t.value[1:], 16) * sign`
			`elif t.value[0] == '%':`
			`t.value = int(t.value[1:], 2) * sign`
			`else:`
			`t.value = int(t.value) * sign`
			`return t`


			`def t_COMMENT(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""[ \t];[^\n]""" # dont eat newline`
endl parsing 2018-01-05 21:52:23 +00:00			`return None # don't process comments`
ply 2018-01-03 20:43:19 +00:00

			`def t_PRESERVEREGS(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""!\s[AXY]{0,3}\s(?!=)"""`
ply 2018-01-03 20:43:19 +00:00			`t.value = t.value[1:-1].strip()`
			`return t`


endl parsing 2018-01-05 21:52:23 +00:00			`def t_ENDL(t):`
restructure 2018-02-08 20:10:52 +00:00			`r"""\n+"""`
ply 2018-01-03 20:43:19 +00:00			`t.lexer.lineno += len(t.value)`
comp 2018-01-07 18:14:21 +00:00			`t.value = "\n"`
endl parsing 2018-01-05 21:52:23 +00:00			`return t # end of lines are significant to the parser`
ply 2018-01-03 20:43:19 +00:00

			`def t_error(t):`
			`line, col = t.lineno, find_tok_column(t)`
comp 2018-01-07 18:14:21 +00:00			`filename = getattr(t.lexer, "source_filename", "<unknown-file>")`
			`sref = SourceRef(filename, line, col)`
renames 2018-01-07 22:45:42 +00:00			`if hasattr(t.lexer, "error_function"):`
			`t.lexer.error_function(sref, "illegal character '{:s}'", t.value[0])`
			`else:`
fixed a bunch of issues 2018-01-14 23:20:36 +00:00			`print("{}: illegal character '{:s}'".format(sref, t.value[0]))`
ply 2018-01-03 20:43:19 +00:00			`t.lexer.skip(1)`


improved sourceref column calculation when dealing with tabs, added more error checks 2018-01-14 17:02:39 +00:00			`def custom_error(t, message):`
			`line, col = t.lineno, find_tok_column(t)`
			`filename = getattr(t.lexer, "source_filename", "<unknown-file>")`
			`sref = SourceRef(filename, line, col)`
			`if hasattr(t.lexer, "error_function"):`
			`t.lexer.error_function(sref, message)`
			`else:`
fixed a bunch of issues 2018-01-14 23:20:36 +00:00			`print(sref, message)`
improved sourceref column calculation when dealing with tabs, added more error checks 2018-01-14 17:02:39 +00:00			`t.lexer.skip(1)`


ply 2018-01-03 20:43:19 +00:00			`def find_tok_column(token):`
restructure 2018-02-08 20:10:52 +00:00			`"""Find the column of the token in its line."""`
ply 2018-01-03 20:43:19 +00:00			`last_cr = lexer.lexdata.rfind('\n', 0, token.lexpos)`
improved sourceref column calculation when dealing with tabs, added more error checks 2018-01-14 17:02:39 +00:00			`chunk = lexer.lexdata[last_cr:token.lexpos]`
			`return len(chunk.expandtabs())`
ply 2018-01-03 20:43:19 +00:00

optimize 2018-01-08 00:51:36 +00:00			`def print_warning(text: str, sourceref: SourceRef = None) -> None:`
			`if sourceref:`
			`print_bold("warning: {}: {:s}".format(sourceref, text))`
			`else:`
			`print_bold("warning: " + text)`


			`def print_bold(text: str) -> None:`
			`if sys.stdout.isatty():`
			`print("\x1b[1m" + text + "\x1b[0m", flush=True)`
			`else:`
			`print(text)`


ply 2018-01-03 20:43:19 +00:00			`lexer = ply.lex.lex()`


			`if __name__ == "__main__":`
			`ply.lex.runmain()`