From a30b2894cd25d372e048000a8eb4a269c94f38da Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Thu, 8 Feb 2018 21:10:52 +0100 Subject: [PATCH] restructure --- il65/compile.py | 8 ++-- il65/{expressions.py => constantfold.py} | 21 +++++---- il65/optimize.py | 10 ++--- il65/plylex.py | 36 ++++++++-------- il65/plyparse.py | 8 ++++ todo2.ill | 54 ++++++++++++------------ 6 files changed, 72 insertions(+), 65 deletions(-) rename il65/{expressions.py => constantfold.py} (94%) diff --git a/il65/compile.py b/il65/compile.py index a10ebe90f..b6c11074d 100644 --- a/il65/compile.py +++ b/il65/compile.py @@ -9,11 +9,11 @@ import re import os import sys import linecache -from typing import Optional, Tuple, Set, Dict, List, Any, no_type_check +from typing import no_type_check, Set, List, Dict, Tuple, Optional, Any import attr from .datatypes import DataType, VarType from .plylex import SourceRef, print_bold -from .expressions import ExpressionOptimizer +from .constantfold import ConstantFold from .plyparse import * @@ -43,8 +43,8 @@ class PlyParser: self.check_all_symbolnames(module) self.determine_subroutine_usage(module) self.all_parents_connected(module) - eo = ExpressionOptimizer(module) - eo.optimize() # do some constant-folding + cf = ConstantFold(module) + cf.fold_constants() # do some constant-folding self.semantic_check(module) self.coerce_values(module) self.check_floats_enabled(module) diff --git a/il65/expressions.py b/il65/constantfold.py similarity index 94% rename from il65/expressions.py rename to il65/constantfold.py index 8b30983bf..ab9f9c7e7 100644 --- a/il65/expressions.py +++ b/il65/constantfold.py @@ -1,6 +1,7 @@ """ Programming Language for 6502/6510 microprocessors, codename 'Sick' -This is the part of the compiler/optimizer that simplifies/evaluates expressions. +This is the part of the compiler/optimizer that simplifies expressions by doing +'constant folding' - replacing expressions with constant, compile-time precomputed values. Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ @@ -8,9 +9,7 @@ Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 import sys from .plylex import SourceRef from .datatypes import VarType -from .plyparse import Module, Expression, LiteralValue, SymbolName, ParseError, VarDef, Dereference, Register,\ - SubCall, AddressOf, AstNode, ExpressionWithOperator, ExpressionEvaluationError, \ - math_functions, builtin_functions, check_symbol_definition +from .plyparse import * def handle_internal_error(exc: Exception, msg: str = "") -> None: @@ -25,29 +24,29 @@ def handle_internal_error(exc: Exception, msg: str = "") -> None: raise exc -class ExpressionOptimizer: +class ConstantFold: def __init__(self, mod: Module) -> None: self.num_warnings = 0 self.module = mod self.optimizations_performed = False - def optimize(self, once: bool=False) -> None: + def fold_constants(self, once: bool=False) -> None: self.num_warnings = 0 if once: - self.constant_folding() + self._constant_folding() else: self.optimizations_performed = True # keep optimizing as long as there were changes made while self.optimizations_performed: self.optimizations_performed = False - self.constant_folding() + self._constant_folding() - def constant_folding(self) -> None: + def _constant_folding(self) -> None: for expression in self.module.all_nodes(Expression): if isinstance(expression, LiteralValue): continue try: - evaluated = self.process_expression(expression) # type: ignore + evaluated = self._process_expression(expression) # type: ignore if evaluated is not expression: # replace the node with the newly evaluated result parent = expression.parent @@ -58,7 +57,7 @@ class ExpressionOptimizer: except Exception as x: handle_internal_error(x, "process_expressions of node {}".format(expression)) - def process_expression(self, expr: Expression) -> Expression: + def _process_expression(self, expr: Expression) -> Expression: # process/simplify all expressions (constant folding etc) result = None # type: Expression if expr.is_compile_constant() or isinstance(expr, ExpressionWithOperator) and expr.must_be_constant: diff --git a/il65/optimize.py b/il65/optimize.py index fe56f131f..3d068a8f4 100644 --- a/il65/optimize.py +++ b/il65/optimize.py @@ -8,11 +8,9 @@ Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 from typing import List, no_type_check, Union from .datatypes import DataType -from .plyparse import Module, Block, Scope, IncrDecr, AstNode, Register, TargetRegisters, Assignment, AugAssignment, \ - AssignmentTargets, SymbolName, VarDef, Dereference, LiteralValue, ExpressionWithOperator, Subroutine, \ - Goto, Expression, Directive, coerce_constant_value, datatype_of +from .plyparse import * from .plylex import print_warning, print_bold -from .expressions import ExpressionOptimizer +from .constantfold import ConstantFold class Optimizer: @@ -20,7 +18,7 @@ class Optimizer: self.num_warnings = 0 self.module = mod self.optimizations_performed = False - self.simple_expression_optimizer = ExpressionOptimizer(self.module) + self.constant_folder = ConstantFold(self.module) def optimize(self) -> None: self.num_warnings = 0 @@ -34,7 +32,7 @@ class Optimizer: self.remove_empty_blocks() def _optimize(self) -> None: - self.simple_expression_optimizer.optimize(True) # perform constant folding and simple expression optimization + self.constant_folder.fold_constants(True) # perform constant folding and simple expression optimization # @todo expression optimization: reduce expression nesting / flattening of parenthesis # @todo expression optimization: simplify logical expression when a term makes it always true or false # @todo expression optimization: optimize some simple multiplications into shifts (A*=8 -> A<<3) diff --git a/il65/plylex.py b/il65/plylex.py index 9dc4f8128..9da7990cc 100644 --- a/il65/plylex.py +++ b/il65/plylex.py @@ -157,19 +157,19 @@ reserved = { # rules for tokens with some actions def t_inlineasm(t): - r"%asm\s*\{[^\S\n]*" + r"""%asm\s*\{[^\S\n]*""" t.lexer.code_start = t.lexer.lexpos # Record start position t.lexer.level = 1 # initial brace level t.lexer.begin("inlineasm") # enter state 'inlineasm' def t_inlineasm_lbrace(t): - r"\{" + r"""\{""" t.lexer.level += 1 def t_inlineasm_rbrace(t): - r"\}" + r"""\}""" t.lexer.level -= 1 # if closing brace, return code fragment if t.lexer.level == 0: @@ -181,7 +181,7 @@ def t_inlineasm_rbrace(t): def t_inlineasm_comment(t): - r";[^\n]*" + r""";[^\n]*""" pass @@ -203,7 +203,7 @@ def t_inlineasm_string(t): def t_inlineasm_nonspace(t): - r'[^\s\{\}\'\"]+' + r"""[^\s\{\}\'\"]+""" pass @@ -213,31 +213,31 @@ def t_inlineasm_error(t): def t_CLOBBEREDREGISTER(t): - r"(AX|AY|XY|A|X|Y)\?" + r"""(AX|AY|XY|A|X|Y)\?""" t.value = t.value[:-1] return t def t_DATATYPE(t): - r"\.byte|\.wordarray|\.float|\.array|\.word|\.text|\.stext|\.ptext|\.pstext|\.matrix" + r"""\.byte|\.wordarray|\.float|\.array|\.word|\.text|\.stext|\.ptext|\.pstext|\.matrix""" t.value = t.value[1:] return t def t_LABEL(t): - r"[a-zA-Z_]\w*\s*:" + r"""[a-zA-Z_]\w*\s*:""" t.value = t.value[:-1].strip() return t def t_BOOLEAN(t): - r"true|false" + r"""true|false""" t.value = t.value == "true" return t def t_DOTTEDNAME(t): - r"[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)+" + r"""[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)+""" first, second = t.value.split(".") if first in reserved or second in reserved: custom_error(t, "reserved word as part of dotted name") @@ -246,13 +246,13 @@ def t_DOTTEDNAME(t): def t_NAME(t): - r"[a-zA-Z_]\w*" + r"""[a-zA-Z_]\w*""" t.type = reserved.get(t.value, "NAME") # check for reserved words return t def t_DIRECTIVE(t): - r"%[a-z]+\b" + r"""%[a-z]+\b""" t.value = t.value[1:] return t @@ -280,7 +280,7 @@ def t_STRING(t): def t_FLOATINGPOINT(t): - r"((?: (?: \d* \. \d+ ) | (?: \d+ \.? ) )(?: [Ee] [+-]? \d+ ) ?)(?![a-z])" + r"""((?: (?: \d* \. \d+ ) | (?: \d+ \.? ) )(?: [Ee] [+-]? \d+ ) ?)(?![a-z])""" try: t.value = int(t.value) t.type = "INTEGER" @@ -290,7 +290,7 @@ def t_FLOATINGPOINT(t): def t_INTEGER(t): - r"\$?[a-fA-F\d]+ | [\$%]?\d+ | %?[01]+" + r"""\$?[a-fA-F\d]+ | [\$%]?\d+ | %?[01]+""" sign = 1 if t.value[0] in "+-": sign = -1 if t.value[0] == "-" else 1 @@ -305,18 +305,18 @@ def t_INTEGER(t): def t_COMMENT(t): - r"[ \t]*;[^\n]*" # dont eat newline + r"""[ \t]*;[^\n]*""" # dont eat newline return None # don't process comments def t_PRESERVEREGS(t): - r"!\s*[AXY]{0,3}\s*(?!=)" + r"""!\s*[AXY]{0,3}\s*(?!=)""" t.value = t.value[1:-1].strip() return t def t_ENDL(t): - r"\n+" + r"""\n+""" t.lexer.lineno += len(t.value) t.value = "\n" return t # end of lines are significant to the parser @@ -345,7 +345,7 @@ def custom_error(t, message): def find_tok_column(token): - """ Find the column of the token in its line.""" + """Find the column of the token in its line.""" last_cr = lexer.lexdata.rfind('\n', 0, token.lexpos) chunk = lexer.lexdata[last_cr:token.lexpos] return len(chunk.expandtabs()) diff --git a/il65/plyparse.py b/il65/plyparse.py index f4f858510..7a89d53ea 100644 --- a/il65/plyparse.py +++ b/il65/plyparse.py @@ -18,6 +18,14 @@ from .datatypes import DataType, VarType, REGISTER_SYMBOLS, REGISTER_BYTES, REGI char_to_bytevalue, FLOAT_MAX_NEGATIVE, FLOAT_MAX_POSITIVE +__all__ = ["ProgramFormat", "ZpOptions", "math_functions", "builtin_functions", "ParseError", "ExpressionEvaluationError", + "UndefinedSymbolError", "AstNode", "Directive", "Scope", "Block", "Module", "Label", "Expression", + "Register", "Subroutine", "LiteralValue", "AddressOf", "SymbolName", "Dereference", "IncrDecr", + "ExpressionWithOperator", "Goto", "SubCall", "VarDef", "Return", "Assignment", "AugAssignment", + "InlineAssembly", "TargetRegisters", "AssignmentTargets", + "parse_file", "coerce_constant_value", "datatype_of", "check_symbol_definition"] + + class ProgramFormat(enum.Enum): RAW = "raw" PRG = "prg" diff --git a/todo2.ill b/todo2.ill index 57ac1ef36..07c4035f0 100644 --- a/todo2.ill +++ b/todo2.ill @@ -37,8 +37,10 @@ start: %breakpoint abc,def - ;X += border - ;XY += border + X += border + XY += border ; @todo .word augassign register + XY -= 1234+333 ; @todo .word augassign register + A += [c2f] AY += [c2f] AY += [XY] @@ -59,11 +61,11 @@ start: v3t=2.23424 ; @todo store as constant float with generated name, replace value node v3t=2.23411 ; @todo store as constant float with generated name, replace value node v3t=1.23411 + 1; @todo store as constant float with generated name, replace value node -; v3t+=2.23424 ; @todo store as constant float with generated name, replace value node -; v3t+=2.23424 ; @todo store as constant float with generated name, replace value node -; v3t+=2.23411 ; @todo store as constant float with generated name, replace value node -; v3t+=2.23411 ; @todo store as constant float with generated name, replace value node - ;v3t=2.23424 * v3t ; @todo store as constant float with generated name, replace value node + v3t+=2.23424 ; @todo store as constant float with generated name, replace value node + v3t+=2.23424 ; @todo store as constant float with generated name, replace value node + v3t+=2.23411 ; @todo store as constant float with generated name, replace value node + v3t+=2.23411 ; @todo store as constant float with generated name, replace value node + v3t=2.23424 * v3t ; @todo store as constant float with generated name, replace value node XY*=2 XY*=3 X=3 ; @todo optimize consecutive assignments @@ -84,25 +86,25 @@ start: XY=XY/0 ; @todo zerodiv (during expression to code generation) XY=XY//0 ; @todo zerodiv (during expression to code generation) XY*=2.23424 ; @todo store as constant float with generated name, replace value node - ;XY*=2.23424 * v3t ; @todo store as constant float with generated name, replace value node - ;v3t*=2.23424 * v3t ; @todo store as constant float with generated name, replace value node -; A++ -; X-- -; A+=1 -; X-=2 -; [AX]++ -; [AX .byte]++ -; [AX .word]++ -; [AX .float]++ -; [$ccc0]++ -; [$ccc0 .byte]++ -; [$ccc0 .word]++ -; [$ccc0 .float]++ -; A+=2 -; A+=3 -; XY+=6 -; XY+=222 -; XY+=666 + XY*=2.23424 * v3t ; @todo store as constant float with generated name, replace value node + v3t*=2.23424 * v3t ; @todo store as constant float with generated name, replace value node + A++ + X-- + A+=1 + X-=2 + [AX]++ + [AX .byte]++ + [AX .word]++ + [AX .float]++ + [$ccc0]++ + [$ccc0 .byte]++ + [$ccc0 .word]++ + [$ccc0 .float]++ + A+=2 + A+=3 + XY+=6 + XY+=222 + XY+=666 return 44 }