From 007c81acad5fed97bca673bcd2a351e44bbf97e5 Mon Sep 17 00:00:00 2001 From: Chris Pressey Date: Thu, 16 Jun 2016 11:04:03 -0500 Subject: [PATCH] Split scanner off from parser module, into own module. --- src/sixtypical/parser.py | 74 +-------------------------------------- src/sixtypical/scanner.py | 74 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 73 deletions(-) create mode 100644 src/sixtypical/scanner.py diff --git a/src/sixtypical/parser.py b/src/sixtypical/parser.py index 403f9aa..b2e775a 100644 --- a/src/sixtypical/parser.py +++ b/src/sixtypical/parser.py @@ -1,84 +1,12 @@ # encoding: UTF-8 -import re - from sixtypical.ast import Program, Defn, Routine, Block, Instr from sixtypical.model import ( TYPE_BIT, TYPE_BYTE, TYPE_BYTE_TABLE, RoutineType, VectorType, ExecutableType, LocationRef, ConstantRef ) - - -class Scanner(object): - def __init__(self, text): - self.text = text - self.token = None - self.type = None - self.scan() - - def scan_pattern(self, pattern, type, token_group=1, rest_group=2): - pattern = r'^(' + pattern + r')(.*?)$' - match = re.match(pattern, self.text, re.DOTALL) - if not match: - return False - else: - self.type = type - self.token = match.group(token_group) - self.text = match.group(rest_group) - return True - - def scan(self): - self.scan_pattern(r'[ \t\n\r]*', 'whitespace') - while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'): - self.scan_pattern(r'[ \t\n\r]*', 'whitespace') - if not self.text: - self.token = None - self.type = 'EOF' - return - if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'): - return - if self.scan_pattern(r'\d+', 'integer literal'): - return - if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal', - token_group=2, rest_group=3): - # ecch - self.token = str(eval('0x' + self.token)) - return - if self.scan_pattern(r'\"(.*?)\"', 'string literal', - token_group=2, rest_group=3): - return - if self.scan_pattern(r'\w+', 'identifier'): - return - if self.scan_pattern(r'.', 'unknown character'): - return - else: - raise AssertionError("this should never happen, self.text=(%s)" % self.text) - - def expect(self, token): - if self.token == token: - self.scan() - else: - raise SyntaxError("Expected '%s', but found '%s'" % - (token, self.token)) - - def on(self, token): - return self.token == token - - def on_type(self, type): - return self.type == type - - def check_type(self, type): - if not self.type == type: - raise SyntaxError("Expected %s, but found %s ('%s')" % - (type, self.type, self.token)) - - def consume(self, token): - if self.token == token: - self.scan() - return True - else: - return False +from sixtypical.scanner import Scanner class SymEntry(object): diff --git a/src/sixtypical/scanner.py b/src/sixtypical/scanner.py new file mode 100644 index 0000000..bd189c8 --- /dev/null +++ b/src/sixtypical/scanner.py @@ -0,0 +1,74 @@ +# encoding: UTF-8 + +import re + + +class Scanner(object): + def __init__(self, text): + self.text = text + self.token = None + self.type = None + self.scan() + + def scan_pattern(self, pattern, type, token_group=1, rest_group=2): + pattern = r'^(' + pattern + r')(.*?)$' + match = re.match(pattern, self.text, re.DOTALL) + if not match: + return False + else: + self.type = type + self.token = match.group(token_group) + self.text = match.group(rest_group) + return True + + def scan(self): + self.scan_pattern(r'[ \t\n\r]*', 'whitespace') + while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'): + self.scan_pattern(r'[ \t\n\r]*', 'whitespace') + if not self.text: + self.token = None + self.type = 'EOF' + return + if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'): + return + if self.scan_pattern(r'\d+', 'integer literal'): + return + if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal', + token_group=2, rest_group=3): + # ecch + self.token = str(eval('0x' + self.token)) + return + if self.scan_pattern(r'\"(.*?)\"', 'string literal', + token_group=2, rest_group=3): + return + if self.scan_pattern(r'\w+', 'identifier'): + return + if self.scan_pattern(r'.', 'unknown character'): + return + else: + raise AssertionError("this should never happen, self.text=(%s)" % self.text) + + def expect(self, token): + if self.token == token: + self.scan() + else: + raise SyntaxError("Expected '%s', but found '%s'" % + (token, self.token)) + + def on(self, token): + return self.token == token + + def on_type(self, type): + return self.type == type + + def check_type(self, type): + if not self.type == type: + raise SyntaxError("Expected %s, but found %s ('%s')" % + (type, self.type, self.token)) + + def consume(self, token): + if self.token == token: + self.scan() + return True + else: + return False