1
0
mirror of https://github.com/catseye/SixtyPical.git synced 2025-01-10 17:31:18 +00:00

Split scanner off from parser module, into own module.

This commit is contained in:
Chris Pressey 2016-06-16 11:04:03 -05:00
parent 7b1ee60a73
commit 007c81acad
2 changed files with 75 additions and 73 deletions

View File

@ -1,84 +1,12 @@
# encoding: UTF-8
import re
from sixtypical.ast import Program, Defn, Routine, Block, Instr
from sixtypical.model import (
TYPE_BIT, TYPE_BYTE, TYPE_BYTE_TABLE,
RoutineType, VectorType, ExecutableType,
LocationRef, ConstantRef
)
class Scanner(object):
def __init__(self, text):
self.text = text
self.token = None
self.type = None
self.scan()
def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
pattern = r'^(' + pattern + r')(.*?)$'
match = re.match(pattern, self.text, re.DOTALL)
if not match:
return False
else:
self.type = type
self.token = match.group(token_group)
self.text = match.group(rest_group)
return True
def scan(self):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
if not self.text:
self.token = None
self.type = 'EOF'
return
if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'):
return
if self.scan_pattern(r'\d+', 'integer literal'):
return
if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal',
token_group=2, rest_group=3):
# ecch
self.token = str(eval('0x' + self.token))
return
if self.scan_pattern(r'\"(.*?)\"', 'string literal',
token_group=2, rest_group=3):
return
if self.scan_pattern(r'\w+', 'identifier'):
return
if self.scan_pattern(r'.', 'unknown character'):
return
else:
raise AssertionError("this should never happen, self.text=(%s)" % self.text)
def expect(self, token):
if self.token == token:
self.scan()
else:
raise SyntaxError("Expected '%s', but found '%s'" %
(token, self.token))
def on(self, token):
return self.token == token
def on_type(self, type):
return self.type == type
def check_type(self, type):
if not self.type == type:
raise SyntaxError("Expected %s, but found %s ('%s')" %
(type, self.type, self.token))
def consume(self, token):
if self.token == token:
self.scan()
return True
else:
return False
from sixtypical.scanner import Scanner
class SymEntry(object):

74
src/sixtypical/scanner.py Normal file
View File

@ -0,0 +1,74 @@
# encoding: UTF-8
import re
class Scanner(object):
def __init__(self, text):
self.text = text
self.token = None
self.type = None
self.scan()
def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
pattern = r'^(' + pattern + r')(.*?)$'
match = re.match(pattern, self.text, re.DOTALL)
if not match:
return False
else:
self.type = type
self.token = match.group(token_group)
self.text = match.group(rest_group)
return True
def scan(self):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
if not self.text:
self.token = None
self.type = 'EOF'
return
if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'):
return
if self.scan_pattern(r'\d+', 'integer literal'):
return
if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal',
token_group=2, rest_group=3):
# ecch
self.token = str(eval('0x' + self.token))
return
if self.scan_pattern(r'\"(.*?)\"', 'string literal',
token_group=2, rest_group=3):
return
if self.scan_pattern(r'\w+', 'identifier'):
return
if self.scan_pattern(r'.', 'unknown character'):
return
else:
raise AssertionError("this should never happen, self.text=(%s)" % self.text)
def expect(self, token):
if self.token == token:
self.scan()
else:
raise SyntaxError("Expected '%s', but found '%s'" %
(token, self.token))
def on(self, token):
return self.token == token
def on_type(self, type):
return self.type == type
def check_type(self, type):
if not self.type == type:
raise SyntaxError("Expected %s, but found %s ('%s')" %
(type, self.type, self.token))
def consume(self, token):
if self.token == token:
self.scan()
return True
else:
return False