mirror of
https://github.com/catseye/SixtyPical.git
synced 2025-01-10 17:31:18 +00:00
Split scanner off from parser module, into own module.
This commit is contained in:
parent
7b1ee60a73
commit
007c81acad
@ -1,84 +1,12 @@
|
||||
# encoding: UTF-8
|
||||
|
||||
import re
|
||||
|
||||
from sixtypical.ast import Program, Defn, Routine, Block, Instr
|
||||
from sixtypical.model import (
|
||||
TYPE_BIT, TYPE_BYTE, TYPE_BYTE_TABLE,
|
||||
RoutineType, VectorType, ExecutableType,
|
||||
LocationRef, ConstantRef
|
||||
)
|
||||
|
||||
|
||||
class Scanner(object):
|
||||
def __init__(self, text):
|
||||
self.text = text
|
||||
self.token = None
|
||||
self.type = None
|
||||
self.scan()
|
||||
|
||||
def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
|
||||
pattern = r'^(' + pattern + r')(.*?)$'
|
||||
match = re.match(pattern, self.text, re.DOTALL)
|
||||
if not match:
|
||||
return False
|
||||
else:
|
||||
self.type = type
|
||||
self.token = match.group(token_group)
|
||||
self.text = match.group(rest_group)
|
||||
return True
|
||||
|
||||
def scan(self):
|
||||
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
||||
while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
|
||||
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
||||
if not self.text:
|
||||
self.token = None
|
||||
self.type = 'EOF'
|
||||
return
|
||||
if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'):
|
||||
return
|
||||
if self.scan_pattern(r'\d+', 'integer literal'):
|
||||
return
|
||||
if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal',
|
||||
token_group=2, rest_group=3):
|
||||
# ecch
|
||||
self.token = str(eval('0x' + self.token))
|
||||
return
|
||||
if self.scan_pattern(r'\"(.*?)\"', 'string literal',
|
||||
token_group=2, rest_group=3):
|
||||
return
|
||||
if self.scan_pattern(r'\w+', 'identifier'):
|
||||
return
|
||||
if self.scan_pattern(r'.', 'unknown character'):
|
||||
return
|
||||
else:
|
||||
raise AssertionError("this should never happen, self.text=(%s)" % self.text)
|
||||
|
||||
def expect(self, token):
|
||||
if self.token == token:
|
||||
self.scan()
|
||||
else:
|
||||
raise SyntaxError("Expected '%s', but found '%s'" %
|
||||
(token, self.token))
|
||||
|
||||
def on(self, token):
|
||||
return self.token == token
|
||||
|
||||
def on_type(self, type):
|
||||
return self.type == type
|
||||
|
||||
def check_type(self, type):
|
||||
if not self.type == type:
|
||||
raise SyntaxError("Expected %s, but found %s ('%s')" %
|
||||
(type, self.type, self.token))
|
||||
|
||||
def consume(self, token):
|
||||
if self.token == token:
|
||||
self.scan()
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
from sixtypical.scanner import Scanner
|
||||
|
||||
|
||||
class SymEntry(object):
|
||||
|
74
src/sixtypical/scanner.py
Normal file
74
src/sixtypical/scanner.py
Normal file
@ -0,0 +1,74 @@
|
||||
# encoding: UTF-8
|
||||
|
||||
import re
|
||||
|
||||
|
||||
class Scanner(object):
|
||||
def __init__(self, text):
|
||||
self.text = text
|
||||
self.token = None
|
||||
self.type = None
|
||||
self.scan()
|
||||
|
||||
def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
|
||||
pattern = r'^(' + pattern + r')(.*?)$'
|
||||
match = re.match(pattern, self.text, re.DOTALL)
|
||||
if not match:
|
||||
return False
|
||||
else:
|
||||
self.type = type
|
||||
self.token = match.group(token_group)
|
||||
self.text = match.group(rest_group)
|
||||
return True
|
||||
|
||||
def scan(self):
|
||||
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
||||
while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
|
||||
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
||||
if not self.text:
|
||||
self.token = None
|
||||
self.type = 'EOF'
|
||||
return
|
||||
if self.scan_pattern(r'\,|\@|\+|\:|\{|\}', 'operator'):
|
||||
return
|
||||
if self.scan_pattern(r'\d+', 'integer literal'):
|
||||
return
|
||||
if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal',
|
||||
token_group=2, rest_group=3):
|
||||
# ecch
|
||||
self.token = str(eval('0x' + self.token))
|
||||
return
|
||||
if self.scan_pattern(r'\"(.*?)\"', 'string literal',
|
||||
token_group=2, rest_group=3):
|
||||
return
|
||||
if self.scan_pattern(r'\w+', 'identifier'):
|
||||
return
|
||||
if self.scan_pattern(r'.', 'unknown character'):
|
||||
return
|
||||
else:
|
||||
raise AssertionError("this should never happen, self.text=(%s)" % self.text)
|
||||
|
||||
def expect(self, token):
|
||||
if self.token == token:
|
||||
self.scan()
|
||||
else:
|
||||
raise SyntaxError("Expected '%s', but found '%s'" %
|
||||
(token, self.token))
|
||||
|
||||
def on(self, token):
|
||||
return self.token == token
|
||||
|
||||
def on_type(self, type):
|
||||
return self.type == type
|
||||
|
||||
def check_type(self, type):
|
||||
if not self.type == type:
|
||||
raise SyntaxError("Expected %s, but found %s ('%s')" %
|
||||
(type, self.type, self.token))
|
||||
|
||||
def consume(self, token):
|
||||
if self.token == token:
|
||||
self.scan()
|
||||
return True
|
||||
else:
|
||||
return False
|
Loading…
x
Reference in New Issue
Block a user