1
0
mirror of https://github.com/catseye/SixtyPical.git synced 2024-09-27 11:54:45 +00:00

Merge branch 'develop-0.18' into goto-at-end-of-block

This commit is contained in:
Chris Pressey 2018-11-27 13:55:36 +00:00
commit d90ac92a33
3 changed files with 17 additions and 11 deletions

View File

@ -1,6 +1,12 @@
History of SixtyPical History of SixtyPical
===================== =====================
0.18
----
* Fixed pathological memory use in the lexical scanner - should
be much less inefficient now when parsing large source files.
0.17 0.17
---- ----

View File

@ -1,7 +1,7 @@
SixtyPical SixtyPical
========== ==========
_Version 0.17. Work-in-progress, everything is subject to change._ _Version 0.18. Work-in-progress, everything is subject to change._
**SixtyPical** is a low-level programming language with advanced **SixtyPical** is a low-level programming language with advanced
static analysis. Many of its primitive instructions resemble static analysis. Many of its primitive instructions resemble

View File

@ -17,18 +17,20 @@ class Scanner(object):
self.filename = filename self.filename = filename
self.token = None self.token = None
self.type = None self.type = None
self.pos = 0
self.line_number = 1 self.line_number = 1
self.scan() self.scan()
def scan_pattern(self, pattern, type, token_group=1, rest_group=2): def scan_pattern(self, pattern, type, token_group=1):
pattern = r'^(' + pattern + r')(.*?)$' pattern = r'(' + pattern + r')'
match = re.match(pattern, self.text, re.DOTALL) regexp = re.compile(pattern, flags=re.DOTALL)
match = regexp.match(self.text, pos=self.pos)
if not match: if not match:
return False return False
else: else:
self.type = type self.type = type
self.token = match.group(token_group) self.token = match.group(token_group)
self.text = match.group(rest_group) self.pos += len(match.group(0))
self.line_number += self.token.count('\n') self.line_number += self.token.count('\n')
return True return True
@ -36,7 +38,7 @@ class Scanner(object):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace') self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'): while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace') self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
if not self.text: if self.pos >= len(self.text):
self.token = None self.token = None
self.type = 'EOF' self.type = 'EOF'
return return
@ -44,20 +46,18 @@ class Scanner(object):
return return
if self.scan_pattern(r'\d+', 'integer literal'): if self.scan_pattern(r'\d+', 'integer literal'):
return return
if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal', if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal', token_group=2):
token_group=2, rest_group=3):
# ecch # ecch
self.token = str(eval('0x' + self.token)) self.token = str(eval('0x' + self.token))
return return
if self.scan_pattern(r'\"(.*?)\"', 'string literal', if self.scan_pattern(r'\"(.*?)\"', 'string literal', token_group=2):
token_group=2, rest_group=3):
return return
if self.scan_pattern(r'\w+', 'identifier'): if self.scan_pattern(r'\w+', 'identifier'):
return return
if self.scan_pattern(r'.', 'unknown character'): if self.scan_pattern(r'.', 'unknown character'):
return return
else: else:
raise AssertionError("this should never happen, self.text=({})".format(self.text)) raise AssertionError("this should never happen, self.text=({}), self.pos=({})".format(self.text, self.pos))
def expect(self, token): def expect(self, token):
if self.token == token: if self.token == token: