mirror of
https://github.com/catseye/SixtyPical.git
synced 2024-11-26 14:49:15 +00:00
Make lexer greatly less inefficient on large source files.
This commit is contained in:
parent
3cd28bdb3e
commit
0429e4bd90
@ -17,18 +17,20 @@ class Scanner(object):
|
|||||||
self.filename = filename
|
self.filename = filename
|
||||||
self.token = None
|
self.token = None
|
||||||
self.type = None
|
self.type = None
|
||||||
|
self.pos = 0
|
||||||
self.line_number = 1
|
self.line_number = 1
|
||||||
self.scan()
|
self.scan()
|
||||||
|
|
||||||
def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
|
def scan_pattern(self, pattern, type, token_group=1):
|
||||||
pattern = r'^(' + pattern + r')(.*?)$'
|
pattern = r'(' + pattern + r')'
|
||||||
match = re.match(pattern, self.text, re.DOTALL)
|
regexp = re.compile(pattern, flags=re.DOTALL)
|
||||||
|
match = regexp.match(self.text, pos=self.pos)
|
||||||
if not match:
|
if not match:
|
||||||
return False
|
return False
|
||||||
else:
|
else:
|
||||||
self.type = type
|
self.type = type
|
||||||
self.token = match.group(token_group)
|
self.token = match.group(token_group)
|
||||||
self.text = match.group(rest_group)
|
self.pos += len(match.group(0))
|
||||||
self.line_number += self.token.count('\n')
|
self.line_number += self.token.count('\n')
|
||||||
return True
|
return True
|
||||||
|
|
||||||
@ -36,7 +38,7 @@ class Scanner(object):
|
|||||||
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
||||||
while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
|
while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
|
||||||
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
|
||||||
if not self.text:
|
if self.pos >= len(self.text):
|
||||||
self.token = None
|
self.token = None
|
||||||
self.type = 'EOF'
|
self.type = 'EOF'
|
||||||
return
|
return
|
||||||
@ -44,20 +46,18 @@ class Scanner(object):
|
|||||||
return
|
return
|
||||||
if self.scan_pattern(r'\d+', 'integer literal'):
|
if self.scan_pattern(r'\d+', 'integer literal'):
|
||||||
return
|
return
|
||||||
if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal',
|
if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal', token_group=2):
|
||||||
token_group=2, rest_group=3):
|
|
||||||
# ecch
|
# ecch
|
||||||
self.token = str(eval('0x' + self.token))
|
self.token = str(eval('0x' + self.token))
|
||||||
return
|
return
|
||||||
if self.scan_pattern(r'\"(.*?)\"', 'string literal',
|
if self.scan_pattern(r'\"(.*?)\"', 'string literal', token_group=2):
|
||||||
token_group=2, rest_group=3):
|
|
||||||
return
|
return
|
||||||
if self.scan_pattern(r'\w+', 'identifier'):
|
if self.scan_pattern(r'\w+', 'identifier'):
|
||||||
return
|
return
|
||||||
if self.scan_pattern(r'.', 'unknown character'):
|
if self.scan_pattern(r'.', 'unknown character'):
|
||||||
return
|
return
|
||||||
else:
|
else:
|
||||||
raise AssertionError("this should never happen, self.text=({})".format(self.text))
|
raise AssertionError("this should never happen, self.text=({}), self.pos=({})".format(self.text, self.pos))
|
||||||
|
|
||||||
def expect(self, token):
|
def expect(self, token):
|
||||||
if self.token == token:
|
if self.token == token:
|
||||||
|
Loading…
Reference in New Issue
Block a user