llvm-6502/utils/lit/lit/ShUtil.py
Chandler Carruth 1f7ebddd5f Revert r159528 which taught lit's builtin shell test runner about the
'|&' bash syntax. We have lots of users with a bash on their system
which doesn't support this syntax, and as bash is still significantly
faster, we should support them.

The test suite has already been updated to cope with this.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@159580 91177308-0d34-0410-b5e6-96231b3b80d8
2012-07-02 20:43:21 +00:00

354 lines
12 KiB
Python

import itertools
import Util
from ShCommands import Command, Pipeline, Seq
class ShLexer:
def __init__(self, data, win32Escapes = False):
self.data = data
self.pos = 0
self.end = len(data)
self.win32Escapes = win32Escapes
def eat(self):
c = self.data[self.pos]
self.pos += 1
return c
def look(self):
return self.data[self.pos]
def maybe_eat(self, c):
"""
maybe_eat(c) - Consume the character c if it is the next character,
returning True if a character was consumed. """
if self.data[self.pos] == c:
self.pos += 1
return True
return False
def lex_arg_fast(self, c):
# Get the leading whitespace free section.
chunk = self.data[self.pos - 1:].split(None, 1)[0]
# If it has special characters, the fast path failed.
if ('|' in chunk or '&' in chunk or
'<' in chunk or '>' in chunk or
"'" in chunk or '"' in chunk or
'\\' in chunk):
return None
self.pos = self.pos - 1 + len(chunk)
return chunk
def lex_arg_slow(self, c):
if c in "'\"":
str = self.lex_arg_quoted(c)
else:
str = c
while self.pos != self.end:
c = self.look()
if c.isspace() or c in "|&":
break
elif c in '><':
# This is an annoying case; we treat '2>' as a single token so
# we don't have to track whitespace tokens.
# If the parse string isn't an integer, do the usual thing.
if not str.isdigit():
break
# Otherwise, lex the operator and convert to a redirection
# token.
num = int(str)
tok = self.lex_one_token()
assert isinstance(tok, tuple) and len(tok) == 1
return (tok[0], num)
elif c == '"':
self.eat()
str += self.lex_arg_quoted('"')
elif c == "'":
self.eat()
str += self.lex_arg_quoted("'")
elif not self.win32Escapes and c == '\\':
# Outside of a string, '\\' escapes everything.
self.eat()
if self.pos == self.end:
Util.warning("escape at end of quoted argument in: %r" %
self.data)
return str
str += self.eat()
else:
str += self.eat()
return str
def lex_arg_quoted(self, delim):
str = ''
while self.pos != self.end:
c = self.eat()
if c == delim:
return str
elif c == '\\' and delim == '"':
# Inside a '"' quoted string, '\\' only escapes the quote
# character and backslash, otherwise it is preserved.
if self.pos == self.end:
Util.warning("escape at end of quoted argument in: %r" %
self.data)
return str
c = self.eat()
if c == '"': #
str += '"'
elif c == '\\':
str += '\\'
else:
str += '\\' + c
else:
str += c
Util.warning("missing quote character in %r" % self.data)
return str
def lex_arg_checked(self, c):
pos = self.pos
res = self.lex_arg_fast(c)
end = self.pos
self.pos = pos
reference = self.lex_arg_slow(c)
if res is not None:
if res != reference:
raise ValueError,"Fast path failure: %r != %r" % (res, reference)
if self.pos != end:
raise ValueError,"Fast path failure: %r != %r" % (self.pos, end)
return reference
def lex_arg(self, c):
return self.lex_arg_fast(c) or self.lex_arg_slow(c)
def lex_one_token(self):
"""
lex_one_token - Lex a single 'sh' token. """
c = self.eat()
if c in ';!':
return (c,)
if c == '|':
if self.maybe_eat('|'):
return ('||',)
return (c,)
if c == '&':
if self.maybe_eat('&'):
return ('&&',)
if self.maybe_eat('>'):
return ('&>',)
return (c,)
if c == '>':
if self.maybe_eat('&'):
return ('>&',)
if self.maybe_eat('>'):
return ('>>',)
return (c,)
if c == '<':
if self.maybe_eat('&'):
return ('<&',)
if self.maybe_eat('>'):
return ('<<',)
return (c,)
return self.lex_arg(c)
def lex(self):
while self.pos != self.end:
if self.look().isspace():
self.eat()
else:
yield self.lex_one_token()
###
class ShParser:
def __init__(self, data, win32Escapes = False):
self.data = data
self.tokens = ShLexer(data, win32Escapes = win32Escapes).lex()
def lex(self):
try:
return self.tokens.next()
except StopIteration:
return None
def look(self):
next = self.lex()
if next is not None:
self.tokens = itertools.chain([next], self.tokens)
return next
def parse_command(self):
tok = self.lex()
if not tok:
raise ValueError,"empty command!"
if isinstance(tok, tuple):
raise ValueError,"syntax error near unexpected token %r" % tok[0]
args = [tok]
redirects = []
while 1:
tok = self.look()
# EOF?
if tok is None:
break
# If this is an argument, just add it to the current command.
if isinstance(tok, str):
args.append(self.lex())
continue
# Otherwise see if it is a terminator.
assert isinstance(tok, tuple)
if tok[0] in ('|',';','&','||','&&'):
break
# Otherwise it must be a redirection.
op = self.lex()
arg = self.lex()
if not arg:
raise ValueError,"syntax error near token %r" % op[0]
redirects.append((op, arg))
return Command(args, redirects)
def parse_pipeline(self):
negate = False
if self.look() == ('!',):
self.lex()
negate = True
commands = [self.parse_command()]
while self.look() == ('|',):
self.lex()
commands.append(self.parse_command())
return Pipeline(commands, negate)
def parse(self):
lhs = self.parse_pipeline()
while self.look():
operator = self.lex()
assert isinstance(operator, tuple) and len(operator) == 1
if not self.look():
raise ValueError, "missing argument to operator %r" % operator[0]
# FIXME: Operator precedence!!
lhs = Seq(lhs, operator[0], self.parse_pipeline())
return lhs
###
import unittest
class TestShLexer(unittest.TestCase):
def lex(self, str, *args, **kwargs):
return list(ShLexer(str, *args, **kwargs).lex())
def test_basic(self):
self.assertEqual(self.lex('a|b>c&d<e'),
['a', ('|',), 'b', ('>',), 'c', ('&',), 'd',
('<',), 'e'])
def test_redirection_tokens(self):
self.assertEqual(self.lex('a2>c'),
['a2', ('>',), 'c'])
self.assertEqual(self.lex('a 2>c'),
['a', ('>',2), 'c'])
def test_quoting(self):
self.assertEqual(self.lex(""" 'a' """),
['a'])
self.assertEqual(self.lex(""" "hello\\"world" """),
['hello"world'])
self.assertEqual(self.lex(""" "hello\\'world" """),
["hello\\'world"])
self.assertEqual(self.lex(""" "hello\\\\world" """),
["hello\\world"])
self.assertEqual(self.lex(""" he"llo wo"rld """),
["hello world"])
self.assertEqual(self.lex(""" a\\ b a\\\\b """),
["a b", "a\\b"])
self.assertEqual(self.lex(""" "" "" """),
["", ""])
self.assertEqual(self.lex(""" a\\ b """, win32Escapes = True),
['a\\', 'b'])
class TestShParse(unittest.TestCase):
def parse(self, str):
return ShParser(str).parse()
def test_basic(self):
self.assertEqual(self.parse('echo hello'),
Pipeline([Command(['echo', 'hello'], [])], False))
self.assertEqual(self.parse('echo ""'),
Pipeline([Command(['echo', ''], [])], False))
self.assertEqual(self.parse("""echo -DFOO='a'"""),
Pipeline([Command(['echo', '-DFOO=a'], [])], False))
self.assertEqual(self.parse('echo -DFOO="a"'),
Pipeline([Command(['echo', '-DFOO=a'], [])], False))
def test_redirection(self):
self.assertEqual(self.parse('echo hello > c'),
Pipeline([Command(['echo', 'hello'],
[((('>'),), 'c')])], False))
self.assertEqual(self.parse('echo hello > c >> d'),
Pipeline([Command(['echo', 'hello'], [(('>',), 'c'),
(('>>',), 'd')])], False))
self.assertEqual(self.parse('a 2>&1'),
Pipeline([Command(['a'], [(('>&',2), '1')])], False))
def test_pipeline(self):
self.assertEqual(self.parse('a | b'),
Pipeline([Command(['a'], []),
Command(['b'], [])],
False))
self.assertEqual(self.parse('a | b | c'),
Pipeline([Command(['a'], []),
Command(['b'], []),
Command(['c'], [])],
False))
self.assertEqual(self.parse('! a'),
Pipeline([Command(['a'], [])],
True))
def test_list(self):
self.assertEqual(self.parse('a ; b'),
Seq(Pipeline([Command(['a'], [])], False),
';',
Pipeline([Command(['b'], [])], False)))
self.assertEqual(self.parse('a & b'),
Seq(Pipeline([Command(['a'], [])], False),
'&',
Pipeline([Command(['b'], [])], False)))
self.assertEqual(self.parse('a && b'),
Seq(Pipeline([Command(['a'], [])], False),
'&&',
Pipeline([Command(['b'], [])], False)))
self.assertEqual(self.parse('a || b'),
Seq(Pipeline([Command(['a'], [])], False),
'||',
Pipeline([Command(['b'], [])], False)))
self.assertEqual(self.parse('a && b || c'),
Seq(Seq(Pipeline([Command(['a'], [])], False),
'&&',
Pipeline([Command(['b'], [])], False)),
'||',
Pipeline([Command(['c'], [])], False)))
if __name__ == '__main__':
unittest.main()