mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2024-12-26 21:32:10 +00:00
[lit] Parse all strings as UTF-8 rather than ASCII.
As far as I can tell UTF-8 has been supported since the beginning of Python's codec support, and it's the de facto standard for text these days, at least for primarily-English text. This allows us to put Unicode into lit RUN lines. rdar://problem/18311663 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@217688 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
81d53b7290
commit
c919e57cbf
3
test/Other/lit-unicode.txt
Normal file
3
test/Other/lit-unicode.txt
Normal file
@ -0,0 +1,3 @@
|
||||
REQUIRES: shell
|
||||
RUN: echo "ようこそ" | FileCheck %s
|
||||
CHECK: {{^}}ようこそ{{$}}
|
@ -6,8 +6,8 @@
|
||||
import sys, re, time
|
||||
|
||||
def to_bytes(str):
|
||||
# Encode to Latin1 to get binary data.
|
||||
return str.encode('ISO-8859-1')
|
||||
# Encode to UTF-8 to get binary data.
|
||||
return str.encode('utf-8')
|
||||
|
||||
class TerminalController:
|
||||
"""
|
||||
@ -136,7 +136,7 @@ class TerminalController:
|
||||
|
||||
def _tparm(self, arg, index):
|
||||
import curses
|
||||
return curses.tparm(to_bytes(arg), index).decode('ascii') or ''
|
||||
return curses.tparm(to_bytes(arg), index).decode('utf-8') or ''
|
||||
|
||||
def _tigetstr(self, cap_name):
|
||||
# String capabilities can include "delays" of the form "$<2>".
|
||||
@ -147,7 +147,7 @@ class TerminalController:
|
||||
if cap is None:
|
||||
cap = ''
|
||||
else:
|
||||
cap = cap.decode('ascii')
|
||||
cap = cap.decode('utf-8')
|
||||
return re.sub(r'\$<\d+>[/*]?', '', cap)
|
||||
|
||||
def render(self, template):
|
||||
|
@ -192,6 +192,11 @@ def executeShCmd(cmd, cfg, cwd, results):
|
||||
f.seek(0, 0)
|
||||
procData[i] = (procData[i][0], f.read())
|
||||
|
||||
def to_string(bytes):
|
||||
if isinstance(bytes, str):
|
||||
return bytes
|
||||
return bytes.encode('utf-8')
|
||||
|
||||
exitCode = None
|
||||
for i,(out,err) in enumerate(procData):
|
||||
res = procs[i].wait()
|
||||
@ -201,11 +206,11 @@ def executeShCmd(cmd, cfg, cwd, results):
|
||||
|
||||
# Ensure the resulting output is always of string type.
|
||||
try:
|
||||
out = str(out.decode('ascii'))
|
||||
out = to_string(out.decode('utf-8'))
|
||||
except:
|
||||
out = str(out)
|
||||
try:
|
||||
err = str(err.decode('ascii'))
|
||||
err = to_string(err.decode('utf-8'))
|
||||
except:
|
||||
err = str(err)
|
||||
|
||||
@ -314,13 +319,18 @@ def parseIntegratedTestScriptCommands(source_path):
|
||||
# Python2 and bytes in Python3.
|
||||
#
|
||||
# Once we find a match, we do require each script line to be decodable to
|
||||
# ascii, so we convert the outputs to ascii before returning. This way the
|
||||
# UTF-8, so we convert the outputs to UTF-8 before returning. This way the
|
||||
# remaining code can work with "strings" agnostic of the executing Python
|
||||
# version.
|
||||
|
||||
def to_bytes(str):
|
||||
# Encode to Latin1 to get binary data.
|
||||
return str.encode('ISO-8859-1')
|
||||
# Encode to UTF-8 to get binary data.
|
||||
return str.encode('utf-8')
|
||||
def to_string(bytes):
|
||||
if isinstance(bytes, str):
|
||||
return bytes
|
||||
return to_bytes(bytes)
|
||||
|
||||
keywords = ('RUN:', 'XFAIL:', 'REQUIRES:', 'END.')
|
||||
keywords_re = re.compile(
|
||||
to_bytes("(%s)(.*)\n" % ("|".join(k for k in keywords),)))
|
||||
@ -341,13 +351,13 @@ def parseIntegratedTestScriptCommands(source_path):
|
||||
match_position)
|
||||
last_match_position = match_position
|
||||
|
||||
# Convert the keyword and line to ascii strings and yield the
|
||||
# Convert the keyword and line to UTF-8 strings and yield the
|
||||
# command. Note that we take care to return regular strings in
|
||||
# Python 2, to avoid other code having to differentiate between the
|
||||
# str and unicode types.
|
||||
keyword,ln = match.groups()
|
||||
yield (line_number, str(keyword[:-1].decode('ascii')),
|
||||
str(ln.decode('ascii')))
|
||||
yield (line_number, to_string(keyword[:-1].decode('utf-8')),
|
||||
to_string(ln.decode('utf-8')))
|
||||
finally:
|
||||
f.close()
|
||||
|
||||
|
@ -31,7 +31,7 @@ class GoogleTest(TestFormat):
|
||||
try:
|
||||
lines = lit.util.capture([path, '--gtest_list_tests'],
|
||||
env=localConfig.environment)
|
||||
lines = lines.decode('ascii')
|
||||
lines = lines.decode('utf-8')
|
||||
if kIsWindows:
|
||||
lines = lines.replace('\r', '')
|
||||
lines = lines.split('\n')
|
||||
|
@ -156,13 +156,18 @@ def executeCommand(command, cwd=None, env=None):
|
||||
if exitCode == -signal.SIGINT:
|
||||
raise KeyboardInterrupt
|
||||
|
||||
def to_string(bytes):
|
||||
if isinstance(bytes, str):
|
||||
return bytes
|
||||
return bytes.encode('utf-8')
|
||||
|
||||
# Ensure the resulting output is always of string type.
|
||||
try:
|
||||
out = str(out.decode('ascii'))
|
||||
out = to_string(out.decode('utf-8'))
|
||||
except:
|
||||
out = str(out)
|
||||
try:
|
||||
err = str(err.decode('ascii'))
|
||||
err = to_string(err.decode('utf-8'))
|
||||
except:
|
||||
err = str(err)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user