Revert r188376, "[lit] Support parsing scripts with inconsistent or invalid encodings.", this doesn't work yet for bots using the internal shell.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@188379 91177308-0d34-0410-b5e6-96231b3b80d8
2025-03-20 11:32:33 +00:00 · 2013-08-14 15:55:25 +00:00 · 2013-08-14 15:55:25 +00:00 · 14a5c695a9
commit 14a5c695a9
parent 003f93f134
2 changed files with 12 additions and 45 deletions
--- a/utils/lit/lit/TestRunner.py
+++ b/utils/lit/lit/TestRunner.py
@ -305,54 +305,24 @@ def isExpectedFail(test, xfails):

    return False

-def parseIntegratedTestScriptCommands(source_path):
+def parseIntegratedTestScriptCommands(sourcepath):
    """
    parseIntegratedTestScriptCommands(source_path) -> commands

    Parse the commands in an integrated test script file into a list of
    (line_number, command_type, line).
    """
-
-    # This code is carefully written to be dual compatible with Python 2.5+ and
-    # Python 3 without requiring input files to always have valid codings. The
-    # trick we use is to open the file in binary mode and use the regular
-    # expression library to find the commands, with it scanning strings in
-    # Python2 and bytes in Python3.
-    #
-    # Once we find a match, we do require each script line to be decodable to
-    # ascii, so we convert the outputs to ascii before returning. This way the
-    # remaining code can work with "strings" agnostic of the executing Python
-    # version.
-    
-    def to_bytes(str):
-        # Encode to Latin1 to get binary data.
-        return str.encode('ISO-8859-1')
-    keywords = ('RUN:', 'XFAIL:', 'REQUIRES:', 'END.')
-    keywords_re = re.compile(
-        to_bytes("(%s)(.*)\n" % ("|".join(k for k in keywords),)))
-
-    f = open(source_path, 'rb')
-    try:
-        # Read the entire file contents.
-        data = f.read()
-
-        # Iterate over the matches.
-        line_number = 1
-        last_match_position = 0
-        for match in keywords_re.finditer(data):
-            # Compute the updated line number by counting the intervening
-            # newlines.
-            match_position = match.start()
-            line_number += data.count(to_bytes('\n'), last_match_position,
-                                      match_position)
-            last_match_position = match_position
-
-            # Convert the keyword and line to ascii and yield the command.
-            keyword,ln = match.groups()
-            yield (line_number, keyword[:-1].decode('ascii'),
-                   ln.decode('ascii'))
-    finally:
-        f.close()
+    line_number = 0
+    for ln in open(sourcepath):
+        line_number += 1
+        if 'RUN:' in ln:
+            yield (line_number, 'RUN', ln[ln.index('RUN:')+4:])
+        elif 'XFAIL:' in ln:
+            yield (line_number, 'XFAIL', ln[ln.index('XFAIL:') + 6:])
+        elif 'REQUIRES:' in ln:
+            yield (line_number, 'REQUIRES', ln[ln.index('REQUIRES:') + 9:])
+        elif 'END.' in ln:
+            yield (line_number, 'END', ln[ln.index('END.') + 4:])

 def parseIntegratedTestScript(test, normalize_slashes=False,
                              extra_substitutions=[]):
--- a/utils/lit/tests/shtest-encoding.py
+++ b/utils/lit/tests/shtest-encoding.py
@ -1,3 +0,0 @@
-# RUN: true
-
-# Here is a string that cannot be decoded in line mode: Â.