llvm-mc: Support escaped characters in string literals (for .ascii and .asciz)

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79010 91177308-0d34-0410-b5e6-96231b3b80d8
2025-06-28 22:24:28 +00:00 · 2009-08-14 18:19:52 +00:00
parent 2247276c6f
commit 1ab7594946
3 changed files with 93 additions and 7 deletions
--- a/test/MC/AsmParser/directive_ascii.s
+++ b/test/MC/AsmParser/directive_ascii.s
@ -1,5 +1,6 @@
 # RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
        .data
 # CHECK: TEST0:
 TEST0:  
        .ascii
@ -20,5 +21,28 @@ TEST2:
 # CHECK: .byte 0
 TEST3:  
        .asciz "B", "C"
-
+        
-       
+# CHECK: TEST4:
 # CHECK: .byte 1
 # CHECK: .byte 1
 # CHECK: .byte 7
 # CHECK: .byte 0
 # CHECK: .byte 56
 # CHECK: .byte 1
 # CHECK: .byte 0
 # CHECK: .byte 49
 # CHECK: .byte 0
 TEST4:  
        .ascii "\1\01\07\08\001\0001\b\0"
 # CHECK: TEST5:
 # CHECK: .byte 8
 # CHECK: .byte 12
 # CHECK: .byte 10
 # CHECK: .byte 13
 # CHECK: .byte 9
 # CHECK: .byte 92
 # CHECK: .byte 34
 TEST5:
        .ascii "\b\f\n\r\t\\\""
--- a/tools/llvm-mc/AsmParser.cpp
+++ b/tools/llvm-mc/AsmParser.cpp
@ -765,6 +765,64 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
  return false;
 }
 bool AsmParser::ParseEscapedString(std::string &Data) {
  assert(Lexer.is(AsmToken::String) && "Unexpected current token!");
  Data = "";
  StringRef Str = Lexer.getTok().getStringContents();
  for (unsigned i = 0, e = Str.size(); i != e; ++i) {
    if (Str[i] != '\\') {
      Data += Str[i];
      continue;
    }
    // Recognize escaped characters. Note that this escape semantics currently
    // loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
    ++i;
    if (i == e)
      return TokError("unexpected backslash at end of string");
    // Recognize octal sequences.
    if ((unsigned) (Str[i] - '0') <= 7) {
      // Consume up to three octal characters.
      unsigned Value = Str[i] - '0';
      if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
        ++i;
        Value = Value * 8 + (Str[i] - '0');
        if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
          ++i;
          Value = Value * 8 + (Str[i] - '0');
        }
      }
      if (Value > 255)
        return TokError("invalid octal escape sequence (out of range)");
      Data += (unsigned char) Value;
      continue;
    }
    // Otherwise recognize individual escapes.
    switch (Str[i]) {
    default:
      // Just reject invalid escape sequences for now.
      return TokError("invalid escape sequence (unrecognized character)");
    case 'b': Data += '\b'; break;
    case 'f': Data += '\f'; break;
    case 'n': Data += '\n'; break;
    case 'r': Data += '\r'; break;
    case 't': Data += '\t'; break;
    case '"': Data += '"'; break;
    case '\\': Data += '\\'; break;
    }
  }
  return false;
 }
 /// ParseDirectiveAscii:
 ///   ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
 bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
@ -773,11 +831,11 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
      if (Lexer.isNot(AsmToken::String))
        return TokError("expected string in '.ascii' or '.asciz' directive");
-      // FIXME: This shouldn't use a const char* + strlen, the string could have
+      std::string Data;
-      // embedded nulls.
+      if (ParseEscapedString(Data))
-      // FIXME: Should have accessor for getting string contents.
+        return true;
-      StringRef Str = Lexer.getTok().getString();
+      
-      Out.EmitBytes(Str.substr(1, Str.size() - 2));
+      Out.EmitBytes(Data);
      if (ZeroTerminated)
        Out.EmitBytes(StringRef("\0", 1));
--- a/tools/llvm-mc/AsmParser.h
+++ b/tools/llvm-mc/AsmParser.h
@ -135,6 +135,10 @@ private:
  bool ParseDirectiveFile(SMLoc DirectiveLoc); // ".file"
  bool ParseDirectiveLine(SMLoc DirectiveLoc); // ".line"
  bool ParseDirectiveLoc(SMLoc DirectiveLoc); // ".loc"
  /// ParseEscapedString - Parse the current token as a string which may include
  /// escaped characters and return the string contents.
  bool ParseEscapedString(std::string &Data);
 };
 } // end namespace llvm