mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-28 22:24:28 +00:00
llvm-mc: Support escaped characters in string literals (for .ascii and .asciz)
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@79010 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -1,5 +1,6 @@
|
|||||||
# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
|
# RUN: llvm-mc -triple i386-unknown-unknown %s | FileCheck %s
|
||||||
|
|
||||||
|
.data
|
||||||
# CHECK: TEST0:
|
# CHECK: TEST0:
|
||||||
TEST0:
|
TEST0:
|
||||||
.ascii
|
.ascii
|
||||||
@ -20,5 +21,28 @@ TEST2:
|
|||||||
# CHECK: .byte 0
|
# CHECK: .byte 0
|
||||||
TEST3:
|
TEST3:
|
||||||
.asciz "B", "C"
|
.asciz "B", "C"
|
||||||
|
|
||||||
|
# CHECK: TEST4:
|
||||||
|
# CHECK: .byte 1
|
||||||
|
# CHECK: .byte 1
|
||||||
|
# CHECK: .byte 7
|
||||||
|
# CHECK: .byte 0
|
||||||
|
# CHECK: .byte 56
|
||||||
|
# CHECK: .byte 1
|
||||||
|
# CHECK: .byte 0
|
||||||
|
# CHECK: .byte 49
|
||||||
|
# CHECK: .byte 0
|
||||||
|
TEST4:
|
||||||
|
.ascii "\1\01\07\08\001\0001\b\0"
|
||||||
|
|
||||||
|
# CHECK: TEST5:
|
||||||
|
# CHECK: .byte 8
|
||||||
|
# CHECK: .byte 12
|
||||||
|
# CHECK: .byte 10
|
||||||
|
# CHECK: .byte 13
|
||||||
|
# CHECK: .byte 9
|
||||||
|
# CHECK: .byte 92
|
||||||
|
# CHECK: .byte 34
|
||||||
|
TEST5:
|
||||||
|
.ascii "\b\f\n\r\t\\\""
|
||||||
|
|
||||||
|
@ -765,6 +765,64 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool AsmParser::ParseEscapedString(std::string &Data) {
|
||||||
|
assert(Lexer.is(AsmToken::String) && "Unexpected current token!");
|
||||||
|
|
||||||
|
Data = "";
|
||||||
|
StringRef Str = Lexer.getTok().getStringContents();
|
||||||
|
for (unsigned i = 0, e = Str.size(); i != e; ++i) {
|
||||||
|
if (Str[i] != '\\') {
|
||||||
|
Data += Str[i];
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Recognize escaped characters. Note that this escape semantics currently
|
||||||
|
// loosely follows Darwin 'as'. Notably, it doesn't support hex escapes.
|
||||||
|
++i;
|
||||||
|
if (i == e)
|
||||||
|
return TokError("unexpected backslash at end of string");
|
||||||
|
|
||||||
|
// Recognize octal sequences.
|
||||||
|
if ((unsigned) (Str[i] - '0') <= 7) {
|
||||||
|
// Consume up to three octal characters.
|
||||||
|
unsigned Value = Str[i] - '0';
|
||||||
|
|
||||||
|
if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
|
||||||
|
++i;
|
||||||
|
Value = Value * 8 + (Str[i] - '0');
|
||||||
|
|
||||||
|
if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) {
|
||||||
|
++i;
|
||||||
|
Value = Value * 8 + (Str[i] - '0');
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (Value > 255)
|
||||||
|
return TokError("invalid octal escape sequence (out of range)");
|
||||||
|
|
||||||
|
Data += (unsigned char) Value;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Otherwise recognize individual escapes.
|
||||||
|
switch (Str[i]) {
|
||||||
|
default:
|
||||||
|
// Just reject invalid escape sequences for now.
|
||||||
|
return TokError("invalid escape sequence (unrecognized character)");
|
||||||
|
|
||||||
|
case 'b': Data += '\b'; break;
|
||||||
|
case 'f': Data += '\f'; break;
|
||||||
|
case 'n': Data += '\n'; break;
|
||||||
|
case 'r': Data += '\r'; break;
|
||||||
|
case 't': Data += '\t'; break;
|
||||||
|
case '"': Data += '"'; break;
|
||||||
|
case '\\': Data += '\\'; break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
/// ParseDirectiveAscii:
|
/// ParseDirectiveAscii:
|
||||||
/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
|
/// ::= ( .ascii | .asciz ) [ "string" ( , "string" )* ]
|
||||||
bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
|
bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
|
||||||
@ -773,11 +831,11 @@ bool AsmParser::ParseDirectiveAscii(bool ZeroTerminated) {
|
|||||||
if (Lexer.isNot(AsmToken::String))
|
if (Lexer.isNot(AsmToken::String))
|
||||||
return TokError("expected string in '.ascii' or '.asciz' directive");
|
return TokError("expected string in '.ascii' or '.asciz' directive");
|
||||||
|
|
||||||
// FIXME: This shouldn't use a const char* + strlen, the string could have
|
std::string Data;
|
||||||
// embedded nulls.
|
if (ParseEscapedString(Data))
|
||||||
// FIXME: Should have accessor for getting string contents.
|
return true;
|
||||||
StringRef Str = Lexer.getTok().getString();
|
|
||||||
Out.EmitBytes(Str.substr(1, Str.size() - 2));
|
Out.EmitBytes(Data);
|
||||||
if (ZeroTerminated)
|
if (ZeroTerminated)
|
||||||
Out.EmitBytes(StringRef("\0", 1));
|
Out.EmitBytes(StringRef("\0", 1));
|
||||||
|
|
||||||
|
@ -135,6 +135,10 @@ private:
|
|||||||
bool ParseDirectiveFile(SMLoc DirectiveLoc); // ".file"
|
bool ParseDirectiveFile(SMLoc DirectiveLoc); // ".file"
|
||||||
bool ParseDirectiveLine(SMLoc DirectiveLoc); // ".line"
|
bool ParseDirectiveLine(SMLoc DirectiveLoc); // ".line"
|
||||||
bool ParseDirectiveLoc(SMLoc DirectiveLoc); // ".loc"
|
bool ParseDirectiveLoc(SMLoc DirectiveLoc); // ".loc"
|
||||||
|
|
||||||
|
/// ParseEscapedString - Parse the current token as a string which may include
|
||||||
|
/// escaped characters and return the string contents.
|
||||||
|
bool ParseEscapedString(std::string &Data);
|
||||||
};
|
};
|
||||||
|
|
||||||
} // end namespace llvm
|
} // end namespace llvm
|
||||||
|
Reference in New Issue
Block a user