Re-commit r192758 - MC: quote tricky symbol names in asm output

The reason this got reverted was that the @feat.00 symbol which was emitted
for every TU became quoted, and on cygwin/mingw we use the gas assembler which
couldn't handle the quotes.

This commit fixes the problem by only emitting @feat.00 for win32, where we use
clang -cc1as to assemble. gas would just drop this symbol anyway, so there is no
loss there.

With @feat.00 gone, there shouldn't be quoted symbols showing up on cygwin since
it uses the Itanium ABI, which doesn't put these funny characters in symbols.

> Because of win32 mangling, we produce symbol and section names with
> funny characters in them, most notably @ characters.
>
> MC would choke on trying to parse its own assembly output. This patch addresses
> that by:
>
> - Making @ trigger quoting of symbol names
> - Also quote section names in the same way
> - Just parse section names like other identifiers (to allow for quotes)
> - Don't assume @ signifies a symbol variant if it is in a string.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@192859 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hans Wennborg 2013-10-17 01:13:02 +00:00
parent 88a68cbbb5
commit b74b88edac
9 changed files with 68 additions and 26 deletions

View File

@ -792,19 +792,25 @@ bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
EndLoc = SMLoc::getFromPointer(Identifier.end());
// This is a symbol reference.
std::pair<StringRef, StringRef> Split = Identifier.split('@');
MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first);
// Lookup the symbol variant if used.
StringRef SymbolName = Identifier;
MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
if (Split.first.size() != Identifier.size()) {
Variant = MCSymbolRefExpr::getVariantKindForName(Split.second);
std::pair<StringRef, StringRef> Split = Identifier.split('@');
if (Split.first.size() != Identifier.size() &&
FirstTokenKind != AsmToken::String) {
SymbolName = Split.first;
StringRef VariantName = Split.second;
// Lookup the symbol variant.
Variant = MCSymbolRefExpr::getVariantKindForName(VariantName);
if (Variant == MCSymbolRefExpr::VK_Invalid) {
Variant = MCSymbolRefExpr::VK_None;
return TokError("invalid variant '" + Split.second + "'");
return TokError("invalid variant '" + VariantName + "'");
}
}
MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName);
// If this is an absolute variable reference, substitute it now to preserve
// semantics in the face of reassignment.
if (Sym->isVariable() && isa<MCConstantExpr>(Sym->getVariableValue())) {

View File

@ -295,12 +295,7 @@ bool COFFAsmParser::ParseSectionSwitch(StringRef Section,
}
bool COFFAsmParser::ParseSectionName(StringRef &SectionName) {
if (!getLexer().is(AsmToken::Identifier))
return true;
SectionName = getTok().getIdentifier();
Lex();
return false;
return getParser().parseIdentifier(SectionName);
}
// .section name [, "flags"]

View File

@ -39,6 +39,22 @@ void MCSectionCOFF::setSelection(int Selection,
Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT;
}
static bool isAcceptableSectionNameChar(char C) {
return (C >= 'a' && C <= 'z') ||
(C >= 'A' && C <= 'Z') ||
(C >= '0' && C <= '9') ||
C == '_' || C == '$' || C == '.';
}
/// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be
/// syntactically correct.
static bool sectionNameNeedsQuoting(StringRef Name) {
for (unsigned i = 0, e = Name.size(); i != e; ++i)
if (!isAcceptableSectionNameChar(Name[i]))
return true;
return false;
}
void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
raw_ostream &OS,
const MCExpr *Subsection) const {
@ -49,7 +65,10 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI,
return;
}
OS << "\t.section\t" << getSectionName() << ",\"";
if (sectionNameNeedsQuoting(getSectionName()))
OS << "\t.section\t" << '"' << getSectionName() << '"' << ",\"";
else
OS << "\t.section\t" << getSectionName() << ",\"";
if (getKind().isText())
OS << 'x';
if (getKind().isWriteable())

View File

@ -18,12 +18,10 @@ const MCSection *MCSymbol::AbsolutePseudoSection =
reinterpret_cast<const MCSection *>(1);
static bool isAcceptableChar(char C) {
if ((C < 'a' || C > 'z') &&
(C < 'A' || C > 'Z') &&
(C < '0' || C > '9') &&
C != '_' && C != '$' && C != '.' && C != '@')
return false;
return true;
return (C >= 'a' && C <= 'z') ||
(C >= 'A' && C <= 'Z') ||
(C >= '0' && C <= '9') ||
C == '_' || C == '$' || C == '.';
}
/// NameNeedsQuoting - Return true if the identifier \p Str needs quotes to be

View File

@ -519,9 +519,11 @@ void X86AsmPrinter::EmitStartOfAsmFile(Module &M) {
if (Subtarget->isTargetEnvMacho())
OutStreamer.SwitchSection(getObjFileLowering().getTextSection());
if (Subtarget->isTargetCOFF()) {
if (Subtarget->isTargetCOFF() && Subtarget->isTargetWindows()) {
// Emit an absolute @feat.00 symbol. This appears to be some kind of
// compiler features bitfield read by link.exe.
// We only do this on win32, since on cygwin etc. we use the GNU assembler,
// which doesn't handle this symbol.
if (!Subtarget->is64Bit()) {
MCSymbol *S = MMI->getContext().GetOrCreateSymbol(StringRef("@feat.00"));
OutStreamer.BeginCOFFSymbolDef(S);

View File

@ -1,7 +1,9 @@
; RUN: llc -O0 -mtriple=i386-pc-win32 -filetype=asm -o - %s | FileCheck %s
; RUN: llc -O0 -mtriple=i386-pc-win32 -filetype=asm -o - %s | FileCheck %s --check-prefix=WIN32
; RUN: llc -O0 -mtriple=i386-pc-cygwin -filetype=asm -o - %s | FileCheck %s --check-prefix=CYGWIN
define i32 @foo() {
ret i32 0
}
; CHECK: @feat.00 = 1
; WIN32: "@feat.00" = 1
; CYGWIN-NOT: "@feat.00" = 1

View File

@ -3,7 +3,7 @@
; Check that a fastcall function gets correct mangling
define x86_fastcallcc void @func(i64 %X, i8 %Y, i8 %G, i16 %Z) {
; CHECK: @func@20:
; CHECK: "@func@20":
ret void
}

View File

@ -5,7 +5,7 @@
define internal x86_stdcallcc void @MyFunc() nounwind {
entry:
; CHECK: MyFunc@0:
; CHECK: "_MyFunc@0":
; CHECK: ret
ret void
}
@ -20,5 +20,5 @@ entry:
@B = global %0 { void (...)* bitcast (void ()* @MyFunc to void (...)*) }, align 4
; CHECK: _B:
; CHECK: .long _MyFunc@0
; CHECK: .long "_MyFunc@0"

View File

@ -0,0 +1,20 @@
; Check that certain symbol and section names are quoted in the asm output.
; RUN: llc -mtriple=i686-pc-win32 %s -o - | FileCheck %s
; Check that the symbol and section names can round-trip through the assembler.
; RUN: llc -mtriple=i686-pc-win32 %s -o - | llvm-mc -triple i686-pc-win32 -filetype=obj | llvm-readobj -s -section-symbols | FileCheck %s --check-prefix=READOBJ
@"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ" = global i32 0
define weak i32 @"\01??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51"() section ".text" {
%res = load i32* @"\01??__E_Generic_object@?$_Error_objects@H@std@@YAXXZ"
ret i32 %res
}
; CHECK: .section ".text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51","xr"
; CHECK: .globl "??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51"
; CHECK: "??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51"
; READOBJ: Symbol
; READOBJ: Name: ??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51
; READOBJ: Section: .text$??_B?$num_put@_WV?$back_insert_iterator@V?$basic_string@_WU?$char_traits@_W@std@@V?$allocator@_W@2@@std@@@std@@@std@@51