Output UTF-8-encoded characters as identifier characters into assembly

by default.

This is a behaviour configurable in the MCAsmInfo. I've decided to turn
it on by default in (possibly optimistic) hopes that most assemblers are
reasonably sane. If this proves a problem, switching to default seems
reasonable.

I'm not sure if this is the opportune place to test, but it seemed good
to make sure it was tested somewhere.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@154235 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sean Hunt 2012-04-07 00:37:53 +00:00
parent d4f020a3af
commit 3420e7f360
4 changed files with 19 additions and 4 deletions

View File

@ -143,6 +143,10 @@ namespace llvm {
/// symbol names. This defaults to true. /// symbol names. This defaults to true.
bool AllowPeriodsInName; bool AllowPeriodsInName;
/// AllowUTF8 - This is true if the assembler accepts UTF-8 input.
// FIXME: Make this a more general encoding setting?
bool AllowUTF8;
//===--- Data Emission Directives -------------------------------------===// //===--- Data Emission Directives -------------------------------------===//
/// ZeroDirective - this should be set to the directive used to get some /// ZeroDirective - this should be set to the directive used to get some
@ -485,6 +489,9 @@ namespace llvm {
bool doesAllowPeriodsInName() const { bool doesAllowPeriodsInName() const {
return AllowPeriodsInName; return AllowPeriodsInName;
} }
bool doesAllowUTF8() const {
return AllowUTF8;
}
const char *getZeroDirective() const { const char *getZeroDirective() const {
return ZeroDirective; return ZeroDirective;
} }

View File

@ -49,6 +49,7 @@ MCAsmInfo::MCAsmInfo() {
AllowQuotesInName = false; AllowQuotesInName = false;
AllowNameToStartWithDigit = false; AllowNameToStartWithDigit = false;
AllowPeriodsInName = true; AllowPeriodsInName = true;
AllowUTF8 = true;
ZeroDirective = "\t.zero\t"; ZeroDirective = "\t.zero\t";
AsciiDirective = "\t.ascii\t"; AsciiDirective = "\t.ascii\t";
AscizDirective = "\t.asciz\t"; AscizDirective = "\t.asciz\t";

View File

@ -22,12 +22,13 @@
#include "llvm/ADT/Twine.h" #include "llvm/ADT/Twine.h"
using namespace llvm; using namespace llvm;
static bool isAcceptableChar(char C, bool AllowPeriod) { static bool isAcceptableChar(char C, bool AllowPeriod, bool AllowUTF8) {
if ((C < 'a' || C > 'z') && if ((C < 'a' || C > 'z') &&
(C < 'A' || C > 'Z') && (C < 'A' || C > 'Z') &&
(C < '0' || C > '9') && (C < '0' || C > '9') &&
C != '_' && C != '$' && C != '@' && C != '_' && C != '$' && C != '@' &&
!(AllowPeriod && C == '.')) !(AllowPeriod && C == '.') &&
!(AllowUTF8 && (C & 0x80)))
return false; return false;
return true; return true;
} }
@ -56,8 +57,9 @@ static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
// If any of the characters in the string is an unacceptable character, force // If any of the characters in the string is an unacceptable character, force
// quotes. // quotes.
bool AllowPeriod = MAI.doesAllowPeriodsInName(); bool AllowPeriod = MAI.doesAllowPeriodsInName();
bool AllowUTF8 = MAI.doesAllowUTF8();
for (unsigned i = 0, e = Str.size(); i != e; ++i) for (unsigned i = 0, e = Str.size(); i != e; ++i)
if (!isAcceptableChar(Str[i], AllowPeriod)) if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
return true; return true;
return false; return false;
} }
@ -74,8 +76,9 @@ static void appendMangledName(SmallVectorImpl<char> &OutName, StringRef Str,
} }
bool AllowPeriod = MAI.doesAllowPeriodsInName(); bool AllowPeriod = MAI.doesAllowPeriodsInName();
bool AllowUTF8 = MAI.doesAllowUTF8();
for (unsigned i = 0, e = Str.size(); i != e; ++i) { for (unsigned i = 0, e = Str.size(); i != e; ++i) {
if (!isAcceptableChar(Str[i], AllowPeriod)) if (!isAcceptableChar(Str[i], AllowPeriod, AllowUTF8))
MangleLetter(OutName, Str[i]); MangleLetter(OutName, Str[i]);
else else
OutName.push_back(Str[i]); OutName.push_back(Str[i]);

4
test/CodeGen/X86/utf8.ll Normal file
View File

@ -0,0 +1,4 @@
; RUN: llc < %s -march=x86 | FileCheck %s
; CHECK: "iΔ",4,4
@"i\CE\94" = common global i32 0, align 4