Implement the local-dynamic TLS model for x86 (PR3985)

This implements codegen support for accesses to thread-local variables
using the local-dynamic model, and adds a clean-up pass so that the base
address for the TLS block can be re-used between local-dynamic access on
an execution path.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@157818 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hans Wennborg 2012-06-01 16:27:21 +00:00
parent 6bb5c0074d
commit f0234fcbc9
13 changed files with 353 additions and 17 deletions

View File

@ -100,6 +100,26 @@ namespace X86II {
/// SYMBOL_LABEL @TLSGD
MO_TLSGD,
/// MO_TLSLD - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the TLS index for the module that
/// contains the symbol. When this index is passed to a call to to
/// __tls_get_addr, the function will return the base address of the TLS
/// block for the symbol.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSLD
MO_TLSLD,
/// MO_TLSLDM - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the TLS index for the module that
/// contains the symbol. When this index is passed to a call to to
/// ___tls_get_addr, the function will return the base address of the TLS
/// block for the symbol.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @TLSLDM
MO_TLSLDM,
/// MO_GOTTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///
@ -121,6 +141,13 @@ namespace X86II {
/// SYMBOL_LABEL @TPOFF
MO_TPOFF,
/// MO_DTPOFF - On a symbol operand this indicates that the immediate is
/// the offset of the GOT entry with the TLS offset of the symbol.
///
/// See 'ELF Handling for Thread-Local Storage' for more details.
/// SYMBOL_LABEL @DTPOFF
MO_DTPOFF,
/// MO_NTPOFF - On a symbol operand this indicates that the immediate is
/// some TLS offset.
///

View File

@ -36,6 +36,11 @@ FunctionPass *createX86ISelDag(X86TargetMachine &TM,
/// register for PIC on x86-32.
FunctionPass* createGlobalBaseRegPass();
/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses
/// to local-dynamic TLS variables so that the TLS base address for the module
/// is only fetched once per execution path through the function.
FunctionPass *createCleanupLocalDynamicTLSPass();
/// createX86FloatingPointStackifierPass - This function returns a pass which
/// converts floating point register references and pseudo instructions into
/// floating point stack references and physical instructions.

View File

@ -186,9 +186,12 @@ void X86AsmPrinter::printSymbolOperand(const MachineOperand &MO,
O << '-' << *MF->getPICBaseSymbol();
break;
case X86II::MO_TLSGD: O << "@TLSGD"; break;
case X86II::MO_TLSLD: O << "@TLSLD"; break;
case X86II::MO_TLSLDM: O << "@TLSLDM"; break;
case X86II::MO_GOTTPOFF: O << "@GOTTPOFF"; break;
case X86II::MO_INDNTPOFF: O << "@INDNTPOFF"; break;
case X86II::MO_TPOFF: O << "@TPOFF"; break;
case X86II::MO_DTPOFF: O << "@DTPOFF"; break;
case X86II::MO_NTPOFF: O << "@NTPOFF"; break;
case X86II::MO_GOTNTPOFF: O << "@GOTNTPOFF"; break;
case X86II::MO_GOTPCREL: O << "@GOTPCREL"; break;

View File

@ -7263,7 +7263,7 @@ X86TargetLowering::LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const {
static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) {
unsigned char OperandFlags, bool LocalDynamic = false) {
MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
DebugLoc dl = GA->getDebugLoc();
@ -7271,12 +7271,16 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
GA->getValueType(0),
GA->getOffset(),
OperandFlags);
X86ISD::NodeType CallType = LocalDynamic ? X86ISD::TLSBASEADDR
: X86ISD::TLSADDR;
if (InFlag) {
SDValue Ops[] = { Chain, TGA, *InFlag };
Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 3);
Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 3);
} else {
SDValue Ops[] = { Chain, TGA };
Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
Chain = DAG.getNode(CallType, dl, NodeTys, Ops, 2);
}
// TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
@ -7308,6 +7312,45 @@ LowerToTLSGeneralDynamicModel64(GlobalAddressSDNode *GA, SelectionDAG &DAG,
X86::RAX, X86II::MO_TLSGD);
}
static SDValue LowerToTLSLocalDynamicModel(GlobalAddressSDNode *GA,
SelectionDAG &DAG,
const EVT PtrVT,
bool is64Bit) {
DebugLoc dl = GA->getDebugLoc();
// Get the start address of the TLS block for this module.
X86MachineFunctionInfo* MFI = DAG.getMachineFunction()
.getInfo<X86MachineFunctionInfo>();
MFI->incNumLocalDynamicTLSAccesses();
SDValue Base;
if (is64Bit) {
Base = GetTLSADDR(DAG, DAG.getEntryNode(), GA, NULL, PtrVT, X86::RAX,
X86II::MO_TLSLD, /*LocalDynamic=*/true);
} else {
SDValue InFlag;
SDValue Chain = DAG.getCopyToReg(DAG.getEntryNode(), dl, X86::EBX,
DAG.getNode(X86ISD::GlobalBaseReg, DebugLoc(), PtrVT), InFlag);
InFlag = Chain.getValue(1);
Base = GetTLSADDR(DAG, Chain, GA, &InFlag, PtrVT, X86::EAX,
X86II::MO_TLSLDM, /*LocalDynamic=*/true);
}
// Note: the CleanupLocalDynamicTLSPass will remove redundant computations
// of Base.
// Build x@dtpoff.
unsigned char OperandFlags = X86II::MO_DTPOFF;
unsigned WrapperKind = X86ISD::Wrapper;
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(), dl,
GA->getValueType(0),
GA->getOffset(), OperandFlags);
SDValue Offset = DAG.getNode(WrapperKind, dl, PtrVT, TGA);
// Add x@dtpoff with the base.
return DAG.getNode(ISD::ADD, dl, PtrVT, Offset, Base);
}
// Lower ISD::GlobalTLSAddress using the "initial exec" or "local exec" model.
static SDValue LowerToTLSExecModel(GlobalAddressSDNode *GA, SelectionDAG &DAG,
const EVT PtrVT, TLSModel::Model model,
@ -7372,8 +7415,6 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
const GlobalValue *GV = GA->getGlobal();
if (Subtarget->isTargetELF()) {
// TODO: implement the "local dynamic" model
// If GV is an alias then use the aliasee for determining
// thread-localness.
if (const GlobalAlias *GA = dyn_cast<GlobalAlias>(GV))
@ -7383,11 +7424,12 @@ X86TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const {
switch (model) {
case TLSModel::GeneralDynamic:
case TLSModel::LocalDynamic: // not implemented
if (Subtarget->is64Bit())
return LowerToTLSGeneralDynamicModel64(GA, DAG, getPointerTy());
return LowerToTLSGeneralDynamicModel32(GA, DAG, getPointerTy());
case TLSModel::LocalDynamic:
return LowerToTLSLocalDynamicModel(GA, DAG, getPointerTy(),
Subtarget->is64Bit());
case TLSModel::InitialExec:
case TLSModel::LocalExec:
return LowerToTLSExecModel(GA, DAG, getPointerTy(), model,
@ -11257,6 +11299,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::FRSQRT: return "X86ISD::FRSQRT";
case X86ISD::FRCP: return "X86ISD::FRCP";
case X86ISD::TLSADDR: return "X86ISD::TLSADDR";
case X86ISD::TLSBASEADDR: return "X86ISD::TLSBASEADDR";
case X86ISD::TLSCALL: return "X86ISD::TLSCALL";
case X86ISD::EH_RETURN: return "X86ISD::EH_RETURN";
case X86ISD::TC_RETURN: return "X86ISD::TC_RETURN";

View File

@ -207,6 +207,10 @@ namespace llvm {
// TLSADDR - Thread Local Storage.
TLSADDR,
// TLSBASEADDR - Thread Local Storage. A call to get the start address
// of the TLS block for the current module.
TLSBASEADDR,
// TLSCALL - Thread Local Storage. When calling to an OS provided
// thunk at the address from an earlier relocation.
TLSCALL,

View File

@ -375,11 +375,16 @@ let Defs = [EAX, ECX, EDX, FP0, FP1, FP2, FP3, FP4, FP5, FP6, ST0,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [ESP] in
Uses = [ESP] in {
def TLS_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_addr32",
[(X86tlsaddr tls32addr:$sym)]>,
Requires<[In32BitMode]>;
def TLS_base_addr32 : I<0, Pseudo, (outs), (ins i32mem:$sym),
"# TLS_base_addr32",
[(X86tlsbaseaddr tls32baseaddr:$sym)]>,
Requires<[In32BitMode]>;
}
// All calls clobber the non-callee saved registers. RSP is marked as
// a use to prevent stack-pointer assignments that appear immediately
@ -389,11 +394,16 @@ let Defs = [RAX, RCX, RDX, RSI, RDI, R8, R9, R10, R11,
MM0, MM1, MM2, MM3, MM4, MM5, MM6, MM7,
XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, EFLAGS],
Uses = [RSP] in
Uses = [RSP] in {
def TLS_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_addr64",
[(X86tlsaddr tls64addr:$sym)]>,
Requires<[In64BitMode]>;
def TLS_base_addr64 : I<0, Pseudo, (outs), (ins i64mem:$sym),
"# TLS_base_addr64",
[(X86tlsbaseaddr tls64baseaddr:$sym)]>,
Requires<[In64BitMode]>;
}
// Darwin TLS Support
// For i386, the address of the thunk is passed on the stack, on return the

View File

@ -21,6 +21,7 @@
#include "llvm/LLVMContext.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@ -3990,9 +3991,126 @@ namespace {
AU.setPreservesCFG();
MachineFunctionPass::getAnalysisUsage(AU);
}
private:
unsigned BaseReg;
};
}
char CGBR::ID = 0;
FunctionPass*
llvm::createGlobalBaseRegPass() { return new CGBR(); }
namespace {
struct LDTLSCleanup : public MachineFunctionPass {
static char ID;
LDTLSCleanup() : MachineFunctionPass(ID) {}
virtual bool runOnMachineFunction(MachineFunction &MF) {
X86MachineFunctionInfo* MFI = MF.getInfo<X86MachineFunctionInfo>();
if (MFI->getNumLocalDynamicTLSAccesses() < 2) {
// No point folding accesses if there isn't at least two.
return false;
}
MachineDominatorTree *DT = &getAnalysis<MachineDominatorTree>();
return VisitNode(DT->getRootNode(), 0);
}
// Visit the dominator subtree rooted at Node in pre-order.
// If TLSBaseAddrReg is non-null, then use that to replace any
// TLS_base_addr instructions. Otherwise, create the register
// when the first such instruction is seen, and then use it
// as we encounter more instructions.
bool VisitNode(MachineDomTreeNode *Node, unsigned TLSBaseAddrReg) {
MachineBasicBlock *BB = Node->getBlock();
bool Changed = false;
// Traverse the current block.
for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;
++I) {
switch (I->getOpcode()) {
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
if (TLSBaseAddrReg)
I = ReplaceTLSBaseAddrCall(I, TLSBaseAddrReg);
else
I = SetRegister(I, &TLSBaseAddrReg);
Changed = true;
break;
default:
break;
}
}
// Visit the children of this block in the dominator tree.
for (MachineDomTreeNode::iterator I = Node->begin(), E = Node->end();
I != E; ++I) {
Changed |= VisitNode(*I, TLSBaseAddrReg);
}
return Changed;
}
// Replace the TLS_base_addr instruction I with a copy from
// TLSBaseAddrReg, returning the new instruction.
MachineInstr *ReplaceTLSBaseAddrCall(MachineInstr *I,
unsigned TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF->getTarget());
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
const X86InstrInfo *TII = TM->getInstrInfo();
// Insert a Copy from TLSBaseAddrReg to RAX/EAX.
MachineInstr *Copy = BuildMI(*I->getParent(), I, I->getDebugLoc(),
TII->get(TargetOpcode::COPY),
is64Bit ? X86::RAX : X86::EAX)
.addReg(TLSBaseAddrReg);
// Erase the TLS_base_addr instruction.
I->eraseFromParent();
return Copy;
}
// Create a virtal register in *TLSBaseAddrReg, and populate it by
// inserting a copy instruction after I. Returns the new instruction.
MachineInstr *SetRegister(MachineInstr *I, unsigned *TLSBaseAddrReg) {
MachineFunction *MF = I->getParent()->getParent();
const X86TargetMachine *TM =
static_cast<const X86TargetMachine *>(&MF->getTarget());
const bool is64Bit = TM->getSubtarget<X86Subtarget>().is64Bit();
const X86InstrInfo *TII = TM->getInstrInfo();
// Create a virtual register for the TLS base address.
MachineRegisterInfo &RegInfo = MF->getRegInfo();
*TLSBaseAddrReg = RegInfo.createVirtualRegister(is64Bit
? &X86::GR64RegClass
: &X86::GR32RegClass);
// Insert a copy from RAX/EAX to TLSBaseAddrReg.
MachineInstr *Next = I->getNextNode();
MachineInstr *Copy = BuildMI(*I->getParent(), Next, I->getDebugLoc(),
TII->get(TargetOpcode::COPY),
*TLSBaseAddrReg)
.addReg(is64Bit ? X86::RAX : X86::EAX);
return Copy;
}
virtual const char *getPassName() const {
return "Local Dynamic TLS Access Clean-up";
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
AU.addRequired<MachineDominatorTree>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
}
char LDTLSCleanup::ID = 0;
FunctionPass*
llvm::createCleanupLocalDynamicTLSPass() { return new LDTLSCleanup(); }

View File

@ -97,6 +97,8 @@ def SDTX86Wrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def SDT_X86TLSADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TLSBASEADDR : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86TLSCALL : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def SDT_X86SEG_ALLOCA : SDTypeProfile<1, 1, [SDTCisVT<0, iPTR>, SDTCisVT<1, iPTR>]>;
@ -203,6 +205,9 @@ def X86WrapperRIP : SDNode<"X86ISD::WrapperRIP", SDTX86Wrapper>;
def X86tlsaddr : SDNode<"X86ISD::TLSADDR", SDT_X86TLSADDR,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def X86tlsbaseaddr : SDNode<"X86ISD::TLSBASEADDR", SDT_X86TLSBASEADDR,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def X86ehret : SDNode<"X86ISD::EH_RETURN", SDT_X86EHRET,
[SDNPHasChain]>;
@ -492,6 +497,9 @@ def lea32addr : ComplexPattern<i32, 5, "SelectLEAAddr",
def tls32addr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
def tls32baseaddr : ComplexPattern<i32, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
[add, sub, mul, X86mul_imm, shl, or, frameindex,
X86WrapperRIP], []>;
@ -499,6 +507,9 @@ def lea64addr : ComplexPattern<i64, 5, "SelectLEAAddr",
def tls64addr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
def tls64baseaddr : ComplexPattern<i64, 5, "SelectTLSADDRAddr",
[tglobaltlsaddr], []>;
//===----------------------------------------------------------------------===//
// X86 Instruction Predicate Definitions.
def HasCMov : Predicate<"Subtarget->hasCMov()">;

View File

@ -156,9 +156,12 @@ MCOperand X86MCInstLower::LowerSymbolOperand(const MachineOperand &MO,
break;
case X86II::MO_SECREL: RefKind = MCSymbolRefExpr::VK_SECREL; break;
case X86II::MO_TLSGD: RefKind = MCSymbolRefExpr::VK_TLSGD; break;
case X86II::MO_TLSLD: RefKind = MCSymbolRefExpr::VK_TLSLD; break;
case X86II::MO_TLSLDM: RefKind = MCSymbolRefExpr::VK_TLSLDM; break;
case X86II::MO_GOTTPOFF: RefKind = MCSymbolRefExpr::VK_GOTTPOFF; break;
case X86II::MO_INDNTPOFF: RefKind = MCSymbolRefExpr::VK_INDNTPOFF; break;
case X86II::MO_TPOFF: RefKind = MCSymbolRefExpr::VK_TPOFF; break;
case X86II::MO_DTPOFF: RefKind = MCSymbolRefExpr::VK_DTPOFF; break;
case X86II::MO_NTPOFF: RefKind = MCSymbolRefExpr::VK_NTPOFF; break;
case X86II::MO_GOTNTPOFF: RefKind = MCSymbolRefExpr::VK_GOTNTPOFF; break;
case X86II::MO_GOTPCREL: RefKind = MCSymbolRefExpr::VK_GOTPCREL; break;
@ -551,17 +554,38 @@ ReSimplify:
static void LowerTlsAddr(MCStreamer &OutStreamer,
X86MCInstLower &MCInstLowering,
const MachineInstr &MI) {
bool is64Bits = MI.getOpcode() == X86::TLS_addr64;
bool is64Bits = MI.getOpcode() == X86::TLS_addr64 ||
MI.getOpcode() == X86::TLS_base_addr64;
bool needsPadding = MI.getOpcode() == X86::TLS_addr64;
MCContext &context = OutStreamer.getContext();
if (is64Bits) {
if (needsPadding) {
MCInst prefix;
prefix.setOpcode(X86::DATA16_PREFIX);
OutStreamer.EmitInstruction(prefix);
}
MCSymbolRefExpr::VariantKind SRVK;
switch (MI.getOpcode()) {
case X86::TLS_addr32:
case X86::TLS_addr64:
SRVK = MCSymbolRefExpr::VK_TLSGD;
break;
case X86::TLS_base_addr32:
SRVK = MCSymbolRefExpr::VK_TLSLDM;
break;
case X86::TLS_base_addr64:
SRVK = MCSymbolRefExpr::VK_TLSLD;
break;
default:
llvm_unreachable("unexpected opcode");
}
MCSymbol *sym = MCInstLowering.GetSymbolFromOperand(MI.getOperand(3));
const MCSymbolRefExpr *symRef =
MCSymbolRefExpr::Create(sym, MCSymbolRefExpr::VK_TLSGD, context);
const MCSymbolRefExpr *symRef = MCSymbolRefExpr::Create(sym, SRVK, context);
MCInst LEA;
if (is64Bits) {
@ -583,7 +607,7 @@ static void LowerTlsAddr(MCStreamer &OutStreamer,
}
OutStreamer.EmitInstruction(LEA);
if (is64Bits) {
if (needsPadding) {
MCInst prefix;
prefix.setOpcode(X86::DATA16_PREFIX);
OutStreamer.EmitInstruction(prefix);
@ -645,6 +669,8 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
case X86::TLS_addr32:
case X86::TLS_addr64:
case X86::TLS_base_addr32:
case X86::TLS_base_addr64:
return LowerTlsAddr(OutStreamer, MCInstLowering, *MI);
case X86::MOVPC32r: {
@ -714,4 +740,3 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) {
MCInstLowering.Lower(MI, TmpInst);
OutStreamer.EmitInstruction(TmpInst);
}

View File

@ -66,6 +66,8 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
/// ArgumentStackSize - The number of bytes on stack consumed by the arguments
/// being passed on the stack.
unsigned ArgumentStackSize;
/// NumLocalDynamics - Number of local-dynamic TLS accesses.
unsigned NumLocalDynamics;
public:
X86MachineFunctionInfo() : ForceFramePointer(false),
@ -79,7 +81,8 @@ public:
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
VarArgsFPOffset(0),
ArgumentStackSize(0) {}
ArgumentStackSize(0),
NumLocalDynamics(0) {}
explicit X86MachineFunctionInfo(MachineFunction &MF)
: ForceFramePointer(false),
@ -93,7 +96,8 @@ public:
RegSaveFrameIndex(0),
VarArgsGPOffset(0),
VarArgsFPOffset(0),
ArgumentStackSize(0) {}
ArgumentStackSize(0),
NumLocalDynamics(0) {}
bool getForceFramePointer() const { return ForceFramePointer;}
void setForceFramePointer(bool forceFP) { ForceFramePointer = forceFP; }
@ -130,6 +134,9 @@ public:
unsigned getArgumentStackSize() const { return ArgumentStackSize; }
void setArgumentStackSize(unsigned size) { ArgumentStackSize = size; }
unsigned getNumLocalDynamicTLSAccesses() const { return NumLocalDynamics; }
void incNumLocalDynamicTLSAccesses() { ++NumLocalDynamics; }
};
} // End llvm namespace

View File

@ -147,6 +147,10 @@ bool X86PassConfig::addInstSelector() {
// Install an instruction selector.
PM->add(createX86ISelDag(getX86TargetMachine(), getOptLevel()));
// For ELF, cleanup any local-dynamic TLS accesses.
if (getX86Subtarget().isTargetELF() && getOptLevel() != CodeGenOpt::None)
PM->add(createCleanupLocalDynamicTLSPass());
// For 32-bit, prepend instructions to set the "global base reg" for PIC.
if (!getX86Subtarget().is64Bit())
PM->add(createGlobalBaseRegPass());

View File

@ -0,0 +1,59 @@
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck %s
@x = internal thread_local global i32 0, align 4
@y = internal thread_local global i32 0, align 4
; get_x and get_y are here to prevent x and y to be optimized away as 0
define i32* @get_x() {
entry:
ret i32* @x
; FIXME: This function uses a single thread-local variable,
; so we might want to fall back to general-dynamic here.
; CHECK: get_x:
; CHECK: leaq x@TLSLD(%rip), %rdi
; CHECK-NEXT: callq __tls_get_addr@PLT
; CHECK: x@DTPOFF
}
define i32* @get_y() {
entry:
ret i32* @y
}
define i32 @f(i32 %i) {
entry:
%cmp = icmp eq i32 %i, 1
br i1 %cmp, label %return, label %if.else
; This bb does not access TLS, so should not call __tls_get_addr.
; CHECK: f:
; CHECK-NOT: __tls_get_addr
; CHECK: je
if.else:
%0 = load i32* @x, align 4
%cmp1 = icmp eq i32 %i, 2
br i1 %cmp1, label %if.then2, label %return
; Now we call __tls_get_addr.
; CHECK: # %if.else
; CHECK: leaq x@TLSLD(%rip), %rdi
; CHECK-NEXT: callq __tls_get_addr@PLT
; CHECK: x@DTPOFF
if.then2:
%1 = load i32* @y, align 4
%add = add nsw i32 %1, %0
br label %return
; This accesses TLS, but is dominated by the previous block,
; so should not have to call __tls_get_addr again.
; CHECK: # %if.then2
; CHECK-NOT: __tls_get_addr
; CHECK: y@DTPOFF
return:
%retval.0 = phi i32 [ %add, %if.then2 ], [ 5, %entry ], [ %0, %if.else ]
ret i32 %retval.0
}

View File

@ -2,6 +2,8 @@
; RUN: llc < %s -march=x86-64 -mtriple=x86_64-linux-gnu -relocation-model=pic | FileCheck -check-prefix=X64 %s
@i = thread_local global i32 15
@j = internal thread_local global i32 42
@k = internal thread_local global i32 42
define i32 @f1() {
entry:
@ -64,4 +66,22 @@ entry:
; X64: callq __tls_get_addr@PLT
define i32 @f5() nounwind {
entry:
%0 = load i32* @j, align 4
%1 = load i32* @k, align 4
%add = add nsw i32 %0, %1
ret i32 %add
}
; X32: f5:
; X32: leal {{[jk]}}@TLSLDM
; X32-NEXT: calll ___tls_get_addr@PLT
; X32-NEXT: movl {{[jk]}}@DTPOFF(%eax)
; X32-NEXT: addl {{[jk]}}@DTPOFF(%eax)
; X64: f5:
; X64: leaq {{[jk]}}@TLSLD(%rip), %rdi
; X64-NEXT: callq __tls_get_addr@PLT
; X64-NEXT: movl {{[jk]}}@DTPOFF(%rax)
; X64-NEXT: addl {{[jk]}}@DTPOFF(%rax)