This patch implements the general dynamic TLS model for 64-bit PowerPC.

Given a thread-local symbol x with global-dynamic access, the generated
code to obtain x's address is:

     Instruction                            Relocation            Symbol
  addis ra,r2,x@got@tlsgd@ha           R_PPC64_GOT_TLSGD16_HA       x
  addi  r3,ra,x@got@tlsgd@l            R_PPC64_GOT_TLSGD16_L        x
  bl __tls_get_addr(x@tlsgd)           R_PPC64_TLSGD                x
                                       R_PPC64_REL24           __tls_get_addr
  nop
  <use address in r3>

The implementation borrows from the medium code model work for introducing
special forms of ADDIS and ADDI into the DAG representation.  This is made
slightly more complicated by having to introduce a call to the external
function __tls_get_addr.  Using the full call machinery is overkill and,
more importantly, makes it difficult to add a special relocation.  So I've
introduced another opcode GET_TLS_ADDR to represent the function call, and
surrounded it with register copies to set up the parameter and return value.

Most of the code is pretty straightforward.  I ran into one peculiarity
when I introduced a new PPC opcode BL8_NOP_ELF_TLSGD, which is just like
BL8_NOP_ELF except that it takes another parameter to represent the symbol
("x" above) that requires a relocation on the call.  Something in the 
TblGen machinery causes BL8_NOP_ELF and BL8_NOP_ELF_TLSGD to be treated
identically during the emit phase, so this second operand was never
visited to generate relocations.  This is the reason for the slightly
messy workaround in PPCMCCodeEmitter.cpp:getDirectBrEncoding().

Two new tests are included to demonstrate correct external assembly and
correct generation of relocations using the integrated assembler.

Comments welcome!

Thanks,
Bill


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169910 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2012-12-11 20:30:11 +00:00
parent 0b944ee36f
commit 57ac1f458a
15 changed files with 245 additions and 14 deletions

View File

@ -181,6 +181,9 @@ public:
VK_PPC_TOC16_LO, // symbol@toc@l
VK_PPC_GOT_TPREL16_DS, // symbol@got@tprel
VK_PPC_TLS, // symbol@tls
VK_PPC_GOT_TLSGD16_HA, // symbol@got@tlsgd@ha
VK_PPC_GOT_TLSGD16_LO, // symbol@got@tlsgd@l
VK_PPC_TLSGD, // symbol@tlsgd
VK_Mips_GPREL,
VK_Mips_GOT_CALL,

View File

@ -478,7 +478,10 @@ enum {
R_PPC64_TOC16_DS = 63,
R_PPC64_TOC16_LO_DS = 64,
R_PPC64_TLS = 67,
R_PPC64_GOT_TPREL16_DS = 87
R_PPC64_GOT_TLSGD16_LO = 80,
R_PPC64_GOT_TLSGD16_HA = 82,
R_PPC64_GOT_TPREL16_DS = 87,
R_PPC64_TLSGD = 107
};
// ARM Specific e_flags

View File

@ -213,6 +213,9 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PPC_TOC16_LO: return "toc@l";
case VK_PPC_GOT_TPREL16_DS: return "got@tprel";
case VK_PPC_TLS: return "tls";
case VK_PPC_GOT_TLSGD16_HA: return "got@tlsgd@ha";
case VK_PPC_GOT_TLSGD16_LO: return "got@tlsgd@l";
case VK_PPC_TLSGD: return "tlsgd";
case VK_Mips_GPREL: return "GPREL";
case VK_Mips_GOT_CALL: return "GOT_CALL";
case VK_Mips_GOT16: return "GOT16";

View File

@ -32,6 +32,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_8:
case PPC::fixup_ppc_toc:
case PPC::fixup_ppc_tlsreg:
case PPC::fixup_ppc_tlsgd:
return Value;
case PPC::fixup_ppc_lo14:
case PPC::fixup_ppc_toc16_ds:
@ -85,7 +86,8 @@ public:
{ "fixup_ppc_toc", 0, 64, 0 },
{ "fixup_ppc_toc16", 16, 16, 0 },
{ "fixup_ppc_toc16_ds", 16, 14, 0 },
{ "fixup_ppc_tlsreg", 0, 0, 0 }
{ "fixup_ppc_tlsreg", 0, 0, 0 },
{ "fixup_ppc_tlsgd", 0, 0, 0 }
};
if (Kind < FirstTargetFixupKind)

View File

@ -85,6 +85,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TOC16_HA:
Type = ELF::R_PPC64_TOC16_HA;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA:
Type = ELF::R_PPC64_GOT_TLSGD16_HA;
break;
}
break;
case PPC::fixup_ppc_lo16:
@ -99,6 +102,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TOC16_LO:
Type = ELF::R_PPC64_TOC16_LO;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO:
Type = ELF::R_PPC64_GOT_TLSGD16_LO;
break;
}
break;
case PPC::fixup_ppc_lo14:
@ -127,6 +133,9 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case PPC::fixup_ppc_tlsreg:
Type = ELF::R_PPC64_TLS;
break;
case PPC::fixup_ppc_tlsgd:
Type = ELF::R_PPC64_TLSGD;
break;
case FK_Data_8:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");

View File

@ -47,6 +47,10 @@ enum Fixups {
/// fixup_ppc_tlsreg - Insert thread-pointer register number.
fixup_ppc_tlsreg,
/// fixup_ppc_tlsgd - Not a true fixup, but ties a symbol to a call
/// to __tls_get_addr for the TLS global dynamic model.
fixup_ppc_tlsgd,
// Marker
LastTargetFixupKind,

View File

@ -17,6 +17,7 @@
#include "MCTargetDesc/PPCFixupKinds.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/MC/MCCodeEmitter.h"
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCInstrInfo.h"
#include "llvm/MC/MCSubtargetInfo.h"
@ -119,6 +120,16 @@ getDirectBrEncoding(const MCInst &MI, unsigned OpNo,
// Add a fixup for the branch target.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_br24));
// For special TLS calls, add another fixup for the symbol. Apparently
// BL8_NOP_ELF and BL8_NOP_ELF_TLSGD are sufficiently similar that TblGen
// will not generate a separate case for the latter, so this is the only
// way to get the extra fixup generated.
if (MI.getOpcode() == PPC::BL8_NOP_ELF_TLSGD) {
const MCOperand &MO2 = MI.getOperand(OpNo+1);
Fixups.push_back(MCFixup::Create(0, MO2.getExpr(),
(MCFixupKind)PPC::fixup_ppc_tlsgd));
}
return 0;
}
@ -223,7 +234,6 @@ unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
return getPPCRegisterNumbering(PPC::X13);
}
unsigned PPCMCCodeEmitter::
get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {

View File

@ -530,6 +530,58 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(TmpInst);
return;
}
case PPC::ADDIStlsgdHA: {
// Transform: %Xd = ADDIStlsgdHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsgd@ha
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDIS8)
.addReg(MI->getOperand(0).getReg())
.addReg(PPC::X2)
.addExpr(SymGotTlsGD));
return;
}
case PPC::ADDItlsgdL: {
// Transform: %Xd = ADDItlsgdL %Xs, <ga:@sym>
// Into: %Xd = ADDI8L %Xs, sym@got@tlsgd@l
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
const MCExpr *SymGotTlsGD =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::ADDI8L)
.addReg(MI->getOperand(0).getReg())
.addReg(MI->getOperand(1).getReg())
.addExpr(SymGotTlsGD));
return;
}
case PPC::GETtlsADDR: {
// Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
// Into: BL8_NOP_ELF_TLSGD __tls_get_addr(sym@tlsgd)
assert(Subtarget.isPPC64() && "Not supported for 32-bit PowerPC");
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
const MCSymbolRefExpr *TlsRef =
MCSymbolRefExpr::Create(TlsGetAddr, MCSymbolRefExpr::VK_None, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
const MCExpr *SymVar =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_TLSGD,
OutContext);
OutStreamer.EmitInstruction(MCInstBuilder(PPC::BL8_NOP_ELF_TLSGD)
.addExpr(TlsRef)
.addExpr(SymVar));
return;
}
case PPC::MFCRpseud:
case PPC::MFCR8pseud:
// Transform: %R3 = MFCRpseud %CR7

View File

@ -1316,6 +1316,22 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::LDgotTPREL, dl, MVT::i64,
N->getOperand(0), N->getOperand(1));
}
// FIXME: Try without these. Doesn't seem necessary.
case PPCISD::ADDIS_TLSGD_HA: {
assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
return CurDAG->getMachineNode(PPC::ADDIStlsgdHA, dl, MVT::i64,
N->getOperand(0), N->getOperand(1));
}
case PPCISD::ADDI_TLSGD_L: {
assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
return CurDAG->getMachineNode(PPC::ADDItlsgdL, dl, MVT::i64,
N->getOperand(0), N->getOperand(1));
}
case PPCISD::GET_TLS_ADDR: {
assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
return CurDAG->getMachineNode(PPC::GETtlsADDR, dl, MVT::i64,
N->getOperand(0), N->getOperand(1));
}
}
return SelectCode(N);

View File

@ -580,6 +580,9 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
case PPCISD::LD_GOT_TPREL: return "PPCISD::LD_GOT_TPREL";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
}
}
@ -1342,18 +1345,42 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
if (!is64bit)
llvm_unreachable("only local-exec is currently supported for ppc32");
if (Model != TLSModel::InitialExec)
llvm_unreachable("only local-exec and initial-exec TLS modes supported");
if (Model == TLSModel::InitialExec) {
SDValue GOTOffset = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_GOT_TPREL16_DS);
SDValue TPReg = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLS);
SDValue GOTReg = DAG.getRegister(is64bit ? PPC::X2 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL, dl, PtrVT,
GOTOffset, GOTReg);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TPReg);
}
SDValue GOTOffset = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_GOT_TPREL16_DS);
SDValue TPReg = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLS);
SDValue GOTReg = DAG.getRegister(is64bit ? PPC::X2 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL, dl, PtrVT,
GOTOffset, GOTReg);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TPReg);
if (Model == TLSModel::GeneralDynamic) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTReg = DAG.getRegister(PPC::X2, MVT::i64);
SDValue GOTEntryHi = DAG.getNode(PPCISD::ADDIS_TLSGD_HA, dl, PtrVT,
GOTReg, TGA);
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
GOTEntryHi, TGA);
// We need a chain node, and don't have one handy. The underlying
// call has no side effects, so using the function entry node
// suffices.
SDValue Chain = DAG.getEntryNode();
Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, GOTEntry);
SDValue ParmReg = DAG.getRegister(PPC::X3, MVT::i64);
SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLS_ADDR, dl,
PtrVT, ParmReg, TGA);
// The call to GET_TLS_ADDR really is in X3 already, but
// some hacks are needed here to tie everything together.
// The extra copies dissolve during subsequent transforms.
Chain = DAG.getCopyToReg(Chain, dl, PPC::X3, TLSAddr);
return DAG.getCopyFromReg(Chain, dl, PPC::X3, PtrVT);
}
llvm_unreachable("local-dynamic TLS mode is not yet supported");
}
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,

View File

@ -192,6 +192,20 @@ namespace llvm {
/// TLS sequence.
ADD_TLS,
/// G8RC = ADDIS_TLSGD_HA %X2, Symbol - For the general-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym@got@tlsgd@ha.
ADDIS_TLSGD_HA,
/// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
/// sym@got@tlsgd@l.
ADDI_TLSGD_L,
/// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
/// model, produces a call to __tls_get_addr(sym@tlsgd).
GET_TLS_ADDR,
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,

View File

@ -43,6 +43,7 @@ def tlsaddr : Operand<i64> {
def tlsreg : Operand<i64> {
let EncoderMethod = "getTLSRegEncoding";
}
def tlsgd : Operand<i64> {}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
@ -110,6 +111,11 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
(outs), (ins calltarget:$func),
"bl $func\n\tnop", BrB, []>;
let isCodeGenOnly = 1 in
def BL8_NOP_ELF_TLSGD : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func, tlsgd:$sym),
"bl $func($sym)\n\tnop", BrB, []>;
def BLA8_ELF : IForm<18, 1, 1,
(outs), (ins aaddr:$func),
"bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
@ -716,6 +722,21 @@ def LDgotTPREL: Pseudo<(outs G8RC:$rD), (ins tlsaddr:$disp, G8RC:$reg),
isPPC64;
def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g),
(ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>;
def ADDIStlsgdHA: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolHi64:$disp),
"#ADDIStlsgdHA",
[(set G8RC:$rD,
(PPCaddisTlsgdHA G8RC:$reg, tglobaladdr:$disp))]>,
isPPC64;
def ADDItlsgdL : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, symbolLo64:$disp),
"#ADDItlsgdL",
[(set G8RC:$rD,
(PPCaddiTlsgdL G8RC:$reg, tglobaladdr:$disp))]>,
isPPC64;
def GETtlsADDR : Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tlsgd:$sym),
"#GETtlsADDR",
[(set G8RC:$rD,
(PPCgetTlsAddr G8RC:$reg, tglobaladdr:$sym))]>,
isPPC64;
let PPC970_Unit = 2 in {
// Truncating stores.

View File

@ -93,6 +93,9 @@ def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
def PPCldGotTprel : SDNode<"PPCISD::LD_GOT_TPREL", SDTIntBinOp, [SDNPMayLoad]>;
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;

View File

@ -0,0 +1,41 @@
; RUN: llc -mcpu=pwr7 -O0 -filetype=obj -relocation-model=pic %s -o - | \
; RUN: elf-dump --dump-section-data | FileCheck %s
; Test correct relocation generation for thread-local storage using
; the general dynamic model and integrated assembly.
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@a = thread_local global i32 0, align 4
define signext i32 @main() nounwind {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
%0 = load i32* @a, align 4
ret i32 %0
}
; Verify generation of R_PPC64_GOT_TLSGD16_HA, R_PPC64_GOT_TLSGD16_LO,
; and R_PPC64_TLSGD for accessing external variable a, and R_PPC64_REL24
; for the call to __tls_get_addr.
;
; CHECK: '.rela.text'
; CHECK: Relocation 0
; CHECK-NEXT: 'r_offset'
; CHECK-NEXT: 'r_sym', 0x[[SYM1:[0-9a-f]+]]
; CHECK-NEXT: 'r_type', 0x00000052
; CHECK: Relocation 1
; CHECK-NEXT: 'r_offset'
; CHECK-NEXT: 'r_sym', 0x[[SYM1]]
; CHECK-NEXT: 'r_type', 0x00000050
; CHECK: Relocation 2
; CHECK-NEXT: 'r_offset'
; CHECK-NEXT: 'r_sym', 0x[[SYM1]]
; CHECK-NEXT: 'r_type', 0x0000006b
; CHECK: Relocation 3
; CHECK-NEXT: 'r_offset'
; CHECK-NEXT: 'r_sym', 0x{{[0-9a-f]+}}
; CHECK-NEXT: 'r_type', 0x0000000a

View File

@ -0,0 +1,23 @@
; RUN: llc -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck %s
; Test correct assembly code generation for thread-local storage using
; the general dynamic model.
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@a = thread_local global i32 0, align 4
define signext i32 @main() nounwind {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
%0 = load i32* @a, align 4
ret i32 %0
}
; CHECK: addis [[REG:[0-9]+]], 2, a@got@tlsgd@ha
; CHECK-NEXT: addi 3, [[REG]], a@got@tlsgd@l
; CHECK-NEXT: bl __tls_get_addr(a@tlsgd)
; CHECK-NEXT: nop