This patch introduces initial-exec model support for thread-local storage

on 64-bit PowerPC ELF.

The patch includes code to handle external assembly and MC output with the
integrated assembler.  It intentionally does not support the "old" JIT.

For the initial-exec TLS model, the ABI requires the following to calculate
the address of external thread-local variable x:

 Code sequence            Relocation                  Symbol
  ld 9,x@got@tprel(2)      R_PPC64_GOT_TPREL16_DS      x
  add 9,9,x@tls            R_PPC64_TLS                 x

The register 9 is arbitrary here.  The linker will replace x@got@tprel
with the offset relative to the thread pointer to the generated GOT
entry for symbol x.  It will replace x@tls with the thread-pointer
register (13).

The two test cases verify correct assembly output and relocation output
as just described.

PowerPC-specific selection node variants are added for the two
instructions above:  LD_GOT_TPREL and ADD_TLS.  These are inserted
when an initial-exec global variable is encountered by
PPCTargetLowering::LowerGlobalTLSAddress(), and later lowered to
machine instructions LDgotTPREL and ADD8TLS.  LDgotTPREL is a pseudo
that uses the same LDrs support added for medium code model's LDtocL,
with a different relocation type.

The rest of the processing is straightforward.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@169281 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2012-12-04 16:18:08 +00:00
parent 315f09f422
commit d7802bf0dd
18 changed files with 216 additions and 16 deletions

View File

@ -179,6 +179,8 @@ public:
VK_PPC_TPREL16_LO, // symbol@tprel@l
VK_PPC_TOC16_HA, // symbol@toc@ha
VK_PPC_TOC16_LO, // symbol@toc@l
VK_PPC_GOT_TPREL16_DS, // symbol@got@tprel
VK_PPC_TLS, // symbol@tls
VK_Mips_GPREL,
VK_Mips_GOT_CALL,

View File

@ -476,7 +476,9 @@ enum {
R_PPC64_TOC16_HA = 50,
R_PPC64_TOC = 51,
R_PPC64_TOC16_DS = 63,
R_PPC64_TOC16_LO_DS = 64
R_PPC64_TOC16_LO_DS = 64,
R_PPC64_TLS = 67,
R_PPC64_GOT_TPREL16_DS = 87
};
// ARM Specific e_flags

View File

@ -211,6 +211,8 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) {
case VK_PPC_TPREL16_LO: return "tprel@l";
case VK_PPC_TOC16_HA: return "toc@ha";
case VK_PPC_TOC16_LO: return "toc@l";
case VK_PPC_GOT_TPREL16_DS: return "got@tprel";
case VK_PPC_TLS: return "tls";
case VK_Mips_GPREL: return "GPREL";
case VK_Mips_GOT_CALL: return "GOT_CALL";
case VK_Mips_GOT16: return "GOT16";

View File

@ -31,6 +31,7 @@ static unsigned adjustFixupValue(unsigned Kind, uint64_t Value) {
case FK_Data_4:
case FK_Data_8:
case PPC::fixup_ppc_toc:
case PPC::fixup_ppc_tlsreg:
return Value;
case PPC::fixup_ppc_lo14:
case PPC::fixup_ppc_toc16_ds:
@ -83,7 +84,8 @@ public:
{ "fixup_ppc_lo14", 16, 14, 0 },
{ "fixup_ppc_toc", 0, 64, 0 },
{ "fixup_ppc_toc16", 16, 16, 0 },
{ "fixup_ppc_toc16_ds", 16, 14, 0 }
{ "fixup_ppc_toc16_ds", 16, 14, 0 },
{ "fixup_ppc_tlsreg", 0, 0, 0 }
};
if (Kind < FirstTargetFixupKind)

View File

@ -119,8 +119,14 @@ unsigned PPCELFObjectWriter::getRelocTypeInner(const MCValue &Target,
case MCSymbolRefExpr::VK_PPC_TOC16_LO:
Type = ELF::R_PPC64_TOC16_LO_DS;
break;
case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS:
Type = ELF::R_PPC64_GOT_TPREL16_DS;
break;
}
break;
case PPC::fixup_ppc_tlsreg:
Type = ELF::R_PPC64_TLS;
break;
case FK_Data_8:
switch (Modifier) {
default: llvm_unreachable("Unsupported Modifier");

View File

@ -44,6 +44,9 @@ enum Fixups {
/// fixup_ppc_toc16_ds - A 14-bit signed fixup relative to the TOC base with
/// implied 2 zero bits
fixup_ppc_toc16_ds,
/// fixup_ppc_tlsreg - Insert thread-pointer register number.
fixup_ppc_tlsreg,
// Marker
LastTargetFixupKind,

View File

@ -62,6 +62,10 @@ public:
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
unsigned get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const;
@ -195,6 +199,31 @@ unsigned PPCMCCodeEmitter::getMemRIXEncoding(const MCInst &MI, unsigned OpNo,
}
unsigned PPCMCCodeEmitter::getTLSOffsetEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
// Add a fixup for the GOT displacement to the TLS block offset.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_toc16_ds));
return 0;
}
unsigned PPCMCCodeEmitter::getTLSRegEncoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {
const MCOperand &MO = MI.getOperand(OpNo);
if (MO.isReg()) return getMachineOpValue(MI, MO, Fixups);
// Add a fixup for the TLS register, which simply provides a relocation
// hint to the linker that this statement is part of a relocation sequence.
// Return the thread-pointer register's encoding.
Fixups.push_back(MCFixup::Create(0, MO.getExpr(),
(MCFixupKind)PPC::fixup_ppc_tlsreg));
return getPPCRegisterNumbering(PPC::X13);
}
unsigned PPCMCCodeEmitter::
get_crbitm_encoding(const MCInst &MI, unsigned OpNo,
SmallVectorImpl<MCFixup> &Fixups) const {

View File

@ -65,13 +65,16 @@ namespace llvm {
MO_NLP_HIDDEN_FLAG = 16,
/// The next are not flags but distinct values.
MO_ACCESS_MASK = 224,
MO_ACCESS_MASK = 0xe0,
/// MO_LO16, MO_HA16 - lo16(symbol) and ha16(symbol)
MO_LO16 = 32, MO_HA16 = 64,
MO_LO16 = 1 << 5,
MO_HA16 = 2 << 5,
MO_TPREL16_HA = 96,
MO_TPREL16_LO = 128
MO_TPREL16_HA = 3 << 5,
MO_TPREL16_LO = 4 << 5,
MO_GOT_TPREL16_DS = 5 << 5,
MO_TLS = 6 << 5
};
} // end namespace PPCII

View File

@ -513,6 +513,23 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
OutStreamer.EmitInstruction(TmpInst);
return;
}
case PPC::LDgotTPREL: {
// Transform %Xd = LDgotTPREL <ga:@sym>, %Xs
LowerPPCMachineInstrToMCInst(MI, TmpInst, *this, Subtarget.isDarwin());
// Change the opcode to LDrs, which is a form of LD with the offset
// specified by a SymbolLo.
TmpInst.setOpcode(PPC::LDrs);
const MachineOperand &MO = MI->getOperand(1);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = Mang->getSymbol(GValue);
const MCExpr *Exp =
MCSymbolRefExpr::Create(MOSymbol, MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS,
OutContext);
TmpInst.getOperand(1) = MCOperand::CreateExpr(Exp);
OutStreamer.EmitInstruction(TmpInst);
return;
}
case PPC::MFCRpseud:
case PPC::MFCR8pseud:
// Transform: %R3 = MFCRpseud %CR7

View File

@ -68,6 +68,8 @@ namespace {
unsigned getLO16Encoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getMemRIXEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getTLSOffsetEncoding(const MachineInstr &MI, unsigned OpNo) const;
unsigned getTLSRegEncoding(const MachineInstr &MI, unsigned OpNo) const;
const char *getPassName() const { return "PowerPC Machine Code Emitter"; }
@ -243,6 +245,20 @@ unsigned PPCCodeEmitter::getMemRIXEncoding(const MachineInstr &MI,
}
unsigned PPCCodeEmitter::getTLSOffsetEncoding(const MachineInstr &MI,
unsigned OpNo) const {
llvm_unreachable("TLS not supported on the old JIT.");
return 0;
}
unsigned PPCCodeEmitter::getTLSRegEncoding(const MachineInstr &MI,
unsigned OpNo) const {
llvm_unreachable("TLS not supported on the old JIT.");
return 0;
}
unsigned PPCCodeEmitter::getMachineOpValue(const MachineInstr &MI,
const MachineOperand &MO) const {

View File

@ -1311,6 +1311,11 @@ SDNode *PPCDAGToDAGISel::Select(SDNode *N) {
return CurDAG->getMachineNode(PPC::ADDItocL, dl, MVT::i64,
SDValue(Tmp, 0), GA);
}
case PPCISD::LD_GOT_TPREL: {
assert (PPCSubTarget.isPPC64() && "Only supported for 64-bit ABI");
return CurDAG->getMachineNode(PPC::LDgotTPREL, dl, MVT::i64,
N->getOperand(0), N->getOperand(1));
}
}
return SelectCode(N);

View File

@ -578,6 +578,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADDIS_TOC_HA: return "PPCISD::ADDIS_TOC_HA";
case PPCISD::LD_TOC_L: return "PPCISD::LD_TOC_L";
case PPCISD::ADDI_TOC_L: return "PPCISD::ADDI_TOC_L";
case PPCISD::LD_GOT_TPREL: return "PPCISD::LD_GOT_TPREL";
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
}
}
@ -1324,19 +1326,34 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
EVT PtrVT = getPointerTy();
bool is64bit = PPCSubTarget.isPPC64();
TLSModel::Model model = getTargetMachine().getTLSModel(GV);
TLSModel::Model Model = getTargetMachine().getTLSModel(GV);
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL16_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL16_LO);
if (Model == TLSModel::LocalExec) {
SDValue TGAHi = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL16_HA);
SDValue TGALo = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TPREL16_LO);
SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
}
if (model != TLSModel::LocalExec)
llvm_unreachable("only local-exec TLS mode supported");
SDValue TLSReg = DAG.getRegister(is64bit ? PPC::X13 : PPC::R2,
if (!is64bit)
llvm_unreachable("only local-exec is currently supported for ppc32");
if (Model != TLSModel::InitialExec)
llvm_unreachable("only local-exec and initial-exec TLS modes supported");
SDValue GOTOffset = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_GOT_TPREL16_DS);
SDValue TPReg = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLS);
SDValue GOTReg = DAG.getRegister(is64bit ? PPC::X2 : PPC::R2,
is64bit ? MVT::i64 : MVT::i32);
SDValue Hi = DAG.getNode(PPCISD::Hi, dl, PtrVT, TGAHi, TLSReg);
return DAG.getNode(PPCISD::Lo, dl, PtrVT, TGALo, Hi);
SDValue TPOffset = DAG.getNode(PPCISD::LD_GOT_TPREL, dl, PtrVT,
GOTOffset, GOTReg);
return DAG.getNode(PPCISD::ADD_TLS, dl, PtrVT, TPOffset, TPReg);
}
SDValue PPCTargetLowering::LowerGlobalAddress(SDValue Op,

View File

@ -178,6 +178,20 @@ namespace llvm {
CR6SET,
CR6UNSET,
/// G8RC = LD_GOT_TPREL Symbol, G8RReg - Used by the initial-exec
/// TLS model, produces a LD instruction with base register G8RReg
/// and offset sym@got@tprel. The latter identifies the GOT entry
/// containing the offset of "sym" relative to the thread pointer.
LD_GOT_TPREL,
/// G8RC = ADD_TLS G8RReg, Symbol - Used by the initial-exec TLS
/// model, produces an ADD instruction that adds the contents of
/// G8RReg to the thread pointer. Symbol contains a relocation
/// sym@tls which is to be replaced by the thread pointer and
/// identifies to the linker that the instruction is part of a
/// TLS sequence.
ADD_TLS,
/// STD_32 - This is the STD instruction for use with "32-bit" registers.
STD_32 = ISD::FIRST_TARGET_MEMORY_OPCODE,

View File

@ -37,6 +37,12 @@ def memrs : Operand<iPTR> { // memri where the immediate is a symbolLo64
let EncoderMethod = "getMemRIXEncoding";
let MIOperandInfo = (ops symbolLo64:$off, ptr_rc:$reg);
}
def tlsaddr : Operand<i64> {
let EncoderMethod = "getTLSOffsetEncoding";
}
def tlsreg : Operand<i64> {
let EncoderMethod = "getTLSRegEncoding";
}
//===----------------------------------------------------------------------===//
// 64-bit transformation functions.
@ -364,6 +370,11 @@ def XORIS8 : DForm_4<27, (outs G8RC:$dst), (ins G8RC:$src1, u16imm:$src2),
def ADD8 : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
"add $rT, $rA, $rB", IntSimple,
[(set G8RC:$rT, (add G8RC:$rA, G8RC:$rB))]>;
// ADD8 has a special form: reg = ADD8(reg, sym@tls) for use by the
// initial-exec thread-local storage model.
def ADD8TLS : XOForm_1<31, 266, 0, (outs G8RC:$rT), (ins G8RC:$rA, tlsreg:$rB),
"add $rT, $rA, $rB", IntSimple,
[(set G8RC:$rT, (add G8RC:$rA, tglobaltlsaddr:$rB))]>;
let Defs = [CARRY] in {
def ADDC8 : XOForm_1<31, 10, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
@ -697,6 +708,15 @@ def ADDItocL: Pseudo<(outs G8RC:$rD), (ins G8RC:$reg, tocentry:$disp),
[(set G8RC:$rD,
(PPCaddiTocL G8RC:$reg, tglobaladdr:$disp))]>, isPPC64;
// Support for thread-local storage.
def LDgotTPREL: Pseudo<(outs G8RC:$rD), (ins tlsaddr:$disp, G8RC:$reg),
"#LDgotTPREL",
[(set G8RC:$rD,
(PPCldGotTprel G8RC:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
def : Pat<(PPCaddTls G8RC:$in, tglobaltlsaddr:$g),
(ADD8TLS G8RC:$in, tglobaltlsaddr:$g)>;
let PPC970_Unit = 2 in {
// Truncating stores.
def STB8 : DForm_1<38, (outs), (ins G8RC:$rS, memri:$src),

View File

@ -91,6 +91,9 @@ def PPCtoc_entry: SDNode<"PPCISD::TOC_ENTRY", SDTIntBinOp, [SDNPMayLoad]>;
def PPCvmaddfp : SDNode<"PPCISD::VMADDFP", SDTFPTernaryOp, []>;
def PPCvnmsubfp : SDNode<"PPCISD::VNMSUBFP", SDTFPTernaryOp, []>;
def PPCldGotTprel : SDNode<"PPCISD::LD_GOT_TPREL", SDTIntBinOp, [SDNPMayLoad]>;
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
// These nodes represent the 32-bit PPC shifts that operate on 6-bit shift

View File

@ -114,6 +114,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
break;
case PPCII::MO_TPREL16_LO: RefKind = MCSymbolRefExpr::VK_PPC_TPREL16_LO;
break;
case PPCII::MO_GOT_TPREL16_DS:
RefKind = MCSymbolRefExpr::VK_PPC_GOT_TPREL16_DS;
break;
case PPCII::MO_TLS:
RefKind = MCSymbolRefExpr::VK_PPC_TLS;
break;
}
// FIXME: This isn't right, but we don't have a good way to express this in

View File

@ -0,0 +1,32 @@
; RUN: llc -mcpu=pwr7 -O0 -filetype=obj %s -o - | \
; RUN: elf-dump --dump-section-data | FileCheck %s
; Test correct relocation generation for thread-local storage
; using the initial-exec model and integrated assembly.
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@a = external thread_local global i32
define signext i32 @main() nounwind {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
%0 = load i32* @a, align 4
ret i32 %0
}
; Verify generation of R_PPC64_GOT_TPREL16_DS and R_PPC64_TLS for
; accessing external variable a.
;
; CHECK: '.rela.text'
; CHECK: Relocation 0
; CHECK-NEXT: 'r_offset'
; CHECK-NEXT: 'r_sym', 0x[[SYM1:[0-9a-f]+]]
; CHECK-NEXT: 'r_type', 0x00000057
; CHECK: Relocation 1
; CHECK-NEXT: 'r_offset'
; CHECK-NEXT: 'r_sym', 0x[[SYM1]]
; CHECK-NEXT: 'r_type', 0x00000043

View File

@ -0,0 +1,21 @@
; RUN: llc -mcpu=pwr7 -O0 <%s | FileCheck %s
; Test correct assembly code generation for thread-local storage
; using the initial-exec model.
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
@a = external thread_local global i32
define signext i32 @main() nounwind {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval
%0 = load i32* @a, align 4
ret i32 %0
}
; CHECK: ld [[REG:[0-9]+]], a@got@tprel(2)
; CHECK: add {{[0-9]+}}, [[REG]], a@tls