Revert "r227976 - [PowerPC] Yet another approach to __tls_get_addr" and related fixups

Unfortunately, even with the workaround of disabling the linker TLS
optimizations in Clang restored (which has already been done), this still
breaks self-hosting on my P7 machine (-O3 -DNDEBUG -mcpu=native).

Bill is currently working on an alternate implementation to address the TLS
issue in a way that also fully elides the linker bug (which, unfortunately,
this approach did not fully), so I'm reverting this now.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228460 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2015-02-06 23:07:40 +00:00
parent ead6da39e9
commit 9168f717c9
15 changed files with 117 additions and 292 deletions

View File

@ -32,7 +32,6 @@ add_llvm_target(PowerPCCodeGen
PPCTargetObjectFile.cpp
PPCTargetTransformInfo.cpp
PPCSelectionDAGInfo.cpp
PPCTLSDynamicCall.cpp
PPCVSXCopy.cpp
PPCVSXFMAMutate.cpp
)

View File

@ -40,7 +40,6 @@ namespace llvm {
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCTLSDynamicCallPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
@ -91,7 +90,12 @@ namespace llvm {
MO_TOC_LO = 7 << 4,
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
MO_TLS = 8 << 4
MO_TLS = 8 << 4,
// Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
// call sequences.
MO_TLSLD = 9 << 4,
MO_TLSGD = 10 << 4
};
} // end namespace PPCII

View File

@ -101,7 +101,6 @@ namespace {
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
@ -405,39 +404,6 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
}
/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
/// call to __tls_get_addr to the current output stream.
void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
MCSymbolRefExpr::VariantKind VK) {
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
assert(MI->getOperand(0).isReg() &&
((Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
(!Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) &&
"GETtls[ld]ADDR[32] must define GPR3");
assert(MI->getOperand(1).isReg() &&
((Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::X3) ||
(!Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
"GETtls[ld]ADDR[32] must read GPR3");
if (!Subtarget.isPPC64() && !Subtarget.isDarwin() &&
TM.getRelocationModel() == Reloc::PIC_)
Kind = MCSymbolRefExpr::VK_PLT;
const MCSymbolRefExpr *TlsRef =
MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext);
EmitToStreamer(OutStreamer,
MCInstBuilder(Subtarget.isPPC64() ?
PPC::BL8_NOP_TLS : PPC::BL_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
}
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
@ -841,15 +807,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsGD));
return;
}
case PPC::GETtlsADDR:
// Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
// Into: BL8_NOP_TLS __tls_get_addr(sym@tlsgd)
case PPC::GETtlsADDR32: {
// Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym>
// Into: BL_TLS __tls_get_addr(sym@tlsgd)@PLT
EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
return;
}
case PPC::ADDIStlsldHA: {
// Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
@ -887,15 +844,6 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsLD));
return;
}
case PPC::GETtlsldADDR:
// Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
// Into: BL8_NOP_TLS __tls_get_addr(sym@tlsld)
case PPC::GETtlsldADDR32: {
// Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym>
// Into: BL_TLS __tls_get_addr(sym@tlsld)@PLT
EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD);
return;
}
case PPC::ADDISdtprelHA:
// Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
// Into: %Xd = ADDIS8 %X3, sym@dtprel@ha

View File

@ -355,20 +355,6 @@ static bool hasNonRISpills(const MachineFunction &MF) {
return FuncInfo->hasNonRISpills();
}
/// MustSaveLR - Return true if this function requires that we save the LR
/// register onto the stack in the prolog and restore it in the epilog of the
/// function.
static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
// We need a save/restore of LR if there is any def of LR (which is
// defined by calls, including the PIC setup sequence), or if there is
// some use of the LR stack slot (e.g. for builtin_return_address).
// (LR comes in 32 and 64 bit versions.)
MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
}
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
@ -395,7 +381,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// stackless code if all local vars are reg-allocated.
bool DisableRedZone = MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
unsigned LR = RegInfo->getRARegister();
if (!DisableRedZone &&
(Subtarget.isPPC64() || // 32-bit SVR4, no stack-
!Subtarget.isSVR4ABI() || // allocated locals.
@ -403,7 +388,6 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
!MustSaveLR(MF, LR) &&
!RegInfo->hasBasePointer(MF)) { // No special alignment.
// No need for frame
if (UpdateMF)
@ -1124,6 +1108,20 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
/// MustSaveLR - Return true if this function requires that we save the LR
/// register onto the stack in the prolog and restore it in the epilog of the
/// function.
static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
// We need a save/restore of LR if there is any def of LR (which is
// defined by calls, including the PIC setup sequence), or if there is
// some use of the LR stack slot (e.g. for builtin_return_address).
// (LR comes in 32 and 64 bit versions.)
MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
}
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *) const {

View File

@ -806,6 +806,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
@ -839,10 +841,8 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
@ -1701,6 +1701,27 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
// Generate a call to __tls_get_addr for the given GOT entry Op.
std::pair<SDValue,SDValue>
PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
SelectionDAG &DAG) const {
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Op;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(CallingConv::C, IntPtrTy,
DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
std::move(Args), 0);
return LowerCallTo(CLI);
}
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
@ -1747,7 +1768,8 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::GeneralDynamic) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLSGD);
SDValue GOTPtr;
if (is64bit) {
setUsesTOCBasePtr(DAG);
@ -1760,13 +1782,15 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
else
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl,
PtrVT, GOTPtr, TGA);
return DAG.getNode(PPCISD::GET_TLS_ADDR, dl, PtrVT, GOTEntry, TGA);
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
GOTPtr, TGA);
std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
return CallResult.first;
}
if (Model == TLSModel::LocalDynamic) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLSLD);
SDValue GOTPtr;
if (is64bit) {
setUsesTOCBasePtr(DAG);
@ -1781,10 +1805,11 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
GOTPtr, TGA);
SDValue TLSAddr = DAG.getNode(PPCISD::GET_TLSLD_ADDR, dl,
PtrVT, GOTEntry, TGA);
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
PtrVT, TLSAddr, TGA);
std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
SDValue TLSAddr = CallResult.first;
SDValue Chain = CallResult.second;
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
Chain, TLSAddr, TGA);
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
}
@ -3808,6 +3833,23 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (Callee.getNode()) {
Ops.push_back(Chain);
Ops.push_back(Callee);
// If this is a call to __tls_get_addr, find the symbol whose address
// is to be taken and add it to the list. This will be used to
// generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
// We find the symbol by walking the chain to the CopyFromReg, walking
// back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
// pulling the symbol from that node.
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
SDValue TGTAddr = AddI->getOperand(1);
assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
"Didn't find target global TLS address where we expected one");
Ops.push_back(TGTAddr);
CallOpc = PPCISD::CALL_TLS;
}
}
// If this is a tail call add stack pointer delta.
if (isTailCall)
@ -3970,9 +4012,12 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
Ops.insert(std::next(Ops.begin()), AddTOC);
} else if ((CallOpc == PPCISD::CALL) &&
(!isLocalCall(Callee) ||
DAG.getTarget().getRelocationModel() == Reloc::PIC_))
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
} else if (CallOpc == PPCISD::CALL_TLS)
// For 64-bit SVR4, TLS calls are always non-local.
CallOpc = PPCISD::CALL_NOP_TLS;
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);

View File

@ -101,6 +101,10 @@ namespace llvm {
/// SVR4 calls.
CALL, CALL_NOP,
/// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
/// to access TLS variables.
CALL_TLS, CALL_NOP_TLS,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
@ -224,10 +228,6 @@ namespace llvm {
/// sym\@got\@tlsgd\@l.
ADDI_TLSGD_L,
/// G8RC = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
/// model, produces a call to __tls_get_addr(sym\@tlsgd).
GET_TLS_ADDR,
/// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym\@got\@tlsld\@ha.
@ -238,13 +238,11 @@ namespace llvm {
/// sym\@got\@tlsld\@l.
ADDI_TLSLD_L,
/// G8RC = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
/// model, produces a call to __tls_get_addr(sym\@tlsld).
GET_TLSLD_ADDR,
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds X3 to
/// sym\@dtprel\@ha.
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
/// local-dynamic TLS model, produces an ADDIS8 instruction
/// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
/// to tie this in place following a copy to %X3 from the result
/// of a GET_TLSLD_ADDR.
ADDIS_DTPREL_HA,
/// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
@ -637,6 +635,8 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;

View File

@ -202,6 +202,9 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
(BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
@ -901,12 +904,6 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR8] in
def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsADDR",
[(set i64:$rD,
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
@ -917,12 +914,6 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR8] in
def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsldADDR",
[(set i64:$rD,
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,

View File

@ -110,11 +110,10 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
[SDNPHasChain]>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
@ -137,9 +136,15 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
@ -2454,6 +2459,9 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
(BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@ -2508,21 +2516,10 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsgdL32",
[(set i32:$rD,
(PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR] in
def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsADDR32",
[(set i32:$rD,
(PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsldL32",
[(set i32:$rD,
(PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1, Defs = [LR] in
def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsldADDR32",
[(set i32:$rD,
(PPCgetTlsldAddr i32:$reg,
tglobaltlsaddr:$sym))]>;
def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDIdtprelL32",
[(set i32:$rD,

View File

@ -137,6 +137,12 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
case PPCII::MO_TLS:
RefKind = MCSymbolRefExpr::VK_PPC_TLS;
break;
case PPCII::MO_TLSGD:
RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
break;
case PPCII::MO_TLSLD:
RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
break;
}
if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)

View File

@ -1,113 +0,0 @@
//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass fixes up GETtls[ld]ADDR[32] machine instructions so that
// they read and write GPR3. These are really call instructions, so
// must use the calling convention registers. This is done in a late
// pass so that TLS variable accesses can be fully commoned.
//
//===----------------------------------------------------------------------===//
#include "PPCInstrInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-tls-dynamic-call"
namespace llvm {
void initializePPCTLSDynamicCallPass(PassRegistry&);
}
namespace {
// PPCTLSDynamicCall pass - Add copies to and from GPR3 around
// GETtls[ld]ADDR[32] machine instructions. These instructions
// are actually call instructions, so the register choice is
// constrained. We delay introducing these copies as late as
// possible so that TLS variable accesses can be fully commoned.
struct PPCTLSDynamicCall : public MachineFunctionPass {
static char ID;
PPCTLSDynamicCall() : MachineFunctionPass(ID) {
initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry());
}
const PPCTargetMachine *TM;
const PPCInstrInfo *TII;
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
bool Is64Bit = TM->getSubtargetImpl()->isPPC64();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE; ++I) {
MachineInstr *MI = I;
if (MI->getOpcode() != PPC::GETtlsADDR &&
MI->getOpcode() != PPC::GETtlsldADDR &&
MI->getOpcode() != PPC::GETtlsADDR32 &&
MI->getOpcode() != PPC::GETtlsldADDR32)
continue;
DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;);
unsigned OutReg = MI->getOperand(0).getReg();
unsigned InReg = MI->getOperand(1).getReg();
DebugLoc DL = MI->getDebugLoc();
unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), GPR3)
.addReg(InReg);
MI->getOperand(0).setReg(GPR3);
MI->getOperand(1).setReg(GPR3);
BuildMI(MBB, ++I, DL, TII->get(TargetOpcode::COPY), OutReg)
.addReg(GPR3);
Changed = true;
}
return Changed;
}
public:
bool runOnMachineFunction(MachineFunction &MF) override {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
TII = TM->getSubtargetImpl()->getInstrInfo();
bool Changed = false;
for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
MachineBasicBlock &B = *I++;
if (processBlock(B))
Changed = true;
}
return Changed;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
MachineFunctionPass::getAnalysisUsage(AU);
}
};
}
INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
"PowerPC TLS Dynamic Call Fixup", false, false)
INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE,
"PowerPC TLS Dynamic Call Fixup", false, false)
char PPCTLSDynamicCall::ID = 0;
FunctionPass*
llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); }

View File

@ -266,7 +266,6 @@ void PPCPassConfig::addPreRegAlloc() {
initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
&PPCVSXFMAMutateID);
addPass(createPPCTLSDynamicCallPass());
}
void PPCPassConfig::addPreSched2() {

View File

@ -12,7 +12,8 @@ entry:
; CHECK-LABEL: @test1
; CHECK: mflr 0
; CHECK: std 0, 16(1)
; CHECK-DAG: ld 3, 64(1)
; FIXME: These next two lines don't both need to load the same value.
; CHECK-DAG: ld 3, 16(1)
; CHECK-DAG: ld 0, 16(1)
; CHECK: mtlr 0
; CHECK: blr

View File

@ -1,52 +0,0 @@
; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s
; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | grep "__tls_get_addr" | count 1
; This test was derived from LLVM's own
; PrettyStackTraceEntry::~PrettyStackTraceEntry(). It demonstrates an
; opportunity for CSE of calls to __tls_get_addr().
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
%"class.llvm::PrettyStackTraceEntry" = type { i32 (...)**, %"class.llvm::PrettyStackTraceEntry"* }
@_ZTVN4llvm21PrettyStackTraceEntryE = unnamed_addr constant [5 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD2Ev to i8*), i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD0Ev to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)], align 8
@_ZL20PrettyStackTraceHead = internal thread_local unnamed_addr global %"class.llvm::PrettyStackTraceEntry"* null, align 8
@.str = private unnamed_addr constant [87 x i8] c"PrettyStackTraceHead == this && \22Pretty stack trace entry destruction is out of order\22\00", align 1
@.str1 = private unnamed_addr constant [64 x i8] c"/home/wschmidt/llvm/llvm-test2/lib/Support/PrettyStackTrace.cpp\00", align 1
@__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev = private unnamed_addr constant [62 x i8] c"virtual llvm::PrettyStackTraceEntry::~PrettyStackTraceEntry()\00", align 1
declare void @_ZN4llvm21PrettyStackTraceEntryD2Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr
declare void @__cxa_pure_virtual()
declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*)
declare void @_ZdlPv(i8*)
define void @_ZN4llvm21PrettyStackTraceEntryD0Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr align 2 {
entry:
%0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0
store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
%1 = load %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8
%cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this
br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i
cond.false.i: ; preds = %entry
tail call void @__assert_fail(i8* getelementptr inbounds ([87 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([64 x i8]* @.str1, i64 0, i64 0), i32 zeroext 119, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev, i64 0, i64 0))
unreachable
_ZN4llvm21PrettyStackTraceEntryD2Ev.exit: ; preds = %entry
%NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1
%2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64*
%3 = load i64* %2, align 8
store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8
%4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8*
tail call void @_ZdlPv(i8* %4)
ret void
}
; CHECK-LABEL: _ZN4llvm21PrettyStackTraceEntryD0Ev:
; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha
; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l
; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld)
; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha
; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)

View File

@ -1,7 +1,7 @@
; RUN: llc -march=ppc64 -mcpu=pwr7 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0 %s
; RUN: llc -march=ppc64 -mcpu=pwr7 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1 %s
; R;U;N: llc -march=ppc32 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0-32 %s
; R;U;N: llc -march=ppc32 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1-32 %s
; RUN: llc -march=ppc32 -O0 -relocation-model=pic < %s | FileCheck -check-prefix=OPT0-32 %s
; RUN: llc -march=ppc32 -O1 -relocation-model=pic < %s | FileCheck -check-prefix=OPT1-32 %s
target triple = "powerpc64-unknown-linux-gnu"
; Test correct assembly code generation for thread-local storage using
@ -65,5 +65,5 @@ entry:
; OPT1: bl __tls_get_addr(a2@tlsgd)
; OPT1-NEXT: nop
; OPT1-32-LABEL: main2
; OPT1-32: addi {{[0-9]+}}, {{[0-9]+}}, a2@got@tlsgd
; OPT1-32: addi 3, {{[0-9]+}}, a2@got@tlsgd
; OPT1-32: bl __tls_get_addr(a2@tlsgd)@PLT

View File

@ -19,11 +19,13 @@ entry:
}
; CHECK-LABEL: call_once:
; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l
; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha
; CHECK: addi 3, 3, __once_callable@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
; CHECK-NEXT: nop
; CHECK: std {{[0-9]+}}, 0(3)
; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
; CHECK: addis 3, 2, __once_call@got@tlsgd@ha
; CHECK: addi 3, 3, __once_call@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_call@tlsgd)
; CHECK-NEXT: nop
; CHECK: std {{[0-9]+}}, 0(3)