[PowerPC] Fix reverted patch r227976 to avoid register assignment issues

See full discussion in http://reviews.llvm.org/D7491.

We now hide the add-immediate and call instructions together in a
separate pseudo-op, which is tagged to define GPR3 and clobber the
call-killed registers.  The PPCTLSDynamicCall pass prior to RA now
expands this op into the two separate addi and call ops, with explicit
definitions of GPR3 on both instructions, and explicit clobbers on the
call instruction.  The pass is now marked as requiring and preserving
the LiveIntervals and SlotIndexes analyses, and fixes these up after
the replacement sequences are introduced.

Self-hosting has been verified on LE P8 and BE P7 with various
optimization levels, etc.  It has also been verified with the
--no-tls-optimize flag workaround removed.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@228725 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Bill Schmidt 2015-02-10 19:09:05 +00:00
parent 3163865f01
commit 49b3971b70
14 changed files with 434 additions and 120 deletions

View File

@ -32,6 +32,7 @@ add_llvm_target(PowerPCCodeGen
PPCTargetObjectFile.cpp
PPCTargetTransformInfo.cpp
PPCSelectionDAGInfo.cpp
PPCTLSDynamicCall.cpp
PPCVSXCopy.cpp
PPCVSXFMAMutate.cpp
)

View File

@ -40,6 +40,7 @@ namespace llvm {
FunctionPass *createPPCVSXFMAMutatePass();
FunctionPass *createPPCBranchSelectionPass();
FunctionPass *createPPCISelDag(PPCTargetMachine &TM);
FunctionPass *createPPCTLSDynamicCallPass();
void LowerPPCMachineInstrToMCInst(const MachineInstr *MI, MCInst &OutMI,
AsmPrinter &AP, bool isDarwin);
@ -90,12 +91,7 @@ namespace llvm {
MO_TOC_LO = 7 << 4,
// Symbol for VK_PPC_TLS fixup attached to an ADD instruction
MO_TLS = 8 << 4,
// Symbols for VK_PPC_TLSGD and VK_PPC_TLSLD in __tls_get_addr
// call sequences.
MO_TLSLD = 9 << 4,
MO_TLSGD = 10 << 4
MO_TLS = 8 << 4
};
} // end namespace PPCII

View File

@ -101,6 +101,7 @@ namespace {
const MachineInstr &MI);
void LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
const MachineInstr &MI);
void EmitTlsCall(const MachineInstr *MI, MCSymbolRefExpr::VariantKind VK);
};
/// PPCLinuxAsmPrinter - PowerPC assembly printer, customized for Linux
@ -406,6 +407,39 @@ void PPCAsmPrinter::LowerPATCHPOINT(MCStreamer &OutStreamer, StackMaps &SM,
EmitToStreamer(OutStreamer, MCInstBuilder(PPC::NOP));
}
/// EmitTlsCall -- Given a GETtls[ld]ADDR[32] instruction, print a
/// call to __tls_get_addr to the current output stream.
void PPCAsmPrinter::EmitTlsCall(const MachineInstr *MI,
MCSymbolRefExpr::VariantKind VK) {
StringRef Name = "__tls_get_addr";
MCSymbol *TlsGetAddr = OutContext.GetOrCreateSymbol(Name);
MCSymbolRefExpr::VariantKind Kind = MCSymbolRefExpr::VK_None;
assert(MI->getOperand(0).isReg() &&
((Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::X3) ||
(!Subtarget.isPPC64() && MI->getOperand(0).getReg() == PPC::R3)) &&
"GETtls[ld]ADDR[32] must define GPR3");
assert(MI->getOperand(1).isReg() &&
((Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::X3) ||
(!Subtarget.isPPC64() && MI->getOperand(1).getReg() == PPC::R3)) &&
"GETtls[ld]ADDR[32] must read GPR3");
if (!Subtarget.isPPC64() && !Subtarget.isDarwin() &&
TM.getRelocationModel() == Reloc::PIC_)
Kind = MCSymbolRefExpr::VK_PLT;
const MCSymbolRefExpr *TlsRef =
MCSymbolRefExpr::Create(TlsGetAddr, Kind, OutContext);
const MachineOperand &MO = MI->getOperand(2);
const GlobalValue *GValue = MO.getGlobal();
MCSymbol *MOSymbol = getSymbol(GValue);
const MCExpr *SymVar = MCSymbolRefExpr::Create(MOSymbol, VK, OutContext);
EmitToStreamer(OutStreamer,
MCInstBuilder(Subtarget.isPPC64() ?
PPC::BL8_NOP_TLS : PPC::BL_TLS)
.addExpr(TlsRef)
.addExpr(SymVar));
}
/// EmitInstruction -- Print out a single PowerPC MI in Darwin syntax to
/// the current output stream.
///
@ -808,6 +842,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsGD));
return;
}
case PPC::GETtlsADDR:
// Transform: %X3 = GETtlsADDR %X3, <ga:@sym>
// Into: BL8_NOP_TLS __tls_get_addr(sym at tlsgd)
case PPC::GETtlsADDR32: {
// Transform: %R3 = GETtlsADDR32 %R3, <ga:@sym>
// Into: BL_TLS __tls_get_addr(sym at tlsgd)@PLT
EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSGD);
return;
}
case PPC::ADDIStlsldHA: {
// Transform: %Xd = ADDIStlsldHA %X2, <ga:@sym>
// Into: %Xd = ADDIS8 %X2, sym@got@tlsld@ha
@ -844,6 +887,15 @@ void PPCAsmPrinter::EmitInstruction(const MachineInstr *MI) {
.addExpr(SymGotTlsLD));
return;
}
case PPC::GETtlsldADDR:
// Transform: %X3 = GETtlsldADDR %X3, <ga:@sym>
// Into: BL8_NOP_TLS __tls_get_addr(sym at tlsld)
case PPC::GETtlsldADDR32: {
// Transform: %R3 = GETtlsldADDR32 %R3, <ga:@sym>
// Into: BL_TLS __tls_get_addr(sym at tlsld)@PLT
EmitTlsCall(MI, MCSymbolRefExpr::VK_PPC_TLSLD);
return;
}
case PPC::ADDISdtprelHA:
// Transform: %Xd = ADDISdtprelHA %X3, <ga:@sym>
// Into: %Xd = ADDIS8 %X3, sym@dtprel@ha

View File

@ -355,6 +355,20 @@ static bool hasNonRISpills(const MachineFunction &MF) {
return FuncInfo->hasNonRISpills();
}
/// MustSaveLR - Return true if this function requires that we save the LR
/// register onto the stack in the prolog and restore it in the epilog of the
/// function.
static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
// We need a save/restore of LR if there is any def of LR (which is
// defined by calls, including the PIC setup sequence), or if there is
// some use of the LR stack slot (e.g. for builtin_return_address).
// (LR comes in 32 and 64 bit versions.)
MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
}
/// determineFrameLayout - Determine the size of the frame and maximum call
/// frame size.
unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
@ -381,6 +395,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
// stackless code if all local vars are reg-allocated.
bool DisableRedZone = MF.getFunction()->getAttributes().
hasAttribute(AttributeSet::FunctionIndex, Attribute::NoRedZone);
unsigned LR = RegInfo->getRARegister();
if (!DisableRedZone &&
(Subtarget.isPPC64() || // 32-bit SVR4, no stack-
!Subtarget.isSVR4ABI() || // allocated locals.
@ -388,6 +403,7 @@ unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF,
FrameSize <= 224 && // Fits in red zone.
!MFI->hasVarSizedObjects() && // No dynamic alloca.
!MFI->adjustsStack() && // No calls.
!MustSaveLR(MF, LR) &&
!RegInfo->hasBasePointer(MF)) { // No special alignment.
// No need for frame
if (UpdateMF)
@ -1108,20 +1124,6 @@ void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
}
}
/// MustSaveLR - Return true if this function requires that we save the LR
/// register onto the stack in the prolog and restore it in the epilog of the
/// function.
static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
// We need a save/restore of LR if there is any def of LR (which is
// defined by calls, including the PIC setup sequence), or if there is
// some use of the LR stack slot (e.g. for builtin_return_address).
// (LR comes in 32 and 64 bit versions.)
MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
}
void
PPCFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *) const {

View File

@ -806,8 +806,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::SHL: return "PPCISD::SHL";
case PPCISD::CALL: return "PPCISD::CALL";
case PPCISD::CALL_NOP: return "PPCISD::CALL_NOP";
case PPCISD::CALL_TLS: return "PPCISD::CALL_TLS";
case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
case PPCISD::MTCTR: return "PPCISD::MTCTR";
case PPCISD::BCTRL: return "PPCISD::BCTRL";
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
@ -841,8 +839,12 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::ADD_TLS: return "PPCISD::ADD_TLS";
case PPCISD::ADDIS_TLSGD_HA: return "PPCISD::ADDIS_TLSGD_HA";
case PPCISD::ADDI_TLSGD_L: return "PPCISD::ADDI_TLSGD_L";
case PPCISD::GET_TLS_ADDR: return "PPCISD::GET_TLS_ADDR";
case PPCISD::ADDI_TLSGD_L_ADDR: return "PPCISD::ADDI_TLSGD_L_ADDR";
case PPCISD::ADDIS_TLSLD_HA: return "PPCISD::ADDIS_TLSLD_HA";
case PPCISD::ADDI_TLSLD_L: return "PPCISD::ADDI_TLSLD_L";
case PPCISD::GET_TLSLD_ADDR: return "PPCISD::GET_TLSLD_ADDR";
case PPCISD::ADDI_TLSLD_L_ADDR: return "PPCISD::ADDI_TLSLD_L_ADDR";
case PPCISD::ADDIS_DTPREL_HA: return "PPCISD::ADDIS_DTPREL_HA";
case PPCISD::ADDI_DTPREL_L: return "PPCISD::ADDI_DTPREL_L";
case PPCISD::VADD_SPLAT: return "PPCISD::VADD_SPLAT";
@ -1701,27 +1703,6 @@ SDValue PPCTargetLowering::LowerBlockAddress(SDValue Op,
return LowerLabelRef(TgtBAHi, TgtBALo, isPIC, DAG);
}
// Generate a call to __tls_get_addr for the given GOT entry Op.
std::pair<SDValue,SDValue>
PPCTargetLowering::lowerTLSCall(SDValue Op, SDLoc dl,
SelectionDAG &DAG) const {
Type *IntPtrTy = getDataLayout()->getIntPtrType(*DAG.getContext());
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Entry.Node = Op;
Entry.Ty = IntPtrTy;
Args.push_back(Entry);
TargetLowering::CallLoweringInfo CLI(DAG);
CLI.setDebugLoc(dl).setChain(DAG.getEntryNode())
.setCallee(CallingConv::C, IntPtrTy,
DAG.getTargetExternalSymbol("__tls_get_addr", getPointerTy()),
std::move(Args), 0);
return LowerCallTo(CLI);
}
SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
SelectionDAG &DAG) const {
@ -1768,8 +1749,7 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
}
if (Model == TLSModel::GeneralDynamic) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLSGD);
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
setUsesTOCBasePtr(DAG);
@ -1782,15 +1762,12 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
else
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSGD_L, dl, PtrVT,
GOTPtr, TGA);
std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
return CallResult.first;
return DAG.getNode(PPCISD::ADDI_TLSGD_L_ADDR, dl, PtrVT,
GOTPtr, TGA, TGA);
}
if (Model == TLSModel::LocalDynamic) {
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0,
PPCII::MO_TLSLD);
SDValue TGA = DAG.getTargetGlobalAddress(GV, dl, PtrVT, 0, 0);
SDValue GOTPtr;
if (is64bit) {
setUsesTOCBasePtr(DAG);
@ -1803,13 +1780,10 @@ SDValue PPCTargetLowering::LowerGlobalTLSAddress(SDValue Op,
else
GOTPtr = DAG.getNode(PPCISD::PPC32_PICGOT, dl, PtrVT);
}
SDValue GOTEntry = DAG.getNode(PPCISD::ADDI_TLSLD_L, dl, PtrVT,
GOTPtr, TGA);
std::pair<SDValue, SDValue> CallResult = lowerTLSCall(GOTEntry, dl, DAG);
SDValue TLSAddr = CallResult.first;
SDValue Chain = CallResult.second;
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl, PtrVT,
Chain, TLSAddr, TGA);
SDValue TLSAddr = DAG.getNode(PPCISD::ADDI_TLSLD_L_ADDR, dl,
PtrVT, GOTPtr, TGA, TGA);
SDValue DtvOffsetHi = DAG.getNode(PPCISD::ADDIS_DTPREL_HA, dl,
PtrVT, TLSAddr, TGA);
return DAG.getNode(PPCISD::ADDI_DTPREL_L, dl, PtrVT, DtvOffsetHi, TGA);
}
@ -3833,23 +3807,6 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
if (Callee.getNode()) {
Ops.push_back(Chain);
Ops.push_back(Callee);
// If this is a call to __tls_get_addr, find the symbol whose address
// is to be taken and add it to the list. This will be used to
// generate __tls_get_addr(<sym>@tlsgd) or __tls_get_addr(<sym>@tlsld).
// We find the symbol by walking the chain to the CopyFromReg, walking
// back from the CopyFromReg to the ADDI_TLSGD_L or ADDI_TLSLD_L, and
// pulling the symbol from that node.
if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
if (!strcmp(S->getSymbol(), "__tls_get_addr")) {
assert(!needIndirectCall && "Indirect call to __tls_get_addr???");
SDNode *AddI = Chain.getNode()->getOperand(2).getNode();
SDValue TGTAddr = AddI->getOperand(1);
assert(TGTAddr.getNode()->getOpcode() == ISD::TargetGlobalTLSAddress &&
"Didn't find target global TLS address where we expected one");
Ops.push_back(TGTAddr);
CallOpc = PPCISD::CALL_TLS;
}
}
// If this is a tail call add stack pointer delta.
if (isTailCall)
@ -4012,12 +3969,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
Ops.insert(std::next(Ops.begin()), AddTOC);
} else if ((CallOpc == PPCISD::CALL) &&
(!isLocalCall(Callee) ||
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
DAG.getTarget().getRelocationModel() == Reloc::PIC_))
// Otherwise insert NOP for non-local calls.
CallOpc = PPCISD::CALL_NOP;
} else if (CallOpc == PPCISD::CALL_TLS)
// For 64-bit SVR4, TLS calls are always non-local.
CallOpc = PPCISD::CALL_NOP_TLS;
}
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);

View File

@ -101,10 +101,6 @@ namespace llvm {
/// SVR4 calls.
CALL, CALL_NOP,
/// CALL_TLS and CALL_NOP_TLS - Versions of CALL and CALL_NOP used
/// to access TLS variables.
CALL_TLS, CALL_NOP_TLS,
/// CHAIN,FLAG = MTCTR(VAL, CHAIN[, INFLAG]) - Directly corresponds to a
/// MTCTR instruction.
MTCTR,
@ -223,26 +219,46 @@ namespace llvm {
/// register to sym\@got\@tlsgd\@ha.
ADDIS_TLSGD_HA,
/// G8RC = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
/// %X3 = ADDI_TLSGD_L G8RReg, Symbol - For the general-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
/// sym\@got\@tlsgd\@l.
/// sym\@got\@tlsgd\@l and stores the result in X3. Hidden by
/// ADDIS_TLSGD_L_ADDR until after register assignment.
ADDI_TLSGD_L,
/// %X3 = GET_TLS_ADDR %X3, Symbol - For the general-dynamic TLS
/// model, produces a call to __tls_get_addr(sym\@tlsgd). Hidden by
/// ADDIS_TLSGD_L_ADDR until after register assignment.
GET_TLS_ADDR,
/// G8RC = ADDI_TLSGD_L_ADDR G8RReg, Symbol, Symbol - Op that
/// combines ADDI_TLSGD_L and GET_TLS_ADDR until expansion following
/// register assignment.
ADDI_TLSGD_L_ADDR,
/// G8RC = ADDIS_TLSLD_HA %X2, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds the GOT base
/// register to sym\@got\@tlsld\@ha.
ADDIS_TLSLD_HA,
/// G8RC = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
/// %X3 = ADDI_TLSLD_L G8RReg, Symbol - For the local-dynamic TLS
/// model, produces an ADDI8 instruction that adds G8RReg to
/// sym\@got\@tlsld\@l.
/// sym\@got\@tlsld\@l and stores the result in X3. Hidden by
/// ADDIS_TLSLD_L_ADDR until after register assignment.
ADDI_TLSLD_L,
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol, Chain - For the
/// local-dynamic TLS model, produces an ADDIS8 instruction
/// that adds X3 to sym\@dtprel\@ha. The Chain operand is needed
/// to tie this in place following a copy to %X3 from the result
/// of a GET_TLSLD_ADDR.
/// %X3 = GET_TLSLD_ADDR %X3, Symbol - For the local-dynamic TLS
/// model, produces a call to __tls_get_addr(sym\@tlsld). Hidden by
/// ADDIS_TLSLD_L_ADDR until after register assignment.
GET_TLSLD_ADDR,
/// G8RC = ADDI_TLSLD_L_ADDR G8RReg, Symbol, Symbol - Op that
/// combines ADDI_TLSLD_L and GET_TLSLD_ADDR until expansion
/// following register assignment.
ADDI_TLSLD_L_ADDR,
/// G8RC = ADDIS_DTPREL_HA %X3, Symbol - For the local-dynamic TLS
/// model, produces an ADDIS8 instruction that adds X3 to
/// sym\@dtprel\@ha.
ADDIS_DTPREL_HA,
/// G8RC = ADDI_DTPREL_L G8RReg, Symbol - For the local-dynamic TLS
@ -635,8 +651,6 @@ namespace llvm {
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const;
std::pair<SDValue,SDValue> lowerTLSCall(SDValue Op, SDLoc dl,
SelectionDAG &DAG) const;
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) const;
SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const;

View File

@ -202,9 +202,6 @@ def : Pat<(PPCcall (i64 texternalsym:$dst)),
def : Pat<(PPCcall_nop (i64 texternalsym:$dst)),
(BL8_NOP texternalsym:$dst)>;
def : Pat<(PPCcall_nop_tls texternalsym:$func, tglobaltlsaddr:$sym),
(BL8_NOP_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
// Atomic operations
let usesCustomInserter = 1 in {
let Defs = [CR0] in {
@ -904,6 +901,28 @@ def ADDItlsgdL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsgdL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
def GETtlsADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsADDR",
[(set i64:$rD,
(PPCgetTlsAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
// Combined op for ADDItlsgdL and GETtlsADDR, late expanded. X3 and LR8
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
in
def ADDItlsgdLADDR : Pseudo<(outs g8rc:$rD),
(ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
"#ADDItlsgdLADDR",
[(set i64:$rD,
(PPCaddiTlsgdLAddr i64:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>,
isPPC64;
def ADDIStlsldHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDIStlsldHA",
[(set i64:$rD,
@ -914,6 +933,28 @@ def ADDItlsldL : Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
[(set i64:$rD,
(PPCaddiTlsldL i64:$reg, tglobaltlsaddr:$disp))]>,
isPPC64;
// LR8 is a true define, while the rest of the Defs are clobbers. X3 is
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7] in
def GETtlsldADDR : Pseudo<(outs g8rc:$rD), (ins g8rc:$reg, tlsgd:$sym),
"#GETtlsldADDR",
[(set i64:$rD,
(PPCgetTlsldAddr i64:$reg, tglobaltlsaddr:$sym))]>,
isPPC64;
// Combined op for ADDItlsldL and GETtlsADDR, late expanded. X3 and LR8
// are true defines, while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [X0,X3,X4,X5,X6,X7,X8,X9,X10,X11,X12,LR8,CTR8,CR0,CR1,CR5,CR6,CR7]
in
def ADDItlsldLADDR : Pseudo<(outs g8rc:$rD),
(ins g8rc_nox0:$reg, s16imm64:$disp, tlsgd:$sym),
"#ADDItlsldLADDR",
[(set i64:$rD,
(PPCaddiTlsldLAddr i64:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>,
isPPC64;
def ADDISdtprelHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, s16imm64:$disp),
"#ADDISdtprelHA",
[(set i64:$rD,

View File

@ -110,10 +110,19 @@ def PPCldGotTprelL : SDNode<"PPCISD::LD_GOT_TPREL_L", SDTIntBinOp,
def PPCaddTls : SDNode<"PPCISD::ADD_TLS", SDTIntBinOp, []>;
def PPCaddisTlsgdHA : SDNode<"PPCISD::ADDIS_TLSGD_HA", SDTIntBinOp>;
def PPCaddiTlsgdL : SDNode<"PPCISD::ADDI_TLSGD_L", SDTIntBinOp>;
def PPCgetTlsAddr : SDNode<"PPCISD::GET_TLS_ADDR", SDTIntBinOp>;
def PPCaddiTlsgdLAddr : SDNode<"PPCISD::ADDI_TLSGD_L_ADDR",
SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
def PPCaddisTlsldHA : SDNode<"PPCISD::ADDIS_TLSLD_HA", SDTIntBinOp>;
def PPCaddiTlsldL : SDNode<"PPCISD::ADDI_TLSLD_L", SDTIntBinOp>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp,
[SDNPHasChain]>;
def PPCgetTlsldAddr : SDNode<"PPCISD::GET_TLSLD_ADDR", SDTIntBinOp>;
def PPCaddiTlsldLAddr : SDNode<"PPCISD::ADDI_TLSLD_L_ADDR",
SDTypeProfile<1, 3, [
SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,
SDTCisSameAs<0, 3>, SDTCisInt<0> ]>>;
def PPCaddisDtprelHA : SDNode<"PPCISD::ADDIS_DTPREL_HA", SDTIntBinOp>;
def PPCaddiDtprelL : SDNode<"PPCISD::ADDI_DTPREL_L", SDTIntBinOp>;
def PPCvperm : SDNode<"PPCISD::VPERM", SDT_PPCvperm, []>;
@ -136,15 +145,9 @@ def SDT_PPCCall : SDTypeProfile<0, -1, [SDTCisInt<0>]>;
def PPCcall : SDNode<"PPCISD::CALL", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_tls : SDNode<"PPCISD::CALL_TLS", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
@ -2459,9 +2462,6 @@ def : Pat<(PPCcall (i32 tglobaladdr:$dst)),
def : Pat<(PPCcall (i32 texternalsym:$dst)),
(BL texternalsym:$dst)>;
def : Pat<(PPCcall_tls texternalsym:$func, tglobaltlsaddr:$sym),
(BL_TLS texternalsym:$func, tglobaltlsaddr:$sym)>;
def : Pat<(PPCtc_return (i32 tglobaladdr:$dst), imm:$imm),
(TCRETURNdi tglobaladdr:$dst, imm:$imm)>;
@ -2516,10 +2516,49 @@ def ADDItlsgdL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsgdL32",
[(set i32:$rD,
(PPCaddiTlsgdL i32:$reg, tglobaltlsaddr:$disp))]>;
// LR is a true define, while the rest of the Defs are clobbers. R3 is
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
def GETtlsADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsADDR32",
[(set i32:$rD,
(PPCgetTlsAddr i32:$reg, tglobaltlsaddr:$sym))]>;
// Combined op for ADDItlsgdL32 and GETtlsADDR32, late expanded. R3 and LR
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
def ADDItlsgdLADDR32 : Pseudo<(outs gprc:$rD),
(ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
"#ADDItlsgdLADDR32",
[(set i32:$rD,
(PPCaddiTlsgdLAddr i32:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>;
def ADDItlsldL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDItlsldL32",
[(set i32:$rD,
(PPCaddiTlsldL i32:$reg, tglobaltlsaddr:$disp))]>;
// LR is a true define, while the rest of the Defs are clobbers. R3 is
// explicitly defined when this op is created, so not mentioned here.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
def GETtlsldADDR32 : Pseudo<(outs gprc:$rD), (ins gprc:$reg, tlsgd32:$sym),
"GETtlsldADDR32",
[(set i32:$rD,
(PPCgetTlsldAddr i32:$reg,
tglobaltlsaddr:$sym))]>;
// Combined op for ADDItlsldL32 and GETtlsADDR32, late expanded. R3 and LR
// are true defines while the rest of the Defs are clobbers.
let hasExtraSrcRegAllocReq = 1, hasExtraDefRegAllocReq = 1,
Defs = [R0,R3,R4,R5,R6,R7,R8,R9,R10,R11,R12,LR,CTR,CR0,CR1,CR5,CR6,CR7] in
def ADDItlsldLADDR32 : Pseudo<(outs gprc:$rD),
(ins gprc_nor0:$reg, s16imm:$disp, tlsgd32:$sym),
"#ADDItlsldLADDR32",
[(set i32:$rD,
(PPCaddiTlsldLAddr i32:$reg,
tglobaltlsaddr:$disp,
tglobaltlsaddr:$sym))]>;
def ADDIdtprelL32 : Pseudo<(outs gprc:$rD), (ins gprc_nor0:$reg, s16imm:$disp),
"#ADDIdtprelL32",
[(set i32:$rD,

View File

@ -137,12 +137,6 @@ static MCOperand GetSymbolRef(const MachineOperand &MO, const MCSymbol *Symbol,
case PPCII::MO_TLS:
RefKind = MCSymbolRefExpr::VK_PPC_TLS;
break;
case PPCII::MO_TLSGD:
RefKind = MCSymbolRefExpr::VK_PPC_TLSGD;
break;
case PPCII::MO_TLSLD:
RefKind = MCSymbolRefExpr::VK_PPC_TLSLD;
break;
}
if (MO.getTargetFlags() == PPCII::MO_PLT_OR_STUB && !isDarwin)

View File

@ -0,0 +1,170 @@
//===---------- PPCTLSDynamicCall.cpp - TLS Dynamic Call Fixup ------------===//
//
// The LLVM Compiler Infrastructure
//
// This file is distributed under the University of Illinois Open Source
// License. See LICENSE.TXT for details.
//
//===----------------------------------------------------------------------===//
//
// This pass expands ADDItls{ld,gd}LADDR[32] machine instructions into
// separate ADDItls[gd]L[32] and GETtlsADDR[32] instructions, both of
// which define GPR3. A copy is added from GPR3 to the target virtual
// register of the original instruction. The GETtlsADDR[32] is really
// a call instruction, so its target register is constrained to be GPR3.
// This is not true of ADDItls[gd]L[32], but there is a legacy linker
// optimization bug that requires the target register of the addi of
// a local- or general-dynamic TLS access sequence to be GPR3.
//
// This is done in a late pass so that TLS variable accesses can be
// fully commoned by MachineCSE.
//
//===----------------------------------------------------------------------===//
#include "PPCInstrInfo.h"
#include "PPC.h"
#include "PPCInstrBuilder.h"
#include "PPCTargetMachine.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
#define DEBUG_TYPE "ppc-tls-dynamic-call"
namespace llvm {
void initializePPCTLSDynamicCallPass(PassRegistry&);
}
namespace {
struct PPCTLSDynamicCall : public MachineFunctionPass {
static char ID;
PPCTLSDynamicCall() : MachineFunctionPass(ID) {
initializePPCTLSDynamicCallPass(*PassRegistry::getPassRegistry());
}
const PPCTargetMachine *TM;
const PPCInstrInfo *TII;
LiveIntervals *LIS;
protected:
bool processBlock(MachineBasicBlock &MBB) {
bool Changed = false;
bool Is64Bit = TM->getSubtargetImpl()->isPPC64();
for (MachineBasicBlock::iterator I = MBB.begin(), IE = MBB.end();
I != IE; ++I) {
MachineInstr *MI = I;
if (MI->getOpcode() != PPC::ADDItlsgdLADDR &&
MI->getOpcode() != PPC::ADDItlsldLADDR &&
MI->getOpcode() != PPC::ADDItlsgdLADDR32 &&
MI->getOpcode() != PPC::ADDItlsldLADDR32)
continue;
DEBUG(dbgs() << "TLS Dynamic Call Fixup:\n " << *MI;);
unsigned OutReg = MI->getOperand(0).getReg();
unsigned InReg = MI->getOperand(1).getReg();
DebugLoc DL = MI->getDebugLoc();
unsigned GPR3 = Is64Bit ? PPC::X3 : PPC::R3;
unsigned Opc1, Opc2;
SmallVector<unsigned, 4> OrigRegs;
OrigRegs.push_back(OutReg);
OrigRegs.push_back(InReg);
OrigRegs.push_back(GPR3);
switch (MI->getOpcode()) {
default:
llvm_unreachable("Opcode inconsistency error");
case PPC::ADDItlsgdLADDR:
Opc1 = PPC::ADDItlsgdL;
Opc2 = PPC::GETtlsADDR;
break;
case PPC::ADDItlsldLADDR:
Opc1 = PPC::ADDItlsldL;
Opc2 = PPC::GETtlsldADDR;
break;
case PPC::ADDItlsgdLADDR32:
Opc1 = PPC::ADDItlsgdL32;
Opc2 = PPC::GETtlsADDR32;
break;
case PPC::ADDItlsldLADDR32:
Opc1 = PPC::ADDItlsldL32;
Opc2 = PPC::GETtlsldADDR32;
break;
}
// Expand into two ops built prior to the existing instruction.
MachineInstr *Addi = BuildMI(MBB, I, DL, TII->get(Opc1), GPR3)
.addReg(InReg);
Addi->addOperand(MI->getOperand(2));
// The ADDItls* instruction is the first instruction in the
// repair range.
MachineBasicBlock::iterator First = I;
--First;
MachineInstr *Call = (BuildMI(MBB, I, DL, TII->get(Opc2), GPR3)
.addReg(GPR3));
Call->addOperand(MI->getOperand(3));
BuildMI(MBB, I, DL, TII->get(TargetOpcode::COPY), OutReg)
.addReg(GPR3);
// The COPY is the last instruction in the repair range.
MachineBasicBlock::iterator Last = I;
--Last;
// Move past the original instruction and remove it.
++I;
MI->removeFromParent();
// Repair the live intervals.
LIS->repairIntervalsInRange(&MBB, First, Last, OrigRegs);
Changed = true;
}
return Changed;
}
public:
bool runOnMachineFunction(MachineFunction &MF) override {
TM = static_cast<const PPCTargetMachine *>(&MF.getTarget());
TII = TM->getSubtargetImpl()->getInstrInfo();
LIS = &getAnalysis<LiveIntervals>();
bool Changed = false;
for (MachineFunction::iterator I = MF.begin(); I != MF.end();) {
MachineBasicBlock &B = *I++;
if (processBlock(B))
Changed = true;
}
return Changed;
}
void getAnalysisUsage(AnalysisUsage &AU) const override {
AU.addRequired<LiveIntervals>();
AU.addPreserved<LiveIntervals>();
AU.addRequired<SlotIndexes>();
AU.addPreserved<SlotIndexes>();
MachineFunctionPass::getAnalysisUsage(AU);
}
};
}
INITIALIZE_PASS_BEGIN(PPCTLSDynamicCall, DEBUG_TYPE,
"PowerPC TLS Dynamic Call Fixup", false, false)
INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
INITIALIZE_PASS_END(PPCTLSDynamicCall, DEBUG_TYPE,
"PowerPC TLS Dynamic Call Fixup", false, false)
char PPCTLSDynamicCall::ID = 0;
FunctionPass*
llvm::createPPCTLSDynamicCallPass() { return new PPCTLSDynamicCall(); }

View File

@ -266,6 +266,8 @@ void PPCPassConfig::addPreRegAlloc() {
initializePPCVSXFMAMutatePass(*PassRegistry::getPassRegistry());
insertPass(VSXFMAMutateEarly ? &RegisterCoalescerID : &MachineSchedulerID,
&PPCVSXFMAMutateID);
if (getPPCTargetMachine().getRelocationModel() == Reloc::PIC_)
addPass(createPPCTLSDynamicCallPass());
}
void PPCPassConfig::addPreSched2() {

View File

@ -12,8 +12,7 @@ entry:
; CHECK-LABEL: @test1
; CHECK: mflr 0
; CHECK: std 0, 16(1)
; FIXME: These next two lines don't both need to load the same value.
; CHECK-DAG: ld 3, 16(1)
; CHECK-DAG: ld 3, 64(1)
; CHECK-DAG: ld 0, 16(1)
; CHECK: mtlr 0
; CHECK: blr

View File

@ -0,0 +1,52 @@
; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | FileCheck %s
; RUN: llc -march=ppc64 -mcpu=pwr7 -O2 -relocation-model=pic < %s | grep "__tls_get_addr" | count 1
; This test was derived from LLVM's own
; PrettyStackTraceEntry::~PrettyStackTraceEntry(). It demonstrates an
; opportunity for CSE of calls to __tls_get_addr().
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-unknown-linux-gnu"
%"class.llvm::PrettyStackTraceEntry" = type { i32 (...)**, %"class.llvm::PrettyStackTraceEntry"* }
@_ZTVN4llvm21PrettyStackTraceEntryE = unnamed_addr constant [5 x i8*] [i8* null, i8* null, i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD2Ev to i8*), i8* bitcast (void (%"class.llvm::PrettyStackTraceEntry"*)* @_ZN4llvm21PrettyStackTraceEntryD0Ev to i8*), i8* bitcast (void ()* @__cxa_pure_virtual to i8*)], align 8
@_ZL20PrettyStackTraceHead = internal thread_local unnamed_addr global %"class.llvm::PrettyStackTraceEntry"* null, align 8
@.str = private unnamed_addr constant [87 x i8] c"PrettyStackTraceHead == this && \22Pretty stack trace entry destruction is out of order\22\00", align 1
@.str1 = private unnamed_addr constant [64 x i8] c"/home/wschmidt/llvm/llvm-test2/lib/Support/PrettyStackTrace.cpp\00", align 1
@__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev = private unnamed_addr constant [62 x i8] c"virtual llvm::PrettyStackTraceEntry::~PrettyStackTraceEntry()\00", align 1
declare void @_ZN4llvm21PrettyStackTraceEntryD2Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr
declare void @__cxa_pure_virtual()
declare void @__assert_fail(i8*, i8*, i32 zeroext, i8*)
declare void @_ZdlPv(i8*)
define void @_ZN4llvm21PrettyStackTraceEntryD0Ev(%"class.llvm::PrettyStackTraceEntry"* %this) unnamed_addr align 2 {
entry:
%0 = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 0
store i32 (...)** bitcast (i8** getelementptr inbounds ([5 x i8*]* @_ZTVN4llvm21PrettyStackTraceEntryE, i64 0, i64 2) to i32 (...)**), i32 (...)*** %0, align 8
%1 = load %"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead, align 8
%cmp.i = icmp eq %"class.llvm::PrettyStackTraceEntry"* %1, %this
br i1 %cmp.i, label %_ZN4llvm21PrettyStackTraceEntryD2Ev.exit, label %cond.false.i
cond.false.i: ; preds = %entry
tail call void @__assert_fail(i8* getelementptr inbounds ([87 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([64 x i8]* @.str1, i64 0, i64 0), i32 zeroext 119, i8* getelementptr inbounds ([62 x i8]* @__PRETTY_FUNCTION__._ZN4llvm21PrettyStackTraceEntryD2Ev, i64 0, i64 0))
unreachable
_ZN4llvm21PrettyStackTraceEntryD2Ev.exit: ; preds = %entry
%NextEntry.i.i = getelementptr inbounds %"class.llvm::PrettyStackTraceEntry"* %this, i64 0, i32 1
%2 = bitcast %"class.llvm::PrettyStackTraceEntry"** %NextEntry.i.i to i64*
%3 = load i64* %2, align 8
store i64 %3, i64* bitcast (%"class.llvm::PrettyStackTraceEntry"** @_ZL20PrettyStackTraceHead to i64*), align 8
%4 = bitcast %"class.llvm::PrettyStackTraceEntry"* %this to i8*
tail call void @_ZdlPv(i8* %4)
ret void
}
; CHECK-LABEL: _ZN4llvm21PrettyStackTraceEntryD0Ev:
; CHECK: addis [[REG1:[0-9]+]], 2, _ZL20PrettyStackTraceHead@got@tlsld@ha
; CHECK: addi 3, [[REG1]], _ZL20PrettyStackTraceHead@got@tlsld@l
; CHECK: bl __tls_get_addr(_ZL20PrettyStackTraceHead@tlsld)
; CHECK: addis 3, 3, _ZL20PrettyStackTraceHead@dtprel@ha
; CHECK: ld {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)
; CHECK: std {{[0-9]+}}, _ZL20PrettyStackTraceHead@dtprel@l(3)

View File

@ -19,13 +19,11 @@ entry:
}
; CHECK-LABEL: call_once:
; CHECK: addis 3, 2, __once_callable@got@tlsgd@ha
; CHECK: addi 3, 3, __once_callable@got@tlsgd@l
; CHECK: addi 3, {{[0-9]+}}, __once_callable@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_callable@tlsgd)
; CHECK-NEXT: nop
; CHECK: std {{[0-9]+}}, 0(3)
; CHECK: addis 3, 2, __once_call@got@tlsgd@ha
; CHECK: addi 3, 3, __once_call@got@tlsgd@l
; CHECK: addi 3, {{[0-9]+}}, __once_call@got@tlsgd@l
; CHECK: bl __tls_get_addr(__once_call@tlsgd)
; CHECK-NEXT: nop
; CHECK: std {{[0-9]+}}, 0(3)