mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-02-08 21:32:39 +00:00
[PowerPC] Loosen ELFv1 PPC64 func descriptor loads for indirect calls
Function pointers under PPC64 ELFv1 (which is used on PPC64/Linux on the POWER7, A2 and earlier cores) are really pointers to a function descriptor, a structure with three pointers: the actual pointer to the code to which to jump, the pointer to the TOC needed by the callee, and an environment pointer. We used to chain these loads, and make them opaque to the rest of the optimizer, so that they'd always occur directly before the call. This is not necessary, and in fact, highly suboptimal on embedded cores. Once the function pointer is known, the loads can be performed ahead of time; in fact, they can be hoisted out of loops. Now these function descriptors are almost always generated by the linker, and thus the contents of the descriptors are invariant. As a result, by default, we'll mark the associated loads as invariant (allowing them to be hoisted out of loops). I've added a target feature to turn this off, however, just in case someone needs that option (constructing an on-stack descriptor, casting it to a function pointer, and then calling it cannot be well-defined C/C++ code, but I can imagine some JIT-compilation system doing so). Consider this simple test: $ cat call.c typedef void (*fp)(); void bar(fp x) { for (int i = 0; i < 1600000000; ++i) x(); } $ cat main.c typedef void (*fp)(); void bar(fp x); void foo() {} int main() { bar(foo); } On the PPC A2 (the BG/Q supercomputer), marking the function-descriptor loads as invariant brings the execution time down to ~8 seconds from ~32 seconds with the loads in the loop. The difference on the POWER7 is smaller. Compiling with: gcc -std=c99 -O3 -mcpu=native call.c main.c : ~6 seconds [this is 4.8.2] clang -O3 -mcpu=native call.c main.c : ~5.3 seconds clang -O3 -mcpu=native call.c main.c -mno-invariant-function-descriptors : ~4 seconds (looks like we'd benefit from additional loop unrolling here, as a first guess, because this is faster with the extra loads) The -mno-invariant-function-descriptors will be added to Clang shortly. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@226207 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
02b677594c
commit
94dc061e85
@ -113,6 +113,11 @@ def FeatureP8Vector : SubtargetFeature<"power8-vector", "HasP8Vector", "true",
|
||||
"Enable POWER8 vector instructions",
|
||||
[FeatureVSX, FeatureAltivec]>;
|
||||
|
||||
def FeatureInvariantFunctionDescriptors :
|
||||
SubtargetFeature<"invariant-function-descriptors",
|
||||
"HasInvariantFunctionDescriptors", "true",
|
||||
"Assume function descriptors are invariant">;
|
||||
|
||||
def DeprecatedMFTB : SubtargetFeature<"", "DeprecatedMFTB", "true",
|
||||
"Treat mftb as deprecated">;
|
||||
def DeprecatedDST : SubtargetFeature<"", "DeprecatedDST", "true",
|
||||
|
@ -784,8 +784,6 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case PPCISD::Hi: return "PPCISD::Hi";
|
||||
case PPCISD::Lo: return "PPCISD::Lo";
|
||||
case PPCISD::TOC_ENTRY: return "PPCISD::TOC_ENTRY";
|
||||
case PPCISD::LOAD: return "PPCISD::LOAD";
|
||||
case PPCISD::LOAD_TOC: return "PPCISD::LOAD_TOC";
|
||||
case PPCISD::DYNALLOC: return "PPCISD::DYNALLOC";
|
||||
case PPCISD::GlobalBaseReg: return "PPCISD::GlobalBaseReg";
|
||||
case PPCISD::SRL: return "PPCISD::SRL";
|
||||
@ -3590,11 +3588,11 @@ static bool isFunctionGlobalAddress(SDValue Callee) {
|
||||
|
||||
static
|
||||
unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
|
||||
SDValue &Chain, SDLoc dl, int SPDiff, bool isTailCall,
|
||||
bool IsPatchPoint,
|
||||
SDValue &Chain, SDValue CallSeqStart, SDLoc dl, int SPDiff,
|
||||
bool isTailCall, bool IsPatchPoint,
|
||||
SmallVectorImpl<std::pair<unsigned, SDValue> > &RegsToPass,
|
||||
SmallVectorImpl<SDValue> &Ops, std::vector<EVT> &NodeTys,
|
||||
const PPCSubtarget &Subtarget) {
|
||||
ImmutableCallSite *CS, const PPCSubtarget &Subtarget) {
|
||||
|
||||
bool isPPC64 = Subtarget.isPPC64();
|
||||
bool isSVR4ABI = Subtarget.isSVR4ABI();
|
||||
@ -3695,50 +3693,50 @@ unsigned PrepareCall(SelectionDAG &DAG, SDValue &Callee, SDValue &InFlag,
|
||||
// 6. On return of the callee, the TOC of the caller needs to be
|
||||
// restored (this is done in FinishCall()).
|
||||
//
|
||||
// All those operations are flagged together to ensure that no other
|
||||
// The loads are scheduled at the beginning of the call sequence, and the
|
||||
// register copies are flagged together to ensure that no other
|
||||
// operations can be scheduled in between. E.g. without flagging the
|
||||
// operations together, a TOC access in the caller could be scheduled
|
||||
// between the load of the callee TOC and the branch to the callee, which
|
||||
// copies together, a TOC access in the caller could be scheduled between
|
||||
// the assignment of the callee TOC and the branch to the callee, which
|
||||
// results in the TOC access going through the TOC of the callee instead
|
||||
// of going through the TOC of the caller, which leads to incorrect code.
|
||||
|
||||
// Load the address of the function entry point from the function
|
||||
// descriptor.
|
||||
SDVTList VTs = DAG.getVTList(MVT::i64, MVT::Other, MVT::Glue);
|
||||
SDValue LoadFuncPtr = DAG.getNode(PPCISD::LOAD, dl, VTs,
|
||||
makeArrayRef(MTCTROps, InFlag.getNode() ? 3 : 2));
|
||||
Chain = LoadFuncPtr.getValue(1);
|
||||
InFlag = LoadFuncPtr.getValue(2);
|
||||
SDValue LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-1);
|
||||
if (LDChain.getValueType() == MVT::Glue)
|
||||
LDChain = CallSeqStart.getValue(CallSeqStart->getNumValues()-2);
|
||||
|
||||
bool LoadsInv = Subtarget.hasInvariantFunctionDescriptors();
|
||||
|
||||
MachinePointerInfo MPI(CS ? CS->getCalledValue() : nullptr);
|
||||
SDValue LoadFuncPtr = DAG.getLoad(MVT::i64, dl, LDChain, Callee, MPI,
|
||||
false, false, LoadsInv, 8);
|
||||
|
||||
// Load environment pointer into r11.
|
||||
// Offset of the environment pointer within the function descriptor.
|
||||
SDValue PtrOff = DAG.getIntPtrConstant(16);
|
||||
|
||||
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, PtrOff);
|
||||
SDValue LoadEnvPtr = DAG.getNode(PPCISD::LOAD, dl, VTs, Chain, AddPtr,
|
||||
InFlag);
|
||||
Chain = LoadEnvPtr.getValue(1);
|
||||
InFlag = LoadEnvPtr.getValue(2);
|
||||
SDValue LoadEnvPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddPtr,
|
||||
MPI.getWithOffset(16), false, false,
|
||||
LoadsInv, 8);
|
||||
|
||||
SDValue TOCOff = DAG.getIntPtrConstant(8);
|
||||
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
|
||||
SDValue TOCPtr = DAG.getLoad(MVT::i64, dl, LDChain, AddTOC,
|
||||
MPI.getWithOffset(8), false, false,
|
||||
LoadsInv, 8);
|
||||
|
||||
SDValue TOCVal = DAG.getCopyToReg(Chain, dl, PPC::X2, TOCPtr,
|
||||
InFlag);
|
||||
Chain = TOCVal.getValue(0);
|
||||
InFlag = TOCVal.getValue(1);
|
||||
|
||||
SDValue EnvVal = DAG.getCopyToReg(Chain, dl, PPC::X11, LoadEnvPtr,
|
||||
InFlag);
|
||||
|
||||
Chain = EnvVal.getValue(0);
|
||||
InFlag = EnvVal.getValue(1);
|
||||
|
||||
// Load TOC of the callee into r2. We are using a target-specific load
|
||||
// with r2 hard coded, because the result of a target-independent load
|
||||
// would never go directly into r2, since r2 is a reserved register (which
|
||||
// prevents the register allocator from allocating it), resulting in an
|
||||
// additional register being allocated and an unnecessary move instruction
|
||||
// being generated.
|
||||
VTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
SDValue TOCOff = DAG.getIntPtrConstant(8);
|
||||
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, Callee, TOCOff);
|
||||
SDValue LoadTOCPtr = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain,
|
||||
AddTOC, InFlag);
|
||||
Chain = LoadTOCPtr.getValue(0);
|
||||
InFlag = LoadTOCPtr.getValue(1);
|
||||
|
||||
MTCTROps[0] = Chain;
|
||||
MTCTROps[1] = LoadFuncPtr;
|
||||
MTCTROps[2] = InFlag;
|
||||
@ -3863,17 +3861,18 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
|
||||
SmallVector<std::pair<unsigned, SDValue>, 8>
|
||||
&RegsToPass,
|
||||
SDValue InFlag, SDValue Chain,
|
||||
SDValue &Callee,
|
||||
SDValue CallSeqStart, SDValue &Callee,
|
||||
int SPDiff, unsigned NumBytes,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
SmallVectorImpl<SDValue> &InVals,
|
||||
ImmutableCallSite *CS) const {
|
||||
|
||||
bool isELFv2ABI = Subtarget.isELFv2ABI();
|
||||
std::vector<EVT> NodeTys;
|
||||
SmallVector<SDValue, 8> Ops;
|
||||
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, dl, SPDiff,
|
||||
isTailCall, IsPatchPoint, RegsToPass, Ops,
|
||||
NodeTys, Subtarget);
|
||||
unsigned CallOpc = PrepareCall(DAG, Callee, InFlag, Chain, CallSeqStart, dl,
|
||||
SPDiff, isTailCall, IsPatchPoint, RegsToPass,
|
||||
Ops, NodeTys, CS, Subtarget);
|
||||
|
||||
// Add implicit use of CR bit 6 for 32-bit SVR4 vararg calls
|
||||
if (isVarArg && Subtarget.isSVR4ABI() && !Subtarget.isPPC64())
|
||||
@ -3977,12 +3976,13 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
CallingConv::ID CallConv = CLI.CallConv;
|
||||
bool isVarArg = CLI.IsVarArg;
|
||||
bool IsPatchPoint = CLI.IsPatchPoint;
|
||||
ImmutableCallSite *CS = CLI.CS;
|
||||
|
||||
if (isTailCall)
|
||||
isTailCall = IsEligibleForTailCallOptimization(Callee, CallConv, isVarArg,
|
||||
Ins, DAG);
|
||||
|
||||
if (!isTailCall && CLI.CS && CLI.CS->isMustTailCall())
|
||||
if (!isTailCall && CS && CS->isMustTailCall())
|
||||
report_fatal_error("failed to perform tail call elimination on a call "
|
||||
"site marked musttail");
|
||||
|
||||
@ -3990,16 +3990,16 @@ PPCTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
|
||||
if (Subtarget.isPPC64())
|
||||
return LowerCall_64SVR4(Chain, Callee, CallConv, isVarArg,
|
||||
isTailCall, IsPatchPoint, Outs, OutVals, Ins,
|
||||
dl, DAG, InVals);
|
||||
dl, DAG, InVals, CS);
|
||||
else
|
||||
return LowerCall_32SVR4(Chain, Callee, CallConv, isVarArg,
|
||||
isTailCall, IsPatchPoint, Outs, OutVals, Ins,
|
||||
dl, DAG, InVals);
|
||||
dl, DAG, InVals, CS);
|
||||
}
|
||||
|
||||
return LowerCall_Darwin(Chain, Callee, CallConv, isVarArg,
|
||||
isTailCall, IsPatchPoint, Outs, OutVals, Ins,
|
||||
dl, DAG, InVals);
|
||||
dl, DAG, InVals, CS);
|
||||
}
|
||||
|
||||
SDValue
|
||||
@ -4010,7 +4010,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
SmallVectorImpl<SDValue> &InVals,
|
||||
ImmutableCallSite *CS) const {
|
||||
// See PPCTargetLowering::LowerFormalArguments_32SVR4() for a description
|
||||
// of the 32-bit SVR4 ABI stack frame layout.
|
||||
|
||||
@ -4216,8 +4217,8 @@ PPCTargetLowering::LowerCall_32SVR4(SDValue Chain, SDValue Callee,
|
||||
false, TailCallArguments);
|
||||
|
||||
return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
|
||||
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
|
||||
Ins, InVals);
|
||||
RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
|
||||
NumBytes, Ins, InVals, CS);
|
||||
}
|
||||
|
||||
// Copy an argument into memory, being careful to do this outside the
|
||||
@ -4248,7 +4249,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
SmallVectorImpl<SDValue> &InVals,
|
||||
ImmutableCallSite *CS) const {
|
||||
|
||||
bool isELFv2ABI = Subtarget.isELFv2ABI();
|
||||
bool isLittleEndian = Subtarget.isLittleEndian();
|
||||
@ -4688,7 +4690,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
||||
unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
|
||||
SDValue PtrOff = DAG.getIntPtrConstant(TOCSaveOffset);
|
||||
SDValue AddPtr = DAG.getNode(ISD::ADD, dl, PtrVT, StackPtr, PtrOff);
|
||||
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr, MachinePointerInfo(),
|
||||
Chain = DAG.getStore(Val.getValue(1), dl, Val, AddPtr,
|
||||
MachinePointerInfo::getStack(TOCSaveOffset),
|
||||
false, false, 0);
|
||||
// In the ELFv2 ABI, R12 must contain the address of an indirect callee.
|
||||
// This does not mean the MTCTR instruction must use R12; it's easier
|
||||
@ -4711,8 +4714,8 @@ PPCTargetLowering::LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
||||
FPOp, true, TailCallArguments);
|
||||
|
||||
return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
|
||||
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
|
||||
Ins, InVals);
|
||||
RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
|
||||
NumBytes, Ins, InVals, CS);
|
||||
}
|
||||
|
||||
SDValue
|
||||
@ -4723,7 +4726,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const {
|
||||
SmallVectorImpl<SDValue> &InVals,
|
||||
ImmutableCallSite *CS) const {
|
||||
|
||||
unsigned NumOps = Outs.size();
|
||||
|
||||
@ -5104,8 +5108,8 @@ PPCTargetLowering::LowerCall_Darwin(SDValue Chain, SDValue Callee,
|
||||
FPOp, true, TailCallArguments);
|
||||
|
||||
return FinishCall(CallConv, dl, isTailCall, isVarArg, IsPatchPoint, DAG,
|
||||
RegsToPass, InFlag, Chain, Callee, SPDiff, NumBytes,
|
||||
Ins, InVals);
|
||||
RegsToPass, InFlag, Chain, CallSeqStart, Callee, SPDiff,
|
||||
NumBytes, Ins, InVals, CS);
|
||||
}
|
||||
|
||||
bool
|
||||
|
@ -76,13 +76,6 @@ namespace llvm {
|
||||
/// The following two target-specific nodes are used for calls through
|
||||
/// function pointers in the 64-bit SVR4 ABI.
|
||||
|
||||
/// Like a regular LOAD but additionally taking/producing a flag.
|
||||
LOAD,
|
||||
|
||||
/// Like LOAD (taking/producing a flag), but using r2 as hard-coded
|
||||
/// destination.
|
||||
LOAD_TOC,
|
||||
|
||||
/// OPRC, CHAIN = DYNALLOC(CHAIN, NEGSIZE, FRAME_INDEX)
|
||||
/// This instruction is lowered in PPCRegisterInfo::eliminateFrameIndex to
|
||||
/// compute an allocation on the stack.
|
||||
@ -687,11 +680,12 @@ namespace llvm {
|
||||
SelectionDAG &DAG,
|
||||
SmallVector<std::pair<unsigned, SDValue>, 8>
|
||||
&RegsToPass,
|
||||
SDValue InFlag, SDValue Chain,
|
||||
SDValue InFlag, SDValue Chain, SDValue CallSeqStart,
|
||||
SDValue &Callee,
|
||||
int SPDiff, unsigned NumBytes,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
SmallVectorImpl<SDValue> &InVals,
|
||||
ImmutableCallSite *CS) const;
|
||||
|
||||
SDValue
|
||||
LowerFormalArguments(SDValue Chain,
|
||||
@ -753,7 +747,8 @@ namespace llvm {
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
SmallVectorImpl<SDValue> &InVals,
|
||||
ImmutableCallSite *CS) const;
|
||||
SDValue
|
||||
LowerCall_64SVR4(SDValue Chain, SDValue Callee,
|
||||
CallingConv::ID CallConv,
|
||||
@ -762,7 +757,8 @@ namespace llvm {
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
SmallVectorImpl<SDValue> &InVals,
|
||||
ImmutableCallSite *CS) const;
|
||||
SDValue
|
||||
LowerCall_32SVR4(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
|
||||
bool isVarArg, bool isTailCall, bool IsPatchPoint,
|
||||
@ -770,7 +766,8 @@ namespace llvm {
|
||||
const SmallVectorImpl<SDValue> &OutVals,
|
||||
const SmallVectorImpl<ISD::InputArg> &Ins,
|
||||
SDLoc dl, SelectionDAG &DAG,
|
||||
SmallVectorImpl<SDValue> &InVals) const;
|
||||
SmallVectorImpl<SDValue> &InVals,
|
||||
ImmutableCallSite *CS) const;
|
||||
|
||||
SDValue lowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const;
|
||||
SDValue lowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const;
|
||||
|
@ -837,11 +837,6 @@ def LDtocBA: Pseudo<(outs g8rc:$rD), (ins tocentry:$disp, g8rc:$reg),
|
||||
[(set i64:$rD,
|
||||
(PPCtoc_entry tblockaddress:$disp, i64:$reg))]>, isPPC64;
|
||||
|
||||
let hasSideEffects = 1, isCodeGenOnly = 1, RST = 2, Defs = [X2] in
|
||||
def LDinto_toc: DSForm_1<58, 0, (outs), (ins memrix:$src),
|
||||
"ld 2, $src", IIC_LdStLD,
|
||||
[(PPCload_toc ixaddr:$src)]>, isPPC64;
|
||||
|
||||
def LDX : XForm_1<31, 21, (outs g8rc:$rD), (ins memrr:$src),
|
||||
"ldx $rD, $src", IIC_LdStLD,
|
||||
[(set i64:$rD, (load xaddr:$src))]>, isPPC64;
|
||||
@ -870,11 +865,6 @@ def LDUX : XForm_1<31, 53, (outs g8rc:$rD, ptr_rc_nor0:$ea_result),
|
||||
}
|
||||
}
|
||||
|
||||
def : Pat<(PPCload ixaddr:$src),
|
||||
(LD ixaddr:$src)>;
|
||||
def : Pat<(PPCload xaddr:$src),
|
||||
(LDX xaddr:$src)>;
|
||||
|
||||
// Support for medium and large code model.
|
||||
def ADDIStocHA: Pseudo<(outs g8rc:$rD), (ins g8rc_nox0:$reg, tocentry:$disp),
|
||||
"#ADDIStocHA",
|
||||
|
@ -145,11 +145,6 @@ def PPCcall_nop : SDNode<"PPCISD::CALL_NOP", SDT_PPCCall,
|
||||
def PPCcall_nop_tls : SDNode<"PPCISD::CALL_NOP_TLS", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
def PPCload_toc : SDNode<"PPCISD::LOAD_TOC", SDTypeProfile<0, 1, []>,
|
||||
[SDNPHasChain, SDNPSideEffect,
|
||||
SDNPInGlue, SDNPOutGlue]>;
|
||||
def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
|
||||
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
|
||||
|
@ -123,7 +123,7 @@ void PPCSubtarget::initializeEnvironment() {
|
||||
DeprecatedDST = false;
|
||||
HasLazyResolverStubs = false;
|
||||
HasICBT = false;
|
||||
|
||||
HasInvariantFunctionDescriptors = false;
|
||||
}
|
||||
|
||||
void PPCSubtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) {
|
||||
|
@ -114,6 +114,7 @@ protected:
|
||||
bool HasLazyResolverStubs;
|
||||
bool IsLittleEndian;
|
||||
bool HasICBT;
|
||||
bool HasInvariantFunctionDescriptors;
|
||||
|
||||
enum {
|
||||
PPC_ABI_UNKNOWN,
|
||||
@ -232,6 +233,9 @@ public:
|
||||
bool isDeprecatedMFTB() const { return DeprecatedMFTB; }
|
||||
bool isDeprecatedDST() const { return DeprecatedDST; }
|
||||
bool hasICBT() const { return HasICBT; }
|
||||
bool hasInvariantFunctionDescriptors() const {
|
||||
return HasInvariantFunctionDescriptors;
|
||||
}
|
||||
|
||||
const Triple &getTargetTriple() const { return TargetTriple; }
|
||||
|
||||
|
@ -64,6 +64,14 @@ static std::string computeFSAdditions(StringRef FS, CodeGenOpt::Level OL, String
|
||||
else
|
||||
FullFS = "+crbits";
|
||||
}
|
||||
|
||||
if (OL != CodeGenOpt::None) {
|
||||
if (!FullFS.empty())
|
||||
FullFS = "+invariant-function-descriptors," + FullFS;
|
||||
else
|
||||
FullFS = "+invariant-function-descriptors";
|
||||
}
|
||||
|
||||
return FullFS;
|
||||
}
|
||||
|
||||
|
47
test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll
Normal file
47
test/CodeGen/PowerPC/ppc64-func-desc-hoist.ll
Normal file
@ -0,0 +1,47 @@
|
||||
; RUN: llc -mcpu=a2 < %s | FileCheck %s -check-prefix=INVFUNCDESC
|
||||
; RUN: llc -mcpu=a2 -mattr=-invariant-function-descriptors < %s | FileCheck %s -check-prefix=NONINVFUNCDESC
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
; Function Attrs: nounwind
|
||||
define void @bar(void (...)* nocapture %x) #0 {
|
||||
entry:
|
||||
%callee.knr.cast = bitcast void (...)* %x to void ()*
|
||||
br label %for.body
|
||||
|
||||
; INVFUNCDESC-LABEL: @bar
|
||||
; INVFUNCDESC-DAG: ld [[REG1:[0-9]+]], 8(3)
|
||||
; INVFUNCDESC-DAG: ld [[REG2:[0-9]+]], 16(3)
|
||||
; INVFUNCDESC-DAG: ld [[REG3:[0-9]+]], 0(3)
|
||||
|
||||
; INVFUNCDESC: %for.body
|
||||
; INVFUNCDESC-DAG: mtctr [[REG3]]
|
||||
; INVFUNCDESC-DAG: mr 11, [[REG2]]
|
||||
; INVFUNCDESC-DAG: std 2, 40(1)
|
||||
; INVFUNCDESC: mr 2, [[REG1]]
|
||||
; INVFUNCDESC: bctrl
|
||||
; INVFUNCDESC-NEXT: ld 2, 40(1)
|
||||
|
||||
; NONINVFUNCDESC-LABEL: @bar
|
||||
; NONINVFUNCDESC: %for.body
|
||||
; NONINVFUNCDESC: std 2, 40(1)
|
||||
; NONINVFUNCDESC-DAG: ld 3, 0(30)
|
||||
; NONINVFUNCDESC-DAG: ld 11, 16(30)
|
||||
; NONINVFUNCDESC-DAG: ld 2, 8(30)
|
||||
; NONINVFUNCDESC: mtctr 3
|
||||
; NONINVFUNCDESC: bctrl
|
||||
; NONINVFUNCDESC-NEXT: ld 2, 40(1)
|
||||
|
||||
for.body: ; preds = %for.body, %entry
|
||||
%i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
|
||||
tail call void %callee.knr.cast() #0
|
||||
%inc = add nuw nsw i32 %i.02, 1
|
||||
%exitcond = icmp eq i32 %inc, 1600000000
|
||||
br i1 %exitcond, label %for.end, label %for.body
|
||||
|
||||
for.end: ; preds = %for.body
|
||||
ret void
|
||||
}
|
||||
|
||||
attributes #0 = { nounwind }
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -mcpu=ppc64 < %s | FileCheck %s
|
||||
; RUN: llc -mcpu=pwr7 < %s | FileCheck %s
|
||||
target datalayout = "E-m:e-i64:64-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user