mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 16:33:28 +00:00
[PowerPC] Ensure that the TOC reload directly follows bctrl on PPC64
On non-Darwin PPC64, the TOC reload needs to come directly after the bctrl instruction (for indirect calls) because the 'bctrl/ld 2, 40(1)' instruction sequence is interpreted by the unwinding code in libgcc. To make sure these occur as a pair, as with other pairings interpreted by the linker, fuse the two instructions into one instruction (for code generation only). In the future, we might wish to do this by emitting CFI directives instead, but this solution is simpler, and mirrors what GCC does. Additional discussion on this point is contained in the PR. Fixes PR22015. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@224788 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
5dbd280542
commit
c9e5247ea7
@ -775,6 +775,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||
case PPCISD::CALL_NOP_TLS: return "PPCISD::CALL_NOP_TLS";
|
||||
case PPCISD::MTCTR: return "PPCISD::MTCTR";
|
||||
case PPCISD::BCTRL: return "PPCISD::BCTRL";
|
||||
case PPCISD::BCTRL_LOAD_TOC: return "PPCISD::BCTRL_LOAD_TOC";
|
||||
case PPCISD::RET_FLAG: return "PPCISD::RET_FLAG";
|
||||
case PPCISD::READ_TIME_BASE: return "PPCISD::READ_TIME_BASE";
|
||||
case PPCISD::EH_SJLJ_SETJMP: return "PPCISD::EH_SJLJ_SETJMP";
|
||||
@ -3864,7 +3865,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
|
||||
// stack frame. If caller and callee belong to the same module (and have the
|
||||
// same TOC), the NOP will remain unchanged.
|
||||
|
||||
bool needsTOCRestore = false;
|
||||
if (!isTailCall && Subtarget.isSVR4ABI()&& Subtarget.isPPC64()) {
|
||||
if (CallOpc == PPCISD::BCTRL) {
|
||||
// This is a call through a function pointer.
|
||||
@ -3876,7 +3876,17 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
|
||||
// since r2 is a reserved register (which prevents the register allocator
|
||||
// from allocating it), resulting in an additional register being
|
||||
// allocated and an unnecessary move instruction being generated.
|
||||
needsTOCRestore = true;
|
||||
CallOpc = PPCISD::BCTRL_LOAD_TOC;
|
||||
|
||||
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
|
||||
unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
|
||||
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
|
||||
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
|
||||
|
||||
// The address needs to go after the chain input but before the flag (or
|
||||
// any other variadic arguments).
|
||||
Ops.insert(std::next(Ops.begin()), AddTOC);
|
||||
} else if ((CallOpc == PPCISD::CALL) &&
|
||||
(!isLocalCall(Callee) ||
|
||||
DAG.getTarget().getRelocationModel() == Reloc::PIC_)) {
|
||||
@ -3890,17 +3900,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, SDLoc dl,
|
||||
Chain = DAG.getNode(CallOpc, dl, NodeTys, Ops);
|
||||
InFlag = Chain.getValue(1);
|
||||
|
||||
if (needsTOCRestore) {
|
||||
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
|
||||
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
|
||||
SDValue StackPtr = DAG.getRegister(PPC::X1, PtrVT);
|
||||
unsigned TOCSaveOffset = PPCFrameLowering::getTOCSaveOffset(isELFv2ABI);
|
||||
SDValue TOCOff = DAG.getIntPtrConstant(TOCSaveOffset);
|
||||
SDValue AddTOC = DAG.getNode(ISD::ADD, dl, MVT::i64, StackPtr, TOCOff);
|
||||
Chain = DAG.getNode(PPCISD::LOAD_TOC, dl, VTs, Chain, AddTOC, InFlag);
|
||||
InFlag = Chain.getValue(1);
|
||||
}
|
||||
|
||||
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
|
||||
DAG.getIntPtrConstant(BytesCalleePops, true),
|
||||
InFlag, dl);
|
||||
|
@ -117,6 +117,10 @@ namespace llvm {
|
||||
/// BCTRL instruction.
|
||||
BCTRL,
|
||||
|
||||
/// CHAIN,FLAG = BCTRL(CHAIN, ADDR, INFLAG) - The combination of a bctrl
|
||||
/// instruction and the TOC reload required on SVR4 PPC64.
|
||||
BCTRL_LOAD_TOC,
|
||||
|
||||
/// Return with a flag operand, matched by 'blr'
|
||||
RET_FLAG,
|
||||
|
||||
|
@ -167,6 +167,17 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let isCall = 1, PPC970_Unit = 7, isCodeGenOnly = 1,
|
||||
Defs = [LR8, X2], Uses = [CTR8, RM], RST = 2 in {
|
||||
def BCTRL8_LDinto_toc :
|
||||
XLForm_2_ext_and_DSForm_1<19, 528, 20, 0, 1, 58, 0, (outs),
|
||||
(ins memrix:$src),
|
||||
"bctrl\n\tld 2, $src", IIC_BrB,
|
||||
[(PPCbctrl_load_toc ixaddr:$src)]>,
|
||||
Requires<[In64BitMode]>;
|
||||
}
|
||||
|
||||
} // Interpretation64Bit
|
||||
|
||||
// FIXME: Duplicating this for the asm parser should be unnecessary, but the
|
||||
|
@ -945,6 +945,45 @@ class XLForm_3<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
|
||||
let Inst{31} = 0;
|
||||
}
|
||||
|
||||
class XLForm_2_and_DSForm_1<bits<6> opcode1, bits<10> xo1, bit lk,
|
||||
bits<6> opcode2, bits<2> xo2,
|
||||
dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
|
||||
bits<5> BO;
|
||||
bits<5> BI;
|
||||
bits<2> BH;
|
||||
|
||||
bits<5> RST;
|
||||
bits<19> DS_RA;
|
||||
|
||||
let Pattern = pattern;
|
||||
|
||||
let Inst{6-10} = BO;
|
||||
let Inst{11-15} = BI;
|
||||
let Inst{16-18} = 0;
|
||||
let Inst{19-20} = BH;
|
||||
let Inst{21-30} = xo1;
|
||||
let Inst{31} = lk;
|
||||
|
||||
let Inst{38-42} = RST;
|
||||
let Inst{43-47} = DS_RA{18-14}; // Register #
|
||||
let Inst{48-61} = DS_RA{13-0}; // Displacement.
|
||||
let Inst{62-63} = xo2;
|
||||
}
|
||||
|
||||
class XLForm_2_ext_and_DSForm_1<bits<6> opcode1, bits<10> xo1,
|
||||
bits<5> bo, bits<5> bi, bit lk,
|
||||
bits<6> opcode2, bits<2> xo2,
|
||||
dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin, list<dag> pattern>
|
||||
: XLForm_2_and_DSForm_1<opcode1, xo1, lk, opcode2, xo2,
|
||||
OOL, IOL, asmstr, itin, pattern> {
|
||||
let BO = bo;
|
||||
let BI = bi;
|
||||
let BH = 0;
|
||||
}
|
||||
|
||||
// 1.7.8 XFX-Form
|
||||
class XFXForm_1<bits<6> opcode, bits<10> xo, dag OOL, dag IOL, string asmstr,
|
||||
InstrItinClass itin>
|
||||
|
@ -153,6 +153,10 @@ def PPCmtctr : SDNode<"PPCISD::MTCTR", SDT_PPCCall,
|
||||
def PPCbctrl : SDNode<"PPCISD::BCTRL", SDTNone,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
def PPCbctrl_load_toc : SDNode<"PPCISD::BCTRL_LOAD_TOC",
|
||||
SDTypeProfile<0, 1, []>,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
|
||||
SDNPVariadic]>;
|
||||
|
||||
def retflag : SDNode<"PPCISD::RET_FLAG", SDTNone,
|
||||
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -march=ppc64 | FileCheck %s
|
||||
; RUN: llc < %s -march=ppc64 -mcpu=pwr7 | FileCheck %s
|
||||
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
|
||||
target triple = "powerpc64-unknown-linux-gnu"
|
||||
|
||||
@ -67,3 +67,20 @@ define double @test_external(double %x) nounwind {
|
||||
; CHECK-NEXT: nop
|
||||
ret double %call
|
||||
}
|
||||
|
||||
; The 'ld 2, 40(1)' really must always come directly after the bctrl to make
|
||||
; the unwinding code in libgcc happy.
|
||||
@g = external global void ()*
|
||||
declare void @h(i64)
|
||||
define void @test_indir_toc_reload(i64 %x) {
|
||||
%1 = load void ()** @g
|
||||
call void %1()
|
||||
call void @h(i64 %x)
|
||||
ret void
|
||||
|
||||
; CHECK-LABEL: @test_indir_toc_reload
|
||||
; CHECK: bctrl
|
||||
; CHECK-NEXT: ld 2, 40(1)
|
||||
; CHECK: blr
|
||||
}
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user