From 5b00ceaeeabff8c25abb09926343c3fcb06053d8 Mon Sep 17 00:00:00 2001 From: Hal Finkel Date: Sat, 31 Mar 2012 14:45:15 +0000 Subject: [PATCH] Fix dynamic linking on PPC64. Dynamic linking on PPC64 has had problems since we had to move the top-down hazard-detection logic post-ra. For dynamic linking to work there needs to be a nop placed after every call. It turns out that it is really hard to guarantee that nothing will be placed in between the call (bl) and the nop during post-ra scheduling. Previous attempts at fixing this by placing logic inside the hazard detector only partially worked. This is now fixed in a different way: call+nop codegen-only instructions. As far as CodeGen is concerned the pair is now a single instruction and cannot be split. This solution works much better than previous attempts. The scoreboard hazard detector is also renamed to be more generic, there is currently no cpu-specific logic in it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153816 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/PowerPC/PPCHazardRecognizers.cpp | 33 ++++++------ lib/Target/PowerPC/PPCHazardRecognizers.h | 14 ++--- lib/Target/PowerPC/PPCISelLowering.cpp | 23 +++++--- lib/Target/PowerPC/PPCISelLowering.h | 4 +- lib/Target/PowerPC/PPCInstr64Bit.td | 20 ++++++- lib/Target/PowerPC/PPCInstrFormats.td | 59 +++++++++++++++++++++ lib/Target/PowerPC/PPCInstrInfo.cpp | 5 +- lib/Target/PowerPC/PPCInstrInfo.td | 3 ++ 8 files changed, 126 insertions(+), 35 deletions(-) diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp index ae317af4823..6ed1fb9e6a3 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp +++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp @@ -22,17 +22,29 @@ using namespace llvm; //===----------------------------------------------------------------------===// -// PowerPC 440 Hazard Recognizer -void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { +// PowerPC Scoreboard Hazard Recognizer +void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { const MCInstrDesc *MCID = DAG->getInstrDesc(SU); - if (!MCID) { + if (!MCID) // This is a PPC pseudo-instruction. return; - } ScoreboardHazardRecognizer::EmitInstruction(SU); } +ScheduleHazardRecognizer::HazardType +PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { + return ScoreboardHazardRecognizer::getHazardType(SU, Stalls); +} + +void PPCScoreboardHazardRecognizer::AdvanceCycle() { + ScoreboardHazardRecognizer::AdvanceCycle(); +} + +void PPCScoreboardHazardRecognizer::Reset() { + ScoreboardHazardRecognizer::Reset(); +} + //===----------------------------------------------------------------------===// // PowerPC 970 Hazard Recognizer // @@ -61,7 +73,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) : TII(tii) { - LastWasBL8_ELF = false; EndDispatchGroup(); } @@ -132,15 +143,6 @@ getHazardType(SUnit *SU, int Stalls) { return NoHazard; unsigned Opcode = MI->getOpcode(); - - // If the last instruction was a BL8_ELF, then the NOP must follow it - // directly (this is strong requirement from the linker due to the ELF ABI). - // We return only Hazard (and not NoopHazard) because if the NOP is necessary - // then it will already be in the instruction stream (it is not always - // necessary; tail calls, for example, do not need it). - if (LastWasBL8_ELF && Opcode != PPC::NOP) - return Hazard; - bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = GetInstrType(Opcode, isFirst, isSingle, isCracked, @@ -199,8 +201,6 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) { return; unsigned Opcode = MI->getOpcode(); - LastWasBL8_ELF = (Opcode == PPC::BL8_ELF); - bool isFirst, isSingle, isCracked, isLoad, isStore; PPCII::PPC970_Unit InstrType = GetInstrType(Opcode, isFirst, isSingle, isCracked, @@ -240,7 +240,6 @@ void PPCHazardRecognizer970::AdvanceCycle() { } void PPCHazardRecognizer970::Reset() { - LastWasBL8_ELF = false; EndDispatchGroup(); } diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.h b/lib/Target/PowerPC/PPCHazardRecognizers.h index d80a3850b01..55b45d01b20 100644 --- a/lib/Target/PowerPC/PPCHazardRecognizers.h +++ b/lib/Target/PowerPC/PPCHazardRecognizers.h @@ -21,16 +21,19 @@ namespace llvm { -/// PPCHazardRecognizer440 - This class implements a scoreboard-based -/// hazard recognizer for the PPC 440 and friends. -class PPCHazardRecognizer440 : public ScoreboardHazardRecognizer { +/// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based +/// hazard recognizer for generic PPC processors. +class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer { const ScheduleDAG *DAG; public: - PPCHazardRecognizer440(const InstrItineraryData *ItinData, + PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData, const ScheduleDAG *DAG_) : ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {} + virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void EmitInstruction(SUnit *SU); + virtual void AdvanceCycle(); + virtual void Reset(); }; /// PPCHazardRecognizer970 - This class defines a finite state automata that @@ -49,9 +52,6 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer { // HasCTRSet - If the CTR register is set in this group, disallow BCTRL. bool HasCTRSet; - // Was the last instruction issued a BL8_ELF - bool LastWasBL8_ELF; - // StoredPtr - Keep track of the address of any store. If we see a load from // the same address (or one that aliases it), disallow the store. We can have // up to four stores in one dispatch group, hence we track up to 4. diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp index bfa4df92488..8357e8bbdb3 100644 --- a/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/lib/Target/PowerPC/PPCISelLowering.cpp @@ -472,6 +472,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const { case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; case PPCISD::STD_32: return "PPCISD::STD_32"; case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; + case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4"; case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; case PPCISD::NOP: return "PPCISD::NOP"; case PPCISD::MTCTR: return "PPCISD::MTCTR"; @@ -2813,9 +2814,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); } - Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); - InFlag = Chain.getValue(1); - // Add a NOP immediately after the branch instruction when using the 64-bit // SVR4 ABI. At link time, if caller and callee are in a different module and // thus have a different TOC, the call will be replaced with a call to a stub @@ -2824,8 +2822,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // which restores the TOC of the caller from the TOC save slot of the current // stack frame. If caller and callee belong to the same module (and have the // same TOC), the NOP will remain unchanged. + + bool needsTOCRestore = false; if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { - SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); if (CallOpc == PPCISD::BCTRL_SVR4) { // This is a call through a function pointer. // Restore the caller TOC from the save area into R2. @@ -2836,14 +2835,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl, // since r2 is a reserved register (which prevents the register allocator // from allocating it), resulting in an additional register being // allocated and an unnecessary move instruction being generated. - Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); - InFlag = Chain.getValue(1); - } else { + needsTOCRestore = true; + } else if (CallOpc == PPCISD::CALL_SVR4) { // Otherwise insert NOP. - InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag); + CallOpc = PPCISD::CALL_NOP_SVR4; } } + Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size()); + InFlag = Chain.getValue(1); + + if (needsTOCRestore) { + SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue); + Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); + InFlag = Chain.getValue(1); + } + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(BytesCalleePops, true), InFlag); diff --git a/lib/Target/PowerPC/PPCISelLowering.h b/lib/Target/PowerPC/PPCISelLowering.h index 2e046c4693c..e79229f49b6 100644 --- a/lib/Target/PowerPC/PPCISelLowering.h +++ b/lib/Target/PowerPC/PPCISelLowering.h @@ -95,7 +95,9 @@ namespace llvm { EXTSW_32, /// CALL - A direct function call. - CALL_Darwin, CALL_SVR4, + /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit + /// SVR4 calls. + CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4, /// NOP - Special NOP which follows 64-bit SVR4 calls. NOP, diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td index 78f35961526..e1d8b65b8af 100644 --- a/lib/Target/PowerPC/PPCInstr64Bit.td +++ b/lib/Target/PowerPC/PPCInstr64Bit.td @@ -89,10 +89,22 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in { let Uses = [RM] in { def BL8_ELF : IForm<18, 0, 1, (outs), (ins calltarget:$func, variable_ops), - "bl $func", BrB, []>; // See Pat patterns below. + "bl $func", BrB, []>; // See Pat patterns below. + + let isCodeGenOnly = 1 in + def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24, + (outs), (ins calltarget:$func, variable_ops), + "bl $func\n\tnop", BrB, []>; + def BLA8_ELF : IForm<18, 1, 1, (outs), (ins aaddr:$func, variable_ops), "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; + + let isCodeGenOnly = 1 in + def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24, + (outs), (ins aaddr:$func, variable_ops), + "bla $func\n\tnop", BrB, + [(PPCcall_nop_SVR4 (i64 imm:$func))]>; } let Uses = [X11, CTR8, RM] in { def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, @@ -111,8 +123,14 @@ def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)), def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), (BL8_ELF tglobaladdr:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)), + (BL8_NOP_ELF tglobaladdr:$dst)>; + def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), (BL8_ELF texternalsym:$dst)>; +def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)), + (BL8_NOP_ELF texternalsym:$dst)>; + def : Pat<(PPCnop), (NOP)>; diff --git a/lib/Target/PowerPC/PPCInstrFormats.td b/lib/Target/PowerPC/PPCInstrFormats.td index d332e2ac973..d8e4b2bdf34 100644 --- a/lib/Target/PowerPC/PPCInstrFormats.td +++ b/lib/Target/PowerPC/PPCInstrFormats.td @@ -51,6 +51,36 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; } class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } +// Two joined instructions; used to emit two adjacent instructions as one. +// The itinerary from the first instruction is used for scheduling and +// classification. +class I2 opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr, + InstrItinClass itin> + : Instruction { + field bits<64> Inst; + + bit PPC64 = 0; // Default value, override with isPPC64 + + let Namespace = "PPC"; + let Inst{0-5} = opcode1; + let Inst{32-37} = opcode2; + let OutOperandList = OOL; + let InOperandList = IOL; + let AsmString = asmstr; + let Itinerary = itin; + + bits<1> PPC970_First = 0; + bits<1> PPC970_Single = 0; + bits<1> PPC970_Cracked = 0; + bits<3> PPC970_Unit = 0; + + /// These fields correspond to the fields in PPCInstrInfo.h. Any changes to + /// these must be reflected there! See comments there for what these are. + let TSFlags{0} = PPC970_First; + let TSFlags{1} = PPC970_Single; + let TSFlags{2} = PPC970_Cracked; + let TSFlags{5-3} = PPC970_Unit; +} // 1.7.1 I-Form class IForm opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, @@ -164,6 +194,35 @@ class DForm_4_zero opcode, dag OOL, dag IOL, string asmstr, let Addr = 0; } +class IForm_and_DForm_1 opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : I2 { + bits<5> A; + bits<21> Addr; + + let Pattern = pattern; + bits<24> LI; + + let Inst{6-29} = LI; + let Inst{30} = aa; + let Inst{31} = lk; + + let Inst{38-42} = A; + let Inst{43-47} = Addr{20-16}; // Base Reg + let Inst{48-63} = Addr{15-0}; // Displacement +} + +// This is used to emit BL8+NOP. +class IForm_and_DForm_4_zero opcode1, bit aa, bit lk, bits<6> opcode2, + dag OOL, dag IOL, string asmstr, + InstrItinClass itin, list pattern> + : IForm_and_DForm_1 { + let A = 0; + let Addr = 0; +} + class DForm_5 opcode, dag OOL, dag IOL, string asmstr, InstrItinClass itin> : I { diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 7a8ec40ab16..6f74828b21f 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -51,7 +51,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer( unsigned Directive = TM->getSubtarget().getDarwinDirective(); if (Directive == PPC::DIR_440) { const InstrItineraryData *II = TM->getInstrItineraryData(); - return new PPCHazardRecognizer440(II, DAG); + return new PPCScoreboardHazardRecognizer(II, DAG); } return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); @@ -684,6 +684,9 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const { case PPC::GC_LABEL: case PPC::DBG_VALUE: return 0; + case PPC::BL8_NOP_ELF: + case PPC::BLA8_NOP_ELF: + return 8; default: return 4; // PowerPC instructions are all 4 bytes } diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td index 939b71ae40a..1b15df058c4 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.td +++ b/lib/Target/PowerPC/PPCInstrInfo.td @@ -116,6 +116,9 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall, def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;