Fix dynamic linking on PPC64.

Dynamic linking on PPC64 has had problems since we had to move the top-down
hazard-detection logic post-ra. For dynamic linking to work there needs to be
a nop placed after every call. It turns out that it is really hard to guarantee
that nothing will be placed in between the call (bl) and the nop during post-ra
scheduling. Previous attempts at fixing this by placing logic inside the
hazard detector only partially worked.

This is now fixed in a different way: call+nop codegen-only instructions. As far
as CodeGen is concerned the pair is now a single instruction and cannot be split.
This solution works much better than previous attempts.

The scoreboard hazard detector is also renamed to be more generic, there is currently
no cpu-specific logic in it.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@153816 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Hal Finkel 2012-03-31 14:45:15 +00:00
parent f5f256cffd
commit 5b00ceaeea
8 changed files with 126 additions and 35 deletions

View File

@ -22,17 +22,29 @@
using namespace llvm; using namespace llvm;
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// PowerPC 440 Hazard Recognizer // PowerPC Scoreboard Hazard Recognizer
void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) { void PPCScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) {
const MCInstrDesc *MCID = DAG->getInstrDesc(SU); const MCInstrDesc *MCID = DAG->getInstrDesc(SU);
if (!MCID) { if (!MCID)
// This is a PPC pseudo-instruction. // This is a PPC pseudo-instruction.
return; return;
}
ScoreboardHazardRecognizer::EmitInstruction(SU); ScoreboardHazardRecognizer::EmitInstruction(SU);
} }
ScheduleHazardRecognizer::HazardType
PPCScoreboardHazardRecognizer::getHazardType(SUnit *SU, int Stalls) {
return ScoreboardHazardRecognizer::getHazardType(SU, Stalls);
}
void PPCScoreboardHazardRecognizer::AdvanceCycle() {
ScoreboardHazardRecognizer::AdvanceCycle();
}
void PPCScoreboardHazardRecognizer::Reset() {
ScoreboardHazardRecognizer::Reset();
}
//===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===//
// PowerPC 970 Hazard Recognizer // PowerPC 970 Hazard Recognizer
// //
@ -61,7 +73,6 @@ void PPCHazardRecognizer440::EmitInstruction(SUnit *SU) {
PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii) PPCHazardRecognizer970::PPCHazardRecognizer970(const TargetInstrInfo &tii)
: TII(tii) { : TII(tii) {
LastWasBL8_ELF = false;
EndDispatchGroup(); EndDispatchGroup();
} }
@ -132,15 +143,6 @@ getHazardType(SUnit *SU, int Stalls) {
return NoHazard; return NoHazard;
unsigned Opcode = MI->getOpcode(); unsigned Opcode = MI->getOpcode();
// If the last instruction was a BL8_ELF, then the NOP must follow it
// directly (this is strong requirement from the linker due to the ELF ABI).
// We return only Hazard (and not NoopHazard) because if the NOP is necessary
// then it will already be in the instruction stream (it is not always
// necessary; tail calls, for example, do not need it).
if (LastWasBL8_ELF && Opcode != PPC::NOP)
return Hazard;
bool isFirst, isSingle, isCracked, isLoad, isStore; bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType = PPCII::PPC970_Unit InstrType =
GetInstrType(Opcode, isFirst, isSingle, isCracked, GetInstrType(Opcode, isFirst, isSingle, isCracked,
@ -199,8 +201,6 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
return; return;
unsigned Opcode = MI->getOpcode(); unsigned Opcode = MI->getOpcode();
LastWasBL8_ELF = (Opcode == PPC::BL8_ELF);
bool isFirst, isSingle, isCracked, isLoad, isStore; bool isFirst, isSingle, isCracked, isLoad, isStore;
PPCII::PPC970_Unit InstrType = PPCII::PPC970_Unit InstrType =
GetInstrType(Opcode, isFirst, isSingle, isCracked, GetInstrType(Opcode, isFirst, isSingle, isCracked,
@ -240,7 +240,6 @@ void PPCHazardRecognizer970::AdvanceCycle() {
} }
void PPCHazardRecognizer970::Reset() { void PPCHazardRecognizer970::Reset() {
LastWasBL8_ELF = false;
EndDispatchGroup(); EndDispatchGroup();
} }

View File

@ -21,16 +21,19 @@
namespace llvm { namespace llvm {
/// PPCHazardRecognizer440 - This class implements a scoreboard-based /// PPCScoreboardHazardRecognizer - This class implements a scoreboard-based
/// hazard recognizer for the PPC 440 and friends. /// hazard recognizer for generic PPC processors.
class PPCHazardRecognizer440 : public ScoreboardHazardRecognizer { class PPCScoreboardHazardRecognizer : public ScoreboardHazardRecognizer {
const ScheduleDAG *DAG; const ScheduleDAG *DAG;
public: public:
PPCHazardRecognizer440(const InstrItineraryData *ItinData, PPCScoreboardHazardRecognizer(const InstrItineraryData *ItinData,
const ScheduleDAG *DAG_) : const ScheduleDAG *DAG_) :
ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {} ScoreboardHazardRecognizer(ItinData, DAG_), DAG(DAG_) {}
virtual HazardType getHazardType(SUnit *SU, int Stalls);
virtual void EmitInstruction(SUnit *SU); virtual void EmitInstruction(SUnit *SU);
virtual void AdvanceCycle();
virtual void Reset();
}; };
/// PPCHazardRecognizer970 - This class defines a finite state automata that /// PPCHazardRecognizer970 - This class defines a finite state automata that
@ -49,9 +52,6 @@ class PPCHazardRecognizer970 : public ScheduleHazardRecognizer {
// HasCTRSet - If the CTR register is set in this group, disallow BCTRL. // HasCTRSet - If the CTR register is set in this group, disallow BCTRL.
bool HasCTRSet; bool HasCTRSet;
// Was the last instruction issued a BL8_ELF
bool LastWasBL8_ELF;
// StoredPtr - Keep track of the address of any store. If we see a load from // StoredPtr - Keep track of the address of any store. If we see a load from
// the same address (or one that aliases it), disallow the store. We can have // the same address (or one that aliases it), disallow the store. We can have
// up to four stores in one dispatch group, hence we track up to 4. // up to four stores in one dispatch group, hence we track up to 4.

View File

@ -472,6 +472,7 @@ const char *PPCTargetLowering::getTargetNodeName(unsigned Opcode) const {
case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32"; case PPCISD::EXTSW_32: return "PPCISD::EXTSW_32";
case PPCISD::STD_32: return "PPCISD::STD_32"; case PPCISD::STD_32: return "PPCISD::STD_32";
case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4"; case PPCISD::CALL_SVR4: return "PPCISD::CALL_SVR4";
case PPCISD::CALL_NOP_SVR4: return "PPCISD::CALL_NOP_SVR4";
case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin"; case PPCISD::CALL_Darwin: return "PPCISD::CALL_Darwin";
case PPCISD::NOP: return "PPCISD::NOP"; case PPCISD::NOP: return "PPCISD::NOP";
case PPCISD::MTCTR: return "PPCISD::MTCTR"; case PPCISD::MTCTR: return "PPCISD::MTCTR";
@ -2813,9 +2814,6 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size()); return DAG.getNode(PPCISD::TC_RETURN, dl, MVT::Other, &Ops[0], Ops.size());
} }
Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
// Add a NOP immediately after the branch instruction when using the 64-bit // Add a NOP immediately after the branch instruction when using the 64-bit
// SVR4 ABI. At link time, if caller and callee are in a different module and // SVR4 ABI. At link time, if caller and callee are in a different module and
// thus have a different TOC, the call will be replaced with a call to a stub // thus have a different TOC, the call will be replaced with a call to a stub
@ -2824,8 +2822,9 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// which restores the TOC of the caller from the TOC save slot of the current // which restores the TOC of the caller from the TOC save slot of the current
// stack frame. If caller and callee belong to the same module (and have the // stack frame. If caller and callee belong to the same module (and have the
// same TOC), the NOP will remain unchanged. // same TOC), the NOP will remain unchanged.
bool needsTOCRestore = false;
if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) { if (!isTailCall && PPCSubTarget.isSVR4ABI()&& PPCSubTarget.isPPC64()) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
if (CallOpc == PPCISD::BCTRL_SVR4) { if (CallOpc == PPCISD::BCTRL_SVR4) {
// This is a call through a function pointer. // This is a call through a function pointer.
// Restore the caller TOC from the save area into R2. // Restore the caller TOC from the save area into R2.
@ -2836,14 +2835,22 @@ PPCTargetLowering::FinishCall(CallingConv::ID CallConv, DebugLoc dl,
// since r2 is a reserved register (which prevents the register allocator // since r2 is a reserved register (which prevents the register allocator
// from allocating it), resulting in an additional register being // from allocating it), resulting in an additional register being
// allocated and an unnecessary move instruction being generated. // allocated and an unnecessary move instruction being generated.
Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag); needsTOCRestore = true;
InFlag = Chain.getValue(1); } else if (CallOpc == PPCISD::CALL_SVR4) {
} else {
// Otherwise insert NOP. // Otherwise insert NOP.
InFlag = DAG.getNode(PPCISD::NOP, dl, MVT::Glue, InFlag); CallOpc = PPCISD::CALL_NOP_SVR4;
} }
} }
Chain = DAG.getNode(CallOpc, dl, NodeTys, &Ops[0], Ops.size());
InFlag = Chain.getValue(1);
if (needsTOCRestore) {
SDVTList VTs = DAG.getVTList(MVT::Other, MVT::Glue);
Chain = DAG.getNode(PPCISD::TOC_RESTORE, dl, VTs, Chain, InFlag);
InFlag = Chain.getValue(1);
}
Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
DAG.getIntPtrConstant(BytesCalleePops, true), DAG.getIntPtrConstant(BytesCalleePops, true),
InFlag); InFlag);

View File

@ -95,7 +95,9 @@ namespace llvm {
EXTSW_32, EXTSW_32,
/// CALL - A direct function call. /// CALL - A direct function call.
CALL_Darwin, CALL_SVR4, /// CALL_NOP_SVR4 is a call with the special NOP which follows 64-bit
/// SVR4 calls.
CALL_Darwin, CALL_SVR4, CALL_NOP_SVR4,
/// NOP - Special NOP which follows 64-bit SVR4 calls. /// NOP - Special NOP which follows 64-bit SVR4 calls.
NOP, NOP,

View File

@ -89,10 +89,22 @@ let isCall = 1, PPC970_Unit = 7, Defs = [LR8] in {
let Uses = [RM] in { let Uses = [RM] in {
def BL8_ELF : IForm<18, 0, 1, def BL8_ELF : IForm<18, 0, 1,
(outs), (ins calltarget:$func, variable_ops), (outs), (ins calltarget:$func, variable_ops),
"bl $func", BrB, []>; // See Pat patterns below. "bl $func", BrB, []>; // See Pat patterns below.
let isCodeGenOnly = 1 in
def BL8_NOP_ELF : IForm_and_DForm_4_zero<18, 0, 1, 24,
(outs), (ins calltarget:$func, variable_ops),
"bl $func\n\tnop", BrB, []>;
def BLA8_ELF : IForm<18, 1, 1, def BLA8_ELF : IForm<18, 1, 1,
(outs), (ins aaddr:$func, variable_ops), (outs), (ins aaddr:$func, variable_ops),
"bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>; "bla $func", BrB, [(PPCcall_SVR4 (i64 imm:$func))]>;
let isCodeGenOnly = 1 in
def BLA8_NOP_ELF : IForm_and_DForm_4_zero<18, 1, 1, 24,
(outs), (ins aaddr:$func, variable_ops),
"bla $func\n\tnop", BrB,
[(PPCcall_nop_SVR4 (i64 imm:$func))]>;
} }
let Uses = [X11, CTR8, RM] in { let Uses = [X11, CTR8, RM] in {
def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1, def BCTRL8_ELF : XLForm_2_ext<19, 528, 20, 0, 1,
@ -111,8 +123,14 @@ def : Pat<(PPCcall_Darwin (i64 texternalsym:$dst)),
def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)), def : Pat<(PPCcall_SVR4 (i64 tglobaladdr:$dst)),
(BL8_ELF tglobaladdr:$dst)>; (BL8_ELF tglobaladdr:$dst)>;
def : Pat<(PPCcall_nop_SVR4 (i64 tglobaladdr:$dst)),
(BL8_NOP_ELF tglobaladdr:$dst)>;
def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)), def : Pat<(PPCcall_SVR4 (i64 texternalsym:$dst)),
(BL8_ELF texternalsym:$dst)>; (BL8_ELF texternalsym:$dst)>;
def : Pat<(PPCcall_nop_SVR4 (i64 texternalsym:$dst)),
(BL8_NOP_ELF texternalsym:$dst)>;
def : Pat<(PPCnop), def : Pat<(PPCnop),
(NOP)>; (NOP)>;

View File

@ -51,6 +51,36 @@ class PPC970_Unit_VALU { bits<3> PPC970_Unit = 5; }
class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; } class PPC970_Unit_VPERM { bits<3> PPC970_Unit = 6; }
class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; } class PPC970_Unit_BRU { bits<3> PPC970_Unit = 7; }
// Two joined instructions; used to emit two adjacent instructions as one.
// The itinerary from the first instruction is used for scheduling and
// classification.
class I2<bits<6> opcode1, bits<6> opcode2, dag OOL, dag IOL, string asmstr,
InstrItinClass itin>
: Instruction {
field bits<64> Inst;
bit PPC64 = 0; // Default value, override with isPPC64
let Namespace = "PPC";
let Inst{0-5} = opcode1;
let Inst{32-37} = opcode2;
let OutOperandList = OOL;
let InOperandList = IOL;
let AsmString = asmstr;
let Itinerary = itin;
bits<1> PPC970_First = 0;
bits<1> PPC970_Single = 0;
bits<1> PPC970_Cracked = 0;
bits<3> PPC970_Unit = 0;
/// These fields correspond to the fields in PPCInstrInfo.h. Any changes to
/// these must be reflected there! See comments there for what these are.
let TSFlags{0} = PPC970_First;
let TSFlags{1} = PPC970_Single;
let TSFlags{2} = PPC970_Cracked;
let TSFlags{5-3} = PPC970_Unit;
}
// 1.7.1 I-Form // 1.7.1 I-Form
class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr, class IForm<bits<6> opcode, bit aa, bit lk, dag OOL, dag IOL, string asmstr,
@ -164,6 +194,35 @@ class DForm_4_zero<bits<6> opcode, dag OOL, dag IOL, string asmstr,
let Addr = 0; let Addr = 0;
} }
class IForm_and_DForm_1<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: I2<opcode1, opcode2, OOL, IOL, asmstr, itin> {
bits<5> A;
bits<21> Addr;
let Pattern = pattern;
bits<24> LI;
let Inst{6-29} = LI;
let Inst{30} = aa;
let Inst{31} = lk;
let Inst{38-42} = A;
let Inst{43-47} = Addr{20-16}; // Base Reg
let Inst{48-63} = Addr{15-0}; // Displacement
}
// This is used to emit BL8+NOP.
class IForm_and_DForm_4_zero<bits<6> opcode1, bit aa, bit lk, bits<6> opcode2,
dag OOL, dag IOL, string asmstr,
InstrItinClass itin, list<dag> pattern>
: IForm_and_DForm_1<opcode1, aa, lk, opcode2,
OOL, IOL, asmstr, itin, pattern> {
let A = 0;
let Addr = 0;
}
class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr, class DForm_5<bits<6> opcode, dag OOL, dag IOL, string asmstr,
InstrItinClass itin> InstrItinClass itin>
: I<opcode, OOL, IOL, asmstr, itin> { : I<opcode, OOL, IOL, asmstr, itin> {

View File

@ -51,7 +51,7 @@ ScheduleHazardRecognizer *PPCInstrInfo::CreateTargetHazardRecognizer(
unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective(); unsigned Directive = TM->getSubtarget<PPCSubtarget>().getDarwinDirective();
if (Directive == PPC::DIR_440) { if (Directive == PPC::DIR_440) {
const InstrItineraryData *II = TM->getInstrItineraryData(); const InstrItineraryData *II = TM->getInstrItineraryData();
return new PPCHazardRecognizer440(II, DAG); return new PPCScoreboardHazardRecognizer(II, DAG);
} }
return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG); return TargetInstrInfoImpl::CreateTargetHazardRecognizer(TM, DAG);
@ -684,6 +684,9 @@ unsigned PPCInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
case PPC::GC_LABEL: case PPC::GC_LABEL:
case PPC::DBG_VALUE: case PPC::DBG_VALUE:
return 0; return 0;
case PPC::BL8_NOP_ELF:
case PPC::BLA8_NOP_ELF:
return 8;
default: default:
return 4; // PowerPC instructions are all 4 bytes return 4; // PowerPC instructions are all 4 bytes
} }

View File

@ -116,6 +116,9 @@ def PPCcall_Darwin : SDNode<"PPCISD::CALL_Darwin", SDT_PPCCall,
def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall, def PPCcall_SVR4 : SDNode<"PPCISD::CALL_SVR4", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>; SDNPVariadic]>;
def PPCcall_nop_SVR4 : SDNode<"PPCISD::CALL_NOP_SVR4", SDT_PPCCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPVariadic]>;
def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>; def PPCnop : SDNode<"PPCISD::NOP", SDT_PPCnop, [SDNPInGlue, SDNPOutGlue]>;
def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>, def PPCload : SDNode<"PPCISD::LOAD", SDTypeProfile<1, 1, []>,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>; [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;