diff --git a/include/llvm/MC/MCSchedule.h b/include/llvm/MC/MCSchedule.h index 862a0fd7add..43b8672265c 100644 --- a/include/llvm/MC/MCSchedule.h +++ b/include/llvm/MC/MCSchedule.h @@ -186,6 +186,8 @@ public: // takes to recover from a branch misprediction. unsigned MispredictPenalty; static const unsigned DefaultMispredictPenalty = 10; + + bool PostRAScheduler; // default value is false bool CompleteModel; @@ -210,7 +212,8 @@ public: LoadLatency(DefaultLoadLatency), HighLatency(DefaultHighLatency), MispredictPenalty(DefaultMispredictPenalty), - CompleteModel(true), ProcID(0), ProcResourceTable(nullptr), + PostRAScheduler(false), CompleteModel(true), + ProcID(0), ProcResourceTable(nullptr), SchedClassTable(nullptr), NumProcResourceKinds(0), NumSchedClasses(0), InstrItineraries(nullptr) { (void)NumProcResourceKinds; @@ -219,12 +222,13 @@ public: // Table-gen driven ctor. MCSchedModel(unsigned iw, int mbs, int lmbs, unsigned ll, unsigned hl, - unsigned mp, bool cm, unsigned pi, const MCProcResourceDesc *pr, - const MCSchedClassDesc *sc, unsigned npr, unsigned nsc, - const InstrItinerary *ii): + unsigned mp, bool postRASched, bool cm, unsigned pi, + const MCProcResourceDesc *pr, const MCSchedClassDesc *sc, + unsigned npr, unsigned nsc, const InstrItinerary *ii): IssueWidth(iw), MicroOpBufferSize(mbs), LoopMicroOpBufferSize(lmbs), LoadLatency(ll), HighLatency(hl), - MispredictPenalty(mp), CompleteModel(cm), ProcID(pi), + MispredictPenalty(mp), PostRAScheduler(postRASched), + CompleteModel(cm), ProcID(pi), ProcResourceTable(pr), SchedClassTable(sc), NumProcResourceKinds(npr), NumSchedClasses(nsc), InstrItineraries(ii) {} diff --git a/include/llvm/Target/TargetSchedule.td b/include/llvm/Target/TargetSchedule.td index e6eeb885c0b..89db37ca859 100644 --- a/include/llvm/Target/TargetSchedule.td +++ b/include/llvm/Target/TargetSchedule.td @@ -88,6 +88,8 @@ class SchedMachineModel { // Per-cycle resources tables. ProcessorItineraries Itineraries = NoItineraries; + bit PostRAScheduler = 0; // Enable Post RegAlloc Scheduler pass. + // Subtargets that define a model for only a subset of instructions // that have a scheduling class (itinerary class or SchedRW list) // and may actually be generated for that subtarget must clear this diff --git a/include/llvm/Target/TargetSubtargetInfo.h b/include/llvm/Target/TargetSubtargetInfo.h index e2aea45f4c1..7808f4de57a 100644 --- a/include/llvm/Target/TargetSubtargetInfo.h +++ b/include/llvm/Target/TargetSubtargetInfo.h @@ -90,16 +90,26 @@ public: // dependency. virtual void adjustSchedDependency(SUnit *def, SUnit *use, SDep& dep) const { } - - // enablePostRAScheduler - If the target can benefit from post-regalloc - // scheduling and the specified optimization level meets the requirement - // return true to enable post-register-allocation scheduling. In - // CriticalPathRCs return any register classes that should only be broken - // if on the critical path. - virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const; - + + // For use with PostRAScheduling: get the anti-dependence breaking that should + // be performed before post-RA scheduling. + virtual AntiDepBreakMode getAntiDepBreakMode() const { + return ANTIDEP_NONE; + } + + // For use with PostRAScheduling: in CriticalPathRCs, return any register + // classes that should only be considered for anti-dependence breaking if they + // are on the critical path. + virtual void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { + return CriticalPathRCs.clear(); + } + + // For use with PostRAScheduling: get the minimum optimization level needed + // to enable post-RA scheduling. + virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const { + return CodeGenOpt::Default; + } + /// \brief True if the subtarget should run the local reassignment /// heuristic of the register allocator. /// This heuristic may be compile time intensive, \p OptLevel provides diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index db3933e5eed..a1ab3445fa4 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -98,6 +98,11 @@ namespace { } bool runOnMachineFunction(MachineFunction &Fn) override; + + bool enablePostRAScheduler( + const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const; }; char PostRAScheduler::ID = 0; @@ -245,6 +250,17 @@ void SchedulePostRATDList::dumpSchedule() const { } #endif +bool PostRAScheduler::enablePostRAScheduler( + const TargetSubtargetInfo &ST, + CodeGenOpt::Level OptLevel, + TargetSubtargetInfo::AntiDepBreakMode &Mode, + TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const { + Mode = ST.getAntiDepBreakMode(); + ST.getCriticalPathRCs(CriticalPathRCs); + return ST.enablePostMachineScheduler() && + OptLevel >= ST.getOptLevelToEnablePostRAScheduler(); +} + bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { if (skipOptnoneFunction(*Fn.getFunction())) return false; @@ -267,9 +283,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { } else { // Check that post-RA scheduling is enabled for this target. // This may upgrade the AntiDepMode. - const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget(); - if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode, - CriticalPathRCs)) + const TargetSubtargetInfo &ST = + Fn.getTarget().getSubtarget(); + if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(), + AntiDepMode, CriticalPathRCs)) return false; } diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index 0c6ff529653..ea9e2b84c19 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -191,7 +191,6 @@ void ARMSubtarget::initializeEnvironment() { InThumbMode = false; HasThumb2 = false; NoARM = false; - PostRAScheduler = false; IsR9Reserved = ReserveR9; UseMovt = false; SupportsTailCall = false; @@ -308,9 +307,6 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { SupportsTailCall = !isThumb1Only(); } - if (!isThumb() || hasThumb2()) - PostRAScheduler = true; - switch (Align) { case DefaultAlign: // Assume pre-ARMv6 doesn't support unaligned accesses. @@ -425,25 +421,15 @@ bool ARMSubtarget::hasSinCos() const { !getTargetTriple().isOSVersionLT(7, 0); } -// Enable the PostMachineScheduler if the target selects it instead of -// PostRAScheduler. Currently only available on the command line via -// -misched-postra. +// This overrides the PostRAScheduler bit in the SchedModel for any CPU. bool ARMSubtarget::enablePostMachineScheduler() const { - return PostRAScheduler; + return (!isThumb() || hasThumb2()); } bool ARMSubtarget::enableAtomicExpandLoadLinked() const { return hasAnyDataBarrier() && !isThumb1Only(); } -bool ARMSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_NONE; - return PostRAScheduler && OptLevel >= CodeGenOpt::Default; -} - bool ARMSubtarget::useMovt(const MachineFunction &MF) const { // NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit // immediates as it is inherently position independent, and may be out of diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 626bb0e7860..ffaff89f04e 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -105,9 +105,6 @@ protected: /// NoARM - True if subtarget does not support ARM mode execution. bool NoARM; - /// PostRAScheduler - True if using post-register-allocation scheduler. - bool PostRAScheduler; - /// IsR9Reserved - True if R9 is a not available as general purpose register. bool IsR9Reserved; @@ -429,12 +426,7 @@ public: bool hasSinCos() const; /// True for some subtargets at > -O0. - bool enablePostMachineScheduler() const; - - /// enablePostRAScheduler - True at 'More' optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; + bool enablePostMachineScheduler() const override; // enableAtomicExpandLoadLinked - True if we need to expand our atomics. bool enableAtomicExpandLoadLinked() const override; diff --git a/lib/Target/Mips/MipsSubtarget.cpp b/lib/Target/Mips/MipsSubtarget.cpp index 902735d7810..d63b712f83d 100644 --- a/lib/Target/Mips/MipsSubtarget.cpp +++ b/lib/Target/Mips/MipsSubtarget.cpp @@ -177,15 +177,17 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU, UseSmallSection = !IsLinux && (RM == Reloc::Static); } -bool -MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode &Mode, - RegClassVector &CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_NONE; +/// This overrides the PostRAScheduler bit in the SchedModel for any CPU. +bool MipsSubtarget::enablePostMachineScheduler() const { return true; } + +void MipsSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { CriticalPathRCs.clear(); - CriticalPathRCs.push_back(isGP64bit() ? &Mips::GPR64RegClass - : &Mips::GPR32RegClass); - return OptLevel >= CodeGenOpt::Aggressive; + CriticalPathRCs.push_back(isGP64bit() ? + &Mips::GPR64RegClass : &Mips::GPR32RegClass); +} + +CodeGenOpt::Level MipsSubtarget::getOptLevelToEnablePostRAScheduler() const { + return CodeGenOpt::Aggressive; } MipsSubtarget & diff --git a/lib/Target/Mips/MipsSubtarget.h b/lib/Target/Mips/MipsSubtarget.h index 6f6e2a67140..944e629898b 100644 --- a/lib/Target/Mips/MipsSubtarget.h +++ b/lib/Target/Mips/MipsSubtarget.h @@ -160,9 +160,10 @@ protected: std::unique_ptr TLInfoSE; public: - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; + /// This overrides the PostRAScheduler bit in the SchedModel for each CPU. + bool enablePostMachineScheduler() const override; + void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; + CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override; /// Only O32 and EABI supported right now. bool isABI_EABI() const { return MipsABI == EABI; } diff --git a/lib/Target/PowerPC/PPCSubtarget.cpp b/lib/Target/PowerPC/PPCSubtarget.cpp index 2e1b74a1317..b51512d335f 100644 --- a/lib/Target/PowerPC/PPCSubtarget.cpp +++ b/lib/Target/PowerPC/PPCSubtarget.cpp @@ -222,22 +222,6 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV, GV->hasCommonLinkage() || isDecl; } -bool PPCSubtarget::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_ALL; - - CriticalPathRCs.clear(); - - if (isPPC64()) - CriticalPathRCs.push_back(&PPC::G8RCRegClass); - else - CriticalPathRCs.push_back(&PPC::GPRCRegClass); - - return OptLevel >= CodeGenOpt::Default; -} - // Embedded cores need aggressive scheduling (and some others also benefit). static bool needsAggressiveScheduling(unsigned Directive) { switch (Directive) { @@ -259,6 +243,19 @@ bool PPCSubtarget::enableMachineScheduler() const { return needsAggressiveScheduling(DarwinDirective); } +// This overrides the PostRAScheduler bit in the SchedModel for each CPU. +bool PPCSubtarget::enablePostMachineScheduler() const { return true; } + +PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const { + return TargetSubtargetInfo::ANTIDEP_ALL; +} + +void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const { + CriticalPathRCs.clear(); + CriticalPathRCs.push_back(isPPC64() ? + &PPC::G8RCRegClass : &PPC::GPRCRegClass); +} + void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h index 2a166995d92..96d3d6209f9 100644 --- a/lib/Target/PowerPC/PPCSubtarget.h +++ b/lib/Target/PowerPC/PPCSubtarget.h @@ -225,15 +225,15 @@ public: bool isDarwinABI() const { return isDarwin(); } bool isSVR4ABI() const { return !isDarwin(); } - /// enablePostRAScheduler - True at 'More' optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; - bool enableEarlyIfConversion() const override { return hasISEL(); } // Scheduling customization. bool enableMachineScheduler() const override; + // This overrides the PostRAScheduler bit in the SchedModel for each CPU. + bool enablePostMachineScheduler() const override; + AntiDepBreakMode getAntiDepBreakMode() const override; + void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override; + void overrideSchedPolicy(MachineSchedPolicy &Policy, MachineInstr *begin, MachineInstr *end, diff --git a/lib/Target/TargetSubtargetInfo.cpp b/lib/Target/TargetSubtargetInfo.cpp index 87b6b661d31..386a813b057 100644 --- a/lib/Target/TargetSubtargetInfo.cpp +++ b/lib/Target/TargetSubtargetInfo.cpp @@ -53,16 +53,7 @@ bool TargetSubtargetInfo::enableRALocalReassignment( } bool TargetSubtargetInfo::enablePostMachineScheduler() const { - return false; -} - -bool TargetSubtargetInfo::enablePostRAScheduler( - CodeGenOpt::Level OptLevel, - AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const { - Mode = ANTIDEP_NONE; - CriticalPathRCs.clear(); - return false; + return getSchedModel()->PostRAScheduler; } bool TargetSubtargetInfo::useAA() const { diff --git a/lib/Target/X86/X86Schedule.td b/lib/Target/X86/X86Schedule.td index 25c5a6bfa1f..b76850aa1c8 100644 --- a/lib/Target/X86/X86Schedule.td +++ b/lib/Target/X86/X86Schedule.td @@ -633,6 +633,7 @@ def GenericModel : SchedMachineModel { let MicroOpBufferSize = 32; let LoadLatency = 4; let HighLatency = 10; + let PostRAScheduler = 0; } include "X86ScheduleAtom.td" diff --git a/lib/Target/X86/X86ScheduleAtom.td b/lib/Target/X86/X86ScheduleAtom.td index 3256ee7c6e4..c8820aa2d8d 100644 --- a/lib/Target/X86/X86ScheduleAtom.td +++ b/lib/Target/X86/X86ScheduleAtom.td @@ -538,6 +538,7 @@ def AtomModel : SchedMachineModel { // On the Atom, the throughput for taken branches is 2 cycles. For small // simple loops, expand by a small factor to hide the backedge cost. let LoopMicroOpBufferSize = 10; + let PostRAScheduler = 1; let Itineraries = AtomItineraries; } diff --git a/lib/Target/X86/X86ScheduleSLM.td b/lib/Target/X86/X86ScheduleSLM.td index 823d10140e3..90d85878812 100644 --- a/lib/Target/X86/X86ScheduleSLM.td +++ b/lib/Target/X86/X86ScheduleSLM.td @@ -19,6 +19,7 @@ def SLMModel : SchedMachineModel { let MicroOpBufferSize = 32; // Based on the reorder buffer. let LoadLatency = 3; let MispredictPenalty = 10; + let PostRAScheduler = 1; // For small loops, expand by a small factor to hide the backedge cost. let LoopMicroOpBufferSize = 10; diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 79b7e68c320..d1b71c48a92 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -219,9 +219,6 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) { // Make sure the right MCSchedModel is used. InitCPUSchedModel(CPUName); - if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM) - PostRAScheduler = true; - InstrItins = getInstrItineraryForCPU(CPUName); // It's important to keep the MCSubtargetInfo feature bits in sync with @@ -286,7 +283,6 @@ void X86Subtarget::initializeEnvironment() { HasCmpxchg16b = false; UseLeaForSP = false; HasSlowDivide = false; - PostRAScheduler = false; PadShortFunctions = false; CallRegIndirect = false; LEAUsesAG = false; @@ -359,16 +355,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU, is64Bit() ? -8 : -4), JITInfo(hasSSE1()) {} -bool -X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode &Mode, - RegClassVector &CriticalPathRCs) const { - Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL; - CriticalPathRCs.clear(); - return PostRAScheduler && OptLevel >= CodeGenOpt::Default; -} - -bool -X86Subtarget::enableEarlyIfConversion() const { +bool X86Subtarget::enableEarlyIfConversion() const { return hasCMov() && X86EarlyIfConv; } + diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index 09db0ebc5a9..30c3b08a890 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -170,9 +170,6 @@ protected: /// full divides and should be used when possible. bool HasSlowDivide; - /// PostRAScheduler - True if using post-register-allocation scheduler. - bool PostRAScheduler; - /// PadShortFunctions - True if the short functions should be padded to prevent /// a stall when returning too early. bool PadShortFunctions; @@ -453,18 +450,15 @@ public: /// Enable the MachineScheduler pass for all X86 subtargets. bool enableMachineScheduler() const override { return true; } - /// enablePostRAScheduler - run for Atom optimization. - bool enablePostRAScheduler(CodeGenOpt::Level OptLevel, - TargetSubtargetInfo::AntiDepBreakMode& Mode, - RegClassVector& CriticalPathRCs) const override; - - bool postRAScheduler() const { return PostRAScheduler; } - bool enableEarlyIfConversion() const override; /// getInstrItins = Return the instruction itineraries based on the /// subtarget selection. const InstrItineraryData &getInstrItineraryData() const { return InstrItins; } + + AntiDepBreakMode getAntiDepBreakMode() const override { + return TargetSubtargetInfo::ANTIDEP_CRITICAL; + } }; } // End llvm namespace diff --git a/utils/TableGen/SubtargetEmitter.cpp b/utils/TableGen/SubtargetEmitter.cpp index 06f869436f1..a59eead6d0d 100644 --- a/utils/TableGen/SubtargetEmitter.cpp +++ b/utils/TableGen/SubtargetEmitter.cpp @@ -1200,6 +1200,10 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) { EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ','); EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ','); + OS << " " << (bool)(PI->ModelDef ? + PI->ModelDef->getValueAsBit("PostRAScheduler") : 0) + << ", // " << "PostRAScheduler\n"; + OS << " " << (bool)(PI->ModelDef ? PI->ModelDef->getValueAsBit("CompleteModel") : 0) << ", // " << "CompleteModel\n";