Move Post RA Scheduling flag bit into SchedMachineModel

Refactoring; no functional changes intended

    Removed PostRAScheduler bits from subtargets (X86, ARM).
    Added PostRAScheduler bit to MCSchedModel class.
    This bit is set by a CPU's scheduling model (if it exists).
    Removed enablePostRAScheduler() function from TargetSubtargetInfo and subclasses.
    Fixed the existing enablePostMachineScheduler() method to use the MCSchedModel (was just returning false!).
    Added methods to TargetSubtargetInfo to allow overrides for AntiDepBreakMode, CriticalPathRCs, and OptLevel for PostRAScheduling.
    Added enablePostRAScheduler() function to PostRAScheduler class which queries the subtarget for the above values.
    Preserved existing scheduler behavior for ARM, MIPS, PPC, and X86: 
       a. ARM overrides the CPU's postRA settings by enabling postRA for any non-Thumb or Thumb2 subtarget. 
       b. MIPS overrides the CPU's postRA settings by enabling postRA for everything. 
       c. PPC overrides the CPU's postRA settings by enabling postRA for everything. 
       d. X86 is the only target that actually has postRA specified via sched model info.

Differential Revision: http://reviews.llvm.org/D4217


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@213101 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Sanjay Patel 2014-07-15 22:39:58 +00:00
parent 110748bbb4
commit f7e042324a
17 changed files with 100 additions and 110 deletions

View File

@ -186,6 +186,8 @@ public:
// takes to recover from a branch misprediction.
unsigned MispredictPenalty;
static const unsigned DefaultMispredictPenalty = 10;
bool PostRAScheduler; // default value is false
bool CompleteModel;
@ -210,7 +212,8 @@ public:
LoadLatency(DefaultLoadLatency),
HighLatency(DefaultHighLatency),
MispredictPenalty(DefaultMispredictPenalty),
CompleteModel(true), ProcID(0), ProcResourceTable(nullptr),
PostRAScheduler(false), CompleteModel(true),
ProcID(0), ProcResourceTable(nullptr),
SchedClassTable(nullptr), NumProcResourceKinds(0),
NumSchedClasses(0), InstrItineraries(nullptr) {
(void)NumProcResourceKinds;
@ -219,12 +222,13 @@ public:
// Table-gen driven ctor.
MCSchedModel(unsigned iw, int mbs, int lmbs, unsigned ll, unsigned hl,
unsigned mp, bool cm, unsigned pi, const MCProcResourceDesc *pr,
const MCSchedClassDesc *sc, unsigned npr, unsigned nsc,
const InstrItinerary *ii):
unsigned mp, bool postRASched, bool cm, unsigned pi,
const MCProcResourceDesc *pr, const MCSchedClassDesc *sc,
unsigned npr, unsigned nsc, const InstrItinerary *ii):
IssueWidth(iw), MicroOpBufferSize(mbs), LoopMicroOpBufferSize(lmbs),
LoadLatency(ll), HighLatency(hl),
MispredictPenalty(mp), CompleteModel(cm), ProcID(pi),
MispredictPenalty(mp), PostRAScheduler(postRASched),
CompleteModel(cm), ProcID(pi),
ProcResourceTable(pr), SchedClassTable(sc), NumProcResourceKinds(npr),
NumSchedClasses(nsc), InstrItineraries(ii) {}

View File

@ -88,6 +88,8 @@ class SchedMachineModel {
// Per-cycle resources tables.
ProcessorItineraries Itineraries = NoItineraries;
bit PostRAScheduler = 0; // Enable Post RegAlloc Scheduler pass.
// Subtargets that define a model for only a subset of instructions
// that have a scheduling class (itinerary class or SchedRW list)
// and may actually be generated for that subtarget must clear this

View File

@ -90,16 +90,26 @@ public:
// dependency.
virtual void adjustSchedDependency(SUnit *def, SUnit *use,
SDep& dep) const { }
// enablePostRAScheduler - If the target can benefit from post-regalloc
// scheduling and the specified optimization level meets the requirement
// return true to enable post-register-allocation scheduling. In
// CriticalPathRCs return any register classes that should only be broken
// if on the critical path.
virtual bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const;
// For use with PostRAScheduling: get the anti-dependence breaking that should
// be performed before post-RA scheduling.
virtual AntiDepBreakMode getAntiDepBreakMode() const {
return ANTIDEP_NONE;
}
// For use with PostRAScheduling: in CriticalPathRCs, return any register
// classes that should only be considered for anti-dependence breaking if they
// are on the critical path.
virtual void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
return CriticalPathRCs.clear();
}
// For use with PostRAScheduling: get the minimum optimization level needed
// to enable post-RA scheduling.
virtual CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const {
return CodeGenOpt::Default;
}
/// \brief True if the subtarget should run the local reassignment
/// heuristic of the register allocator.
/// This heuristic may be compile time intensive, \p OptLevel provides

View File

@ -98,6 +98,11 @@ namespace {
}
bool runOnMachineFunction(MachineFunction &Fn) override;
bool enablePostRAScheduler(
const TargetSubtargetInfo &ST, CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const;
};
char PostRAScheduler::ID = 0;
@ -245,6 +250,17 @@ void SchedulePostRATDList::dumpSchedule() const {
}
#endif
bool PostRAScheduler::enablePostRAScheduler(
const TargetSubtargetInfo &ST,
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
TargetSubtargetInfo::RegClassVector &CriticalPathRCs) const {
Mode = ST.getAntiDepBreakMode();
ST.getCriticalPathRCs(CriticalPathRCs);
return ST.enablePostMachineScheduler() &&
OptLevel >= ST.getOptLevelToEnablePostRAScheduler();
}
bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
if (skipOptnoneFunction(*Fn.getFunction()))
return false;
@ -267,9 +283,10 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
} else {
// Check that post-RA scheduling is enabled for this target.
// This may upgrade the AntiDepMode.
const TargetSubtargetInfo &ST = Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
if (!ST.enablePostRAScheduler(PassConfig->getOptLevel(), AntiDepMode,
CriticalPathRCs))
const TargetSubtargetInfo &ST =
Fn.getTarget().getSubtarget<TargetSubtargetInfo>();
if (!enablePostRAScheduler(ST, PassConfig->getOptLevel(),
AntiDepMode, CriticalPathRCs))
return false;
}

View File

@ -191,7 +191,6 @@ void ARMSubtarget::initializeEnvironment() {
InThumbMode = false;
HasThumb2 = false;
NoARM = false;
PostRAScheduler = false;
IsR9Reserved = ReserveR9;
UseMovt = false;
SupportsTailCall = false;
@ -308,9 +307,6 @@ void ARMSubtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
SupportsTailCall = !isThumb1Only();
}
if (!isThumb() || hasThumb2())
PostRAScheduler = true;
switch (Align) {
case DefaultAlign:
// Assume pre-ARMv6 doesn't support unaligned accesses.
@ -425,25 +421,15 @@ bool ARMSubtarget::hasSinCos() const {
!getTargetTriple().isOSVersionLT(7, 0);
}
// Enable the PostMachineScheduler if the target selects it instead of
// PostRAScheduler. Currently only available on the command line via
// -misched-postra.
// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool ARMSubtarget::enablePostMachineScheduler() const {
return PostRAScheduler;
return (!isThumb() || hasThumb2());
}
bool ARMSubtarget::enableAtomicExpandLoadLinked() const {
return hasAnyDataBarrier() && !isThumb1Only();
}
bool ARMSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
Mode = TargetSubtargetInfo::ANTIDEP_NONE;
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
bool ARMSubtarget::useMovt(const MachineFunction &MF) const {
// NOTE Windows on ARM needs to use mov.w/mov.t pairs to materialise 32-bit
// immediates as it is inherently position independent, and may be out of

View File

@ -105,9 +105,6 @@ protected:
/// NoARM - True if subtarget does not support ARM mode execution.
bool NoARM;
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
/// IsR9Reserved - True if R9 is a not available as general purpose register.
bool IsR9Reserved;
@ -429,12 +426,7 @@ public:
bool hasSinCos() const;
/// True for some subtargets at > -O0.
bool enablePostMachineScheduler() const;
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const override;
bool enablePostMachineScheduler() const override;
// enableAtomicExpandLoadLinked - True if we need to expand our atomics.
bool enableAtomicExpandLoadLinked() const override;

View File

@ -177,15 +177,17 @@ MipsSubtarget::MipsSubtarget(const std::string &TT, const std::string &CPU,
UseSmallSection = !IsLinux && (RM == Reloc::Static);
}
bool
MipsSubtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
RegClassVector &CriticalPathRCs) const {
Mode = TargetSubtargetInfo::ANTIDEP_NONE;
/// This overrides the PostRAScheduler bit in the SchedModel for any CPU.
bool MipsSubtarget::enablePostMachineScheduler() const { return true; }
void MipsSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
CriticalPathRCs.clear();
CriticalPathRCs.push_back(isGP64bit() ? &Mips::GPR64RegClass
: &Mips::GPR32RegClass);
return OptLevel >= CodeGenOpt::Aggressive;
CriticalPathRCs.push_back(isGP64bit() ?
&Mips::GPR64RegClass : &Mips::GPR32RegClass);
}
CodeGenOpt::Level MipsSubtarget::getOptLevelToEnablePostRAScheduler() const {
return CodeGenOpt::Aggressive;
}
MipsSubtarget &

View File

@ -160,9 +160,10 @@ protected:
std::unique_ptr<const MipsTargetLowering> TLInfoSE;
public:
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const override;
/// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
bool enablePostMachineScheduler() const override;
void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
CodeGenOpt::Level getOptLevelToEnablePostRAScheduler() const override;
/// Only O32 and EABI supported right now.
bool isABI_EABI() const { return MipsABI == EABI; }

View File

@ -222,22 +222,6 @@ bool PPCSubtarget::hasLazyResolverStub(const GlobalValue *GV,
GV->hasCommonLinkage() || isDecl;
}
bool PPCSubtarget::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
Mode = TargetSubtargetInfo::ANTIDEP_ALL;
CriticalPathRCs.clear();
if (isPPC64())
CriticalPathRCs.push_back(&PPC::G8RCRegClass);
else
CriticalPathRCs.push_back(&PPC::GPRCRegClass);
return OptLevel >= CodeGenOpt::Default;
}
// Embedded cores need aggressive scheduling (and some others also benefit).
static bool needsAggressiveScheduling(unsigned Directive) {
switch (Directive) {
@ -259,6 +243,19 @@ bool PPCSubtarget::enableMachineScheduler() const {
return needsAggressiveScheduling(DarwinDirective);
}
// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
bool PPCSubtarget::enablePostMachineScheduler() const { return true; }
PPCGenSubtargetInfo::AntiDepBreakMode PPCSubtarget::getAntiDepBreakMode() const {
return TargetSubtargetInfo::ANTIDEP_ALL;
}
void PPCSubtarget::getCriticalPathRCs(RegClassVector &CriticalPathRCs) const {
CriticalPathRCs.clear();
CriticalPathRCs.push_back(isPPC64() ?
&PPC::G8RCRegClass : &PPC::GPRCRegClass);
}
void PPCSubtarget::overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,

View File

@ -225,15 +225,15 @@ public:
bool isDarwinABI() const { return isDarwin(); }
bool isSVR4ABI() const { return !isDarwin(); }
/// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const override;
bool enableEarlyIfConversion() const override { return hasISEL(); }
// Scheduling customization.
bool enableMachineScheduler() const override;
// This overrides the PostRAScheduler bit in the SchedModel for each CPU.
bool enablePostMachineScheduler() const override;
AntiDepBreakMode getAntiDepBreakMode() const override;
void getCriticalPathRCs(RegClassVector &CriticalPathRCs) const override;
void overrideSchedPolicy(MachineSchedPolicy &Policy,
MachineInstr *begin,
MachineInstr *end,

View File

@ -53,16 +53,7 @@ bool TargetSubtargetInfo::enableRALocalReassignment(
}
bool TargetSubtargetInfo::enablePostMachineScheduler() const {
return false;
}
bool TargetSubtargetInfo::enablePostRAScheduler(
CodeGenOpt::Level OptLevel,
AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const {
Mode = ANTIDEP_NONE;
CriticalPathRCs.clear();
return false;
return getSchedModel()->PostRAScheduler;
}
bool TargetSubtargetInfo::useAA() const {

View File

@ -633,6 +633,7 @@ def GenericModel : SchedMachineModel {
let MicroOpBufferSize = 32;
let LoadLatency = 4;
let HighLatency = 10;
let PostRAScheduler = 0;
}
include "X86ScheduleAtom.td"

View File

@ -538,6 +538,7 @@ def AtomModel : SchedMachineModel {
// On the Atom, the throughput for taken branches is 2 cycles. For small
// simple loops, expand by a small factor to hide the backedge cost.
let LoopMicroOpBufferSize = 10;
let PostRAScheduler = 1;
let Itineraries = AtomItineraries;
}

View File

@ -19,6 +19,7 @@ def SLMModel : SchedMachineModel {
let MicroOpBufferSize = 32; // Based on the reorder buffer.
let LoadLatency = 3;
let MispredictPenalty = 10;
let PostRAScheduler = 1;
// For small loops, expand by a small factor to hide the backedge cost.
let LoopMicroOpBufferSize = 10;

View File

@ -219,9 +219,6 @@ void X86Subtarget::resetSubtargetFeatures(StringRef CPU, StringRef FS) {
// Make sure the right MCSchedModel is used.
InitCPUSchedModel(CPUName);
if (X86ProcFamily == IntelAtom || X86ProcFamily == IntelSLM)
PostRAScheduler = true;
InstrItins = getInstrItineraryForCPU(CPUName);
// It's important to keep the MCSubtargetInfo feature bits in sync with
@ -286,7 +283,6 @@ void X86Subtarget::initializeEnvironment() {
HasCmpxchg16b = false;
UseLeaForSP = false;
HasSlowDivide = false;
PostRAScheduler = false;
PadShortFunctions = false;
CallRegIndirect = false;
LEAUsesAG = false;
@ -359,16 +355,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &CPU,
is64Bit() ? -8 : -4),
JITInfo(hasSSE1()) {}
bool
X86Subtarget::enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode &Mode,
RegClassVector &CriticalPathRCs) const {
Mode = TargetSubtargetInfo::ANTIDEP_CRITICAL;
CriticalPathRCs.clear();
return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
}
bool
X86Subtarget::enableEarlyIfConversion() const {
bool X86Subtarget::enableEarlyIfConversion() const {
return hasCMov() && X86EarlyIfConv;
}

View File

@ -170,9 +170,6 @@ protected:
/// full divides and should be used when possible.
bool HasSlowDivide;
/// PostRAScheduler - True if using post-register-allocation scheduler.
bool PostRAScheduler;
/// PadShortFunctions - True if the short functions should be padded to prevent
/// a stall when returning too early.
bool PadShortFunctions;
@ -453,18 +450,15 @@ public:
/// Enable the MachineScheduler pass for all X86 subtargets.
bool enableMachineScheduler() const override { return true; }
/// enablePostRAScheduler - run for Atom optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
TargetSubtargetInfo::AntiDepBreakMode& Mode,
RegClassVector& CriticalPathRCs) const override;
bool postRAScheduler() const { return PostRAScheduler; }
bool enableEarlyIfConversion() const override;
/// getInstrItins = Return the instruction itineraries based on the
/// subtarget selection.
const InstrItineraryData &getInstrItineraryData() const { return InstrItins; }
AntiDepBreakMode getAntiDepBreakMode() const override {
return TargetSubtargetInfo::ANTIDEP_CRITICAL;
}
};
} // End llvm namespace

View File

@ -1200,6 +1200,10 @@ void SubtargetEmitter::EmitProcessorModels(raw_ostream &OS) {
EmitProcessorProp(OS, PI->ModelDef, "HighLatency", ',');
EmitProcessorProp(OS, PI->ModelDef, "MispredictPenalty", ',');
OS << " " << (bool)(PI->ModelDef ?
PI->ModelDef->getValueAsBit("PostRAScheduler") : 0)
<< ", // " << "PostRAScheduler\n";
OS << " " << (bool)(PI->ModelDef ?
PI->ModelDef->getValueAsBit("CompleteModel") : 0)
<< ", // " << "CompleteModel\n";