mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-04-05 17:39:16 +00:00
Porting Hexagon MI Scheduler to the new API.
Change current Hexagon MI scheduler to use new converging scheduler. Integrates DFA resource model into it. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@163137 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
67514e9066
commit
3e59040810
@ -16,6 +16,7 @@ add_llvm_target(HexagonCodeGen
|
||||
HexagonExpandPredSpillCode.cpp
|
||||
HexagonFrameLowering.cpp
|
||||
HexagonHardwareLoops.cpp
|
||||
HexagonMachineScheduler.cpp
|
||||
HexagonMCInstLower.cpp
|
||||
HexagonInstrInfo.cpp
|
||||
HexagonISelDAGToDAG.cpp
|
||||
|
874
lib/Target/Hexagon/HexagonMachineScheduler.cpp
Normal file
874
lib/Target/Hexagon/HexagonMachineScheduler.cpp
Normal file
@ -0,0 +1,874 @@
|
||||
//===- HexagonMachineScheduler.cpp - MI Scheduler for Hexagon -------------===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// MachineScheduler schedules machine instructions after phi elimination. It
|
||||
// preserves LiveIntervals so it can be invoked before register allocation.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#define DEBUG_TYPE "misched"
|
||||
|
||||
#include "HexagonMachineScheduler.h"
|
||||
|
||||
#include <queue>
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
static cl::opt<bool> ForceTopDown("vliw-misched-topdown", cl::Hidden,
|
||||
cl::desc("Force top-down list scheduling"));
|
||||
static cl::opt<bool> ForceBottomUp("vliw-misched-bottomup", cl::Hidden,
|
||||
cl::desc("Force bottom-up list scheduling"));
|
||||
|
||||
#ifndef NDEBUG
|
||||
static cl::opt<bool> ViewMISchedDAGs("vliw-view-misched-dags", cl::Hidden,
|
||||
cl::desc("Pop up a window to show MISched dags after they are processed"));
|
||||
|
||||
static cl::opt<unsigned> MISchedCutoff("vliw-misched-cutoff", cl::Hidden,
|
||||
cl::desc("Stop scheduling after N instructions"), cl::init(~0U));
|
||||
#else
|
||||
static bool ViewMISchedDAGs = false;
|
||||
#endif // NDEBUG
|
||||
|
||||
/// Decrement this iterator until reaching the top or a non-debug instr.
|
||||
static MachineBasicBlock::iterator
|
||||
priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) {
|
||||
assert(I != Beg && "reached the top of the region, cannot decrement");
|
||||
while (--I != Beg) {
|
||||
if (!I->isDebugValue())
|
||||
break;
|
||||
}
|
||||
return I;
|
||||
}
|
||||
|
||||
/// If this iterator is a debug value, increment until reaching the End or a
|
||||
/// non-debug instruction.
|
||||
static MachineBasicBlock::iterator
|
||||
nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) {
|
||||
for(; I != End; ++I) {
|
||||
if (!I->isDebugValue())
|
||||
break;
|
||||
}
|
||||
return I;
|
||||
}
|
||||
|
||||
/// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When
|
||||
/// NumPredsLeft reaches zero, release the successor node.
|
||||
///
|
||||
/// FIXME: Adjust SuccSU height based on MinLatency.
|
||||
void VLIWMachineScheduler::releaseSucc(SUnit *SU, SDep *SuccEdge) {
|
||||
SUnit *SuccSU = SuccEdge->getSUnit();
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (SuccSU->NumPredsLeft == 0) {
|
||||
dbgs() << "*** Scheduling failed! ***\n";
|
||||
SuccSU->dump(this);
|
||||
dbgs() << " has been released too many times!\n";
|
||||
llvm_unreachable(0);
|
||||
}
|
||||
#endif
|
||||
--SuccSU->NumPredsLeft;
|
||||
if (SuccSU->NumPredsLeft == 0 && SuccSU != &ExitSU)
|
||||
SchedImpl->releaseTopNode(SuccSU);
|
||||
}
|
||||
|
||||
/// releaseSuccessors - Call releaseSucc on each of SU's successors.
|
||||
void VLIWMachineScheduler::releaseSuccessors(SUnit *SU) {
|
||||
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
|
||||
I != E; ++I) {
|
||||
releaseSucc(SU, &*I);
|
||||
}
|
||||
}
|
||||
|
||||
/// ReleasePred - Decrement the NumSuccsLeft count of a predecessor. When
|
||||
/// NumSuccsLeft reaches zero, release the predecessor node.
|
||||
///
|
||||
/// FIXME: Adjust PredSU height based on MinLatency.
|
||||
void VLIWMachineScheduler::releasePred(SUnit *SU, SDep *PredEdge) {
|
||||
SUnit *PredSU = PredEdge->getSUnit();
|
||||
|
||||
#ifndef NDEBUG
|
||||
if (PredSU->NumSuccsLeft == 0) {
|
||||
dbgs() << "*** Scheduling failed! ***\n";
|
||||
PredSU->dump(this);
|
||||
dbgs() << " has been released too many times!\n";
|
||||
llvm_unreachable(0);
|
||||
}
|
||||
#endif
|
||||
--PredSU->NumSuccsLeft;
|
||||
if (PredSU->NumSuccsLeft == 0 && PredSU != &EntrySU)
|
||||
SchedImpl->releaseBottomNode(PredSU);
|
||||
}
|
||||
|
||||
/// releasePredecessors - Call releasePred on each of SU's predecessors.
|
||||
void VLIWMachineScheduler::releasePredecessors(SUnit *SU) {
|
||||
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
|
||||
I != E; ++I) {
|
||||
releasePred(SU, &*I);
|
||||
}
|
||||
}
|
||||
|
||||
void VLIWMachineScheduler::moveInstruction(MachineInstr *MI,
|
||||
MachineBasicBlock::iterator InsertPos) {
|
||||
// Advance RegionBegin if the first instruction moves down.
|
||||
if (&*RegionBegin == MI)
|
||||
++RegionBegin;
|
||||
|
||||
// Update the instruction stream.
|
||||
BB->splice(InsertPos, BB, MI);
|
||||
|
||||
// Update LiveIntervals
|
||||
LIS->handleMove(MI);
|
||||
|
||||
// Recede RegionBegin if an instruction moves above the first.
|
||||
if (RegionBegin == InsertPos)
|
||||
RegionBegin = MI;
|
||||
}
|
||||
|
||||
bool VLIWMachineScheduler::checkSchedLimit() {
|
||||
#ifndef NDEBUG
|
||||
if (NumInstrsScheduled == MISchedCutoff && MISchedCutoff != ~0U) {
|
||||
CurrentTop = CurrentBottom;
|
||||
return false;
|
||||
}
|
||||
++NumInstrsScheduled;
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
/// enterRegion - Called back from MachineScheduler::runOnMachineFunction after
|
||||
/// crossing a scheduling boundary. [begin, end) includes all instructions in
|
||||
/// the region, including the boundary itself and single-instruction regions
|
||||
/// that don't get scheduled.
|
||||
void VLIWMachineScheduler::enterRegion(MachineBasicBlock *bb,
|
||||
MachineBasicBlock::iterator begin,
|
||||
MachineBasicBlock::iterator end,
|
||||
unsigned endcount)
|
||||
{
|
||||
ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount);
|
||||
|
||||
// For convenience remember the end of the liveness region.
|
||||
LiveRegionEnd =
|
||||
(RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd);
|
||||
}
|
||||
|
||||
// Setup the register pressure trackers for the top scheduled top and bottom
|
||||
// scheduled regions.
|
||||
void VLIWMachineScheduler::initRegPressure() {
|
||||
TopRPTracker.init(&MF, RegClassInfo, LIS, BB, RegionBegin);
|
||||
BotRPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
|
||||
|
||||
// Close the RPTracker to finalize live ins.
|
||||
RPTracker.closeRegion();
|
||||
|
||||
DEBUG(RPTracker.getPressure().dump(TRI));
|
||||
|
||||
// Initialize the live ins and live outs.
|
||||
TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
|
||||
BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
|
||||
|
||||
// Close one end of the tracker so we can call
|
||||
// getMaxUpward/DownwardPressureDelta before advancing across any
|
||||
// instructions. This converts currently live regs into live ins/outs.
|
||||
TopRPTracker.closeTop();
|
||||
BotRPTracker.closeBottom();
|
||||
|
||||
// Account for liveness generated by the region boundary.
|
||||
if (LiveRegionEnd != RegionEnd)
|
||||
BotRPTracker.recede();
|
||||
|
||||
assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom");
|
||||
|
||||
// Cache the list of excess pressure sets in this region. This will also track
|
||||
// the max pressure in the scheduled code for these sets.
|
||||
RegionCriticalPSets.clear();
|
||||
std::vector<unsigned> RegionPressure = RPTracker.getPressure().MaxSetPressure;
|
||||
for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) {
|
||||
unsigned Limit = TRI->getRegPressureSetLimit(i);
|
||||
if (RegionPressure[i] > Limit)
|
||||
RegionCriticalPSets.push_back(PressureElement(i, 0));
|
||||
}
|
||||
DEBUG(dbgs() << "Excess PSets: ";
|
||||
for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i)
|
||||
dbgs() << TRI->getRegPressureSetName(
|
||||
RegionCriticalPSets[i].PSetID) << " ";
|
||||
dbgs() << "\n");
|
||||
|
||||
// Reset resource state.
|
||||
TopResourceModel->resetPacketState();
|
||||
TopResourceModel->resetDFA();
|
||||
BotResourceModel->resetPacketState();
|
||||
BotResourceModel->resetDFA();
|
||||
TotalPackets = 0;
|
||||
}
|
||||
|
||||
// FIXME: When the pressure tracker deals in pressure differences then we won't
|
||||
// iterate over all RegionCriticalPSets[i].
|
||||
void VLIWMachineScheduler::
|
||||
updateScheduledPressure(std::vector<unsigned> NewMaxPressure) {
|
||||
for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) {
|
||||
unsigned ID = RegionCriticalPSets[i].PSetID;
|
||||
int &MaxUnits = RegionCriticalPSets[i].UnitIncrease;
|
||||
if ((int)NewMaxPressure[ID] > MaxUnits)
|
||||
MaxUnits = NewMaxPressure[ID];
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if scheduling of this SU is possible
|
||||
/// in the current packet.
|
||||
/// It is _not_ precise (statefull), it is more like
|
||||
/// another heuristic. Many corner cases are figured
|
||||
/// empirically.
|
||||
bool VLIWResourceModel::isResourceAvailable(SUnit *SU) {
|
||||
if (!SU || !SU->getInstr())
|
||||
return false;
|
||||
|
||||
// First see if the pipeline could receive this instruction
|
||||
// in the current cycle.
|
||||
switch (SU->getInstr()->getOpcode()) {
|
||||
default:
|
||||
if (!ResourcesModel->canReserveResources(SU->getInstr()))
|
||||
return false;
|
||||
case TargetOpcode::EXTRACT_SUBREG:
|
||||
case TargetOpcode::INSERT_SUBREG:
|
||||
case TargetOpcode::SUBREG_TO_REG:
|
||||
case TargetOpcode::REG_SEQUENCE:
|
||||
case TargetOpcode::IMPLICIT_DEF:
|
||||
case TargetOpcode::COPY:
|
||||
case TargetOpcode::INLINEASM:
|
||||
break;
|
||||
}
|
||||
|
||||
// Now see if there are no other dependencies to instructions already
|
||||
// in the packet.
|
||||
for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
|
||||
if (Packet[i]->Succs.size() == 0)
|
||||
continue;
|
||||
for (SUnit::const_succ_iterator I = Packet[i]->Succs.begin(),
|
||||
E = Packet[i]->Succs.end(); I != E; ++I) {
|
||||
// Since we do not add pseudos to packets, might as well
|
||||
// ignore order dependencies.
|
||||
if (I->isCtrl())
|
||||
continue;
|
||||
|
||||
if (I->getSUnit() == SU)
|
||||
return false;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
/// Keep track of available resources.
|
||||
void VLIWResourceModel::reserveResources(SUnit *SU) {
|
||||
// If this SU does not fit in the packet
|
||||
// start a new one.
|
||||
if (!isResourceAvailable(SU)) {
|
||||
ResourcesModel->clearResources();
|
||||
Packet.clear();
|
||||
TotalPackets++;
|
||||
}
|
||||
|
||||
switch (SU->getInstr()->getOpcode()) {
|
||||
default:
|
||||
ResourcesModel->reserveResources(SU->getInstr());
|
||||
break;
|
||||
case TargetOpcode::EXTRACT_SUBREG:
|
||||
case TargetOpcode::INSERT_SUBREG:
|
||||
case TargetOpcode::SUBREG_TO_REG:
|
||||
case TargetOpcode::REG_SEQUENCE:
|
||||
case TargetOpcode::IMPLICIT_DEF:
|
||||
case TargetOpcode::KILL:
|
||||
case TargetOpcode::PROLOG_LABEL:
|
||||
case TargetOpcode::EH_LABEL:
|
||||
case TargetOpcode::COPY:
|
||||
case TargetOpcode::INLINEASM:
|
||||
break;
|
||||
}
|
||||
Packet.push_back(SU);
|
||||
|
||||
#ifndef NDEBUG
|
||||
DEBUG(dbgs() << "Packet[" << TotalPackets << "]:\n");
|
||||
for (unsigned i = 0, e = Packet.size(); i != e; ++i) {
|
||||
DEBUG(dbgs() << "\t[" << i << "] SU(");
|
||||
DEBUG(dbgs() << Packet[i]->NodeNum << ")\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
// If packet is now full, reset the state so in the next cycle
|
||||
// we start fresh.
|
||||
if (Packet.size() >= InstrItins->SchedModel->IssueWidth) {
|
||||
ResourcesModel->clearResources();
|
||||
Packet.clear();
|
||||
TotalPackets++;
|
||||
}
|
||||
}
|
||||
|
||||
// Release all DAG roots for scheduling.
|
||||
void VLIWMachineScheduler::releaseRoots() {
|
||||
SmallVector<SUnit*, 16> BotRoots;
|
||||
|
||||
for (std::vector<SUnit>::iterator
|
||||
I = SUnits.begin(), E = SUnits.end(); I != E; ++I) {
|
||||
// A SUnit is ready to top schedule if it has no predecessors.
|
||||
if (I->Preds.empty())
|
||||
SchedImpl->releaseTopNode(&(*I));
|
||||
// A SUnit is ready to bottom schedule if it has no successors.
|
||||
if (I->Succs.empty())
|
||||
BotRoots.push_back(&(*I));
|
||||
}
|
||||
// Release bottom roots in reverse order so the higher priority nodes appear
|
||||
// first. This is more natural and slightly more efficient.
|
||||
for (SmallVectorImpl<SUnit*>::const_reverse_iterator
|
||||
I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I)
|
||||
SchedImpl->releaseBottomNode(*I);
|
||||
}
|
||||
|
||||
/// schedule - Called back from MachineScheduler::runOnMachineFunction
|
||||
/// after setting up the current scheduling region. [RegionBegin, RegionEnd)
|
||||
/// only includes instructions that have DAG nodes, not scheduling boundaries.
|
||||
void VLIWMachineScheduler::schedule() {
|
||||
DEBUG(dbgs()
|
||||
<< "********** MI Converging Scheduling VLIW BB#" << BB->getNumber()
|
||||
<< " " << BB->getName()
|
||||
<< " in_func " << BB->getParent()->getFunction()->getName()
|
||||
<< " at loop depth " << MLI->getLoopDepth(BB)
|
||||
<< " \n");
|
||||
|
||||
// Initialize the register pressure tracker used by buildSchedGraph.
|
||||
RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd);
|
||||
|
||||
// Account for liveness generate by the region boundary.
|
||||
if (LiveRegionEnd != RegionEnd)
|
||||
RPTracker.recede();
|
||||
|
||||
// Build the DAG, and compute current register pressure.
|
||||
buildSchedGraph(AA, &RPTracker);
|
||||
|
||||
// Initialize top/bottom trackers after computing region pressure.
|
||||
initRegPressure();
|
||||
|
||||
DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su)
|
||||
SUnits[su].dumpAll(this));
|
||||
|
||||
if (ViewMISchedDAGs) viewGraph();
|
||||
|
||||
SchedImpl->initialize(this);
|
||||
|
||||
// Release edges from the special Entry node or to the special Exit node.
|
||||
releaseSuccessors(&EntrySU);
|
||||
releasePredecessors(&ExitSU);
|
||||
|
||||
// Release all DAG roots for scheduling.
|
||||
releaseRoots();
|
||||
|
||||
CurrentTop = nextIfDebug(RegionBegin, RegionEnd);
|
||||
CurrentBottom = RegionEnd;
|
||||
bool IsTopNode = false;
|
||||
while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) {
|
||||
if (!checkSchedLimit())
|
||||
break;
|
||||
|
||||
// Move the instruction to its new location in the instruction stream.
|
||||
MachineInstr *MI = SU->getInstr();
|
||||
|
||||
if (IsTopNode) {
|
||||
assert(SU->isTopReady() && "node still has unscheduled dependencies");
|
||||
if (&*CurrentTop == MI)
|
||||
CurrentTop = nextIfDebug(++CurrentTop, CurrentBottom);
|
||||
else {
|
||||
moveInstruction(MI, CurrentTop);
|
||||
TopRPTracker.setPos(MI);
|
||||
}
|
||||
|
||||
// Update top scheduled pressure.
|
||||
TopRPTracker.advance();
|
||||
assert(TopRPTracker.getPos() == CurrentTop && "out of sync");
|
||||
updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure);
|
||||
|
||||
// Update DFA state.
|
||||
TopResourceModel->reserveResources(SU);
|
||||
|
||||
// Release dependent instructions for scheduling.
|
||||
releaseSuccessors(SU);
|
||||
}
|
||||
else {
|
||||
assert(SU->isBottomReady() && "node still has unscheduled dependencies");
|
||||
MachineBasicBlock::iterator priorII =
|
||||
priorNonDebug(CurrentBottom, CurrentTop);
|
||||
if (&*priorII == MI)
|
||||
CurrentBottom = priorII;
|
||||
else {
|
||||
if (&*CurrentTop == MI) {
|
||||
CurrentTop = nextIfDebug(++CurrentTop, priorII);
|
||||
TopRPTracker.setPos(CurrentTop);
|
||||
}
|
||||
moveInstruction(MI, CurrentBottom);
|
||||
CurrentBottom = MI;
|
||||
}
|
||||
// Update bottom scheduled pressure.
|
||||
BotRPTracker.recede();
|
||||
assert(BotRPTracker.getPos() == CurrentBottom && "out of sync");
|
||||
updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure);
|
||||
|
||||
// Update DFA state.
|
||||
BotResourceModel->reserveResources(SU);
|
||||
|
||||
// Release dependent instructions for scheduling.
|
||||
releasePredecessors(SU);
|
||||
}
|
||||
SU->isScheduled = true;
|
||||
SchedImpl->schedNode(SU, IsTopNode);
|
||||
}
|
||||
assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.");
|
||||
|
||||
DEBUG(dbgs() << "Final schedule has " << TopResourceModel->getTotalPackets() +
|
||||
BotResourceModel->getTotalPackets()<< "packets.\n");
|
||||
|
||||
placeDebugValues();
|
||||
}
|
||||
|
||||
/// Reinsert any remaining debug_values, just like the PostRA scheduler.
|
||||
void VLIWMachineScheduler::placeDebugValues() {
|
||||
// If first instruction was a DBG_VALUE then put it back.
|
||||
if (FirstDbgValue) {
|
||||
BB->splice(RegionBegin, BB, FirstDbgValue);
|
||||
RegionBegin = FirstDbgValue;
|
||||
}
|
||||
|
||||
for (std::vector<std::pair<MachineInstr *, MachineInstr *> >::iterator
|
||||
DI = DbgValues.end(), DE = DbgValues.begin(); DI != DE; --DI) {
|
||||
std::pair<MachineInstr *, MachineInstr *> P = *prior(DI);
|
||||
MachineInstr *DbgValue = P.first;
|
||||
MachineBasicBlock::iterator OrigPrevMI = P.second;
|
||||
BB->splice(++OrigPrevMI, BB, DbgValue);
|
||||
if (OrigPrevMI == llvm::prior(RegionEnd))
|
||||
RegionEnd = DbgValue;
|
||||
}
|
||||
DbgValues.clear();
|
||||
FirstDbgValue = NULL;
|
||||
}
|
||||
|
||||
void ConvergingVLIWScheduler::initialize(VLIWMachineScheduler *dag) {
|
||||
DAG = dag;
|
||||
TRI = DAG->TRI;
|
||||
Top.DAG = dag;
|
||||
Bot.DAG = dag;
|
||||
|
||||
// Initialize the HazardRecognizers.
|
||||
const TargetMachine &TM = DAG->MF.getTarget();
|
||||
const InstrItineraryData *Itin = TM.getInstrItineraryData();
|
||||
Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
|
||||
Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG);
|
||||
|
||||
assert((!ForceTopDown || !ForceBottomUp) &&
|
||||
"-misched-topdown incompatible with -misched-bottomup");
|
||||
}
|
||||
|
||||
void ConvergingVLIWScheduler::releaseTopNode(SUnit *SU) {
|
||||
if (SU->isScheduled)
|
||||
return;
|
||||
|
||||
for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
|
||||
I != E; ++I) {
|
||||
unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
|
||||
unsigned MinLatency = I->getMinLatency();
|
||||
#ifndef NDEBUG
|
||||
Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency);
|
||||
#endif
|
||||
if (SU->TopReadyCycle < PredReadyCycle + MinLatency)
|
||||
SU->TopReadyCycle = PredReadyCycle + MinLatency;
|
||||
}
|
||||
Top.releaseNode(SU, SU->TopReadyCycle);
|
||||
}
|
||||
|
||||
void ConvergingVLIWScheduler::releaseBottomNode(SUnit *SU) {
|
||||
if (SU->isScheduled)
|
||||
return;
|
||||
|
||||
assert(SU->getInstr() && "Scheduled SUnit must have instr");
|
||||
|
||||
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
|
||||
I != E; ++I) {
|
||||
unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
|
||||
unsigned MinLatency = I->getMinLatency();
|
||||
#ifndef NDEBUG
|
||||
Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency);
|
||||
#endif
|
||||
if (SU->BotReadyCycle < SuccReadyCycle + MinLatency)
|
||||
SU->BotReadyCycle = SuccReadyCycle + MinLatency;
|
||||
}
|
||||
Bot.releaseNode(SU, SU->BotReadyCycle);
|
||||
}
|
||||
|
||||
/// Does this SU have a hazard within the current instruction group.
|
||||
///
|
||||
/// The scheduler supports two modes of hazard recognition. The first is the
|
||||
/// ScheduleHazardRecognizer API. It is a fully general hazard recognizer that
|
||||
/// supports highly complicated in-order reservation tables
|
||||
/// (ScoreboardHazardRecognizer) and arbitrary target-specific logic.
|
||||
///
|
||||
/// The second is a streamlined mechanism that checks for hazards based on
|
||||
/// simple counters that the scheduler itself maintains. It explicitly checks
|
||||
/// for instruction dispatch limitations, including the number of micro-ops that
|
||||
/// can dispatch per cycle.
|
||||
///
|
||||
/// TODO: Also check whether the SU must start a new group.
|
||||
bool ConvergingVLIWScheduler::SchedBoundary::checkHazard(SUnit *SU) {
|
||||
if (HazardRec->isEnabled())
|
||||
return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard;
|
||||
|
||||
if (IssueCount + DAG->getNumMicroOps(SU->getInstr()) > DAG->getIssueWidth())
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void ConvergingVLIWScheduler::SchedBoundary::releaseNode(SUnit *SU,
|
||||
unsigned ReadyCycle) {
|
||||
if (ReadyCycle < MinReadyCycle)
|
||||
MinReadyCycle = ReadyCycle;
|
||||
|
||||
// Check for interlocks first. For the purpose of other heuristics, an
|
||||
// instruction that cannot issue appears as if it's not in the ReadyQueue.
|
||||
if (ReadyCycle > CurrCycle || checkHazard(SU))
|
||||
|
||||
Pending.push(SU);
|
||||
else
|
||||
Available.push(SU);
|
||||
}
|
||||
|
||||
/// Move the boundary of scheduled code by one cycle.
|
||||
void ConvergingVLIWScheduler::SchedBoundary::bumpCycle() {
|
||||
unsigned Width = DAG->getIssueWidth();
|
||||
IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width;
|
||||
|
||||
assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized");
|
||||
unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
|
||||
|
||||
if (!HazardRec->isEnabled()) {
|
||||
// Bypass HazardRec virtual calls.
|
||||
CurrCycle = NextCycle;
|
||||
}
|
||||
else {
|
||||
// Bypass getHazardType calls in case of long latency.
|
||||
for (; CurrCycle != NextCycle; ++CurrCycle) {
|
||||
if (isTop())
|
||||
HazardRec->AdvanceCycle();
|
||||
else
|
||||
HazardRec->RecedeCycle();
|
||||
}
|
||||
}
|
||||
CheckPending = true;
|
||||
|
||||
DEBUG(dbgs() << "*** " << Available.getName() << " cycle "
|
||||
<< CurrCycle << '\n');
|
||||
}
|
||||
|
||||
/// Move the boundary of scheduled code by one SUnit.
|
||||
void ConvergingVLIWScheduler::SchedBoundary::bumpNode(SUnit *SU) {
|
||||
|
||||
// Update the reservation table.
|
||||
if (HazardRec->isEnabled()) {
|
||||
if (!isTop() && SU->isCall) {
|
||||
// Calls are scheduled with their preceding instructions. For bottom-up
|
||||
// scheduling, clear the pipeline state before emitting.
|
||||
HazardRec->Reset();
|
||||
}
|
||||
HazardRec->EmitInstruction(SU);
|
||||
}
|
||||
// Check the instruction group dispatch limit.
|
||||
// TODO: Check if this SU must end a dispatch group.
|
||||
IssueCount += DAG->getNumMicroOps(SU->getInstr());
|
||||
if (IssueCount >= DAG->getIssueWidth()) {
|
||||
DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
|
||||
bumpCycle();
|
||||
}
|
||||
}
|
||||
|
||||
/// Release pending ready nodes in to the available queue. This makes them
|
||||
/// visible to heuristics.
|
||||
void ConvergingVLIWScheduler::SchedBoundary::releasePending() {
|
||||
// If the available queue is empty, it is safe to reset MinReadyCycle.
|
||||
if (Available.empty())
|
||||
MinReadyCycle = UINT_MAX;
|
||||
|
||||
// Check to see if any of the pending instructions are ready to issue. If
|
||||
// so, add them to the available queue.
|
||||
for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
|
||||
SUnit *SU = *(Pending.begin()+i);
|
||||
unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
|
||||
|
||||
if (ReadyCycle < MinReadyCycle)
|
||||
MinReadyCycle = ReadyCycle;
|
||||
|
||||
if (ReadyCycle > CurrCycle)
|
||||
continue;
|
||||
|
||||
if (checkHazard(SU))
|
||||
continue;
|
||||
|
||||
Available.push(SU);
|
||||
Pending.remove(Pending.begin()+i);
|
||||
--i; --e;
|
||||
}
|
||||
CheckPending = false;
|
||||
}
|
||||
|
||||
/// Remove SU from the ready set for this boundary.
|
||||
void ConvergingVLIWScheduler::SchedBoundary::removeReady(SUnit *SU) {
|
||||
if (Available.isInQueue(SU))
|
||||
Available.remove(Available.find(SU));
|
||||
else {
|
||||
assert(Pending.isInQueue(SU) && "bad ready count");
|
||||
Pending.remove(Pending.find(SU));
|
||||
}
|
||||
}
|
||||
|
||||
/// If this queue only has one ready candidate, return it. As a side effect,
|
||||
/// advance the cycle until at least one node is ready. If multiple instructions
|
||||
/// are ready, return NULL.
|
||||
SUnit *ConvergingVLIWScheduler::SchedBoundary::pickOnlyChoice() {
|
||||
if (CheckPending)
|
||||
releasePending();
|
||||
|
||||
for (unsigned i = 0; Available.empty(); ++i) {
|
||||
assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
|
||||
"permanent hazard"); (void)i;
|
||||
bumpCycle();
|
||||
releasePending();
|
||||
}
|
||||
if (Available.size() == 1)
|
||||
return *Available.begin();
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#ifndef NDEBUG
|
||||
void ConvergingVLIWScheduler::traceCandidate(const char *Label, const ReadyQueue &Q,
|
||||
SUnit *SU, PressureElement P) {
|
||||
dbgs() << Label << " " << Q.getName() << " ";
|
||||
if (P.isValid())
|
||||
dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease
|
||||
<< " ";
|
||||
else
|
||||
dbgs() << " ";
|
||||
SU->dump(DAG);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Constants used to denote relative importance of
|
||||
// heuristic components for cost computation.
|
||||
static const unsigned PriorityOne = 200;
|
||||
static const unsigned PriorityThree = 50;
|
||||
static const unsigned ScaleTwo = 10;
|
||||
static const unsigned FactorOne = 2;
|
||||
|
||||
/// Single point to compute overall scheduling cost.
|
||||
/// TODO: More heuristics will be used soon.
|
||||
int ConvergingVLIWScheduler::SchedulingCost(ReadyQueue &Q, SUnit *SU,
|
||||
SchedCandidate &Candidate,
|
||||
RegPressureDelta &Delta,
|
||||
bool verbose) {
|
||||
// Initial trivial priority.
|
||||
int ResCount = 1;
|
||||
|
||||
// Do not waste time on a node that is already scheduled.
|
||||
if (!SU || SU->isScheduled)
|
||||
return ResCount;
|
||||
|
||||
// Forced priority is high.
|
||||
if (SU->isScheduleHigh)
|
||||
ResCount += PriorityOne;
|
||||
|
||||
// Critical path first.
|
||||
if (Q.getID() == TopQID)
|
||||
ResCount += (SU->getHeight() * ScaleTwo);
|
||||
else
|
||||
ResCount += (SU->getDepth() * ScaleTwo);
|
||||
|
||||
// If resources are available for it, multiply the
|
||||
// chance of scheduling.
|
||||
if (DAG->getTopResourceModel()->isResourceAvailable(SU))
|
||||
ResCount <<= FactorOne;
|
||||
|
||||
// Factor in reg pressure as a heuristic.
|
||||
ResCount -= (Delta.Excess.UnitIncrease * PriorityThree);
|
||||
ResCount -= (Delta.CriticalMax.UnitIncrease * PriorityThree);
|
||||
|
||||
DEBUG(if (verbose) dbgs() << " Total(" << ResCount << ")");
|
||||
|
||||
return ResCount;
|
||||
}
|
||||
|
||||
/// Pick the best candidate from the top queue.
|
||||
///
|
||||
/// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during
|
||||
/// DAG building. To adjust for the current scheduling location we need to
|
||||
/// maintain the number of vreg uses remaining to be top-scheduled.
|
||||
ConvergingVLIWScheduler::CandResult ConvergingVLIWScheduler::
|
||||
pickNodeFromQueue(ReadyQueue &Q, const RegPressureTracker &RPTracker,
|
||||
SchedCandidate &Candidate) {
|
||||
DEBUG(Q.dump());
|
||||
|
||||
// getMaxPressureDelta temporarily modifies the tracker.
|
||||
RegPressureTracker &TempTracker = const_cast<RegPressureTracker&>(RPTracker);
|
||||
|
||||
// BestSU remains NULL if no top candidates beat the best existing candidate.
|
||||
CandResult FoundCandidate = NoCand;
|
||||
for (ReadyQueue::iterator I = Q.begin(), E = Q.end(); I != E; ++I) {
|
||||
RegPressureDelta RPDelta;
|
||||
TempTracker.getMaxPressureDelta((*I)->getInstr(), RPDelta,
|
||||
DAG->getRegionCriticalPSets(),
|
||||
DAG->getRegPressure().MaxSetPressure);
|
||||
|
||||
int CurrentCost = SchedulingCost(Q, *I, Candidate, RPDelta, false);
|
||||
|
||||
// Initialize the candidate if needed.
|
||||
if (!Candidate.SU) {
|
||||
Candidate.SU = *I;
|
||||
Candidate.RPDelta = RPDelta;
|
||||
Candidate.SCost = CurrentCost;
|
||||
FoundCandidate = NodeOrder;
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
// Best cost.
|
||||
if (CurrentCost > Candidate.SCost) {
|
||||
DEBUG(traceCandidate("CCAND", Q, *I));
|
||||
Candidate.SU = *I;
|
||||
Candidate.RPDelta = RPDelta;
|
||||
Candidate.SCost = CurrentCost;
|
||||
FoundCandidate = BestCost;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Fall through to original instruction order.
|
||||
// Only consider node order if Candidate was chosen from this Q.
|
||||
if (FoundCandidate == NoCand)
|
||||
continue;
|
||||
}
|
||||
return FoundCandidate;
|
||||
}
|
||||
|
||||
/// Pick the best candidate node from either the top or bottom queue.
|
||||
SUnit *ConvergingVLIWScheduler::pickNodeBidrectional(bool &IsTopNode) {
|
||||
// Schedule as far as possible in the direction of no choice. This is most
|
||||
// efficient, but also provides the best heuristics for CriticalPSets.
|
||||
if (SUnit *SU = Bot.pickOnlyChoice()) {
|
||||
IsTopNode = false;
|
||||
return SU;
|
||||
}
|
||||
if (SUnit *SU = Top.pickOnlyChoice()) {
|
||||
IsTopNode = true;
|
||||
return SU;
|
||||
}
|
||||
SchedCandidate BotCand;
|
||||
// Prefer bottom scheduling when heuristics are silent.
|
||||
CandResult BotResult = pickNodeFromQueue(Bot.Available,
|
||||
DAG->getBotRPTracker(), BotCand);
|
||||
assert(BotResult != NoCand && "failed to find the first candidate");
|
||||
|
||||
// If either Q has a single candidate that provides the least increase in
|
||||
// Excess pressure, we can immediately schedule from that Q.
|
||||
//
|
||||
// RegionCriticalPSets summarizes the pressure within the scheduled region and
|
||||
// affects picking from either Q. If scheduling in one direction must
|
||||
// increase pressure for one of the excess PSets, then schedule in that
|
||||
// direction first to provide more freedom in the other direction.
|
||||
if (BotResult == SingleExcess || BotResult == SingleCritical) {
|
||||
IsTopNode = false;
|
||||
return BotCand.SU;
|
||||
}
|
||||
// Check if the top Q has a better candidate.
|
||||
SchedCandidate TopCand;
|
||||
CandResult TopResult = pickNodeFromQueue(Top.Available,
|
||||
DAG->getTopRPTracker(), TopCand);
|
||||
assert(TopResult != NoCand && "failed to find the first candidate");
|
||||
|
||||
if (TopResult == SingleExcess || TopResult == SingleCritical) {
|
||||
IsTopNode = true;
|
||||
return TopCand.SU;
|
||||
}
|
||||
// If either Q has a single candidate that minimizes pressure above the
|
||||
// original region's pressure pick it.
|
||||
if (BotResult == SingleMax) {
|
||||
IsTopNode = false;
|
||||
return BotCand.SU;
|
||||
}
|
||||
if (TopResult == SingleMax) {
|
||||
IsTopNode = true;
|
||||
return TopCand.SU;
|
||||
}
|
||||
if (TopCand.SCost > BotCand.SCost) {
|
||||
IsTopNode = true;
|
||||
return TopCand.SU;
|
||||
}
|
||||
// Otherwise prefer the bottom candidate in node order.
|
||||
IsTopNode = false;
|
||||
return BotCand.SU;
|
||||
}
|
||||
|
||||
/// Pick the best node to balance the schedule. Implements MachineSchedStrategy.
|
||||
SUnit *ConvergingVLIWScheduler::pickNode(bool &IsTopNode) {
|
||||
if (DAG->top() == DAG->bottom()) {
|
||||
assert(Top.Available.empty() && Top.Pending.empty() &&
|
||||
Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage");
|
||||
return NULL;
|
||||
}
|
||||
SUnit *SU;
|
||||
if (ForceTopDown) {
|
||||
SU = Top.pickOnlyChoice();
|
||||
if (!SU) {
|
||||
SchedCandidate TopCand;
|
||||
CandResult TopResult =
|
||||
pickNodeFromQueue(Top.Available, DAG->getTopRPTracker(), TopCand);
|
||||
assert(TopResult != NoCand && "failed to find the first candidate");
|
||||
(void)TopResult;
|
||||
SU = TopCand.SU;
|
||||
}
|
||||
IsTopNode = true;
|
||||
} else if (ForceBottomUp) {
|
||||
SU = Bot.pickOnlyChoice();
|
||||
if (!SU) {
|
||||
SchedCandidate BotCand;
|
||||
CandResult BotResult =
|
||||
pickNodeFromQueue(Bot.Available, DAG->getBotRPTracker(), BotCand);
|
||||
assert(BotResult != NoCand && "failed to find the first candidate");
|
||||
(void)BotResult;
|
||||
SU = BotCand.SU;
|
||||
}
|
||||
IsTopNode = false;
|
||||
} else {
|
||||
SU = pickNodeBidrectional(IsTopNode);
|
||||
}
|
||||
if (SU->isTopReady())
|
||||
Top.removeReady(SU);
|
||||
if (SU->isBottomReady())
|
||||
Bot.removeReady(SU);
|
||||
|
||||
DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom")
|
||||
<< " Scheduling Instruction in cycle "
|
||||
<< (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n';
|
||||
SU->dump(DAG));
|
||||
return SU;
|
||||
}
|
||||
|
||||
/// Update the scheduler's state after scheduling a node. This is the same node
|
||||
/// that was just returned by pickNode(). However, VLIWMachineScheduler needs to update
|
||||
/// it's state based on the current cycle before MachineSchedStrategy does.
|
||||
void ConvergingVLIWScheduler::schedNode(SUnit *SU, bool IsTopNode) {
|
||||
if (IsTopNode) {
|
||||
SU->TopReadyCycle = Top.CurrCycle;
|
||||
Top.bumpNode(SU);
|
||||
}
|
||||
else {
|
||||
SU->BotReadyCycle = Bot.CurrCycle;
|
||||
Bot.bumpNode(SU);
|
||||
}
|
||||
}
|
||||
|
423
lib/Target/Hexagon/HexagonMachineScheduler.h
Normal file
423
lib/Target/Hexagon/HexagonMachineScheduler.h
Normal file
@ -0,0 +1,423 @@
|
||||
//===-- HexagonMachineScheduler.h - Custom Hexagon MI scheduler. ----===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file is distributed under the University of Illinois Open Source
|
||||
// License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// Custom Hexagon MI scheduler.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef HEXAGONASMPRINTER_H
|
||||
#define HEXAGONASMPRINTER_H
|
||||
|
||||
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
|
||||
#include "llvm/CodeGen/MachineScheduler.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/CodeGen/RegisterClassInfo.h"
|
||||
#include "llvm/CodeGen/RegisterPressure.h"
|
||||
#include "llvm/CodeGen/ResourcePriorityQueue.h"
|
||||
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
|
||||
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
|
||||
#include "llvm/Analysis/AliasAnalysis.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Support/CommandLine.h"
|
||||
#include "llvm/Support/Debug.h"
|
||||
#include "llvm/Support/ErrorHandling.h"
|
||||
#include "llvm/Support/raw_ostream.h"
|
||||
#include "llvm/ADT/OwningPtr.h"
|
||||
#include "llvm/ADT/PriorityQueue.h"
|
||||
|
||||
using namespace llvm;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// MachineSchedStrategy - Interface to a machine scheduling algorithm.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
namespace llvm {
|
||||
class VLIWMachineScheduler;
|
||||
|
||||
/// MachineSchedStrategy - Interface used by VLIWMachineScheduler to drive the selected
|
||||
/// scheduling algorithm.
|
||||
///
|
||||
/// If this works well and targets wish to reuse VLIWMachineScheduler, we may expose it
|
||||
/// in ScheduleDAGInstrs.h
|
||||
class MachineSchedStrategy {
|
||||
public:
|
||||
virtual ~MachineSchedStrategy() {}
|
||||
|
||||
/// Initialize the strategy after building the DAG for a new region.
|
||||
virtual void initialize(VLIWMachineScheduler *DAG) = 0;
|
||||
|
||||
/// Pick the next node to schedule, or return NULL. Set IsTopNode to true to
|
||||
/// schedule the node at the top of the unscheduled region. Otherwise it will
|
||||
/// be scheduled at the bottom.
|
||||
virtual SUnit *pickNode(bool &IsTopNode) = 0;
|
||||
|
||||
/// Notify MachineSchedStrategy that VLIWMachineScheduler has scheduled a node.
|
||||
virtual void schedNode(SUnit *SU, bool IsTopNode) = 0;
|
||||
|
||||
/// When all predecessor dependencies have been resolved, free this node for
|
||||
/// top-down scheduling.
|
||||
virtual void releaseTopNode(SUnit *SU) = 0;
|
||||
/// When all successor dependencies have been resolved, free this node for
|
||||
/// bottom-up scheduling.
|
||||
virtual void releaseBottomNode(SUnit *SU) = 0;
|
||||
};
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// ConvergingVLIWScheduler - Implementation of the standard MachineSchedStrategy.
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
/// ReadyQueue encapsulates vector of "ready" SUnits with basic convenience
|
||||
/// methods for pushing and removing nodes. ReadyQueue's are uniquely identified
|
||||
/// by an ID. SUnit::NodeQueueId is a mask of the ReadyQueues the SUnit is in.
|
||||
class ReadyQueue {
|
||||
unsigned ID;
|
||||
std::string Name;
|
||||
std::vector<SUnit*> Queue;
|
||||
|
||||
public:
|
||||
ReadyQueue(unsigned id, const Twine &name): ID(id), Name(name.str()) {}
|
||||
|
||||
unsigned getID() const { return ID; }
|
||||
|
||||
StringRef getName() const { return Name; }
|
||||
|
||||
// SU is in this queue if it's NodeQueueID is a superset of this ID.
|
||||
bool isInQueue(SUnit *SU) const { return (SU->NodeQueueId & ID); }
|
||||
|
||||
bool empty() const { return Queue.empty(); }
|
||||
|
||||
unsigned size() const { return Queue.size(); }
|
||||
|
||||
typedef std::vector<SUnit*>::iterator iterator;
|
||||
|
||||
iterator begin() { return Queue.begin(); }
|
||||
|
||||
iterator end() { return Queue.end(); }
|
||||
|
||||
iterator find(SUnit *SU) {
|
||||
return std::find(Queue.begin(), Queue.end(), SU);
|
||||
}
|
||||
|
||||
void push(SUnit *SU) {
|
||||
Queue.push_back(SU);
|
||||
SU->NodeQueueId |= ID;
|
||||
}
|
||||
|
||||
void remove(iterator I) {
|
||||
(*I)->NodeQueueId &= ~ID;
|
||||
*I = Queue.back();
|
||||
Queue.pop_back();
|
||||
}
|
||||
|
||||
void dump() {
|
||||
dbgs() << Name << ": ";
|
||||
for (unsigned i = 0, e = Queue.size(); i < e; ++i)
|
||||
dbgs() << Queue[i]->NodeNum << " ";
|
||||
dbgs() << "\n";
|
||||
}
|
||||
};
|
||||
|
||||
/// ConvergingVLIWScheduler shrinks the unscheduled zone using heuristics to balance
|
||||
/// the schedule.
|
||||
class ConvergingVLIWScheduler : public MachineSchedStrategy {
|
||||
|
||||
/// Store the state used by ConvergingVLIWScheduler heuristics, required for the
|
||||
/// lifetime of one invocation of pickNode().
|
||||
struct SchedCandidate {
|
||||
// The best SUnit candidate.
|
||||
SUnit *SU;
|
||||
|
||||
// Register pressure values for the best candidate.
|
||||
RegPressureDelta RPDelta;
|
||||
|
||||
// Best scheduling cost.
|
||||
int SCost;
|
||||
|
||||
SchedCandidate(): SU(NULL), SCost(0) {}
|
||||
};
|
||||
/// Represent the type of SchedCandidate found within a single queue.
|
||||
enum CandResult {
|
||||
NoCand, NodeOrder, SingleExcess, SingleCritical, SingleMax, MultiPressure,
|
||||
BestCost};
|
||||
|
||||
/// Each Scheduling boundary is associated with ready queues. It tracks the
|
||||
/// current cycle in whichever direction at has moved, and maintains the state
|
||||
/// of "hazards" and other interlocks at the current cycle.
|
||||
struct SchedBoundary {
|
||||
VLIWMachineScheduler *DAG;
|
||||
|
||||
ReadyQueue Available;
|
||||
ReadyQueue Pending;
|
||||
bool CheckPending;
|
||||
|
||||
ScheduleHazardRecognizer *HazardRec;
|
||||
|
||||
unsigned CurrCycle;
|
||||
unsigned IssueCount;
|
||||
|
||||
/// MinReadyCycle - Cycle of the soonest available instruction.
|
||||
unsigned MinReadyCycle;
|
||||
|
||||
// Remember the greatest min operand latency.
|
||||
unsigned MaxMinLatency;
|
||||
|
||||
/// Pending queues extend the ready queues with the same ID and the
|
||||
/// PendingFlag set.
|
||||
SchedBoundary(unsigned ID, const Twine &Name):
|
||||
DAG(0), Available(ID, Name+".A"),
|
||||
Pending(ID << ConvergingVLIWScheduler::LogMaxQID, Name+".P"),
|
||||
CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
|
||||
MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
|
||||
|
||||
~SchedBoundary() { delete HazardRec; }
|
||||
|
||||
bool isTop() const {
|
||||
return Available.getID() == ConvergingVLIWScheduler::TopQID;
|
||||
}
|
||||
|
||||
bool checkHazard(SUnit *SU);
|
||||
|
||||
void releaseNode(SUnit *SU, unsigned ReadyCycle);
|
||||
|
||||
void bumpCycle();
|
||||
|
||||
void bumpNode(SUnit *SU);
|
||||
|
||||
void releasePending();
|
||||
|
||||
void removeReady(SUnit *SU);
|
||||
|
||||
SUnit *pickOnlyChoice();
|
||||
};
|
||||
|
||||
VLIWMachineScheduler *DAG;
|
||||
const TargetRegisterInfo *TRI;
|
||||
|
||||
// State of the top and bottom scheduled instruction boundaries.
|
||||
SchedBoundary Top;
|
||||
SchedBoundary Bot;
|
||||
|
||||
public:
|
||||
/// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both)
|
||||
enum {
|
||||
TopQID = 1,
|
||||
BotQID = 2,
|
||||
LogMaxQID = 2
|
||||
};
|
||||
|
||||
ConvergingVLIWScheduler():
|
||||
DAG(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {}
|
||||
|
||||
virtual void initialize(VLIWMachineScheduler *dag);
|
||||
|
||||
virtual SUnit *pickNode(bool &IsTopNode);
|
||||
|
||||
virtual void schedNode(SUnit *SU, bool IsTopNode);
|
||||
|
||||
virtual void releaseTopNode(SUnit *SU);
|
||||
|
||||
virtual void releaseBottomNode(SUnit *SU);
|
||||
|
||||
protected:
|
||||
SUnit *pickNodeBidrectional(bool &IsTopNode);
|
||||
|
||||
int SchedulingCost(ReadyQueue &Q,
|
||||
SUnit *SU, SchedCandidate &Candidate,
|
||||
RegPressureDelta &Delta, bool verbose);
|
||||
|
||||
CandResult pickNodeFromQueue(ReadyQueue &Q,
|
||||
const RegPressureTracker &RPTracker,
|
||||
SchedCandidate &Candidate);
|
||||
#ifndef NDEBUG
|
||||
void traceCandidate(const char *Label, const ReadyQueue &Q, SUnit *SU,
|
||||
PressureElement P = PressureElement());
|
||||
#endif
|
||||
};
|
||||
|
||||
class VLIWResourceModel {
|
||||
/// ResourcesModel - Represents VLIW state.
|
||||
/// Not limited to VLIW targets per say, but assumes
|
||||
/// definition of DFA by a target.
|
||||
DFAPacketizer *ResourcesModel;
|
||||
|
||||
const InstrItineraryData *InstrItins;
|
||||
|
||||
/// Local packet/bundle model. Purely
|
||||
/// internal to the MI schedulre at the time.
|
||||
std::vector<SUnit*> Packet;
|
||||
|
||||
/// Total packets created.
|
||||
unsigned TotalPackets;
|
||||
|
||||
public:
|
||||
VLIWResourceModel(MachineSchedContext *C, const InstrItineraryData *IID) :
|
||||
InstrItins(IID), TotalPackets(0) {
|
||||
const TargetMachine &TM = C->MF->getTarget();
|
||||
ResourcesModel = TM.getInstrInfo()->CreateTargetScheduleState(&TM,NULL);
|
||||
|
||||
// This hard requirement could be relaxed, but for now do not let it proceed.
|
||||
assert(ResourcesModel && "Unimplemented CreateTargetScheduleState.");
|
||||
|
||||
Packet.resize(InstrItins->SchedModel->IssueWidth);
|
||||
Packet.clear();
|
||||
ResourcesModel->clearResources();
|
||||
}
|
||||
|
||||
~VLIWResourceModel() {
|
||||
delete ResourcesModel;
|
||||
}
|
||||
|
||||
void resetPacketState() {
|
||||
Packet.clear();
|
||||
}
|
||||
|
||||
void resetDFA() {
|
||||
ResourcesModel->clearResources();
|
||||
}
|
||||
|
||||
bool isResourceAvailable(SUnit *SU);
|
||||
void reserveResources(SUnit *SU);
|
||||
unsigned getTotalPackets() const { return TotalPackets; }
|
||||
};
|
||||
|
||||
class VLIWMachineScheduler : public ScheduleDAGInstrs {
|
||||
/// AA - AliasAnalysis for making memory reference queries.
|
||||
AliasAnalysis *AA;
|
||||
|
||||
RegisterClassInfo *RegClassInfo;
|
||||
MachineSchedStrategy *SchedImpl;
|
||||
|
||||
/// state separatly for top/bottom sectioins.
|
||||
VLIWResourceModel *TopResourceModel;
|
||||
VLIWResourceModel *BotResourceModel;
|
||||
|
||||
MachineBasicBlock::iterator LiveRegionEnd;
|
||||
|
||||
/// Register pressure in this region computed by buildSchedGraph.
|
||||
IntervalPressure RegPressure;
|
||||
RegPressureTracker RPTracker;
|
||||
|
||||
/// List of pressure sets that exceed the target's pressure limit before
|
||||
/// scheduling, listed in increasing set ID order. Each pressure set is paired
|
||||
/// with its max pressure in the currently scheduled regions.
|
||||
std::vector<PressureElement> RegionCriticalPSets;
|
||||
|
||||
/// The top of the unscheduled zone.
|
||||
MachineBasicBlock::iterator CurrentTop;
|
||||
IntervalPressure TopPressure;
|
||||
RegPressureTracker TopRPTracker;
|
||||
|
||||
/// The bottom of the unscheduled zone.
|
||||
MachineBasicBlock::iterator CurrentBottom;
|
||||
IntervalPressure BotPressure;
|
||||
RegPressureTracker BotRPTracker;
|
||||
|
||||
#ifndef NDEBUG
|
||||
/// The number of instructions scheduled so far. Used to cut off the
|
||||
/// scheduler at the point determined by misched-cutoff.
|
||||
unsigned NumInstrsScheduled;
|
||||
#endif
|
||||
|
||||
/// Total packets in the region.
|
||||
unsigned TotalPackets;
|
||||
|
||||
const MachineLoopInfo *MLI;
|
||||
public:
|
||||
VLIWMachineScheduler(MachineSchedContext *C, MachineSchedStrategy *S):
|
||||
ScheduleDAGInstrs(*C->MF, *C->MLI, *C->MDT, /*IsPostRA=*/false, C->LIS),
|
||||
AA(C->AA), RegClassInfo(C->RegClassInfo), SchedImpl(S),
|
||||
RPTracker(RegPressure), CurrentTop(), TopRPTracker(TopPressure),
|
||||
CurrentBottom(), BotRPTracker(BotPressure), MLI(C->MLI) {
|
||||
|
||||
TopResourceModel = new VLIWResourceModel(C, InstrItins);
|
||||
BotResourceModel = new VLIWResourceModel(C, InstrItins);
|
||||
|
||||
#ifndef NDEBUG
|
||||
NumInstrsScheduled = 0;
|
||||
#endif
|
||||
TotalPackets = 0;
|
||||
}
|
||||
|
||||
virtual ~VLIWMachineScheduler() {
|
||||
delete SchedImpl;
|
||||
delete TopResourceModel;
|
||||
delete BotResourceModel;
|
||||
}
|
||||
|
||||
MachineBasicBlock::iterator top() const { return CurrentTop; }
|
||||
MachineBasicBlock::iterator bottom() const { return CurrentBottom; }
|
||||
|
||||
/// Implement the ScheduleDAGInstrs interface for handling the next scheduling
|
||||
/// region. This covers all instructions in a block, while schedule() may only
|
||||
/// cover a subset.
|
||||
void enterRegion(MachineBasicBlock *bb,
|
||||
MachineBasicBlock::iterator begin,
|
||||
MachineBasicBlock::iterator end,
|
||||
unsigned endcount);
|
||||
|
||||
/// Schedule - This is called back from ScheduleDAGInstrs::Run() when it's
|
||||
/// time to do some work.
|
||||
void schedule();
|
||||
|
||||
unsigned CurCycle;
|
||||
|
||||
/// Get current register pressure for the top scheduled instructions.
|
||||
const IntervalPressure &getTopPressure() const { return TopPressure; }
|
||||
const RegPressureTracker &getTopRPTracker() const { return TopRPTracker; }
|
||||
|
||||
/// Get current register pressure for the bottom scheduled instructions.
|
||||
const IntervalPressure &getBotPressure() const { return BotPressure; }
|
||||
const RegPressureTracker &getBotRPTracker() const { return BotRPTracker; }
|
||||
|
||||
/// Get register pressure for the entire scheduling region before scheduling.
|
||||
const IntervalPressure &getRegPressure() const { return RegPressure; }
|
||||
|
||||
const std::vector<PressureElement> &getRegionCriticalPSets() const {
|
||||
return RegionCriticalPSets;
|
||||
}
|
||||
|
||||
VLIWResourceModel *getTopResourceModel() { return TopResourceModel; };
|
||||
VLIWResourceModel *getBotResourceModel() { return BotResourceModel; };
|
||||
|
||||
/// getIssueWidth - Return the max instructions per scheduling group.
|
||||
unsigned getIssueWidth() const {
|
||||
return (InstrItins && InstrItins->SchedModel)
|
||||
? InstrItins->SchedModel->IssueWidth : 1;
|
||||
}
|
||||
|
||||
/// getNumMicroOps - Return the number of issue slots required for this MI.
|
||||
unsigned getNumMicroOps(MachineInstr *MI) const {
|
||||
if (!InstrItins) return 1;
|
||||
int UOps = InstrItins->getNumMicroOps(MI->getDesc().getSchedClass());
|
||||
return (UOps >= 0) ? UOps : TII->getNumMicroOps(InstrItins, MI);
|
||||
}
|
||||
|
||||
private:
|
||||
void scheduleNodeTopDown(SUnit *SU);
|
||||
void listScheduleTopDown();
|
||||
|
||||
void initRegPressure();
|
||||
void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
|
||||
|
||||
void moveInstruction(MachineInstr *MI, MachineBasicBlock::iterator InsertPos);
|
||||
bool checkSchedLimit();
|
||||
|
||||
void releaseRoots();
|
||||
|
||||
void releaseSucc(SUnit *SU, SDep *SuccEdge);
|
||||
void releaseSuccessors(SUnit *SU);
|
||||
void releasePred(SUnit *SU, SDep *PredEdge);
|
||||
void releasePredecessors(SUnit *SU);
|
||||
|
||||
void placeDebugValues();
|
||||
};
|
||||
} // namespace
|
||||
|
||||
|
||||
#endif
|
@ -310,6 +310,58 @@ void HexagonRegisterInfo::getInitialFrameState(std::vector<MachineMove>
|
||||
Moves.push_back(MachineMove(0, Dst, Src));
|
||||
}
|
||||
|
||||
// Get the weight in units of pressure for this register class.
|
||||
const RegClassWeight &
|
||||
HexagonRegisterInfo::getRegClassWeight(const TargetRegisterClass *RC) const {
|
||||
// Each TargetRegisterClass has a per register weight, and weight
|
||||
// limit which must be less than the limits of its pressure sets.
|
||||
static const RegClassWeight RCWeightTable[] = {
|
||||
{1, 32}, // IntRegs
|
||||
{1, 8}, // CRRegs
|
||||
{1, 4}, // PredRegs
|
||||
{2, 16}, // DoubleRegs
|
||||
{0, 0} };
|
||||
return RCWeightTable[RC->getID()];
|
||||
}
|
||||
|
||||
/// Get the number of dimensions of register pressure.
|
||||
unsigned HexagonRegisterInfo::getNumRegPressureSets() const {
|
||||
return 4;
|
||||
}
|
||||
|
||||
/// Get the name of this register unit pressure set.
|
||||
const char *HexagonRegisterInfo::getRegPressureSetName(unsigned Idx) const {
|
||||
static const char *const RegPressureSetName[] = {
|
||||
"IntRegsRegSet",
|
||||
"CRRegsRegSet",
|
||||
"PredRegsRegSet",
|
||||
"DoubleRegsRegSet"
|
||||
};
|
||||
assert((Idx < 4) && "Index out of bounds");
|
||||
return RegPressureSetName[Idx];
|
||||
}
|
||||
|
||||
/// Get the register unit pressure limit for this dimension.
|
||||
/// This limit must be adjusted dynamically for reserved registers.
|
||||
unsigned HexagonRegisterInfo::getRegPressureSetLimit(unsigned Idx) const {
|
||||
static const int RegPressureLimit [] = { 16, 4, 2, 8 };
|
||||
assert((Idx < 4) && "Index out of bounds");
|
||||
return RegPressureLimit[Idx];
|
||||
}
|
||||
|
||||
const int*
|
||||
HexagonRegisterInfo::getRegClassPressureSets(const TargetRegisterClass *RC)
|
||||
const {
|
||||
static const int RCSetsTable[] = {
|
||||
0, -1, // IntRegs
|
||||
1, -1, // CRRegs
|
||||
2, -1, // PredRegs
|
||||
0, -1, // DoubleRegs
|
||||
-1 };
|
||||
static const unsigned RCSetStartTable[] = { 0, 2, 4, 6, 0 };
|
||||
unsigned SetListStart = RCSetStartTable[RC->getID()];
|
||||
return &RCSetsTable[SetListStart];
|
||||
}
|
||||
unsigned HexagonRegisterInfo::getEHExceptionRegister() const {
|
||||
llvm_unreachable("What is the exception register");
|
||||
}
|
||||
|
@ -87,6 +87,11 @@ struct HexagonRegisterInfo : public HexagonGenRegisterInfo {
|
||||
// Exception handling queries.
|
||||
unsigned getEHExceptionRegister() const;
|
||||
unsigned getEHHandlerRegister() const;
|
||||
const RegClassWeight &getRegClassWeight(const TargetRegisterClass *RC) const;
|
||||
unsigned getNumRegPressureSets() const;
|
||||
const char *getRegPressureSetName(unsigned Idx) const;
|
||||
unsigned getRegPressureSetLimit(unsigned Idx) const;
|
||||
const int* getRegClassPressureSets(const TargetRegisterClass *RC) const;
|
||||
};
|
||||
|
||||
} // end namespace llvm
|
||||
|
@ -47,6 +47,7 @@ def HexagonModel : SchedMachineModel {
|
||||
// Max issue per cycle == bundle width.
|
||||
let IssueWidth = 4;
|
||||
let Itineraries = HexagonItineraries;
|
||||
let LoadLatency = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -58,6 +58,7 @@ def HexagonModelV4 : SchedMachineModel {
|
||||
// Max issue per cycle == bundle width.
|
||||
let IssueWidth = 4;
|
||||
let Itineraries = HexagonItinerariesV4;
|
||||
let LoadLatency = 1;
|
||||
}
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
@ -14,6 +14,7 @@
|
||||
#include "HexagonTargetMachine.h"
|
||||
#include "Hexagon.h"
|
||||
#include "HexagonISelLowering.h"
|
||||
#include "HexagonMachineScheduler.h"
|
||||
#include "llvm/Module.h"
|
||||
#include "llvm/CodeGen/Passes.h"
|
||||
#include "llvm/PassManager.h"
|
||||
@ -29,6 +30,11 @@ opt<bool> DisableHardwareLoops(
|
||||
"disable-hexagon-hwloops", cl::Hidden,
|
||||
cl::desc("Disable Hardware Loops for Hexagon target"));
|
||||
|
||||
static cl::
|
||||
opt<bool> DisableHexagonMISched("disable-hexagon-misched",
|
||||
cl::Hidden, cl::ZeroOrMore, cl::init(false),
|
||||
cl::desc("Disable Hexagon MI Scheduling"));
|
||||
|
||||
/// HexagonTargetMachineModule - Note that this is used on hosts that
|
||||
/// cannot link in a library unless there are references into the
|
||||
/// library. In particular, it seems that it is not possible to get
|
||||
@ -42,6 +48,13 @@ extern "C" void LLVMInitializeHexagonTarget() {
|
||||
RegisterTargetMachine<HexagonTargetMachine> X(TheHexagonTarget);
|
||||
}
|
||||
|
||||
static ScheduleDAGInstrs *createVLIWMachineSched(MachineSchedContext *C) {
|
||||
return new VLIWMachineScheduler(C, new ConvergingVLIWScheduler());
|
||||
}
|
||||
|
||||
static MachineSchedRegistry
|
||||
SchedCustomRegistry("hexagon", "Run Hexagon's custom scheduler",
|
||||
createVLIWMachineSched);
|
||||
|
||||
/// HexagonTargetMachine ctor - Create an ILP32 architecture model.
|
||||
///
|
||||
@ -83,7 +96,13 @@ namespace {
|
||||
class HexagonPassConfig : public TargetPassConfig {
|
||||
public:
|
||||
HexagonPassConfig(HexagonTargetMachine *TM, PassManagerBase &PM)
|
||||
: TargetPassConfig(TM, PM) {}
|
||||
: TargetPassConfig(TM, PM) {
|
||||
// Enable MI scheduler.
|
||||
if (!DisableHexagonMISched) {
|
||||
enablePass(&MachineSchedulerID);
|
||||
MachineSchedRegistry::setDefault(createVLIWMachineSched);
|
||||
}
|
||||
}
|
||||
|
||||
HexagonTargetMachine &getHexagonTargetMachine() const {
|
||||
return getTM<HexagonTargetMachine>();
|
||||
|
@ -1,12 +1,12 @@
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
|
||||
; CHECK: r[[T0:[0-9]+]] = #7
|
||||
; CHECK: memw(r29 + #0) = r[[T0]]
|
||||
; CHECK: r5 = #6
|
||||
; CHECK: r0 = #1
|
||||
; CHECK: r1 = #2
|
||||
; CHECK: r2 = #3
|
||||
; CHECK: r3 = #4
|
||||
; CHECK: r4 = #5
|
||||
; CHECK: r5 = #6
|
||||
|
||||
|
||||
define void @foo() nounwind {
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-hexagon-misched < %s | FileCheck %s
|
||||
; Check that we generate new value store packet in V4
|
||||
|
||||
@i = global i32 0, align 4
|
||||
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched < %s | FileCheck %s
|
||||
; RUN: llc -march=hexagon -mcpu=hexagonv4 -disable-dfa-sched -disable-hexagon-misched < %s | FileCheck %s
|
||||
|
||||
@num = external global i32
|
||||
@acc = external global i32
|
||||
|
Loading…
x
Reference in New Issue
Block a user