mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-06-25 00:24:26 +00:00
misched: API for minimum vs. expected latency.
Minimum latency determines per-cycle scheduling groups. Expected latency determines critical path and cost. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@158021 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
@ -272,6 +272,9 @@ namespace llvm {
|
|||||||
unsigned Depth; // Node depth.
|
unsigned Depth; // Node depth.
|
||||||
unsigned Height; // Node height.
|
unsigned Height; // Node height.
|
||||||
public:
|
public:
|
||||||
|
unsigned TopReadyCycle; // Cycle relative to start when node is ready.
|
||||||
|
unsigned BotReadyCycle; // Cycle relative to end when node is ready.
|
||||||
|
|
||||||
const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null.
|
const TargetRegisterClass *CopyDstRC; // Is a special copy node if not null.
|
||||||
const TargetRegisterClass *CopySrcRC;
|
const TargetRegisterClass *CopySrcRC;
|
||||||
|
|
||||||
@ -287,7 +290,7 @@ namespace llvm {
|
|||||||
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
|
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
|
||||||
SchedulingPref(Sched::None),
|
SchedulingPref(Sched::None),
|
||||||
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
||||||
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||||
|
|
||||||
/// SUnit - Construct an SUnit for post-regalloc scheduling to represent
|
/// SUnit - Construct an SUnit for post-regalloc scheduling to represent
|
||||||
/// a MachineInstr.
|
/// a MachineInstr.
|
||||||
@ -301,7 +304,7 @@ namespace llvm {
|
|||||||
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
|
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
|
||||||
SchedulingPref(Sched::None),
|
SchedulingPref(Sched::None),
|
||||||
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
||||||
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||||
|
|
||||||
/// SUnit - Construct a placeholder SUnit.
|
/// SUnit - Construct a placeholder SUnit.
|
||||||
SUnit()
|
SUnit()
|
||||||
@ -314,7 +317,7 @@ namespace llvm {
|
|||||||
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
|
isScheduleHigh(false), isScheduleLow(false), isCloned(false),
|
||||||
SchedulingPref(Sched::None),
|
SchedulingPref(Sched::None),
|
||||||
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
isDepthCurrent(false), isHeightCurrent(false), Depth(0), Height(0),
|
||||||
CopyDstRC(NULL), CopySrcRC(NULL) {}
|
TopReadyCycle(0), BotReadyCycle(0), CopyDstRC(NULL), CopySrcRC(NULL) {}
|
||||||
|
|
||||||
/// setNode - Assign the representative SDNode for this SUnit.
|
/// setNode - Assign the representative SDNode for this SUnit.
|
||||||
/// This may be used during pre-regalloc scheduling.
|
/// This may be used during pre-regalloc scheduling.
|
||||||
@ -552,12 +555,6 @@ namespace llvm {
|
|||||||
///
|
///
|
||||||
virtual void computeLatency(SUnit *SU) = 0;
|
virtual void computeLatency(SUnit *SU) = 0;
|
||||||
|
|
||||||
/// ComputeOperandLatency - Override dependence edge latency using
|
|
||||||
/// operand use/def information
|
|
||||||
///
|
|
||||||
virtual void computeOperandLatency(SUnit *, SUnit *,
|
|
||||||
SDep&) const { }
|
|
||||||
|
|
||||||
/// ForceUnitLatencies - Return true if all scheduling edges should be given
|
/// ForceUnitLatencies - Return true if all scheduling edges should be given
|
||||||
/// a latency value of one. The default is to return false; schedulers may
|
/// a latency value of one. The default is to return false; schedulers may
|
||||||
/// override this as needed.
|
/// override this as needed.
|
||||||
|
@ -291,11 +291,15 @@ namespace llvm {
|
|||||||
///
|
///
|
||||||
virtual void computeLatency(SUnit *SU);
|
virtual void computeLatency(SUnit *SU);
|
||||||
|
|
||||||
/// computeOperandLatency - Override dependence edge latency using
|
/// computeOperandLatency - Return dependence edge latency using
|
||||||
/// operand use/def information
|
/// operand use/def information
|
||||||
///
|
///
|
||||||
virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
|
/// FindMin may be set to get the minimum vs. expected latency. Minimum
|
||||||
SDep& dep) const;
|
/// latency is used for scheduling groups, while expected latency is for
|
||||||
|
/// instruction cost and critical path.
|
||||||
|
virtual unsigned computeOperandLatency(SUnit *Def, SUnit *Use,
|
||||||
|
const SDep& dep,
|
||||||
|
bool FindMin = false) const;
|
||||||
|
|
||||||
/// schedule - Order nodes according to selected style, filling
|
/// schedule - Order nodes according to selected style, filling
|
||||||
/// in the Sequence member.
|
/// in the Sequence member.
|
||||||
|
@ -214,6 +214,12 @@ public:
|
|||||||
/// class. The latency is the maximum completion time for any stage
|
/// class. The latency is the maximum completion time for any stage
|
||||||
/// in the itinerary.
|
/// in the itinerary.
|
||||||
///
|
///
|
||||||
|
/// InstrStages override the itinerary's MinLatency property. In fact, if the
|
||||||
|
/// stage latencies, which may be zero, are less than MinLatency,
|
||||||
|
/// getStageLatency returns a value less than MinLatency.
|
||||||
|
///
|
||||||
|
/// If no stages exist, MinLatency is used. If MinLatency is invalid (<0),
|
||||||
|
/// then it defaults to one cycle.
|
||||||
unsigned getStageLatency(unsigned ItinClassIndx) const {
|
unsigned getStageLatency(unsigned ItinClassIndx) const {
|
||||||
// If the target doesn't provide itinerary information, use a simple
|
// If the target doesn't provide itinerary information, use a simple
|
||||||
// non-zero default value for all instructions. Some target's provide a
|
// non-zero default value for all instructions. Some target's provide a
|
||||||
@ -222,7 +228,7 @@ public:
|
|||||||
// stage). This is different from beginStage == endStage != 0, which could
|
// stage). This is different from beginStage == endStage != 0, which could
|
||||||
// be used for zero-latency pseudo ops.
|
// be used for zero-latency pseudo ops.
|
||||||
if (isEmpty() || Itineraries[ItinClassIndx].FirstStage == 0)
|
if (isEmpty() || Itineraries[ItinClassIndx].FirstStage == 0)
|
||||||
return 1;
|
return (Props.MinLatency < 0) ? 1 : Props.MinLatency;
|
||||||
|
|
||||||
// Calculate the maximum completion time for any stage.
|
// Calculate the maximum completion time for any stage.
|
||||||
unsigned Latency = 0, StartCycle = 0;
|
unsigned Latency = 0, StartCycle = 0;
|
||||||
|
@ -668,18 +668,36 @@ public:
|
|||||||
return Opcode <= TargetOpcode::COPY;
|
return Opcode <= TargetOpcode::COPY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
virtual int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
SDNode *DefNode, unsigned DefIdx,
|
||||||
|
SDNode *UseNode, unsigned UseIdx) const = 0;
|
||||||
|
|
||||||
/// getOperandLatency - Compute and return the use operand latency of a given
|
/// getOperandLatency - Compute and return the use operand latency of a given
|
||||||
/// pair of def and use.
|
/// pair of def and use.
|
||||||
/// In most cases, the static scheduling itinerary was enough to determine the
|
/// In most cases, the static scheduling itinerary was enough to determine the
|
||||||
/// operand latency. But it may not be possible for instructions with variable
|
/// operand latency. But it may not be possible for instructions with variable
|
||||||
/// number of defs / uses.
|
/// number of defs / uses.
|
||||||
|
///
|
||||||
|
/// This is a raw interface to the itinerary that may be directly overriden by
|
||||||
|
/// a target. Use computeOperandLatency to get the best estimate of latency.
|
||||||
virtual int getOperandLatency(const InstrItineraryData *ItinData,
|
virtual int getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineInstr *DefMI, unsigned DefIdx,
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
const MachineInstr *UseMI, unsigned UseIdx) const;
|
const MachineInstr *UseMI,
|
||||||
|
unsigned UseIdx) const;
|
||||||
|
|
||||||
virtual int getOperandLatency(const InstrItineraryData *ItinData,
|
/// computeOperandLatency - Compute and return the latency of the given data
|
||||||
SDNode *DefNode, unsigned DefIdx,
|
/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for
|
||||||
SDNode *UseNode, unsigned UseIdx) const = 0;
|
/// an unknown use. If the subtarget allows, this may or may not need to call
|
||||||
|
/// getOperandLatency().
|
||||||
|
///
|
||||||
|
/// FindMin may be set to get the minimum vs. expected latency. Minimum
|
||||||
|
/// latency is used for scheduling groups, while expected latency is for
|
||||||
|
/// instruction cost and critical path.
|
||||||
|
unsigned computeOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const TargetRegisterInfo *TRI,
|
||||||
|
const MachineInstr *DefMI,
|
||||||
|
const MachineInstr *UseMI,
|
||||||
|
unsigned Reg, bool FindMin) const;
|
||||||
|
|
||||||
/// getOutputLatency - Compute and return the output dependency latency of a
|
/// getOutputLatency - Compute and return the output dependency latency of a
|
||||||
/// a given pair of defs which both target the same register. This is usually
|
/// a given pair of defs which both target the same register. This is usually
|
||||||
@ -693,13 +711,17 @@ public:
|
|||||||
/// getInstrLatency - Compute the instruction latency of a given instruction.
|
/// getInstrLatency - Compute the instruction latency of a given instruction.
|
||||||
/// If the instruction has higher cost when predicated, it's returned via
|
/// If the instruction has higher cost when predicated, it's returned via
|
||||||
/// PredCost.
|
/// PredCost.
|
||||||
virtual int getInstrLatency(const InstrItineraryData *ItinData,
|
virtual unsigned getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineInstr *MI,
|
const MachineInstr *MI,
|
||||||
unsigned *PredCost = 0) const;
|
unsigned *PredCost = 0) const;
|
||||||
|
|
||||||
virtual int getInstrLatency(const InstrItineraryData *ItinData,
|
virtual int getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
SDNode *Node) const = 0;
|
SDNode *Node) const = 0;
|
||||||
|
|
||||||
|
/// Return the default expected latency for a def based on it's opcode.
|
||||||
|
unsigned defaultDefLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *DefMI) const;
|
||||||
|
|
||||||
/// isHighLatencyDef - Return true if this opcode has high latency to its
|
/// isHighLatencyDef - Return true if this opcode has high latency to its
|
||||||
/// result.
|
/// result.
|
||||||
virtual bool isHighLatencyDef(int opc) const { return false; }
|
virtual bool isHighLatencyDef(int opc) const { return false; }
|
||||||
|
@ -21,8 +21,9 @@
|
|||||||
#include "llvm/CodeGen/Passes.h"
|
#include "llvm/CodeGen/Passes.h"
|
||||||
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
|
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
|
||||||
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
|
#include "llvm/CodeGen/ScheduleHazardRecognizer.h"
|
||||||
#include "llvm/Analysis/AliasAnalysis.h"
|
|
||||||
#include "llvm/Target/TargetInstrInfo.h"
|
#include "llvm/Target/TargetInstrInfo.h"
|
||||||
|
#include "llvm/MC/MCInstrItineraries.h"
|
||||||
|
#include "llvm/Analysis/AliasAnalysis.h"
|
||||||
#include "llvm/Support/CommandLine.h"
|
#include "llvm/Support/CommandLine.h"
|
||||||
#include "llvm/Support/Debug.h"
|
#include "llvm/Support/Debug.h"
|
||||||
#include "llvm/Support/ErrorHandling.h"
|
#include "llvm/Support/ErrorHandling.h"
|
||||||
@ -394,6 +395,12 @@ public:
|
|||||||
return RegionCriticalPSets;
|
return RegionCriticalPSets;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// getIssueWidth - Return the max instructions per scheduling group.
|
||||||
|
///
|
||||||
|
unsigned getIssueWidth() const {
|
||||||
|
return InstrItins ? InstrItins->Props.IssueWidth : 1;
|
||||||
|
}
|
||||||
|
|
||||||
protected:
|
protected:
|
||||||
void initRegPressure();
|
void initRegPressure();
|
||||||
void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
|
void updateScheduledPressure(std::vector<unsigned> NewMaxPressure);
|
||||||
@ -787,13 +794,16 @@ class ConvergingScheduler : public MachineSchedStrategy {
|
|||||||
/// MinReadyCycle - Cycle of the soonest available instruction.
|
/// MinReadyCycle - Cycle of the soonest available instruction.
|
||||||
unsigned MinReadyCycle;
|
unsigned MinReadyCycle;
|
||||||
|
|
||||||
|
// Remember the greatest min operand latency.
|
||||||
|
unsigned MaxMinLatency;
|
||||||
|
|
||||||
/// Pending queues extend the ready queues with the same ID and the
|
/// Pending queues extend the ready queues with the same ID and the
|
||||||
/// PendingFlag set.
|
/// PendingFlag set.
|
||||||
SchedBoundary(unsigned ID, const Twine &Name):
|
SchedBoundary(unsigned ID, const Twine &Name):
|
||||||
Available(ID, Name+".A"),
|
Available(ID, Name+".A"),
|
||||||
Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
|
Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"),
|
||||||
CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
|
CheckPending(false), HazardRec(0), CurrCycle(0), IssueCount(0),
|
||||||
MinReadyCycle(UINT_MAX) {}
|
MinReadyCycle(UINT_MAX), MaxMinLatency(0) {}
|
||||||
|
|
||||||
~SchedBoundary() { delete HazardRec; }
|
~SchedBoundary() { delete HazardRec; }
|
||||||
|
|
||||||
@ -805,6 +815,8 @@ class ConvergingScheduler : public MachineSchedStrategy {
|
|||||||
|
|
||||||
void bumpCycle();
|
void bumpCycle();
|
||||||
|
|
||||||
|
void bumpNode(SUnit *SU, unsigned IssueWidth);
|
||||||
|
|
||||||
void releasePending();
|
void releasePending();
|
||||||
|
|
||||||
void removeReady(SUnit *SU);
|
void removeReady(SUnit *SU);
|
||||||
@ -868,25 +880,53 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ConvergingScheduler::releaseTopNode(SUnit *SU) {
|
void ConvergingScheduler::releaseTopNode(SUnit *SU) {
|
||||||
Top.releaseNode(SU, SU->getDepth());
|
if (SU->isScheduled)
|
||||||
|
return;
|
||||||
|
|
||||||
|
for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end();
|
||||||
|
I != E; ++I) {
|
||||||
|
unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle;
|
||||||
|
unsigned Latency =
|
||||||
|
DAG->computeOperandLatency(I->getSUnit(), SU, *I, /*FindMin=*/true);
|
||||||
|
#ifndef NDEBUG
|
||||||
|
Top.MaxMinLatency = std::max(Latency, Top.MaxMinLatency);
|
||||||
|
#endif
|
||||||
|
if (SU->TopReadyCycle < PredReadyCycle + Latency)
|
||||||
|
SU->TopReadyCycle = PredReadyCycle + Latency;
|
||||||
|
}
|
||||||
|
Top.releaseNode(SU, SU->TopReadyCycle);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
|
void ConvergingScheduler::releaseBottomNode(SUnit *SU) {
|
||||||
Bot.releaseNode(SU, SU->getHeight());
|
if (SU->isScheduled)
|
||||||
|
return;
|
||||||
|
|
||||||
|
assert(SU->getInstr() && "Scheduled SUnit must have instr");
|
||||||
|
|
||||||
|
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
|
||||||
|
I != E; ++I) {
|
||||||
|
unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle;
|
||||||
|
unsigned Latency =
|
||||||
|
DAG->computeOperandLatency(SU, I->getSUnit(), *I, /*FindMin=*/true);
|
||||||
|
#ifndef NDEBUG
|
||||||
|
Bot.MaxMinLatency = std::max(Latency, Bot.MaxMinLatency);
|
||||||
|
#endif
|
||||||
|
if (SU->BotReadyCycle < SuccReadyCycle + Latency)
|
||||||
|
SU->BotReadyCycle = SuccReadyCycle + Latency;
|
||||||
|
}
|
||||||
|
Bot.releaseNode(SU, SU->BotReadyCycle);
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
|
void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU,
|
||||||
unsigned ReadyCycle) {
|
unsigned ReadyCycle) {
|
||||||
if (SU->isScheduled)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (ReadyCycle < MinReadyCycle)
|
if (ReadyCycle < MinReadyCycle)
|
||||||
MinReadyCycle = ReadyCycle;
|
MinReadyCycle = ReadyCycle;
|
||||||
|
|
||||||
// Check for interlocks first. For the purpose of other heuristics, an
|
// Check for interlocks first. For the purpose of other heuristics, an
|
||||||
// instruction that cannot issue appears as if it's not in the ReadyQueue.
|
// instruction that cannot issue appears as if it's not in the ReadyQueue.
|
||||||
if (HazardRec->isEnabled()
|
if (ReadyCycle > CurrCycle
|
||||||
&& HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard)
|
|| (HazardRec->isEnabled() && (HazardRec->getHazardType(SU)
|
||||||
|
!= ScheduleHazardRecognizer::NoHazard)))
|
||||||
Pending.push(SU);
|
Pending.push(SU);
|
||||||
else
|
else
|
||||||
Available.push(SU);
|
Available.push(SU);
|
||||||
@ -900,10 +940,11 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
|
|||||||
unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
|
unsigned NextCycle = std::max(CurrCycle + 1, MinReadyCycle);
|
||||||
|
|
||||||
if (!HazardRec->isEnabled()) {
|
if (!HazardRec->isEnabled()) {
|
||||||
// Bypass lots of virtual calls in case of long latency.
|
// Bypass HazardRec virtual calls.
|
||||||
CurrCycle = NextCycle;
|
CurrCycle = NextCycle;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
// Bypass getHazardType calls in case of long latency.
|
||||||
for (; CurrCycle != NextCycle; ++CurrCycle) {
|
for (; CurrCycle != NextCycle; ++CurrCycle) {
|
||||||
if (isTop())
|
if (isTop())
|
||||||
HazardRec->AdvanceCycle();
|
HazardRec->AdvanceCycle();
|
||||||
@ -917,6 +958,26 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() {
|
|||||||
<< CurrCycle << '\n');
|
<< CurrCycle << '\n');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Move the boundary of scheduled code by one SUnit.
|
||||||
|
void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU,
|
||||||
|
unsigned IssueWidth) {
|
||||||
|
// Update the reservation table.
|
||||||
|
if (HazardRec->isEnabled()) {
|
||||||
|
if (!isTop() && SU->isCall) {
|
||||||
|
// Calls are scheduled with their preceding instructions. For bottom-up
|
||||||
|
// scheduling, clear the pipeline state before emitting.
|
||||||
|
HazardRec->Reset();
|
||||||
|
}
|
||||||
|
HazardRec->EmitInstruction(SU);
|
||||||
|
}
|
||||||
|
// Check the instruction group size limit.
|
||||||
|
++IssueCount;
|
||||||
|
if (IssueCount == IssueWidth) {
|
||||||
|
DEBUG(dbgs() << "*** Max instrs at cycle " << CurrCycle << '\n');
|
||||||
|
bumpCycle();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Release pending ready nodes in to the available queue. This makes them
|
/// Release pending ready nodes in to the available queue. This makes them
|
||||||
/// visible to heuristics.
|
/// visible to heuristics.
|
||||||
void ConvergingScheduler::SchedBoundary::releasePending() {
|
void ConvergingScheduler::SchedBoundary::releasePending() {
|
||||||
@ -928,7 +989,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() {
|
|||||||
// so, add them to the available queue.
|
// so, add them to the available queue.
|
||||||
for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
|
for (unsigned i = 0, e = Pending.size(); i != e; ++i) {
|
||||||
SUnit *SU = *(Pending.begin()+i);
|
SUnit *SU = *(Pending.begin()+i);
|
||||||
unsigned ReadyCycle = isTop() ? SU->getHeight() : SU->getDepth();
|
unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle;
|
||||||
|
|
||||||
if (ReadyCycle < MinReadyCycle)
|
if (ReadyCycle < MinReadyCycle)
|
||||||
MinReadyCycle = ReadyCycle;
|
MinReadyCycle = ReadyCycle;
|
||||||
@ -965,7 +1026,8 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() {
|
|||||||
releasePending();
|
releasePending();
|
||||||
|
|
||||||
for (unsigned i = 0; Available.empty(); ++i) {
|
for (unsigned i = 0; Available.empty(); ++i) {
|
||||||
assert(i <= HazardRec->getMaxLookAhead() && "permanent hazard"); (void)i;
|
assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) &&
|
||||||
|
"permanent hazard"); (void)i;
|
||||||
bumpCycle();
|
bumpCycle();
|
||||||
releasePending();
|
releasePending();
|
||||||
}
|
}
|
||||||
@ -1205,27 +1267,15 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) {
|
|||||||
|
|
||||||
/// Update the scheduler's state after scheduling a node. This is the same node
|
/// Update the scheduler's state after scheduling a node. This is the same node
|
||||||
/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
|
/// that was just returned by pickNode(). However, ScheduleDAGMI needs to update
|
||||||
/// it's state based on the current cycle before MachineSchedStrategy.
|
/// it's state based on the current cycle before MachineSchedStrategy does.
|
||||||
void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
|
void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) {
|
||||||
// Update the reservation table.
|
if (IsTopNode) {
|
||||||
if (IsTopNode && Top.HazardRec->isEnabled()) {
|
SU->TopReadyCycle = Top.CurrCycle;
|
||||||
Top.HazardRec->EmitInstruction(SU);
|
Top.bumpNode(SU, DAG->getIssueWidth());
|
||||||
if (Top.HazardRec->atIssueLimit()) {
|
|
||||||
DEBUG(dbgs() << "*** Max instrs at cycle " << Top.CurrCycle << '\n');
|
|
||||||
Top.bumpCycle();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (Bot.HazardRec->isEnabled()) {
|
|
||||||
if (SU->isCall) {
|
|
||||||
// Calls are scheduled with their preceding instructions. For bottom-up
|
|
||||||
// scheduling, clear the pipeline state before emitting.
|
|
||||||
Bot.HazardRec->Reset();
|
|
||||||
}
|
|
||||||
Bot.HazardRec->EmitInstruction(SU);
|
|
||||||
if (Bot.HazardRec->atIssueLimit()) {
|
|
||||||
DEBUG(dbgs() << "*** Max instrs at cycle " << Bot.CurrCycle << '\n');
|
|
||||||
Bot.bumpCycle();
|
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
SU->BotReadyCycle = Bot.CurrCycle;
|
||||||
|
Bot.bumpNode(SU, DAG->getIssueWidth());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -271,10 +271,12 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU,
|
|||||||
// Adjust the dependence latency using operand def/use
|
// Adjust the dependence latency using operand def/use
|
||||||
// information (if any), and then allow the target to
|
// information (if any), and then allow the target to
|
||||||
// perform its own adjustments.
|
// perform its own adjustments.
|
||||||
const SDep& dep = SDep(SU, SDep::Data, LDataLatency, *Alias);
|
SDep dep(SU, SDep::Data, LDataLatency, *Alias);
|
||||||
if (!UnitLatencies) {
|
if (!UnitLatencies) {
|
||||||
computeOperandLatency(SU, UseSU, const_cast<SDep &>(dep));
|
unsigned Latency = computeOperandLatency(SU, UseSU, dep);
|
||||||
ST.adjustSchedDependency(SU, UseSU, const_cast<SDep &>(dep));
|
dep.setLatency(Latency);
|
||||||
|
|
||||||
|
ST.adjustSchedDependency(SU, UseSU, dep);
|
||||||
}
|
}
|
||||||
UseSU->addPred(dep);
|
UseSU->addPred(dep);
|
||||||
}
|
}
|
||||||
@ -461,11 +463,13 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
|
|||||||
// Create a data dependence.
|
// Create a data dependence.
|
||||||
//
|
//
|
||||||
// TODO: Handle "special" address latencies cleanly.
|
// TODO: Handle "special" address latencies cleanly.
|
||||||
const SDep &dep = SDep(DefSU, SDep::Data, DefSU->Latency, Reg);
|
SDep dep(DefSU, SDep::Data, DefSU->Latency, Reg);
|
||||||
if (!UnitLatencies) {
|
if (!UnitLatencies) {
|
||||||
// Adjust the dependence latency using operand def/use information, then
|
// Adjust the dependence latency using operand def/use information, then
|
||||||
// allow the target to perform its own adjustments.
|
// allow the target to perform its own adjustments.
|
||||||
computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
|
unsigned Latency = computeOperandLatency(DefSU, SU, const_cast<SDep &>(dep));
|
||||||
|
dep.setLatency(Latency);
|
||||||
|
|
||||||
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
|
const TargetSubtargetInfo &ST = TM.getSubtarget<TargetSubtargetInfo>();
|
||||||
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
|
ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
|
||||||
}
|
}
|
||||||
@ -970,8 +974,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
|
|||||||
}
|
}
|
||||||
|
|
||||||
void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
|
void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
|
||||||
// Compute the latency for the node.
|
// Compute the latency for the node. We only provide a default for missing
|
||||||
if (!InstrItins || InstrItins->isEmpty()) {
|
// itineraries. Empty itineraries still have latency properties.
|
||||||
|
if (!InstrItins) {
|
||||||
SU->Latency = 1;
|
SU->Latency = 1;
|
||||||
|
|
||||||
// Simplistic target-independent heuristic: assume that loads take
|
// Simplistic target-independent heuristic: assume that loads take
|
||||||
@ -983,63 +988,15 @@ void ScheduleDAGInstrs::computeLatency(SUnit *SU) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
|
unsigned ScheduleDAGInstrs::computeOperandLatency(SUnit *Def, SUnit *Use,
|
||||||
SDep& dep) const {
|
const SDep& dep,
|
||||||
if (!InstrItins || InstrItins->isEmpty())
|
bool FindMin) const {
|
||||||
return;
|
|
||||||
|
|
||||||
// For a data dependency with a known register...
|
// For a data dependency with a known register...
|
||||||
if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
|
if ((dep.getKind() != SDep::Data) || (dep.getReg() == 0))
|
||||||
return;
|
return 1;
|
||||||
|
|
||||||
const unsigned Reg = dep.getReg();
|
return TII->computeOperandLatency(InstrItins, TRI, Def->getInstr(),
|
||||||
|
Use->getInstr(), dep.getReg(), FindMin);
|
||||||
// ... find the definition of the register in the defining
|
|
||||||
// instruction
|
|
||||||
MachineInstr *DefMI = Def->getInstr();
|
|
||||||
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
|
|
||||||
if (DefIdx != -1) {
|
|
||||||
const MachineOperand &MO = DefMI->getOperand(DefIdx);
|
|
||||||
if (MO.isReg() && MO.isImplicit() &&
|
|
||||||
DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
|
|
||||||
// This is an implicit def, getOperandLatency() won't return the correct
|
|
||||||
// latency. e.g.
|
|
||||||
// %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
|
|
||||||
// %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
|
|
||||||
// What we want is to compute latency between def of %D6/%D7 and use of
|
|
||||||
// %Q3 instead.
|
|
||||||
unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
|
|
||||||
if (DefMI->getOperand(Op2).isReg())
|
|
||||||
DefIdx = Op2;
|
|
||||||
}
|
|
||||||
MachineInstr *UseMI = Use->getInstr();
|
|
||||||
// For all uses of the register, calculate the maxmimum latency
|
|
||||||
int Latency = -1;
|
|
||||||
if (UseMI) {
|
|
||||||
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
|
|
||||||
const MachineOperand &MO = UseMI->getOperand(i);
|
|
||||||
if (!MO.isReg() || !MO.isUse())
|
|
||||||
continue;
|
|
||||||
unsigned MOReg = MO.getReg();
|
|
||||||
if (MOReg != Reg)
|
|
||||||
continue;
|
|
||||||
|
|
||||||
int UseCycle = TII->getOperandLatency(InstrItins, DefMI, DefIdx,
|
|
||||||
UseMI, i);
|
|
||||||
Latency = std::max(Latency, UseCycle);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
// UseMI is null, then it must be a scheduling barrier.
|
|
||||||
if (!InstrItins || InstrItins->isEmpty())
|
|
||||||
return;
|
|
||||||
unsigned DefClass = DefMI->getDesc().getSchedClass();
|
|
||||||
Latency = InstrItins->getOperandCycle(DefClass, DefIdx);
|
|
||||||
}
|
|
||||||
|
|
||||||
// If we found a latency, then replace the existing dependence latency.
|
|
||||||
if (Latency >= 0)
|
|
||||||
dep.setLatency(Latency);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
|
void ScheduleDAGInstrs::dumpNode(const SUnit *SU) const {
|
||||||
|
@ -98,12 +98,6 @@ namespace llvm {
|
|||||||
///
|
///
|
||||||
virtual void computeLatency(SUnit *SU);
|
virtual void computeLatency(SUnit *SU);
|
||||||
|
|
||||||
/// computeOperandLatency - Override dependence edge latency using
|
|
||||||
/// operand use/def information
|
|
||||||
///
|
|
||||||
virtual void computeOperandLatency(SUnit *Def, SUnit *Use,
|
|
||||||
SDep& dep) const { }
|
|
||||||
|
|
||||||
virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
|
virtual void computeOperandLatency(SDNode *Def, SDNode *Use,
|
||||||
unsigned OpIdx, SDep& dep) const;
|
unsigned OpIdx, SDep& dep) const;
|
||||||
|
|
||||||
|
@ -1046,7 +1046,7 @@ bool TwoAddressInstructionPass::isDefTooClose(unsigned Reg, unsigned Dist,
|
|||||||
return true; // Below MI
|
return true; // Below MI
|
||||||
unsigned DefDist = DDI->second;
|
unsigned DefDist = DDI->second;
|
||||||
assert(Dist > DefDist && "Visited def already?");
|
assert(Dist > DefDist && "Visited def already?");
|
||||||
if (TII->getInstrLatency(InstrItins, DefMI) > (int)(Dist - DefDist))
|
if (TII->getInstrLatency(InstrItins, DefMI) > (Dist - DefDist))
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
@ -2568,11 +2568,12 @@ static const MachineInstr *getBundledUseMI(const TargetRegisterInfo *TRI,
|
|||||||
int
|
int
|
||||||
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineInstr *DefMI, unsigned DefIdx,
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
const MachineInstr *UseMI, unsigned UseIdx) const {
|
const MachineInstr *UseMI,
|
||||||
|
unsigned UseIdx) const {
|
||||||
if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
|
if (DefMI->isCopyLike() || DefMI->isInsertSubreg() ||
|
||||||
DefMI->isRegSequence() || DefMI->isImplicitDef())
|
DefMI->isRegSequence() || DefMI->isImplicitDef()) {
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
if (!ItinData || ItinData->isEmpty())
|
if (!ItinData || ItinData->isEmpty())
|
||||||
return DefMI->mayLoad() ? 3 : 1;
|
return DefMI->mayLoad() ? 3 : 1;
|
||||||
|
|
||||||
@ -2983,14 +2984,16 @@ ARMBaseInstrInfo::getOutputLatency(const InstrItineraryData *ItinData,
|
|||||||
DepMI->getNumOperands());
|
DepMI->getNumOperands());
|
||||||
}
|
}
|
||||||
|
|
||||||
int ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineInstr *MI,
|
const MachineInstr *MI,
|
||||||
unsigned *PredCost) const {
|
unsigned *PredCost) const {
|
||||||
if (MI->isCopyLike() || MI->isInsertSubreg() ||
|
if (MI->isCopyLike() || MI->isInsertSubreg() ||
|
||||||
MI->isRegSequence() || MI->isImplicitDef())
|
MI->isRegSequence() || MI->isImplicitDef())
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
if (!ItinData || ItinData->isEmpty())
|
// Be sure to call getStageLatency for an empty itinerary in case it has a
|
||||||
|
// valid MinLatency property.
|
||||||
|
if (!ItinData)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
if (MI->isBundle()) {
|
if (MI->isBundle()) {
|
||||||
|
@ -249,8 +249,9 @@ private:
|
|||||||
const MCInstrDesc &UseMCID,
|
const MCInstrDesc &UseMCID,
|
||||||
unsigned UseIdx, unsigned UseAlign) const;
|
unsigned UseIdx, unsigned UseAlign) const;
|
||||||
|
|
||||||
int getInstrLatency(const InstrItineraryData *ItinData,
|
unsigned getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineInstr *MI, unsigned *PredCost = 0) const;
|
const MachineInstr *MI,
|
||||||
|
unsigned *PredCost = 0) const;
|
||||||
|
|
||||||
int getInstrLatency(const InstrItineraryData *ItinData,
|
int getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
SDNode *Node) const;
|
SDNode *Node) const;
|
||||||
|
@ -61,22 +61,125 @@ TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData,
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the default expected latency for a def based on it's opcode.
|
||||||
|
unsigned TargetInstrInfo::defaultDefLatency(const InstrItineraryData *ItinData,
|
||||||
|
const MachineInstr *DefMI) const {
|
||||||
|
if (DefMI->mayLoad())
|
||||||
|
return ItinData->Props.LoadLatency;
|
||||||
|
if (isHighLatencyDef(DefMI->getOpcode()))
|
||||||
|
return ItinData->Props.HighLatency;
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Both DefMI and UseMI must be valid. By default, call directly to the
|
||||||
|
/// itinerary. This may be overriden by the target.
|
||||||
int
|
int
|
||||||
TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineInstr *DefMI, unsigned DefIdx,
|
const MachineInstr *DefMI, unsigned DefIdx,
|
||||||
const MachineInstr *UseMI, unsigned UseIdx) const {
|
const MachineInstr *UseMI,
|
||||||
if (!ItinData || ItinData->isEmpty())
|
unsigned UseIdx) const {
|
||||||
return -1;
|
|
||||||
|
|
||||||
unsigned DefClass = DefMI->getDesc().getSchedClass();
|
unsigned DefClass = DefMI->getDesc().getSchedClass();
|
||||||
unsigned UseClass = UseMI->getDesc().getSchedClass();
|
unsigned UseClass = UseMI->getDesc().getSchedClass();
|
||||||
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
|
return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx);
|
||||||
}
|
}
|
||||||
|
|
||||||
int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
/// computeOperandLatency - Compute and return the latency of the given data
|
||||||
|
/// dependent def and use. DefMI must be a valid def. UseMI may be NULL for an
|
||||||
|
/// unknown use. Depending on the subtarget's itinerary properties, this may or
|
||||||
|
/// may not need to call getOperandLatency().
|
||||||
|
///
|
||||||
|
/// FindMin may be set to get the minimum vs. expected latency. Minimum
|
||||||
|
/// latency is used for scheduling groups, while expected latency is for
|
||||||
|
/// instruction cost and critical path.
|
||||||
|
///
|
||||||
|
/// For most subtargets, we don't need DefIdx or UseIdx to compute min latency.
|
||||||
|
/// DefMI must be a valid definition, but UseMI may be NULL for an unknown use.
|
||||||
|
unsigned TargetInstrInfo::
|
||||||
|
computeOperandLatency(const InstrItineraryData *ItinData,
|
||||||
|
const TargetRegisterInfo *TRI,
|
||||||
|
const MachineInstr *DefMI, const MachineInstr *UseMI,
|
||||||
|
unsigned Reg, bool FindMin) const {
|
||||||
|
|
||||||
|
// Default to one cycle for missing itinerary. Empty itineraries still have
|
||||||
|
// a properties. We have one hard-coded exception for loads, to preserve
|
||||||
|
// existing behavior.
|
||||||
|
if (!ItinData)
|
||||||
|
return DefMI->mayLoad() ? 2 : 1;
|
||||||
|
|
||||||
|
// Return a latency based on the itinerary properties and defining instruction
|
||||||
|
// if possible. Some common subtargets don't require per-operand latency,
|
||||||
|
// especially for minimum latencies.
|
||||||
|
if (FindMin) {
|
||||||
|
// If MinLatency is valid, call getInstrLatency. This uses Stage latency if
|
||||||
|
// it exists before defaulting to MinLatency.
|
||||||
|
if (ItinData->Props.MinLatency >= 0)
|
||||||
|
return getInstrLatency(ItinData, DefMI);
|
||||||
|
|
||||||
|
// If MinLatency is invalid, OperandLatency is interpreted as MinLatency.
|
||||||
|
// For empty itineraries, short-cirtuit the check and default to one cycle.
|
||||||
|
if (ItinData->isEmpty())
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
else if(ItinData->isEmpty())
|
||||||
|
return defaultDefLatency(ItinData, DefMI);
|
||||||
|
|
||||||
|
// ...operand lookup required
|
||||||
|
|
||||||
|
// Find the definition of the register in the defining instruction.
|
||||||
|
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
|
||||||
|
if (DefIdx != -1) {
|
||||||
|
const MachineOperand &MO = DefMI->getOperand(DefIdx);
|
||||||
|
if (MO.isReg() && MO.isImplicit() &&
|
||||||
|
DefIdx >= (int)DefMI->getDesc().getNumOperands()) {
|
||||||
|
// This is an implicit def, getOperandLatency() won't return the correct
|
||||||
|
// latency. e.g.
|
||||||
|
// %D6<def>, %D7<def> = VLD1q16 %R2<kill>, 0, ..., %Q3<imp-def>
|
||||||
|
// %Q1<def> = VMULv8i16 %Q1<kill>, %Q3<kill>, ...
|
||||||
|
// What we want is to compute latency between def of %D6/%D7 and use of
|
||||||
|
// %Q3 instead.
|
||||||
|
unsigned Op2 = DefMI->findRegisterDefOperandIdx(Reg, false, true, TRI);
|
||||||
|
if (DefMI->getOperand(Op2).isReg())
|
||||||
|
DefIdx = Op2;
|
||||||
|
}
|
||||||
|
// For all uses of the register, calculate the maxmimum latency
|
||||||
|
int OperLatency = -1;
|
||||||
|
|
||||||
|
// UseMI is null, then it must be a scheduling barrier.
|
||||||
|
if (!UseMI) {
|
||||||
|
unsigned DefClass = DefMI->getDesc().getSchedClass();
|
||||||
|
OperLatency = ItinData->getOperandCycle(DefClass, DefIdx);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
for (unsigned i = 0, e = UseMI->getNumOperands(); i != e; ++i) {
|
||||||
|
const MachineOperand &MO = UseMI->getOperand(i);
|
||||||
|
if (!MO.isReg() || !MO.isUse())
|
||||||
|
continue;
|
||||||
|
unsigned MOReg = MO.getReg();
|
||||||
|
if (MOReg != Reg)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
int UseCycle = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, i);
|
||||||
|
OperLatency = std::max(OperLatency, UseCycle);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
// If we found an operand latency, we're done.
|
||||||
|
if (OperLatency >= 0)
|
||||||
|
return OperLatency;
|
||||||
|
}
|
||||||
|
// No operand latency was found.
|
||||||
|
unsigned InstrLatency = getInstrLatency(ItinData, DefMI);
|
||||||
|
// Expected latency is the max of the stage latency and itinerary props.
|
||||||
|
if (!FindMin)
|
||||||
|
InstrLatency = std::max(InstrLatency, defaultDefLatency(ItinData, DefMI));
|
||||||
|
return InstrLatency;
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData,
|
||||||
const MachineInstr *MI,
|
const MachineInstr *MI,
|
||||||
unsigned *PredCost) const {
|
unsigned *PredCost) const {
|
||||||
if (!ItinData || ItinData->isEmpty())
|
// Default to one cycle for no itinerary. However, an "empty" itinerary may
|
||||||
|
// still have a MinLatency property, which getStageLatency checks.
|
||||||
|
if (!ItinData)
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
return ItinData->getStageLatency(MI->getDesc().getSchedClass());
|
return ItinData->getStageLatency(MI->getDesc().getSchedClass());
|
||||||
|
Reference in New Issue
Block a user