mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-02 07:32:52 +00:00
Revert 129383. It causes some targets to hit a scheduler assert.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@129385 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
e9e7ffa10d
commit
c558bf3972
@ -102,11 +102,11 @@ static cl::opt<unsigned> AvgIPC(
|
|||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
namespace {
|
namespace {
|
||||||
// For sched=list-ilp, Count the number of times each factor comes into play.
|
// For sched=list-ilp, Count the number of times each factor comes into play.
|
||||||
enum { FactPressureDiff, FactRegUses, FactStall, FactHeight, FactDepth,
|
enum { FactPressureDiff, FactRegUses, FactHeight, FactDepth, FactStatic,
|
||||||
FactStatic, FactOther, NumFactors };
|
FactOther, NumFactors };
|
||||||
}
|
}
|
||||||
static const char *FactorName[NumFactors] =
|
static const char *FactorName[NumFactors] =
|
||||||
{"PressureDiff", "RegUses", "Stall", "Height", "Depth","Static", "Other"};
|
{"PressureDiff", "RegUses", "Height", "Depth","Static", "Other"};
|
||||||
static int FactorCount[NumFactors];
|
static int FactorCount[NumFactors];
|
||||||
#endif //!NDEBUG
|
#endif //!NDEBUG
|
||||||
|
|
||||||
@ -463,13 +463,6 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) {
|
|||||||
if (DisableSchedCycles)
|
if (DisableSchedCycles)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
// FIXME: Nodes such as CopyFromReg probably should not advance the current
|
|
||||||
// cycle. Otherwise, we can wrongly mask real stalls. If the non-machine node
|
|
||||||
// has predecessors the cycle will be advanced when they are scheduled.
|
|
||||||
// But given the crude nature of modeling latency though such nodes, we
|
|
||||||
// currently need to treat these nodes like real instructions.
|
|
||||||
// if (!SU->getNode() || !SU->getNode()->isMachineOpcode()) return;
|
|
||||||
|
|
||||||
unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
|
unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth();
|
||||||
|
|
||||||
// Bump CurCycle to account for latency. We assume the latency of other
|
// Bump CurCycle to account for latency. We assume the latency of other
|
||||||
@ -540,19 +533,16 @@ void ScheduleDAGRRList::EmitNode(SUnit *SU) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void resetVRegCycle(SUnit *SU);
|
|
||||||
|
|
||||||
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
|
/// ScheduleNodeBottomUp - Add the node to the schedule. Decrement the pending
|
||||||
/// count of its predecessors. If a predecessor pending count is zero, add it to
|
/// count of its predecessors. If a predecessor pending count is zero, add it to
|
||||||
/// the Available queue.
|
/// the Available queue.
|
||||||
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
|
void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
|
||||||
DEBUG(dbgs() << "*** Scheduling [" << CurCycle << "]: ");
|
DEBUG(dbgs() << "\n*** Scheduling [" << CurCycle << "]: ");
|
||||||
DEBUG(SU->dump(this));
|
DEBUG(SU->dump(this));
|
||||||
|
|
||||||
#ifndef NDEBUG
|
#ifndef NDEBUG
|
||||||
if (CurCycle < SU->getHeight())
|
if (CurCycle < SU->getHeight())
|
||||||
DEBUG(dbgs() << " Height [" << SU->getHeight()
|
DEBUG(dbgs() << " Height [" << SU->getHeight() << "] pipeline stall!\n");
|
||||||
<< "] pipeline stall!\n");
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// FIXME: Do not modify node height. It may interfere with
|
// FIXME: Do not modify node height. It may interfere with
|
||||||
@ -569,7 +559,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
|
|||||||
AvailableQueue->ScheduledNode(SU);
|
AvailableQueue->ScheduledNode(SU);
|
||||||
|
|
||||||
// If HazardRec is disabled, and each inst counts as one cycle, then
|
// If HazardRec is disabled, and each inst counts as one cycle, then
|
||||||
// advance CurCycle before ReleasePredecessors to avoid useless pushes to
|
// advance CurCycle before ReleasePredecessors to avoid useles pushed to
|
||||||
// PendingQueue for schedulers that implement HasReadyFilter.
|
// PendingQueue for schedulers that implement HasReadyFilter.
|
||||||
if (!HazardRec->isEnabled() && AvgIPC < 2)
|
if (!HazardRec->isEnabled() && AvgIPC < 2)
|
||||||
AdvanceToCycle(CurCycle + 1);
|
AdvanceToCycle(CurCycle + 1);
|
||||||
@ -590,25 +580,20 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resetVRegCycle(SU);
|
|
||||||
|
|
||||||
SU->isScheduled = true;
|
SU->isScheduled = true;
|
||||||
|
|
||||||
// Conditions under which the scheduler should eagerly advance the cycle:
|
// Conditions under which the scheduler should eagerly advance the cycle:
|
||||||
// (1) No available instructions
|
// (1) No available instructions
|
||||||
// (2) All pipelines full, so available instructions must have hazards.
|
// (2) All pipelines full, so available instructions must have hazards.
|
||||||
//
|
//
|
||||||
// If HazardRec is disabled, the cycle was pre-advanced before calling
|
// If HazardRec is disabled, the cycle was advanced earlier.
|
||||||
// ReleasePredecessors. In that case, IssueCount should remain 0.
|
|
||||||
//
|
//
|
||||||
// Check AvailableQueue after ReleasePredecessors in case of zero latency.
|
// Check AvailableQueue after ReleasePredecessors in case of zero latency.
|
||||||
if (HazardRec->isEnabled() || AvgIPC > 1) {
|
++IssueCount;
|
||||||
if (SU->getNode() && SU->getNode()->isMachineOpcode())
|
if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
|
||||||
++IssueCount;
|
|| (!HazardRec->isEnabled() && AvgIPC > 1 && IssueCount == AvgIPC)
|
||||||
if ((HazardRec->isEnabled() && HazardRec->atIssueLimit())
|
|| AvailableQueue->empty())
|
||||||
|| (!HazardRec->isEnabled() && IssueCount == AvgIPC))
|
AdvanceToCycle(CurCycle + 1);
|
||||||
AdvanceToCycle(CurCycle + 1);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
|
/// CapturePred - This does the opposite of ReleasePred. Since SU is being
|
||||||
@ -1235,7 +1220,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() {
|
|||||||
// priority. If it is not ready put it back. Schedule the node.
|
// priority. If it is not ready put it back. Schedule the node.
|
||||||
Sequence.reserve(SUnits.size());
|
Sequence.reserve(SUnits.size());
|
||||||
while (!AvailableQueue->empty()) {
|
while (!AvailableQueue->empty()) {
|
||||||
DEBUG(dbgs() << "Examining Available:\n";
|
DEBUG(dbgs() << "\n*** Examining Available\n";
|
||||||
AvailableQueue->dump(this));
|
AvailableQueue->dump(this));
|
||||||
|
|
||||||
// Pick the best node to schedule taking all constraints into
|
// Pick the best node to schedule taking all constraints into
|
||||||
@ -1676,6 +1661,17 @@ void RegReductionPQBase::CalculateSethiUllmanNumbers() {
|
|||||||
CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
|
CalcNodeSethiUllmanNumber(&(*SUnits)[i], SethiUllmanNumbers);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
|
||||||
|
SUnits = &sunits;
|
||||||
|
// Add pseudo dependency edges for two-address nodes.
|
||||||
|
AddPseudoTwoAddrDeps();
|
||||||
|
// Reroute edges to nodes with multiple uses.
|
||||||
|
if (!TracksRegPressure)
|
||||||
|
PrescheduleNodesWithMultipleUses();
|
||||||
|
// Calculate node priorities.
|
||||||
|
CalculateSethiUllmanNumbers();
|
||||||
|
}
|
||||||
|
|
||||||
void RegReductionPQBase::addNode(const SUnit *SU) {
|
void RegReductionPQBase::addNode(const SUnit *SU) {
|
||||||
unsigned SUSize = SethiUllmanNumbers.size();
|
unsigned SUSize = SethiUllmanNumbers.size();
|
||||||
if (SUnits->size() > SUSize)
|
if (SUnits->size() > SUSize)
|
||||||
@ -2012,29 +2008,7 @@ static unsigned calcMaxScratches(const SUnit *SU) {
|
|||||||
return Scratches;
|
return Scratches;
|
||||||
}
|
}
|
||||||
|
|
||||||
/// hasOnlyLiveInOpers - Return true if SU has only value predecessors that are
|
/// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a
|
||||||
/// CopyFromReg from a virtual register.
|
|
||||||
static bool hasOnlyLiveInOpers(const SUnit *SU) {
|
|
||||||
bool RetVal = false;
|
|
||||||
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
|
|
||||||
I != E; ++I) {
|
|
||||||
if (I->isCtrl()) continue;
|
|
||||||
const SUnit *PredSU = I->getSUnit();
|
|
||||||
if (PredSU->getNode() &&
|
|
||||||
PredSU->getNode()->getOpcode() == ISD::CopyFromReg) {
|
|
||||||
unsigned Reg =
|
|
||||||
cast<RegisterSDNode>(PredSU->getNode()->getOperand(1))->getReg();
|
|
||||||
if (TargetRegisterInfo::isVirtualRegister(Reg)) {
|
|
||||||
RetVal = true;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return RetVal;
|
|
||||||
}
|
|
||||||
|
|
||||||
/// hasOnlyLiveOutUses - Return true if SU has only value successors that are
|
|
||||||
/// CopyToReg to a virtual register. This SU def is probably a liveout and
|
/// CopyToReg to a virtual register. This SU def is probably a liveout and
|
||||||
/// it has no other use. It should be scheduled closer to the terminator.
|
/// it has no other use. It should be scheduled closer to the terminator.
|
||||||
static bool hasOnlyLiveOutUses(const SUnit *SU) {
|
static bool hasOnlyLiveOutUses(const SUnit *SU) {
|
||||||
@ -2056,71 +2030,62 @@ static bool hasOnlyLiveOutUses(const SUnit *SU) {
|
|||||||
return RetVal;
|
return RetVal;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Set isVRegCycle for a node with only live in opers and live out uses. Also
|
/// UnitsSharePred - Return true if the two scheduling units share a common
|
||||||
// set isVRegCycle for its CopyFromReg operands.
|
/// data predecessor.
|
||||||
//
|
static bool UnitsSharePred(const SUnit *left, const SUnit *right) {
|
||||||
// This is only relevant for single-block loops, in which case the VRegCycle
|
SmallSet<const SUnit*, 4> Preds;
|
||||||
// node is likely an induction variable in which the operand and target virtual
|
for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end();
|
||||||
// registers should be coalesced (e.g. pre/post increment values). Setting the
|
|
||||||
// isVRegCycle flag helps the scheduler prioritize other uses of the same
|
|
||||||
// CopyFromReg so that this node becomes the virtual register "kill". This
|
|
||||||
// avoids interference between the values live in and out of the block and
|
|
||||||
// eliminates a copy inside the loop.
|
|
||||||
static void initVRegCycle(SUnit *SU) {
|
|
||||||
if (DisableSchedVRegCycle)
|
|
||||||
return;
|
|
||||||
|
|
||||||
if (!hasOnlyLiveInOpers(SU) || !hasOnlyLiveOutUses(SU))
|
|
||||||
return;
|
|
||||||
|
|
||||||
DEBUG(dbgs() << "VRegCycle: SU(" << SU->NodeNum << ")\n");
|
|
||||||
|
|
||||||
SU->isVRegCycle = true;
|
|
||||||
|
|
||||||
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
|
|
||||||
I != E; ++I) {
|
|
||||||
if (I->isCtrl()) continue;
|
|
||||||
I->getSUnit()->isVRegCycle = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// After scheduling the definition of a VRegCycle, clear the isVRegCycle flag of
|
|
||||||
// CopyFromReg operands. We should no longer penalize other uses of this VReg.
|
|
||||||
static void resetVRegCycle(SUnit *SU) {
|
|
||||||
if (!SU->isVRegCycle)
|
|
||||||
return;
|
|
||||||
|
|
||||||
for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
|
|
||||||
I != E; ++I) {
|
I != E; ++I) {
|
||||||
if (I->isCtrl()) continue; // ignore chain preds
|
if (I->isCtrl()) continue; // ignore chain preds
|
||||||
SUnit *PredSU = I->getSUnit();
|
Preds.insert(I->getSUnit());
|
||||||
if (PredSU->isVRegCycle) {
|
|
||||||
assert(PredSU->getNode()->getOpcode() == ISD::CopyFromReg &&
|
|
||||||
"VRegCycle def must be CopyFromReg");
|
|
||||||
I->getSUnit()->isVRegCycle = 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end();
|
||||||
|
|
||||||
// Return true if this SUnit uses a CopyFromReg node marked as a VRegCycle. This
|
|
||||||
// means a node that defines the VRegCycle has not been scheduled yet.
|
|
||||||
static bool hasVRegCycleUse(const SUnit *SU) {
|
|
||||||
// If this SU also defines the VReg, don't hoist it as a "use".
|
|
||||||
if (SU->isVRegCycle)
|
|
||||||
return false;
|
|
||||||
|
|
||||||
for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
|
|
||||||
I != E; ++I) {
|
I != E; ++I) {
|
||||||
if (I->isCtrl()) continue; // ignore chain preds
|
if (I->isCtrl()) continue; // ignore chain preds
|
||||||
if (I->getSUnit()->isVRegCycle &&
|
if (Preds.count(I->getSUnit()))
|
||||||
I->getSUnit()->getNode()->getOpcode() == ISD::CopyFromReg) {
|
|
||||||
DEBUG(dbgs() << " VReg cycle use: SU (" << SU->NodeNum << ")\n");
|
|
||||||
return true;
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return true if the virtual register defined by VRCycleSU may interfere with
|
||||||
|
// VRUseSU.
|
||||||
|
//
|
||||||
|
// Note: We may consider two SU's that use the same value live into a loop as
|
||||||
|
// interferng even though the value is not an induction variable. This is an
|
||||||
|
// unfortunate consequence of scheduling on the selection DAG.
|
||||||
|
static bool checkVRegCycleInterference(const SUnit *VRCycleSU,
|
||||||
|
const SUnit *VRUseSU) {
|
||||||
|
for (SUnit::const_pred_iterator I = VRCycleSU->Preds.begin(),
|
||||||
|
E = VRCycleSU->Preds.end(); I != E; ++I) {
|
||||||
|
if (I->isCtrl()) continue; // ignore chain preds
|
||||||
|
SDNode *InNode = I->getSUnit()->getNode();
|
||||||
|
if (!InNode || InNode->getOpcode() != ISD::CopyFromReg)
|
||||||
|
continue;
|
||||||
|
for (SUnit::const_pred_iterator II = VRUseSU->Preds.begin(),
|
||||||
|
EE = VRUseSU->Preds.end(); II != EE; ++II) {
|
||||||
|
if (II->getSUnit() == I->getSUnit())
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compare the VRegCycle properties of the nodes.
|
||||||
|
// Return -1 if left has higher priority, 1 if right has higher priority.
|
||||||
|
// Return 0 if priority is equivalent.
|
||||||
|
static int BUCompareVRegCycle(const SUnit *left, const SUnit *right) {
|
||||||
|
if (left->isVRegCycle && !right->isVRegCycle) {
|
||||||
|
if (checkVRegCycleInterference(left, right))
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
else if (!left->isVRegCycle && right->isVRegCycle) {
|
||||||
|
if (checkVRegCycleInterference(right, left))
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
// Check for either a dependence (latency) or resource (hazard) stall.
|
// Check for either a dependence (latency) or resource (hazard) stall.
|
||||||
//
|
//
|
||||||
// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
|
// Note: The ScheduleHazardRecognizer interface requires a non-const SU.
|
||||||
@ -2136,12 +2101,23 @@ static bool BUHasStall(SUnit *SU, int Height, RegReductionPQBase *SPQ) {
|
|||||||
// Return 0 if latency-based priority is equivalent.
|
// Return 0 if latency-based priority is equivalent.
|
||||||
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
|
static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
|
||||||
RegReductionPQBase *SPQ) {
|
RegReductionPQBase *SPQ) {
|
||||||
// Scheduling an instruction that uses a VReg whose postincrement has not yet
|
// If the two nodes share an operand and one of them has a single
|
||||||
// been scheduled will induce a copy. Model this as an extra cycle of latency.
|
// use that is a live out copy, favor the one that is live out. Otherwise
|
||||||
int LPenalty = hasVRegCycleUse(left) ? 1 : 0;
|
// it will be difficult to eliminate the copy if the instruction is a
|
||||||
int RPenalty = hasVRegCycleUse(right) ? 1 : 0;
|
// loop induction variable update. e.g.
|
||||||
int LHeight = (int)left->getHeight() + LPenalty;
|
// BB:
|
||||||
int RHeight = (int)right->getHeight() + RPenalty;
|
// sub r1, r3, #1
|
||||||
|
// str r0, [r2, r3]
|
||||||
|
// mov r3, r1
|
||||||
|
// cmp
|
||||||
|
// bne BB
|
||||||
|
bool SharePred = UnitsSharePred(left, right);
|
||||||
|
// FIXME: Only adjust if BB is a loop back edge.
|
||||||
|
// FIXME: What's the cost of a copy?
|
||||||
|
int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0;
|
||||||
|
int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0;
|
||||||
|
int LHeight = (int)left->getHeight() - LBonus;
|
||||||
|
int RHeight = (int)right->getHeight() - RBonus;
|
||||||
|
|
||||||
bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
|
bool LStall = (!checkPref || left->SchedulingPref == Sched::Latency) &&
|
||||||
BUHasStall(left, LHeight, SPQ);
|
BUHasStall(left, LHeight, SPQ);
|
||||||
@ -2152,47 +2128,36 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref,
|
|||||||
// If scheduling either one of the node will cause a pipeline stall, sort
|
// If scheduling either one of the node will cause a pipeline stall, sort
|
||||||
// them according to their height.
|
// them according to their height.
|
||||||
if (LStall) {
|
if (LStall) {
|
||||||
if (!RStall) {
|
if (!RStall)
|
||||||
DEBUG(++FactorCount[FactStall]);
|
|
||||||
return 1;
|
return 1;
|
||||||
}
|
if (LHeight != RHeight)
|
||||||
if (LHeight != RHeight) {
|
|
||||||
DEBUG(++FactorCount[FactStall]);
|
|
||||||
return LHeight > RHeight ? 1 : -1;
|
return LHeight > RHeight ? 1 : -1;
|
||||||
}
|
} else if (RStall)
|
||||||
} else if (RStall) {
|
|
||||||
DEBUG(++FactorCount[FactStall]);
|
|
||||||
return -1;
|
return -1;
|
||||||
}
|
|
||||||
|
|
||||||
// If either node is scheduling for latency, sort them by height/depth
|
// If either node is scheduling for latency, sort them by height/depth
|
||||||
// and latency.
|
// and latency.
|
||||||
if (!checkPref || (left->SchedulingPref == Sched::Latency ||
|
if (!checkPref || (left->SchedulingPref == Sched::Latency ||
|
||||||
right->SchedulingPref == Sched::Latency)) {
|
right->SchedulingPref == Sched::Latency)) {
|
||||||
if (DisableSchedCycles) {
|
if (DisableSchedCycles) {
|
||||||
if (LHeight != RHeight) {
|
if (LHeight != RHeight)
|
||||||
DEBUG(++FactorCount[FactHeight]);
|
|
||||||
return LHeight > RHeight ? 1 : -1;
|
return LHeight > RHeight ? 1 : -1;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// If neither instruction stalls (!LStall && !RStall) then
|
// If neither instruction stalls (!LStall && !RStall) then
|
||||||
// its height is already covered so only its depth matters. We also reach
|
// its height is already covered so only its depth matters. We also reach
|
||||||
// this if both stall but have the same height.
|
// this if both stall but have the same height.
|
||||||
int LDepth = left->getDepth() - LPenalty;
|
unsigned LDepth = left->getDepth();
|
||||||
int RDepth = right->getDepth() - RPenalty;
|
unsigned RDepth = right->getDepth();
|
||||||
if (LDepth != RDepth) {
|
if (LDepth != RDepth) {
|
||||||
DEBUG(++FactorCount[FactDepth]);
|
|
||||||
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
|
DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum
|
||||||
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
|
<< ") depth " << LDepth << " vs SU (" << right->NodeNum
|
||||||
<< ") depth " << RDepth << "\n");
|
<< ") depth " << RDepth << "\n");
|
||||||
return LDepth < RDepth ? 1 : -1;
|
return LDepth < RDepth ? 1 : -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (left->Latency != right->Latency) {
|
if (left->Latency != right->Latency)
|
||||||
DEBUG(++FactorCount[FactOther]);
|
|
||||||
return left->Latency > right->Latency ? 1 : -1;
|
return left->Latency > right->Latency ? 1 : -1;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
@ -2204,19 +2169,7 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
|
|||||||
DEBUG(++FactorCount[FactStatic]);
|
DEBUG(++FactorCount[FactStatic]);
|
||||||
return LPriority > RPriority;
|
return LPriority > RPriority;
|
||||||
}
|
}
|
||||||
else if(LPriority == 0) {
|
DEBUG(++FactorCount[FactOther]);
|
||||||
// Schedule zero-latency TokenFactor below any other special
|
|
||||||
// nodes. The alternative may be to avoid artificially boosting the
|
|
||||||
// TokenFactor's height when it is scheduled, but we currently rely on an
|
|
||||||
// instruction's final height to equal the cycle in which it is scheduled,
|
|
||||||
// so heights are monotonically increasing.
|
|
||||||
unsigned LOpc = left->getNode() ? left->getNode()->getOpcode() : 0;
|
|
||||||
unsigned ROpc = right->getNode() ? right->getNode()->getOpcode() : 0;
|
|
||||||
if (LOpc == ISD::TokenFactor)
|
|
||||||
return false;
|
|
||||||
if (ROpc == ISD::TokenFactor)
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
|
// Try schedule def + use closer when Sethi-Ullman numbers are the same.
|
||||||
// e.g.
|
// e.g.
|
||||||
@ -2237,18 +2190,14 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
|
|||||||
// This creates more short live intervals.
|
// This creates more short live intervals.
|
||||||
unsigned LDist = closestSucc(left);
|
unsigned LDist = closestSucc(left);
|
||||||
unsigned RDist = closestSucc(right);
|
unsigned RDist = closestSucc(right);
|
||||||
if (LDist != RDist) {
|
if (LDist != RDist)
|
||||||
DEBUG(++FactorCount[FactOther]);
|
|
||||||
return LDist < RDist;
|
return LDist < RDist;
|
||||||
}
|
|
||||||
|
|
||||||
// How many registers becomes live when the node is scheduled.
|
// How many registers becomes live when the node is scheduled.
|
||||||
unsigned LScratch = calcMaxScratches(left);
|
unsigned LScratch = calcMaxScratches(left);
|
||||||
unsigned RScratch = calcMaxScratches(right);
|
unsigned RScratch = calcMaxScratches(right);
|
||||||
if (LScratch != RScratch) {
|
if (LScratch != RScratch)
|
||||||
DEBUG(++FactorCount[FactOther]);
|
|
||||||
return LScratch > RScratch;
|
return LScratch > RScratch;
|
||||||
}
|
|
||||||
|
|
||||||
if (!DisableSchedCycles) {
|
if (!DisableSchedCycles) {
|
||||||
int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
|
int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ);
|
||||||
@ -2256,20 +2205,15 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) {
|
|||||||
return result > 0;
|
return result > 0;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (left->getHeight() != right->getHeight()) {
|
if (left->getHeight() != right->getHeight())
|
||||||
DEBUG(++FactorCount[FactHeight]);
|
|
||||||
return left->getHeight() > right->getHeight();
|
return left->getHeight() > right->getHeight();
|
||||||
}
|
|
||||||
|
|
||||||
if (left->getDepth() != right->getDepth()) {
|
if (left->getDepth() != right->getDepth())
|
||||||
DEBUG(++FactorCount[FactDepth]);
|
|
||||||
return left->getDepth() < right->getDepth();
|
return left->getDepth() < right->getDepth();
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
assert(left->NodeQueueId && right->NodeQueueId &&
|
assert(left->NodeQueueId && right->NodeQueueId &&
|
||||||
"NodeQueueId cannot be zero");
|
"NodeQueueId cannot be zero");
|
||||||
DEBUG(++FactorCount[FactOther]);
|
|
||||||
return (left->NodeQueueId > right->NodeQueueId);
|
return (left->NodeQueueId > right->NodeQueueId);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2320,22 +2264,24 @@ bool hybrid_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
|||||||
// Avoid causing spills. If register pressure is high, schedule for
|
// Avoid causing spills. If register pressure is high, schedule for
|
||||||
// register pressure reduction.
|
// register pressure reduction.
|
||||||
if (LHigh && !RHigh) {
|
if (LHigh && !RHigh) {
|
||||||
DEBUG(++FactorCount[FactPressureDiff]);
|
|
||||||
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
|
DEBUG(dbgs() << " pressure SU(" << left->NodeNum << ") > SU("
|
||||||
<< right->NodeNum << ")\n");
|
<< right->NodeNum << ")\n");
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
else if (!LHigh && RHigh) {
|
else if (!LHigh && RHigh) {
|
||||||
DEBUG(++FactorCount[FactPressureDiff]);
|
|
||||||
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
|
DEBUG(dbgs() << " pressure SU(" << right->NodeNum << ") > SU("
|
||||||
<< left->NodeNum << ")\n");
|
<< left->NodeNum << ")\n");
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
if (!LHigh && !RHigh) {
|
int result = 0;
|
||||||
int result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
|
if (!DisableSchedVRegCycle) {
|
||||||
if (result != 0)
|
result = BUCompareVRegCycle(left, right);
|
||||||
return result > 0;
|
|
||||||
}
|
}
|
||||||
|
if (result == 0 && !LHigh && !RHigh) {
|
||||||
|
result = BUCompareLatency(left, right, true /*checkPref*/, SPQ);
|
||||||
|
}
|
||||||
|
if (result != 0)
|
||||||
|
return result > 0;
|
||||||
return BURRSort(left, right, SPQ);
|
return BURRSort(left, right, SPQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2401,6 +2347,12 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
|||||||
if (RReduce && !LReduce) return true;
|
if (RReduce && !LReduce) return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!DisableSchedVRegCycle) {
|
||||||
|
int result = BUCompareVRegCycle(left, right);
|
||||||
|
if (result != 0)
|
||||||
|
return result > 0;
|
||||||
|
}
|
||||||
|
|
||||||
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
|
if (!DisableSchedLiveUses && (LLiveUses != RLiveUses)) {
|
||||||
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
|
DEBUG(dbgs() << "Live uses SU(" << left->NodeNum << "): " << LLiveUses
|
||||||
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
|
<< " != SU(" << right->NodeNum << "): " << RLiveUses << "\n");
|
||||||
@ -2439,24 +2391,6 @@ bool ilp_ls_rr_sort::operator()(SUnit *left, SUnit *right) const {
|
|||||||
return BURRSort(left, right, SPQ);
|
return BURRSort(left, right, SPQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RegReductionPQBase::initNodes(std::vector<SUnit> &sunits) {
|
|
||||||
SUnits = &sunits;
|
|
||||||
// Add pseudo dependency edges for two-address nodes.
|
|
||||||
AddPseudoTwoAddrDeps();
|
|
||||||
// Reroute edges to nodes with multiple uses.
|
|
||||||
if (!TracksRegPressure)
|
|
||||||
PrescheduleNodesWithMultipleUses();
|
|
||||||
// Calculate node priorities.
|
|
||||||
CalculateSethiUllmanNumbers();
|
|
||||||
|
|
||||||
// For single block loops, mark nodes that look like canonical IV increments.
|
|
||||||
if (scheduleDAG->BB->isSuccessor(scheduleDAG->BB)) {
|
|
||||||
for (unsigned i = 0, e = sunits.size(); i != e; ++i) {
|
|
||||||
initVRegCycle(&sunits[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// Preschedule for Register Pressure
|
// Preschedule for Register Pressure
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
@ -342,6 +342,10 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
|
|||||||
assert(N->getNodeId() == -1 && "Node already inserted!");
|
assert(N->getNodeId() == -1 && "Node already inserted!");
|
||||||
N->setNodeId(NodeSUnit->NodeNum);
|
N->setNodeId(NodeSUnit->NodeNum);
|
||||||
|
|
||||||
|
// Set isVRegCycle if the node operands are live into and value is live out
|
||||||
|
// of a single block loop.
|
||||||
|
InitVRegCycleFlag(NodeSUnit);
|
||||||
|
|
||||||
// Compute NumRegDefsLeft. This must be done before AddSchedEdges.
|
// Compute NumRegDefsLeft. This must be done before AddSchedEdges.
|
||||||
InitNumRegDefsLeft(NodeSUnit);
|
InitNumRegDefsLeft(NodeSUnit);
|
||||||
|
|
||||||
@ -412,13 +416,7 @@ void ScheduleDAGSDNodes::AddSchedEdges() {
|
|||||||
PhysReg = 0;
|
PhysReg = 0;
|
||||||
|
|
||||||
// If this is a ctrl dep, latency is 1.
|
// If this is a ctrl dep, latency is 1.
|
||||||
// Special-case TokenFactor chains as zero-latency.
|
unsigned OpLatency = isChain ? 1 : OpSU->Latency;
|
||||||
unsigned OpLatency = 1;
|
|
||||||
if (!isChain && OpSU->Latency > 0)
|
|
||||||
OpLatency = OpSU->Latency;
|
|
||||||
else if(isChain && OpN->getOpcode() == ISD::TokenFactor)
|
|
||||||
OpLatency = 0;
|
|
||||||
|
|
||||||
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
|
const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data,
|
||||||
OpLatency, PhysReg);
|
OpLatency, PhysReg);
|
||||||
if (!isChain && !UnitLatencies) {
|
if (!isChain && !UnitLatencies) {
|
||||||
@ -514,6 +512,47 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set isVRegCycle if this node's single use is CopyToReg and its only active
|
||||||
|
// data operands are CopyFromReg.
|
||||||
|
//
|
||||||
|
// This is only relevant for single-block loops, in which case the VRegCycle
|
||||||
|
// node is likely an induction variable in which the operand and target virtual
|
||||||
|
// registers should be coalesced (e.g. pre/post increment values). Setting the
|
||||||
|
// isVRegCycle flag helps the scheduler prioritize other uses of the same
|
||||||
|
// CopyFromReg so that this node becomes the virtual register "kill". This
|
||||||
|
// avoids interference between the values live in and out of the block and
|
||||||
|
// eliminates a copy inside the loop.
|
||||||
|
void ScheduleDAGSDNodes::InitVRegCycleFlag(SUnit *SU) {
|
||||||
|
if (!BB->isSuccessor(BB))
|
||||||
|
return;
|
||||||
|
|
||||||
|
SDNode *N = SU->getNode();
|
||||||
|
if (N->getGluedNode())
|
||||||
|
return;
|
||||||
|
|
||||||
|
if (!N->hasOneUse() || N->use_begin()->getOpcode() != ISD::CopyToReg)
|
||||||
|
return;
|
||||||
|
|
||||||
|
bool FoundLiveIn = false;
|
||||||
|
for (SDNode::op_iterator OI = N->op_begin(), E = N->op_end(); OI != E; ++OI) {
|
||||||
|
EVT OpVT = OI->getValueType();
|
||||||
|
assert(OpVT != MVT::Glue && "Glued nodes should be in same sunit!");
|
||||||
|
|
||||||
|
if (OpVT == MVT::Other)
|
||||||
|
continue; // ignore chain operands
|
||||||
|
|
||||||
|
if (isPassiveNode(OI->getNode()))
|
||||||
|
continue; // ignore constants and such
|
||||||
|
|
||||||
|
if (OI->getNode()->getOpcode() != ISD::CopyFromReg)
|
||||||
|
return;
|
||||||
|
|
||||||
|
FoundLiveIn = true;
|
||||||
|
}
|
||||||
|
if (FoundLiveIn)
|
||||||
|
SU->isVRegCycle = true;
|
||||||
|
}
|
||||||
|
|
||||||
void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
|
void ScheduleDAGSDNodes::InitNumRegDefsLeft(SUnit *SU) {
|
||||||
assert(SU->NumRegDefsLeft == 0 && "expect a new node");
|
assert(SU->NumRegDefsLeft == 0 && "expect a new node");
|
||||||
for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
|
for (RegDefIter I(SU, this); I.IsValid(); I.Advance()) {
|
||||||
|
@ -1,8 +1,10 @@
|
|||||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
|
; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=linearscan -disable-post-ra | FileCheck %s
|
||||||
|
; RUN: llc < %s -mtriple=arm-apple-darwin -regalloc=basic -disable-post-ra | FileCheck %s
|
||||||
|
|
||||||
; The ARM magic hinting works best with linear scan.
|
; The ARM magic hinting works best with linear scan.
|
||||||
; CHECK: ldrd
|
; CHECK: ldmia
|
||||||
; CHECK: strd
|
; CHECK: stmia
|
||||||
|
; CHECK: ldrh
|
||||||
; CHECK: ldrb
|
; CHECK: ldrb
|
||||||
|
|
||||||
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
|
%struct.x = type { i8, i8, i8, i8, i8, i8, i8, i8, i8, i8, i8 }
|
||||||
|
@ -40,8 +40,8 @@ define void @test_add(float* %P, double* %D) {
|
|||||||
define void @test_ext_round(float* %P, double* %D) {
|
define void @test_ext_round(float* %P, double* %D) {
|
||||||
;CHECK: test_ext_round:
|
;CHECK: test_ext_round:
|
||||||
%a = load float* %P ; <float> [#uses=1]
|
%a = load float* %P ; <float> [#uses=1]
|
||||||
;CHECK: vcvt.f64.f32
|
|
||||||
;CHECK: vcvt.f32.f64
|
;CHECK: vcvt.f32.f64
|
||||||
|
;CHECK: vcvt.f64.f32
|
||||||
%b = fpext float %a to double ; <double> [#uses=1]
|
%b = fpext float %a to double ; <double> [#uses=1]
|
||||||
%A = load double* %D ; <double> [#uses=1]
|
%A = load double* %D ; <double> [#uses=1]
|
||||||
%B = fptrunc double %A to float ; <float> [#uses=1]
|
%B = fptrunc double %A to float ; <float> [#uses=1]
|
||||||
|
Loading…
Reference in New Issue
Block a user