More register pressure aware scheduling work.

git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@109064 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Evan Cheng 2010-07-21 23:53:58 +00:00
parent 2b69143083
commit 4a863e2c75
3 changed files with 103 additions and 103 deletions

View File

@ -280,6 +280,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
SU->setHeightToAtLeast(CurCycle); SU->setHeightToAtLeast(CurCycle);
Sequence.push_back(SU); Sequence.push_back(SU);
AvailableQueue->ScheduledNode(SU);
ReleasePredecessors(SU, CurCycle); ReleasePredecessors(SU, CurCycle);
// Release all the implicit physical register defs that are live. // Release all the implicit physical register defs that are live.
@ -298,7 +300,6 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) {
} }
SU->isScheduled = true; SU->isScheduled = true;
AvailableQueue->ScheduledNode(SU);
} }
/// CapturePred - This does the opposite of ReleasePred. Since SU is being /// CapturePred - This does the opposite of ReleasePred. Since SU is being
@ -322,8 +323,6 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: ");
DEBUG(SU->dump(this)); DEBUG(SU->dump(this));
AvailableQueue->UnscheduledNode(SU);
for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) { I != E; ++I) {
CapturePred(&*I); CapturePred(&*I);
@ -353,6 +352,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) {
SU->isScheduled = false; SU->isScheduled = false;
SU->isAvailable = true; SU->isAvailable = true;
AvailableQueue->push(SU); AvailableQueue->push(SU);
AvailableQueue->UnscheduledNode(SU);
} }
/// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in
@ -1053,11 +1053,11 @@ namespace {
/// RegPressure - Tracking current reg pressure per register class. /// RegPressure - Tracking current reg pressure per register class.
/// ///
std::vector<int> RegPressure; std::vector<unsigned> RegPressure;
/// RegLimit - Tracking the number of allocatable registers per register /// RegLimit - Tracking the number of allocatable registers per register
/// class. /// class.
std::vector<int> RegLimit; std::vector<unsigned> RegLimit;
public: public:
RegReductionPriorityQueue(MachineFunction &mf, RegReductionPriorityQueue(MachineFunction &mf,
@ -1170,61 +1170,41 @@ namespace {
SU->NodeQueueId = 0; SU->NodeQueueId = 0;
} }
// EstimateSpills - Given a scheduling unit, estimate the number of spills bool HighRegPressure(const SUnit *SU) const {
// it would cause by scheduling it at the current cycle.
unsigned EstimateSpills(const SUnit *SU) const {
if (!TLI) if (!TLI)
return 0; return false;
unsigned Spills = 0;
for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end();
I != E; ++I) { I != E; ++I) {
if (I->isCtrl()) if (I->isCtrl())
continue; continue;
SUnit *PredSU = I->getSUnit(); SUnit *PredSU = I->getSUnit();
if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1) const SDNode *PN = PredSU->getNode();
if (!PN->isMachineOpcode()) {
if (PN->getOpcode() == ISD::CopyToReg) {
EVT VT = PN->getOperand(1).getValueType();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
unsigned Cost = TLI->getRepRegClassCostFor(VT);
if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
return true;
}
continue; continue;
const SDNode *N = PredSU->getNode(); }
if (!N->isMachineOpcode()) unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
continue;
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) { for (unsigned i = 0; i != NumDefs; ++i) {
EVT VT = N->getValueType(i); EVT VT = PN->getValueType(i);
if (!N->hasAnyUseOfValue(i)) if (!PN->hasAnyUseOfValue(i))
continue; continue;
unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
unsigned Cost = TLI->getRepRegClassCostFor(VT); unsigned Cost = TLI->getRepRegClassCostFor(VT);
// Check if this increases register pressure of the specific register // Check if this increases register pressure of the specific register
// class to the point where it would cause spills. // class to the point where it would cause spills.
int Excess = RegPressure[RCId] + Cost - RegLimit[RCId]; if (RegLimit[RCId] < (RegPressure[RCId] + Cost))
if (Excess > 0) return true;
Spills += Excess;
} }
} }
if (!SU->NumSuccs || !Spills) return false;
return Spills;
const SDNode *N = SU->getNode();
if (!N->isMachineOpcode())
return Spills;
unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) {
EVT VT = N->getValueType(i);
if (!N->hasAnyUseOfValue(i))
continue;
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
unsigned Cost = TLI->getRepRegClassCostFor(VT);
if (RegPressure[RCId] > RegLimit[RCId]) {
int Less = RegLimit[RCId] - (RegPressure[RCId] - Cost);
if (Less > 0) {
if (Spills <= (unsigned)Less)
return 0;
Spills -= Less;
}
}
}
return Spills;
} }
void OpenPredLives(SUnit *SU) { void OpenPredLives(SUnit *SU) {
@ -1232,10 +1212,7 @@ namespace {
if (!N->isMachineOpcode()) if (!N->isMachineOpcode())
return; return;
unsigned Opc = N->getMachineOpcode(); unsigned Opc = N->getMachineOpcode();
if (Opc == TargetOpcode::EXTRACT_SUBREG || if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG ||
Opc == TargetOpcode::COPY_TO_REGCLASS ||
Opc == TargetOpcode::REG_SEQUENCE || Opc == TargetOpcode::REG_SEQUENCE ||
Opc == TargetOpcode::IMPLICIT_DEF) Opc == TargetOpcode::IMPLICIT_DEF)
return; return;
@ -1245,10 +1222,19 @@ namespace {
if (I->isCtrl()) if (I->isCtrl())
continue; continue;
SUnit *PredSU = I->getSUnit(); SUnit *PredSU = I->getSUnit();
if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1) if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
continue; continue;
const SDNode *PN = PredSU->getNode(); const SDNode *PN = PredSU->getNode();
if (!PN->isMachineOpcode()) if (!PN->isMachineOpcode()) {
if (PN->getOpcode() == ISD::CopyToReg) {
EVT VT = PN->getOperand(1).getValueType();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
}
continue;
}
unsigned POpc = PN->getMachineOpcode();
if (POpc == TargetOpcode::IMPLICIT_DEF)
continue; continue;
unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) { for (unsigned i = 0; i != NumDefs; ++i) {
@ -1268,10 +1254,11 @@ namespace {
if (!N->hasAnyUseOfValue(i)) if (!N->hasAnyUseOfValue(i))
continue; continue;
unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
if (RegPressure[RCId] < 0)
// Register pressure tracking is imprecise. This can happen. // Register pressure tracking is imprecise. This can happen.
RegPressure[RCId] = 0; RegPressure[RCId] = 0;
else
RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
} }
} }
@ -1280,10 +1267,7 @@ namespace {
if (!N->isMachineOpcode()) if (!N->isMachineOpcode())
return; return;
unsigned Opc = N->getMachineOpcode(); unsigned Opc = N->getMachineOpcode();
if (Opc == TargetOpcode::EXTRACT_SUBREG || if (Opc == TargetOpcode::COPY_TO_REGCLASS ||
Opc == TargetOpcode::INSERT_SUBREG ||
Opc == TargetOpcode::SUBREG_TO_REG ||
Opc == TargetOpcode::COPY_TO_REGCLASS ||
Opc == TargetOpcode::REG_SEQUENCE || Opc == TargetOpcode::REG_SEQUENCE ||
Opc == TargetOpcode::IMPLICIT_DEF) Opc == TargetOpcode::IMPLICIT_DEF)
return; return;
@ -1293,10 +1277,19 @@ namespace {
if (I->isCtrl()) if (I->isCtrl())
continue; continue;
SUnit *PredSU = I->getSUnit(); SUnit *PredSU = I->getSUnit();
if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1) if (PredSU->NumSuccsLeft != PredSU->NumSuccs)
continue; continue;
const SDNode *PN = PredSU->getNode(); const SDNode *PN = PredSU->getNode();
if (!PN->isMachineOpcode()) if (!PN->isMachineOpcode()) {
if (PN->getOpcode() == ISD::CopyToReg) {
EVT VT = PN->getOperand(1).getValueType();
unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] += TLI->getRepRegClassCostFor(VT);
}
continue;
}
unsigned POpc = PN->getMachineOpcode();
if (POpc == TargetOpcode::IMPLICIT_DEF)
continue; continue;
unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs();
for (unsigned i = 0; i != NumDefs; ++i) { for (unsigned i = 0; i != NumDefs; ++i) {
@ -1304,10 +1297,11 @@ namespace {
if (!PN->hasAnyUseOfValue(i)) if (!PN->hasAnyUseOfValue(i))
continue; continue;
unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); unsigned RCId = TLI->getRepRegClassFor(VT)->getID();
RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT))
if (RegPressure[RCId] < 0)
// Register pressure tracking is imprecise. This can happen. // Register pressure tracking is imprecise. This can happen.
RegPressure[RCId] = 0; RegPressure[RCId] = 0;
else
RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT);
} }
} }
@ -1472,13 +1466,21 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const {
} }
bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
bool LHigh = SPQ->HighRegPressure(left);
bool RHigh = SPQ->HighRegPressure(right);
if (LHigh && !RHigh)
return true;
else if (!LHigh && RHigh)
return false;
else if (!LHigh && !RHigh) {
// Low register pressure situation, schedule for latency if possible.
bool LStall = left->SchedulingPref == Sched::Latency && bool LStall = left->SchedulingPref == Sched::Latency &&
SPQ->getCurCycle() < left->getHeight(); SPQ->getCurCycle() < left->getHeight();
bool RStall = right->SchedulingPref == Sched::Latency && bool RStall = right->SchedulingPref == Sched::Latency &&
SPQ->getCurCycle() < right->getHeight(); SPQ->getCurCycle() < right->getHeight();
// If scheduling one of the node will cause a pipeline stall, delay it. // If scheduling one of the node will cause a pipeline stall, delay it.
// If scheduling either one of the node will cause a pipeline stall, sort them // If scheduling either one of the node will cause a pipeline stall, sort
// according to their height. // them according to their height.
// If neither will cause a pipeline stall, try to reduce register pressure. // If neither will cause a pipeline stall, try to reduce register pressure.
if (LStall) { if (LStall) {
if (!RStall) if (!RStall)
@ -1497,6 +1499,7 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{
if (left->Latency != right->Latency) if (left->Latency != right->Latency)
return left->Latency > right->Latency; return left->Latency > right->Latency;
} }
}
return BURRSort(left, right, SPQ); return BURRSort(left, right, SPQ);
} }

View File

@ -557,28 +557,25 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{
switch (VT.getSimpleVT().SimpleTy) { switch (VT.getSimpleVT().SimpleTy) {
default: default:
return TargetLowering::findRepresentativeClass(VT); return TargetLowering::findRepresentativeClass(VT);
// Use SPR as representative register class for all floating point // Use DPR as representative register class for all floating point
// and vector types. // and vector types. Since there are 32 SPR registers and 32 DPR registers so
case MVT::f32: // the cost is 1 for both f32 and f64.
RRC = ARM::SPRRegisterClass; case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16:
break;
case MVT::f64: case MVT::v8i8: case MVT::v4i16:
case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: case MVT::v2i32: case MVT::v1i64: case MVT::v2f32:
RRC = ARM::SPRRegisterClass; RRC = ARM::DPRRegisterClass;
Cost = 2;
break; break;
case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64:
case MVT::v4f32: case MVT::v2f64: case MVT::v4f32: case MVT::v2f64:
RRC = ARM::SPRRegisterClass; RRC = ARM::DPRRegisterClass;
Cost = 4; Cost = 2;
break; break;
case MVT::v4i64: case MVT::v4i64:
RRC = ARM::SPRRegisterClass; RRC = ARM::DPRRegisterClass;
Cost = 8; Cost = 4;
break; break;
case MVT::v8i64: case MVT::v8i64:
RRC = ARM::SPRRegisterClass; RRC = ARM::DPRRegisterClass;
Cost = 16; Cost = 8;
break; break;
} }
return std::make_pair(RRC, Cost); return std::make_pair(RRC, Cost);

View File

@ -4,14 +4,14 @@
; constant offset addressing, so that each of the following stores ; constant offset addressing, so that each of the following stores
; uses the same register. ; uses the same register.
; CHECK: vstr.32 s0, [r9, #-128] ; CHECK: vstr.32 s0, [r{{.*}}, #-128]
; CHECK: vstr.32 s0, [r9, #-96] ; CHECK: vstr.32 s0, [r{{.*}}, #-96]
; CHECK: vstr.32 s0, [r9, #-64] ; CHECK: vstr.32 s0, [r{{.*}}, #-64]
; CHECK: vstr.32 s0, [r9, #-32] ; CHECK: vstr.32 s0, [r{{.*}}, #-32]
; CHECK: vstr.32 s0, [r9] ; CHECK: vstr.32 s0, [r{{.*}}]
; CHECK: vstr.32 s0, [r9, #32] ; CHECK: vstr.32 s0, [r{{.*}}, #32]
; CHECK: vstr.32 s0, [r9, #64] ; CHECK: vstr.32 s0, [r{{.*}}, #64]
; CHECK: vstr.32 s0, [r9, #96] ; CHECK: vstr.32 s0, [r{{.*}}, #96]
target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"