diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 9503b0c08a6..6e822499282 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -280,6 +280,8 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { SU->setHeightToAtLeast(CurCycle); Sequence.push_back(SU); + AvailableQueue->ScheduledNode(SU); + ReleasePredecessors(SU, CurCycle); // Release all the implicit physical register defs that are live. @@ -298,7 +300,6 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU, unsigned CurCycle) { } SU->isScheduled = true; - AvailableQueue->ScheduledNode(SU); } /// CapturePred - This does the opposite of ReleasePred. Since SU is being @@ -322,8 +323,6 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { DEBUG(dbgs() << "*** Unscheduling [" << SU->getHeight() << "]: "); DEBUG(SU->dump(this)); - AvailableQueue->UnscheduledNode(SU); - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { CapturePred(&*I); @@ -353,6 +352,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SU->isScheduled = false; SU->isAvailable = true; AvailableQueue->push(SU); + AvailableQueue->UnscheduledNode(SU); } /// BacktrackBottomUp - Backtrack scheduling to a previous cycle specified in @@ -1053,11 +1053,11 @@ namespace { /// RegPressure - Tracking current reg pressure per register class. /// - std::vector RegPressure; + std::vector RegPressure; /// RegLimit - Tracking the number of allocatable registers per register /// class. - std::vector RegLimit; + std::vector RegLimit; public: RegReductionPriorityQueue(MachineFunction &mf, @@ -1170,61 +1170,41 @@ namespace { SU->NodeQueueId = 0; } - // EstimateSpills - Given a scheduling unit, estimate the number of spills - // it would cause by scheduling it at the current cycle. - unsigned EstimateSpills(const SUnit *SU) const { + bool HighRegPressure(const SUnit *SU) const { if (!TLI) - return 0; + return false; - unsigned Spills = 0; for (SUnit::const_pred_iterator I = SU->Preds.begin(),E = SU->Preds.end(); I != E; ++I) { if (I->isCtrl()) continue; SUnit *PredSU = I->getSUnit(); - if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1) + const SDNode *PN = PredSU->getNode(); + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyToReg) { + EVT VT = PN->getOperand(1).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + unsigned Cost = TLI->getRepRegClassCostFor(VT); + if (RegLimit[RCId] < (RegPressure[RCId] + Cost)) + return true; + } continue; - const SDNode *N = PredSU->getNode(); - if (!N->isMachineOpcode()) - continue; - unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); + } + unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); - if (!N->hasAnyUseOfValue(i)) + EVT VT = PN->getValueType(i); + if (!PN->hasAnyUseOfValue(i)) continue; unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); unsigned Cost = TLI->getRepRegClassCostFor(VT); // Check if this increases register pressure of the specific register // class to the point where it would cause spills. - int Excess = RegPressure[RCId] + Cost - RegLimit[RCId]; - if (Excess > 0) - Spills += Excess; + if (RegLimit[RCId] < (RegPressure[RCId] + Cost)) + return true; } } - if (!SU->NumSuccs || !Spills) - return Spills; - const SDNode *N = SU->getNode(); - if (!N->isMachineOpcode()) - return Spills; - unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); - if (!N->hasAnyUseOfValue(i)) - continue; - unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - unsigned Cost = TLI->getRepRegClassCostFor(VT); - if (RegPressure[RCId] > RegLimit[RCId]) { - int Less = RegLimit[RCId] - (RegPressure[RCId] - Cost); - if (Less > 0) { - if (Spills <= (unsigned)Less) - return 0; - Spills -= Less; - } - } - } - - return Spills; + return false; } void OpenPredLives(SUnit *SU) { @@ -1232,10 +1212,7 @@ namespace { if (!N->isMachineOpcode()) return; unsigned Opc = N->getMachineOpcode(); - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::INSERT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG || - Opc == TargetOpcode::COPY_TO_REGCLASS || + if (Opc == TargetOpcode::COPY_TO_REGCLASS || Opc == TargetOpcode::REG_SEQUENCE || Opc == TargetOpcode::IMPLICIT_DEF) return; @@ -1245,10 +1222,19 @@ namespace { if (I->isCtrl()) continue; SUnit *PredSU = I->getSUnit(); - if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1) + if (PredSU->NumSuccsLeft != PredSU->NumSuccs) continue; const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyToReg) { + EVT VT = PN->getOperand(1).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) continue; unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { @@ -1268,10 +1254,11 @@ namespace { if (!N->hasAnyUseOfValue(i)) continue; unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); - if (RegPressure[RCId] < 0) + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) // Register pressure tracking is imprecise. This can happen. RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); } } @@ -1280,10 +1267,7 @@ namespace { if (!N->isMachineOpcode()) return; unsigned Opc = N->getMachineOpcode(); - if (Opc == TargetOpcode::EXTRACT_SUBREG || - Opc == TargetOpcode::INSERT_SUBREG || - Opc == TargetOpcode::SUBREG_TO_REG || - Opc == TargetOpcode::COPY_TO_REGCLASS || + if (Opc == TargetOpcode::COPY_TO_REGCLASS || Opc == TargetOpcode::REG_SEQUENCE || Opc == TargetOpcode::IMPLICIT_DEF) return; @@ -1293,10 +1277,19 @@ namespace { if (I->isCtrl()) continue; SUnit *PredSU = I->getSUnit(); - if (PredSU->NumSuccsLeft != PredSU->NumSuccs - 1) + if (PredSU->NumSuccsLeft != PredSU->NumSuccs) continue; const SDNode *PN = PredSU->getNode(); - if (!PN->isMachineOpcode()) + if (!PN->isMachineOpcode()) { + if (PN->getOpcode() == ISD::CopyToReg) { + EVT VT = PN->getOperand(1).getValueType(); + unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); + RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); + } + continue; + } + unsigned POpc = PN->getMachineOpcode(); + if (POpc == TargetOpcode::IMPLICIT_DEF) continue; unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { @@ -1304,10 +1297,11 @@ namespace { if (!PN->hasAnyUseOfValue(i)) continue; unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); - RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); - if (RegPressure[RCId] < 0) + if (RegPressure[RCId] < TLI->getRepRegClassCostFor(VT)) // Register pressure tracking is imprecise. This can happen. RegPressure[RCId] = 0; + else + RegPressure[RCId] -= TLI->getRepRegClassCostFor(VT); } } @@ -1472,30 +1466,39 @@ bool src_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const { } bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ - bool LStall = left->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < left->getHeight(); - bool RStall = right->SchedulingPref == Sched::Latency && - SPQ->getCurCycle() < right->getHeight(); - // If scheduling one of the node will cause a pipeline stall, delay it. - // If scheduling either one of the node will cause a pipeline stall, sort them - // according to their height. - // If neither will cause a pipeline stall, try to reduce register pressure. - if (LStall) { - if (!RStall) - return true; - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - } else if (RStall) + bool LHigh = SPQ->HighRegPressure(left); + bool RHigh = SPQ->HighRegPressure(right); + if (LHigh && !RHigh) + return true; + else if (!LHigh && RHigh) + return false; + else if (!LHigh && !RHigh) { + // Low register pressure situation, schedule for latency if possible. + bool LStall = left->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < left->getHeight(); + bool RStall = right->SchedulingPref == Sched::Latency && + SPQ->getCurCycle() < right->getHeight(); + // If scheduling one of the node will cause a pipeline stall, delay it. + // If scheduling either one of the node will cause a pipeline stall, sort + // them according to their height. + // If neither will cause a pipeline stall, try to reduce register pressure. + if (LStall) { + if (!RStall) + return true; + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + } else if (RStall) return false; - // If either node is scheduling for latency, sort them by height and latency - // first. - if (left->SchedulingPref == Sched::Latency || - right->SchedulingPref == Sched::Latency) { - if (left->getHeight() != right->getHeight()) - return left->getHeight() > right->getHeight(); - if (left->Latency != right->Latency) - return left->Latency > right->Latency; + // If either node is scheduling for latency, sort them by height and latency + // first. + if (left->SchedulingPref == Sched::Latency || + right->SchedulingPref == Sched::Latency) { + if (left->getHeight() != right->getHeight()) + return left->getHeight() > right->getHeight(); + if (left->Latency != right->Latency) + return left->Latency > right->Latency; + } } return BURRSort(left, right, SPQ); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 9e5dd028962..8d1ed2ad188 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -557,28 +557,25 @@ ARMTargetLowering::findRepresentativeClass(EVT VT) const{ switch (VT.getSimpleVT().SimpleTy) { default: return TargetLowering::findRepresentativeClass(VT); - // Use SPR as representative register class for all floating point - // and vector types. - case MVT::f32: - RRC = ARM::SPRRegisterClass; - break; - case MVT::f64: case MVT::v8i8: case MVT::v4i16: + // Use DPR as representative register class for all floating point + // and vector types. Since there are 32 SPR registers and 32 DPR registers so + // the cost is 1 for both f32 and f64. + case MVT::f32: case MVT::f64: case MVT::v8i8: case MVT::v4i16: case MVT::v2i32: case MVT::v1i64: case MVT::v2f32: - RRC = ARM::SPRRegisterClass; - Cost = 2; + RRC = ARM::DPRRegisterClass; break; case MVT::v16i8: case MVT::v8i16: case MVT::v4i32: case MVT::v2i64: case MVT::v4f32: case MVT::v2f64: - RRC = ARM::SPRRegisterClass; - Cost = 4; + RRC = ARM::DPRRegisterClass; + Cost = 2; break; case MVT::v4i64: - RRC = ARM::SPRRegisterClass; - Cost = 8; + RRC = ARM::DPRRegisterClass; + Cost = 4; break; case MVT::v8i64: - RRC = ARM::SPRRegisterClass; - Cost = 16; + RRC = ARM::DPRRegisterClass; + Cost = 8; break; } return std::make_pair(RRC, Cost); diff --git a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll index 25cf1356d61..fc43ff48778 100644 --- a/test/CodeGen/ARM/lsr-on-unrolled-loops.ll +++ b/test/CodeGen/ARM/lsr-on-unrolled-loops.ll @@ -4,14 +4,14 @@ ; constant offset addressing, so that each of the following stores ; uses the same register. -; CHECK: vstr.32 s0, [r9, #-128] -; CHECK: vstr.32 s0, [r9, #-96] -; CHECK: vstr.32 s0, [r9, #-64] -; CHECK: vstr.32 s0, [r9, #-32] -; CHECK: vstr.32 s0, [r9] -; CHECK: vstr.32 s0, [r9, #32] -; CHECK: vstr.32 s0, [r9, #64] -; CHECK: vstr.32 s0, [r9, #96] +; CHECK: vstr.32 s0, [r{{.*}}, #-128] +; CHECK: vstr.32 s0, [r{{.*}}, #-96] +; CHECK: vstr.32 s0, [r{{.*}}, #-64] +; CHECK: vstr.32 s0, [r{{.*}}, #-32] +; CHECK: vstr.32 s0, [r{{.*}}] +; CHECK: vstr.32 s0, [r{{.*}}, #32] +; CHECK: vstr.32 s0, [r{{.*}}, #64] +; CHECK: vstr.32 s0, [r{{.*}}, #96] target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"