mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	Avoiding overly aggressive latency scheduling. If the two nodes share an
operand and one of them has a single use that is a live out copy, favor the one that is live out. Otherwise it will be difficult to eliminate the copy if the instruction is a loop induction variable update. e.g. BB: sub r1, r3, #1 str r0, [r2, r3] mov r3, r1 cmp bne BB => BB: str r0, [r2, r3] sub r3, r3, #1 cmp bne BB This fixed the recent 256.bzip2 regression. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@117675 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -190,7 +190,7 @@ private: | ||||
| void ScheduleDAGRRList::Schedule() { | ||||
|   DEBUG(dbgs() | ||||
|         << "********** List Scheduling BB#" << BB->getNumber() | ||||
|         << " **********\n"); | ||||
|         << " '" << BB->getName() << "' **********\n"); | ||||
|  | ||||
|   NumLiveRegs = 0; | ||||
|   LiveRegDefs.resize(TRI->getNumRegs(), NULL);   | ||||
| @@ -1483,6 +1483,46 @@ static unsigned calcMaxScratches(const SUnit *SU) { | ||||
|   return Scratches; | ||||
| } | ||||
|  | ||||
| /// hasOnlyLiveOutUse - Return true if SU has a single value successor that is a | ||||
| /// CopyToReg to a virtual register. This SU def is probably a liveout and | ||||
| /// it has no other use. It should be scheduled closer to the terminator. | ||||
| static bool hasOnlyLiveOutUses(const SUnit *SU) { | ||||
|   bool RetVal = false; | ||||
|   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | ||||
|        I != E; ++I) { | ||||
|     if (I->isCtrl()) continue; | ||||
|     const SUnit *SuccSU = I->getSUnit(); | ||||
|     if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) { | ||||
|       unsigned Reg = | ||||
|         cast<RegisterSDNode>(SuccSU->getNode()->getOperand(1))->getReg(); | ||||
|       if (TargetRegisterInfo::isVirtualRegister(Reg)) { | ||||
|         RetVal = true; | ||||
|         continue; | ||||
|       } | ||||
|     } | ||||
|     return false; | ||||
|   } | ||||
|   return RetVal; | ||||
| } | ||||
|  | ||||
| /// UnitsSharePred - Return true if the two scheduling units share a common | ||||
| /// data predecessor. | ||||
| static bool UnitsSharePred(const SUnit *left, const SUnit *right) { | ||||
|   SmallSet<const SUnit*, 4> Preds; | ||||
|   for (SUnit::const_pred_iterator I = left->Preds.begin(),E = left->Preds.end(); | ||||
|        I != E; ++I) { | ||||
|     if (I->isCtrl()) continue;  // ignore chain preds | ||||
|     Preds.insert(I->getSUnit()); | ||||
|   } | ||||
|   for (SUnit::const_pred_iterator I = right->Preds.begin(),E = right->Preds.end(); | ||||
|        I != E; ++I) { | ||||
|     if (I->isCtrl()) continue;  // ignore chain preds | ||||
|     if (Preds.count(I->getSUnit())) | ||||
|       return true; | ||||
|   } | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| template <typename RRSort> | ||||
| static bool BURRSort(const SUnit *left, const SUnit *right, | ||||
|                      const RegReductionPriorityQueue<RRSort> *SPQ) { | ||||
| @@ -1558,29 +1598,46 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ | ||||
|   else if (!LHigh && RHigh) | ||||
|     return false; | ||||
|   else if (!LHigh && !RHigh) { | ||||
|     // If the two nodes share an operand and one of them has a single | ||||
|     // use that is a live out copy, favor the one that is live out. Otherwise | ||||
|     // it will be difficult to eliminate the copy if the instruction is a | ||||
|     // loop induction variable update. e.g. | ||||
|     // BB: | ||||
|     // sub r1, r3, #1 | ||||
|     // str r0, [r2, r3] | ||||
|     // mov r3, r1 | ||||
|     // cmp | ||||
|     // bne BB | ||||
|     bool SharePred = UnitsSharePred(left, right); | ||||
|     // FIXME: Only adjust if BB is a loop back edge. | ||||
|     // FIXME: What's the cost of a copy? | ||||
|     int LBonus = (SharePred && hasOnlyLiveOutUses(left)) ? 1 : 0; | ||||
|     int RBonus = (SharePred && hasOnlyLiveOutUses(right)) ? 1 : 0; | ||||
|     int LHeight = (int)left->getHeight() - LBonus; | ||||
|     int RHeight = (int)right->getHeight() - RBonus; | ||||
|  | ||||
|     // Low register pressure situation, schedule for latency if possible. | ||||
|     bool LStall = left->SchedulingPref == Sched::Latency && | ||||
|       SPQ->getCurCycle() < left->getHeight(); | ||||
|       (int)SPQ->getCurCycle() < LHeight; | ||||
|     bool RStall = right->SchedulingPref == Sched::Latency && | ||||
|       SPQ->getCurCycle() < right->getHeight(); | ||||
|       (int)SPQ->getCurCycle() < RHeight; | ||||
|     // If scheduling one of the node will cause a pipeline stall, delay it. | ||||
|     // If scheduling either one of the node will cause a pipeline stall, sort | ||||
|     // them according to their height. | ||||
|     // If neither will cause a pipeline stall, try to reduce register pressure. | ||||
|     if (LStall) { | ||||
|       if (!RStall) | ||||
|         return true; | ||||
|       if (left->getHeight() != right->getHeight()) | ||||
|         return left->getHeight() > right->getHeight(); | ||||
|       if (LHeight != RHeight) | ||||
|         return LHeight > RHeight; | ||||
|     } else if (RStall) | ||||
|       return false; | ||||
|  | ||||
|     // If either node is scheduling for latency, sort them by height and latency | ||||
|     // first. | ||||
|     // If either node is scheduling for latency, sort them by height | ||||
|     // and latency. | ||||
|     if (left->SchedulingPref == Sched::Latency || | ||||
|         right->SchedulingPref == Sched::Latency) { | ||||
|       if (left->getHeight() != right->getHeight()) | ||||
|         return left->getHeight() > right->getHeight(); | ||||
|       if (LHeight != RHeight) | ||||
|         return LHeight > RHeight; | ||||
|       if (left->Latency != right->Latency) | ||||
|         return left->Latency > right->Latency; | ||||
|     } | ||||
| @@ -1631,19 +1688,6 @@ RegReductionPriorityQueue<SF>::canClobber(const SUnit *SU, const SUnit *Op) { | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| /// hasCopyToRegUse - Return true if SU has a value successor that is a | ||||
| /// CopyToReg node. | ||||
| static bool hasCopyToRegUse(const SUnit *SU) { | ||||
|   for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); | ||||
|        I != E; ++I) { | ||||
|     if (I->isCtrl()) continue; | ||||
|     const SUnit *SuccSU = I->getSUnit(); | ||||
|     if (SuccSU->getNode() && SuccSU->getNode()->getOpcode() == ISD::CopyToReg) | ||||
|       return true; | ||||
|   } | ||||
|   return false; | ||||
| } | ||||
|  | ||||
| /// canClobberPhysRegDefs - True if SU would clobber one of SuccSU's | ||||
| /// physical register defs. | ||||
| static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, | ||||
| @@ -1813,6 +1857,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { | ||||
|     if (!Node || !Node->isMachineOpcode() || SU->getNode()->getFlaggedNode()) | ||||
|       continue; | ||||
|  | ||||
|     bool isLiveOut = hasOnlyLiveOutUses(SU); | ||||
|     unsigned Opc = Node->getMachineOpcode(); | ||||
|     const TargetInstrDesc &TID = TII->get(Opc); | ||||
|     unsigned NumRes = TID.getNumDefs(); | ||||
| @@ -1862,7 +1907,7 @@ void RegReductionPriorityQueue<SF>::AddPseudoTwoAddrDeps() { | ||||
|             SuccOpc == TargetOpcode::SUBREG_TO_REG) | ||||
|           continue; | ||||
|         if ((!canClobber(SuccSU, DUSU) || | ||||
|              (hasCopyToRegUse(SU) && !hasCopyToRegUse(SuccSU)) || | ||||
|              (isLiveOut && !hasOnlyLiveOutUses(SuccSU)) || | ||||
|              (!SU->isCommutable && SuccSU->isCommutable)) && | ||||
|             !scheduleDAG->IsReachable(SuccSU, SU)) { | ||||
|           DEBUG(dbgs() << "    Adding a pseudo-two-addr edge from SU #" | ||||
|   | ||||
| @@ -458,6 +458,15 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, | ||||
|     // Adjust the use operand index by num of defs. | ||||
|     OpIdx += TII->get(Use->getMachineOpcode()).getNumDefs(); | ||||
|   int Latency = TII->getOperandLatency(InstrItins, Def, DefIdx, Use, OpIdx); | ||||
|   if (Latency > 1 && Use->getOpcode() == ISD::CopyToReg && | ||||
|       !BB->succ_empty()) { | ||||
|     unsigned Reg = cast<RegisterSDNode>(Use->getOperand(1))->getReg(); | ||||
|     if (TargetRegisterInfo::isVirtualRegister(Reg)) | ||||
|       // This copy is a liveout value. It is likely coalesced, so reduce the | ||||
|       // latency so not to penalize the def. | ||||
|       // FIXME: need target specific adjustment here? | ||||
|       Latency = (Latency > 1) ? Latency - 1 : 1; | ||||
|   } | ||||
|   if (Latency >= 0) | ||||
|     dep.setLatency(Latency); | ||||
| } | ||||
|   | ||||
| @@ -1967,8 +1967,13 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, | ||||
|   if (!ItinData || ItinData->isEmpty()) | ||||
|     return DefTID.mayLoad() ? 3 : 1; | ||||
|  | ||||
|   if (!UseNode->isMachineOpcode()) | ||||
|     return ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx); | ||||
|   if (!UseNode->isMachineOpcode()) { | ||||
|     int Latency = ItinData->getOperandCycle(DefTID.getSchedClass(), DefIdx); | ||||
|     if (Subtarget.isCortexA9()) | ||||
|       return Latency <= 2 ? 1 : Latency - 1; | ||||
|     else | ||||
|       return Latency <= 3 ? 1 : Latency - 2; | ||||
|   } | ||||
|  | ||||
|   const TargetInstrDesc &UseTID = get(UseNode->getMachineOpcode()); | ||||
|   const MachineSDNode *DefMN = dyn_cast<MachineSDNode>(DefNode); | ||||
|   | ||||
| @@ -23,7 +23,10 @@ entry: | ||||
|   %4 = insertelement <2 x double> %2, double %V.0.ph, i32 1 ; <<2 x double>> [#uses=2] | ||||
| ; Constant pool load followed by add. | ||||
| ; Then clobber the loaded register, not the sum. | ||||
| ; CHECK: vldr.64 | ||||
| ; CHECK: vadd.f64 | ||||
| ; CHECK: vldr.64 [[LDR:d.*]], | ||||
| ; CHECK: LPC0_0: | ||||
| ; CHECK: vadd.f64 [[ADD:d.*]], [[LDR]], [[LDR]] | ||||
| ; CHECK: vmov.f64 [[LDR]] | ||||
|   %5 = fadd <2 x double> %3, %3                   ; <<2 x double>> [#uses=2] | ||||
|   | ||||
| @@ -15,9 +15,9 @@ bb.nph:                                           ; preds = %bb5 | ||||
|  | ||||
| ; Loop preheader | ||||
| ; CHECK: vmov.f32 | ||||
| ; CHECK: vmul.f32 | ||||
| ; CHECK: vsub.f32 | ||||
| ; CHECK: vadd.f32 | ||||
| ; CHECK: vmul.f32 | ||||
| bb7:                                              ; preds = %bb9, %bb.nph | ||||
|   %s1.02 = phi float [ undef, %bb.nph ], [ %35, %bb9 ] ; <float> [#uses=3] | ||||
|   %tmp79 = add i32 undef, undef                   ; <i32> [#uses=1] | ||||
|   | ||||
		Reference in New Issue
	
	Block a user