mirror of
				https://github.com/c64scene-ar/llvm-6502.git
				synced 2025-10-25 10:27:04 +00:00 
			
		
		
		
	Fix some latency computation bugs: if the use is not a machine opcode do not just return zero.
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@105061 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
		| @@ -320,7 +320,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { | ||||
|   for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); | ||||
|        I != E; ++I) { | ||||
|     CapturePred(&*I); | ||||
|     if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]) { | ||||
|     if (I->isAssignedRegDep() && SU->getHeight() == LiveRegCycles[I->getReg()]){ | ||||
|       assert(NumLiveRegs > 0 && "NumLiveRegs is already zero!"); | ||||
|       assert(LiveRegDefs[I->getReg()] == I->getSUnit() && | ||||
|              "Physical register dependency violated?"); | ||||
| @@ -1275,6 +1275,17 @@ bool hybrid_ls_rr_sort::operator()(const SUnit *left, const SUnit *right) const{ | ||||
|       return left->getHeight() > right->getHeight(); | ||||
|   } else if (RStall) | ||||
|       return false; | ||||
|  | ||||
|   // If either node is scheduling for latency, sort them by height and latency | ||||
|   // first. | ||||
|   if (left->SchedulingPref == Sched::Latency || | ||||
|       right->SchedulingPref == Sched::Latency) { | ||||
|     if (left->getHeight() != right->getHeight()) | ||||
|       return left->getHeight() > right->getHeight(); | ||||
|     if (left->Latency != right->Latency) | ||||
|       return left->Latency > right->Latency; | ||||
|   } | ||||
|  | ||||
|   return BURRSort(left, right, SPQ); | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -59,7 +59,11 @@ SUnit *ScheduleDAGSDNodes::NewSUnit(SDNode *N) { | ||||
|   SUnits.back().OrigNode = &SUnits.back(); | ||||
|   SUnit *SU = &SUnits.back(); | ||||
|   const TargetLowering &TLI = DAG->getTargetLoweringInfo(); | ||||
|   SU->SchedulingPref = TLI.getSchedulingPreference(N); | ||||
|   if (N->isMachineOpcode() && | ||||
|       N->getMachineOpcode() == TargetOpcode::IMPLICIT_DEF) | ||||
|     SU->SchedulingPref = Sched::None; | ||||
|   else | ||||
|     SU->SchedulingPref = TLI.getSchedulingPreference(N); | ||||
|   return SU; | ||||
| } | ||||
|  | ||||
| @@ -364,8 +368,10 @@ void ScheduleDAGSDNodes::AddSchedEdges() { | ||||
|         if (Cost >= 0) | ||||
|           PhysReg = 0; | ||||
|  | ||||
|         const SDep& dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, | ||||
|                                OpSU->Latency, PhysReg); | ||||
|         // If this is a ctrl dep, latency is 1. | ||||
|         unsigned OpLatency = isChain ? 1 : OpSU->Latency; | ||||
|         const SDep &dep = SDep(OpSU, isChain ? SDep::Order : SDep::Data, | ||||
|                                OpLatency, PhysReg); | ||||
|         if (!isChain && !UnitLatencies) { | ||||
|           ComputeOperandLatency(OpN, N, i, const_cast<SDep &>(dep)); | ||||
|           ST.adjustSchedDependency(OpSU, SU, const_cast<SDep &>(dep)); | ||||
| @@ -427,15 +433,18 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, | ||||
|     return; | ||||
|  | ||||
|   unsigned DefIdx = Use->getOperand(OpIdx).getResNo(); | ||||
|   if (Def->isMachineOpcode() && Use->isMachineOpcode()) { | ||||
|   if (Def->isMachineOpcode()) { | ||||
|     const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); | ||||
|     if (DefIdx >= II.getNumDefs()) | ||||
|       return; | ||||
|     int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); | ||||
|     if (DefCycle < 0) | ||||
|       return; | ||||
|     const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); | ||||
|     int UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); | ||||
|     int UseCycle = 1; | ||||
|     if (Use->isMachineOpcode()) { | ||||
|       const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); | ||||
|       UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); | ||||
|     } | ||||
|     if (UseCycle >= 0) { | ||||
|       int Latency = DefCycle - UseCycle + 1; | ||||
|       if (Latency >= 0) | ||||
|   | ||||
| @@ -4,14 +4,14 @@ | ||||
| ; constant offset addressing, so that each of the following stores | ||||
| ; uses the same register. | ||||
|  | ||||
| ; CHECK: vstr.32 s0, [r12, #-128] | ||||
| ; CHECK: vstr.32 s0, [r12, #-96] | ||||
| ; CHECK: vstr.32 s0, [r12, #-64] | ||||
| ; CHECK: vstr.32 s0, [r12, #-32] | ||||
| ; CHECK: vstr.32 s0, [r12] | ||||
| ; CHECK: vstr.32 s0, [r12, #32] | ||||
| ; CHECK: vstr.32 s0, [r12, #64] | ||||
| ; CHECK: vstr.32 s0, [r12, #96] | ||||
| ; CHECK: vstr.32 s0, [r9, #-128] | ||||
| ; CHECK: vstr.32 s0, [r9, #-96] | ||||
| ; CHECK: vstr.32 s0, [r9, #-64] | ||||
| ; CHECK: vstr.32 s0, [r9, #-32] | ||||
| ; CHECK: vstr.32 s0, [r9] | ||||
| ; CHECK: vstr.32 s0, [r9, #32] | ||||
| ; CHECK: vstr.32 s0, [r9, #64] | ||||
| ; CHECK: vstr.32 s0, [r9, #96] | ||||
|  | ||||
| target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32" | ||||
|  | ||||
| @@ -626,8 +626,8 @@ bb24:                                             ; preds = %bb23 | ||||
| ; LSR should use count-down iteration to avoid requiring the trip count | ||||
| ; in a register, and it shouldn't require any reloads here. | ||||
|  | ||||
| ; CHECK:      sub.w   r9, r9, #1 | ||||
| ; CHECK-NEXT: cmp.w   r9, #0 | ||||
| ; CHECK:      subs  r3, #1 | ||||
| ; CHECK-NEXT: cmp   r3, #0 | ||||
| ; CHECK-NEXT: bne.w    | ||||
|  | ||||
|   %92 = icmp eq i32 %tmp81, %indvar78             ; <i1> [#uses=1] | ||||
|   | ||||
| @@ -45,9 +45,9 @@ define arm_apcscc void @t2(i16* %i_ptr, i16* %o_ptr, %struct.int16x8_t* nocaptur | ||||
| entry: | ||||
| ; CHECK:        t2: | ||||
| ; CHECK:        vld1.16 | ||||
| ; CHECK:        vld1.16 | ||||
| ; CHECK-NOT:    vmov | ||||
| ; CHECK:        vmul.i16 | ||||
| ; CHECK-NOT:    vmov | ||||
| ; CHECK:        vld1.16 | ||||
| ; CHECK:        vmul.i16 | ||||
| ; CHECK-NOT:    vmov | ||||
| ; CHECK:        vst1.16 | ||||
| @@ -238,8 +238,9 @@ bb14:                                             ; preds = %bb6 | ||||
| define arm_aapcs_vfpcc float @t9(%0* nocapture, %3* nocapture) nounwind { | ||||
| ; CHECK:        t9: | ||||
| ; CHECK:        vldr.64 | ||||
| ; CHECK-NOT:    vmov d{{.*}}, d0 | ||||
| ; CHECK:        vmov.i8 d1 | ||||
| ; CHECK-NEXT:   vstmia r0, {d2,d3} | ||||
| ; CHECK-NEXT:   vstmia r0, {d0,d1} | ||||
| ; CHECK-NEXT:   vstmia r0, {d0,d1} | ||||
|   %3 = bitcast double 0.000000e+00 to <2 x float> ; <<2 x float>> [#uses=2] | ||||
|   %4 = shufflevector <2 x float> %3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> ; <<4 x float>> [#uses=1] | ||||
|   | ||||
| @@ -10,8 +10,8 @@ target triple = "powerpc-apple-darwin10.0" | ||||
| define void @foo(i32 %y) nounwind ssp { | ||||
| entry: | ||||
| ; CHECK: foo | ||||
| ; CHECK: add r4 | ||||
| ; CHECK: 0(r4) | ||||
| ; CHECK: add r3 | ||||
| ; CHECK: 0(r3) | ||||
|   %y_addr = alloca i32                            ; <i32*> [#uses=2] | ||||
|   %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0] | ||||
|   store i32 %y, i32* %y_addr | ||||
|   | ||||
		Reference in New Issue
	
	Block a user