From c8bfd1d78ff9a307d1d4cb57cce4549b538e60f4 Mon Sep 17 00:00:00 2001 From: Andrew Trick Date: Fri, 21 Jan 2011 05:51:33 +0000 Subject: [PATCH] Convert -enable-sched-cycles and -enable-sched-hazard to -disable flags. They are still not enable in this revision. Added TargetInstrInfo::isZeroCost() to fix a fundamental problem with the scheduler's model of operand latency in the selection DAG. Generalized unit tests to work with sched-cycles. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@123969 91177308-0d34-0410-b5e6-96231b3b80d8 --- .../llvm/CodeGen/ScheduleHazardRecognizer.h | 2 + include/llvm/Target/TargetInstrInfo.h | 16 ++++- lib/CodeGen/ScoreboardHazardRecognizer.cpp | 12 ++-- .../SelectionDAG/ScheduleDAGRRList.cpp | 62 ++++++++++--------- lib/CodeGen/TargetInstrInfoImpl.cpp | 13 +++- lib/Target/ARM/ARMBaseInstrInfo.cpp | 14 ++--- test/CodeGen/ARM/fnegs.ll | 20 +++--- test/CodeGen/ARM/fnmscs.ll | 8 +-- test/CodeGen/ARM/fpconsts.ll | 4 +- test/CodeGen/ARM/unaligned_load_store.ll | 2 +- 10 files changed, 89 insertions(+), 64 deletions(-) diff --git a/include/llvm/CodeGen/ScheduleHazardRecognizer.h b/include/llvm/CodeGen/ScheduleHazardRecognizer.h index 4771a35bc78..2f53baa1c7e 100644 --- a/include/llvm/CodeGen/ScheduleHazardRecognizer.h +++ b/include/llvm/CodeGen/ScheduleHazardRecognizer.h @@ -42,6 +42,8 @@ public: unsigned getMaxLookAhead() const { return MaxLookAhead; } + bool isEnabled() const { return MaxLookAhead != 0; } + /// atIssueLimit - Return true if no more instructions may be issued in this /// cycle. virtual bool atIssueLimit() const { return false; } diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index cd22765f762..fc7b51ec6c2 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -567,9 +567,9 @@ public: virtual unsigned getInlineAsmLength(const char *Str, const MCAsmInfo &MAI) const; - /// CreateTargetPreRAHazardRecognizer - Allocate and return a hazard - /// recognizer to use for this target when scheduling the machine instructions - /// before register allocation. + /// CreateTargetHazardRecognizer - Allocate and return a hazard recognizer to + /// use for this target when scheduling the machine instructions before + /// register allocation. virtual ScheduleHazardRecognizer* CreateTargetHazardRecognizer(const TargetMachine *TM, const ScheduleDAG *DAG) const = 0; @@ -610,6 +610,14 @@ public: virtual unsigned getNumMicroOps(const InstrItineraryData *ItinData, const MachineInstr *MI) const; + /// isZeroCost - Return true for pseudo instructions that don't consume any + /// machine resources in their current form. These are common cases that the + /// scheduler should consider free, rather than conservatively handling them + /// as instructions with no itinerary. + bool isZeroCost(unsigned Opcode) const { + return Opcode <= TargetOpcode::COPY; + } + /// getOperandLatency - Compute and return the use operand latency of a given /// pair of def and use. /// In most cases, the static scheduling itinerary was enough to determine the @@ -686,6 +694,8 @@ public: const MachineBasicBlock *MBB, const MachineFunction &MF) const; + bool usePreRAHazardRecognizer() const; + virtual ScheduleHazardRecognizer * CreateTargetHazardRecognizer(const TargetMachine*, const ScheduleDAG*) const; diff --git a/lib/CodeGen/ScoreboardHazardRecognizer.cpp b/lib/CodeGen/ScoreboardHazardRecognizer.cpp index b00e0cd0998..e6d7ded8a78 100644 --- a/lib/CodeGen/ScoreboardHazardRecognizer.cpp +++ b/lib/CodeGen/ScoreboardHazardRecognizer.cpp @@ -19,6 +19,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetInstrItineraries.h" using namespace llvm; @@ -170,14 +171,17 @@ void ScoreboardHazardRecognizer::EmitInstruction(SUnit *SU) { if (!ItinData || ItinData->isEmpty()) return; - ++IssueCount; - - unsigned cycle = 0; - // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. const TargetInstrDesc *TID = DAG->getInstrDesc(SU); assert(TID && "The scheduler must filter non-machineinstrs"); + if (DAG->TII->isZeroCost(TID->Opcode)) + return; + + ++IssueCount; + + unsigned cycle = 0; + unsigned idx = TID->getSchedClass(); for (const InstrStage *IS = ItinData->beginStage(idx), *E = ItinData->endStage(idx); IS != E; ++IS) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 1a564f1505b..e43bcfdac9b 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -66,10 +66,9 @@ static RegisterScheduler "which tries to balance ILP and register pressure", createILPListDAGScheduler); -static cl::opt EnableSchedCycles( - "enable-sched-cycles", - cl::desc("Enable cycle-level precision during preRA scheduling"), - cl::init(false), cl::Hidden); +static cl::opt DisableSchedCycles( + "disable-sched-cycles", cl::Hidden, cl::init(true), + cl::desc("Disable cycle-level precision during preRA scheduling")); namespace { //===----------------------------------------------------------------------===// @@ -124,10 +123,10 @@ public: Topo(SUnits) { const TargetMachine &tm = mf.getTarget(); - if (EnableSchedCycles && OptLevel != CodeGenOpt::None) - HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); - else + if (DisableSchedCycles || !NeedLatency) HazardRec = new ScheduleHazardRecognizer(); + else + HazardRec = tm.getInstrInfo()->CreateTargetHazardRecognizer(&tm, this); } ~ScheduleDAGRRList() { @@ -168,7 +167,7 @@ public: private: bool isReady(SUnit *SU) { - return !EnableSchedCycles || !AvailableQueue->hasReadyFilter() || + return DisableSchedCycles || !AvailableQueue->hasReadyFilter() || AvailableQueue->isReady(SU); } @@ -237,7 +236,7 @@ void ScheduleDAGRRList::Schedule() { << " '" << BB->getName() << "' **********\n"); CurCycle = 0; - MinAvailableCycle = EnableSchedCycles ? UINT_MAX : 0; + MinAvailableCycle = DisableSchedCycles ? 0 : UINT_MAX; NumLiveRegs = 0; LiveRegDefs.resize(TRI->getNumRegs(), NULL); LiveRegGens.resize(TRI->getNumRegs(), NULL); @@ -350,7 +349,7 @@ void ScheduleDAGRRList::ReleasePredecessors(SUnit *SU) { /// Check to see if any of the pending instructions are ready to issue. If /// so, add them to the available queue. void ScheduleDAGRRList::ReleasePending() { - if (!EnableSchedCycles) { + if (DisableSchedCycles) { assert(PendingQueue.empty() && "pending instrs not allowed in this mode"); return; } @@ -385,7 +384,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { return; AvailableQueue->setCurCycle(NextCycle); - if (HazardRec->getMaxLookAhead() == 0) { + if (!HazardRec->isEnabled()) { // Bypass lots of virtual calls in case of long latency. CurCycle = NextCycle; } @@ -405,7 +404,7 @@ void ScheduleDAGRRList::AdvanceToCycle(unsigned NextCycle) { /// Move the scheduler state forward until the specified node's dependents are /// ready and can be scheduled with no resource conflicts. void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { - if (!EnableSchedCycles) + if (DisableSchedCycles) return; unsigned ReadyCycle = isBottomUp ? SU->getHeight() : SU->getDepth(); @@ -440,7 +439,7 @@ void ScheduleDAGRRList::AdvancePastStalls(SUnit *SU) { /// Record this SUnit in the HazardRecognizer. /// Does not update CurCycle. void ScheduleDAGRRList::EmitNode(SUnit *SU) { - if (!EnableSchedCycles || HazardRec->getMaxLookAhead() == 0) + if (!HazardRec->isEnabled()) return; // Check for phys reg copy. @@ -525,9 +524,9 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { // (1) No available instructions // (2) All pipelines full, so available instructions must have hazards. // - // If SchedCycles is disabled, count each inst as one cycle. - if (!EnableSchedCycles || - AvailableQueue->empty() || HazardRec->atIssueLimit()) + // If HazardRec is disabled, count each inst as one cycle. + if (!HazardRec->isEnabled() || HazardRec->atIssueLimit() + || AvailableQueue->empty()) AdvanceToCycle(CurCycle + 1); } @@ -585,7 +584,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { SU->setHeightDirty(); SU->isScheduled = false; SU->isAvailable = true; - if (EnableSchedCycles && AvailableQueue->hasReadyFilter()) { + if (!DisableSchedCycles && AvailableQueue->hasReadyFilter()) { // Don't make available until backtracking is complete. SU->isPending = true; PendingQueue.push_back(SU); @@ -2010,24 +2009,27 @@ static int BUCompareLatency(SUnit *left, SUnit *right, bool checkPref, } else if (RStall) return -1; - // If either node is scheduling for latency, sort them by depth + // If either node is scheduling for latency, sort them by height/depth // and latency. if (!checkPref || (left->SchedulingPref == Sched::Latency || right->SchedulingPref == Sched::Latency)) { - int LDepth = (int)left->getDepth(); - int RDepth = (int)right->getDepth(); - - if (EnableSchedCycles) { - if (LDepth != RDepth) - DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum - << ") depth " << LDepth << " vs SU (" << right->NodeNum - << ") depth " << RDepth << ")\n"); - return LDepth < RDepth ? 1 : -1; - } - else { + if (DisableSchedCycles) { if (LHeight != RHeight) return LHeight > RHeight ? 1 : -1; } + else { + // If neither instruction stalls (!LStall && !RStall) then + // it's height is already covered so only its depth matters. We also reach + // this if both stall but have the same height. + unsigned LDepth = left->getDepth(); + unsigned RDepth = right->getDepth(); + if (LDepth != RDepth) { + DEBUG(dbgs() << " Comparing latency of SU (" << left->NodeNum + << ") depth " << LDepth << " vs SU (" << right->NodeNum + << ") depth " << RDepth << "\n"); + return LDepth < RDepth ? 1 : -1; + } + } if (left->Latency != right->Latency) return left->Latency > right->Latency ? 1 : -1; } @@ -2068,7 +2070,7 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { if (LScratch != RScratch) return LScratch > RScratch; - if (EnableSchedCycles) { + if (!DisableSchedCycles) { int result = BUCompareLatency(left, right, false /*checkPref*/, SPQ); if (result != 0) return result > 0; diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 787c2712e75..15340a3f108 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -24,11 +24,16 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt DisableHazardRecognizer( + "disable-sched-hazard", cl::Hidden, cl::init(false), + cl::desc("Disable hazard detection during preRA scheduling")); + /// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything /// after it, replacing it with an unconditional branch to NewDest. void @@ -421,7 +426,13 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, return false; } -// Default implementation of CreateTargetPreRAHazardRecognizer. +// Provide a global flag for disabling the PreRA hazard recognizer that targets +// may choose to honor. +bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const { + return !DisableHazardRecognizer; +} + +// Default implementation of CreateTargetRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfoImpl:: CreateTargetHazardRecognizer(const TargetMachine *TM, const ScheduleDAG *DAG) const { diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index aaf4f0d8a9a..4f44c8ea391 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -41,14 +41,6 @@ static cl::opt EnableARM3Addr("enable-arm-3-addr-conv", cl::Hidden, cl::desc("Enable ARM 2-addr to 3-addr conv")); -// Other targets already have a hazard recognizer enabled by default, so this -// flag currently only affects ARM. It will be generalized when it becomes a -// disabled flag. -static cl::opt EnableHazardRecognizer( - "enable-sched-hazard", cl::Hidden, - cl::desc("Enable hazard detection during preRA scheduling"), - cl::init(false)); - /// ARM_MLxEntry - Record information about MLA / MLS instructions. struct ARM_MLxEntry { unsigned MLxOpc; // MLA / MLS opcode @@ -97,7 +89,7 @@ ARMBaseInstrInfo::ARMBaseInstrInfo(const ARMSubtarget& STI) ScheduleHazardRecognizer *ARMBaseInstrInfo:: CreateTargetHazardRecognizer(const TargetMachine *TM, const ScheduleDAG *DAG) const { - if (EnableHazardRecognizer) { + if (usePreRAHazardRecognizer()) { const InstrItineraryData *II = TM->getInstrItineraryData(); return new ScoreboardHazardRecognizer(II, DAG, "pre-RA-sched"); } @@ -2173,6 +2165,10 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return 1; const TargetInstrDesc &DefTID = get(DefNode->getMachineOpcode()); + + if (isZeroCost(DefTID.Opcode)) + return 0; + if (!ItinData || ItinData->isEmpty()) return DefTID.mayLoad() ? 3 : 1; diff --git a/test/CodeGen/ARM/fnegs.ll b/test/CodeGen/ARM/fnegs.ll index c15005e6e8a..418b59803d3 100644 --- a/test/CodeGen/ARM/fnegs.ll +++ b/test/CodeGen/ARM/fnegs.ll @@ -13,19 +13,19 @@ entry: ret float %retval } ; VFP2: test1: -; VFP2: vneg.f32 s1, s0 +; VFP2: vneg.f32 s{{.*}}, s{{.*}} ; NFP1: test1: -; NFP1: vneg.f32 d1, d0 +; NFP1: vneg.f32 d{{.*}}, d{{.*}} ; NFP0: test1: -; NFP0: vneg.f32 s1, s0 +; NFP0: vneg.f32 s{{.*}}, s{{.*}} ; CORTEXA8: test1: -; CORTEXA8: vneg.f32 d1, d0 +; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}} ; CORTEXA9: test1: -; CORTEXA9: vneg.f32 s1, s0 +; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}} define float @test2(float* %a) { entry: @@ -37,17 +37,17 @@ entry: ret float %retval } ; VFP2: test2: -; VFP2: vneg.f32 s1, s0 +; VFP2: vneg.f32 s{{.*}}, s{{.*}} ; NFP1: test2: -; NFP1: vneg.f32 d1, d0 +; NFP1: vneg.f32 d{{.*}}, d{{.*}} ; NFP0: test2: -; NFP0: vneg.f32 s1, s0 +; NFP0: vneg.f32 s{{.*}}, s{{.*}} ; CORTEXA8: test2: -; CORTEXA8: vneg.f32 d1, d0 +; CORTEXA8: vneg.f32 d{{.*}}, d{{.*}} ; CORTEXA9: test2: -; CORTEXA9: vneg.f32 s1, s0 +; CORTEXA9: vneg.f32 s{{.*}}, s{{.*}} diff --git a/test/CodeGen/ARM/fnmscs.ll b/test/CodeGen/ARM/fnmscs.ll index 5d832537c0f..76c806761f7 100644 --- a/test/CodeGen/ARM/fnmscs.ll +++ b/test/CodeGen/ARM/fnmscs.ll @@ -11,7 +11,7 @@ entry: ; NEON: vnmla.f32 ; A8: t1: -; A8: vnmul.f32 s0, s1, s0 +; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}} ; A8: vsub.f32 d0, d0, d1 %0 = fmul float %a, %b %1 = fsub float -0.0, %0 @@ -28,7 +28,7 @@ entry: ; NEON: vnmla.f32 ; A8: t2: -; A8: vnmul.f32 s0, s1, s0 +; A8: vnmul.f32 s0, s{{[01]}}, s{{[01]}} ; A8: vsub.f32 d0, d0, d1 %0 = fmul float %a, %b %1 = fmul float -1.0, %0 @@ -45,7 +45,7 @@ entry: ; NEON: vnmla.f64 ; A8: t3: -; A8: vnmul.f64 d16, d16, d17 +; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}} ; A8: vsub.f64 d16, d16, d17 %0 = fmul double %a, %b %1 = fsub double -0.0, %0 @@ -62,7 +62,7 @@ entry: ; NEON: vnmla.f64 ; A8: t4: -; A8: vnmul.f64 d16, d16, d17 +; A8: vnmul.f64 d16, d1{{[67]}}, d1{{[67]}} ; A8: vsub.f64 d16, d16, d17 %0 = fmul double %a, %b %1 = fmul double -1.0, %0 diff --git a/test/CodeGen/ARM/fpconsts.ll b/test/CodeGen/ARM/fpconsts.ll index 9e7a8ae28b9..638dde9d8a0 100644 --- a/test/CodeGen/ARM/fpconsts.ll +++ b/test/CodeGen/ARM/fpconsts.ll @@ -3,7 +3,7 @@ define float @t1(float %x) nounwind readnone optsize { entry: ; CHECK: t1: -; CHECK: vmov.f32 s1, #4.000000e+00 +; CHECK: vmov.f32 s{{.*}}, #4.000000e+00 %0 = fadd float %x, 4.000000e+00 ret float %0 } @@ -27,7 +27,7 @@ entry: define float @t4(float %x) nounwind readnone optsize { entry: ; CHECK: t4: -; CHECK: vmov.f32 s1, #-2.400000e+01 +; CHECK: vmov.f32 s{{.*}}, #-2.400000e+01 %0 = fmul float %x, -2.400000e+01 ret float %0 } diff --git a/test/CodeGen/ARM/unaligned_load_store.ll b/test/CodeGen/ARM/unaligned_load_store.ll index 354895e099d..b42e11f2c4a 100644 --- a/test/CodeGen/ARM/unaligned_load_store.ll +++ b/test/CodeGen/ARM/unaligned_load_store.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -march=arm | FileCheck %s -check-prefix=GENERIC +; RUN: llc < %s -march=arm -pre-RA-sched=source | FileCheck %s -check-prefix=GENERIC ; RUN: llc < %s -mtriple=armv6-apple-darwin | FileCheck %s -check-prefix=DARWIN_V6 ; RUN: llc < %s -mtriple=armv6-apple-darwin -arm-strict-align | FileCheck %s -check-prefix=GENERIC ; RUN: llc < %s -mtriple=armv6-linux | FileCheck %s -check-prefix=GENERIC