From 3ef1c8759a20167457eb7fd82ebcaffe7ccaa1d1 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 10 Sep 2010 01:29:16 +0000 Subject: [PATCH] Teach if-converter to be more careful with predicating instructions that would take multiple cycles to decode. For the current if-converter clients (actually only ARM), the instructions that are predicated on false are not nops. They would still take machine cycles to decode. Micro-coded instructions such as LDM / STM can potentially take multiple cycles to decode. If-converter should take treat them as non-micro-coded simple instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113570 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/llvm/CodeGen/PostRAHazardRecognizer.h | 4 +- include/llvm/Target/TargetInstrInfo.h | 6 +-- include/llvm/Target/TargetInstrItineraries.h | 8 ++++ include/llvm/Target/TargetMachine.h | 4 +- lib/CodeGen/IfConversion.cpp | 13 ++++-- lib/CodeGen/PostRAHazardRecognizer.cpp | 22 +++++----- lib/CodeGen/PostRASchedulerList.cpp | 2 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 29 +++++++------- lib/CodeGen/ScheduleDAGInstrs.h | 1 + .../SelectionDAG/ScheduleDAGSDNodes.cpp | 16 ++++---- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 1 + lib/CodeGen/TargetInstrInfoImpl.cpp | 2 +- lib/Target/ARM/ARM.td | 15 +++++-- lib/Target/ARM/ARMBaseInstrInfo.cpp | 40 +++++++++++++------ lib/Target/ARM/ARMBaseInstrInfo.h | 2 +- lib/Target/ARM/ARMISelLowering.cpp | 4 +- lib/Target/ARM/ARMISelLowering.h | 2 + lib/Target/ARM/ARMSubtarget.cpp | 3 +- lib/Target/ARM/ARMSubtarget.h | 10 +++++ lib/Target/ARM/ARMTargetMachine.h | 4 +- lib/Target/ARM/Thumb2HazardRecognizer.h | 2 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 2 +- lib/Target/ARM/Thumb2InstrInfo.h | 2 +- lib/Target/CellSPU/SPUTargetMachine.h | 4 +- lib/Target/PowerPC/PPCTargetMachine.h | 4 +- lib/Target/TargetInstrInfo.cpp | 6 +-- test/CodeGen/ARM/ifcvt10.ll | 30 ++++++++++++++ .../CodeGen/Thumb2/2010-06-21-TailMergeBug.ll | 2 +- 28 files changed, 160 insertions(+), 80 deletions(-) create mode 100644 test/CodeGen/ARM/ifcvt10.ll diff --git a/include/llvm/CodeGen/PostRAHazardRecognizer.h b/include/llvm/CodeGen/PostRAHazardRecognizer.h index 24d73cb7860..4160384f89b 100644 --- a/include/llvm/CodeGen/PostRAHazardRecognizer.h +++ b/include/llvm/CodeGen/PostRAHazardRecognizer.h @@ -75,13 +75,13 @@ class PostRAHazardRecognizer : public ScheduleHazardRecognizer { }; // Itinerary data for the target. - const InstrItineraryData &ItinData; + const InstrItineraryData *ItinData; ScoreBoard ReservedScoreboard; ScoreBoard RequiredScoreboard; public: - PostRAHazardRecognizer(const InstrItineraryData &ItinData); + PostRAHazardRecognizer(const InstrItineraryData *ItinData); virtual HazardType getHazardType(SUnit *SU); virtual void Reset(); diff --git a/include/llvm/Target/TargetInstrInfo.h b/include/llvm/Target/TargetInstrInfo.h index 7ce1f712102..844b96566a3 100644 --- a/include/llvm/Target/TargetInstrInfo.h +++ b/include/llvm/Target/TargetInstrInfo.h @@ -575,7 +575,7 @@ public: /// to use for this target when scheduling the machine instructions after /// register allocation. virtual ScheduleHazardRecognizer* - CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const = 0; + CreateTargetPostRAHazardRecognizer(const InstrItineraryData*) const = 0; /// AnalyzeCompare - For a comparison instruction, return the source register /// in SrcReg and the value it compares against in CmpValue. Return true if @@ -595,7 +595,7 @@ public: /// getNumMicroOps - Return the number of u-operations the given machine /// instruction will be decoded to on the target cpu. virtual unsigned getNumMicroOps(const MachineInstr *MI, - const InstrItineraryData &ItinData) const; + const InstrItineraryData *ItinData) const; }; /// TargetInstrInfoImpl - This is the default implementation of @@ -631,7 +631,7 @@ public: const MachineFunction &MF) const; virtual ScheduleHazardRecognizer * - CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const; + CreateTargetPostRAHazardRecognizer(const InstrItineraryData*) const; }; } // End llvm namespace diff --git a/include/llvm/Target/TargetInstrItineraries.h b/include/llvm/Target/TargetInstrItineraries.h index ae3e6210649..ae156ea972e 100644 --- a/include/llvm/Target/TargetInstrItineraries.h +++ b/include/llvm/Target/TargetInstrItineraries.h @@ -181,6 +181,14 @@ public: return (int)OperandCycles[FirstIdx + OperandIdx]; } + + /// isMicroCoded - Return true if the instructions in the given class decode + /// to more than one micro-ops. + bool isMicroCoded(unsigned ItinClassIndx) const { + if (isEmpty()) + return false; + return Itineratries[ItinClassIndx].NumMicroOps != 1; + } }; diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index 42e99e01564..426338ff8d4 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -152,8 +152,8 @@ public: /// getInstrItineraryData - Returns instruction itinerary data for the target /// or specific subtarget. /// - virtual const InstrItineraryData getInstrItineraryData() const { - return InstrItineraryData(); + virtual const InstrItineraryData *getInstrItineraryData() const { + return 0; } /// getELFWriterInfo - If this target supports an ELF writer, return diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 0ea30d7a792..d73cd538f32 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -18,6 +18,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetInstrItineraries.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" @@ -150,6 +151,7 @@ namespace { const TargetLowering *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; + const InstrItineraryData *InstrItins; bool MadeChange; int FnNum; public: @@ -238,6 +240,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TLI = MF.getTarget().getTargetLowering(); TII = MF.getTarget().getInstrInfo(); TRI = MF.getTarget().getRegisterInfo(); + InstrItins = MF.getTarget().getInstrItineraryData(); if (!TII) return false; // Tail merge tend to expose more if-conversion opportunities. @@ -641,9 +644,10 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch(); if (!isCondBr) { - if (!isPredicated) - BBI.NonPredSize++; - else if (!AlreadyPredicated) { + if (!isPredicated) { + unsigned NumOps = TII->getNumMicroOps(&*I, InstrItins); + BBI.NonPredSize += NumOps; + } else if (!AlreadyPredicated) { // FIXME: This instruction is already predicated before the // if-conversion pass. It's probably something like a conditional move. // Mark this block unpredicable for now. @@ -1364,7 +1368,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); - ToBBI.NonPredSize++; + unsigned NumOps = TII->getNumMicroOps(MI, InstrItins); + ToBBI.NonPredSize += NumOps; if (!TII->isPredicated(I) && !MI->isDebugValue()) { if (!TII->PredicateInstruction(MI, Cond)) { diff --git a/lib/CodeGen/PostRAHazardRecognizer.cpp b/lib/CodeGen/PostRAHazardRecognizer.cpp index cbde2b01eea..2e3919632f8 100644 --- a/lib/CodeGen/PostRAHazardRecognizer.cpp +++ b/lib/CodeGen/PostRAHazardRecognizer.cpp @@ -23,19 +23,19 @@ using namespace llvm; PostRAHazardRecognizer:: -PostRAHazardRecognizer(const InstrItineraryData &LItinData) : +PostRAHazardRecognizer(const InstrItineraryData *LItinData) : ScheduleHazardRecognizer(), ItinData(LItinData) { // Determine the maximum depth of any itinerary. This determines the // depth of the scoreboard. We always make the scoreboard at least 1 // cycle deep to avoid dealing with the boundary condition. unsigned ScoreboardDepth = 1; - if (!ItinData.isEmpty()) { + if (ItinData && !ItinData->isEmpty()) { for (unsigned idx = 0; ; ++idx) { - if (ItinData.isEndMarker(idx)) + if (ItinData->isEndMarker(idx)) break; - const InstrStage *IS = ItinData.beginStage(idx); - const InstrStage *E = ItinData.endStage(idx); + const InstrStage *IS = ItinData->beginStage(idx); + const InstrStage *E = ItinData->endStage(idx); unsigned ItinDepth = 0; for (; IS != E; ++IS) ItinDepth += IS->getCycles(); @@ -74,7 +74,7 @@ void PostRAHazardRecognizer::ScoreBoard::dump() const { ScheduleHazardRecognizer::HazardType PostRAHazardRecognizer::getHazardType(SUnit *SU) { - if (ItinData.isEmpty()) + if (!ItinData || ItinData->isEmpty()) return NoHazard; unsigned cycle = 0; @@ -82,8 +82,8 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) { // Use the itinerary for the underlying instruction to check for // free FU's in the scoreboard at the appropriate future cycles. unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), - *E = ItinData.endStage(idx); IS != E; ++IS) { + for (const InstrStage *IS = ItinData->beginStage(idx), + *E = ItinData->endStage(idx); IS != E; ++IS) { // We must find one of the stage's units free for every cycle the // stage is occupied. FIXME it would be more accurate to find the // same unit free in all the cycles. @@ -121,7 +121,7 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) { } void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) { - if (ItinData.isEmpty()) + if (!ItinData || ItinData->isEmpty()) return; unsigned cycle = 0; @@ -129,8 +129,8 @@ void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) { // Use the itinerary for the underlying instruction to reserve FU's // in the scoreboard at the appropriate future cycles. unsigned idx = SU->getInstr()->getDesc().getSchedClass(); - for (const InstrStage *IS = ItinData.beginStage(idx), - *E = ItinData.endStage(idx); IS != E; ++IS) { + for (const InstrStage *IS = ItinData->beginStage(idx), + *E = ItinData->endStage(idx); IS != E; ++IS) { // We must reserve one of the stage's units for every cycle the // stage is occupied. FIXME it would be more accurate to reserve // the same unit free in all the cycles. diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index f0bd6d1372b..bd5b2b898b2 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -213,7 +213,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { const MachineLoopInfo &MLI = getAnalysis(); const MachineDominatorTree &MDT = getAnalysis(); const TargetMachine &TM = Fn.getTarget(); - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); + const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); ScheduleHazardRecognizer *HR = TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins); AntiDepBreaker *ADB = diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index ea93dd5c666..da0b0562e12 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -32,9 +32,9 @@ using namespace llvm; ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo &mli, const MachineDominatorTree &mdt) - : ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()), - Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) { - MFI = mf.getFrameInfo(); + : ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()), + InstrItins(mf.getTarget().getInstrItineraryData()), + Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) { DbgValueVec.clear(); } @@ -498,23 +498,22 @@ void ScheduleDAGInstrs::FinishBlock() { } void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) { - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - // Compute the latency for the node. - SU->Latency = - InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass()); + if (!InstrItins || InstrItins->isEmpty()) { + SU->Latency = 1; - // Simplistic target-independent heuristic: assume that loads take - // extra time. - if (InstrItins.isEmpty()) + // Simplistic target-independent heuristic: assume that loads take + // extra time. if (SU->getInstr()->getDesc().mayLoad()) SU->Latency += 2; + } else + SU->Latency = + InstrItins->getStageLatency(SU->getInstr()->getDesc().getSchedClass()); } void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, SDep& dep) const { - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - if (InstrItins.isEmpty()) + if (!InstrItins || InstrItins->isEmpty()) return; // For a data dependency with a known register... @@ -528,8 +527,8 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, MachineInstr *DefMI = Def->getInstr(); int DefIdx = DefMI->findRegisterDefOperandIdx(Reg); if (DefIdx != -1) { - int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(), - DefIdx); + int DefCycle = InstrItins->getOperandCycle(DefMI->getDesc().getSchedClass(), + DefIdx); if (DefCycle >= 0) { MachineInstr *UseMI = Use->getInstr(); const unsigned UseClass = UseMI->getDesc().getSchedClass(); @@ -544,7 +543,7 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use, if (MOReg != Reg) continue; - int UseCycle = InstrItins.getOperandCycle(UseClass, i); + int UseCycle = InstrItins->getOperandCycle(UseClass, i); if (UseCycle >= 0) Latency = std::max(Latency, DefCycle - UseCycle + 1); } diff --git a/lib/CodeGen/ScheduleDAGInstrs.h b/lib/CodeGen/ScheduleDAGInstrs.h index c8f543f7146..bc07d9ea044 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.h +++ b/lib/CodeGen/ScheduleDAGInstrs.h @@ -101,6 +101,7 @@ namespace llvm { const MachineLoopInfo &MLI; const MachineDominatorTree &MDT; const MachineFrameInfo *MFI; + const InstrItineraryData *InstrItins; /// Defs, Uses - Remember where defs and uses of each physical register /// are as we iterate upward through the instructions. This is allocated diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index f1bf82ab145..fbf621d0bb4 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -34,8 +34,8 @@ using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf) - : ScheduleDAG(mf) { -} + : ScheduleDAG(mf), + InstrItins(mf.getTarget().getInstrItineraryData()) {} /// Run - perform scheduling. /// @@ -429,8 +429,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { return; } - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - if (InstrItins.isEmpty()) { + if (!InstrItins || InstrItins->isEmpty()) { SU->Latency = 1; return; } @@ -440,7 +439,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) { SU->Latency = 0; for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode()) if (N->isMachineOpcode()) { - SU->Latency += InstrItins. + SU->Latency += InstrItins-> getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass()); } } @@ -451,8 +450,7 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, if (ForceUnitLatencies()) return; - const InstrItineraryData &InstrItins = TM.getInstrItineraryData(); - if (InstrItins.isEmpty()) + if (!InstrItins || InstrItins->isEmpty()) return; if (dep.getKind() != SDep::Data) @@ -463,13 +461,13 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use, const TargetInstrDesc &II = TII->get(Def->getMachineOpcode()); if (DefIdx >= II.getNumDefs()) return; - int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx); + int DefCycle = InstrItins->getOperandCycle(II.getSchedClass(), DefIdx); if (DefCycle < 0) return; int UseCycle = 1; if (Use->isMachineOpcode()) { const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass(); - UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx); + UseCycle = InstrItins->getOperandCycle(UseClass, OpIdx); } if (UseCycle >= 0) { int Latency = DefCycle - UseCycle + 1; diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 842fc8c7270..8008ef85ebc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -36,6 +36,7 @@ namespace llvm { class ScheduleDAGSDNodes : public ScheduleDAG { public: SelectionDAG *DAG; // DAG of the current basic block + const InstrItineraryData *InstrItins; explicit ScheduleDAGSDNodes(MachineFunction &mf); diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp index 6e4a0d837ec..d106ead9b46 100644 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ b/lib/CodeGen/TargetInstrInfoImpl.cpp @@ -416,6 +416,6 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, // Default implementation of CreateTargetPostRAHazardRecognizer. ScheduleHazardRecognizer *TargetInstrInfoImpl:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const { return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II); } diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index d6a8f19724d..f3693e3abb1 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -91,6 +91,15 @@ def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M", include "ARMSchedule.td" +// ARM processor families. +def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others", + "One of the other ARM processor families">; +def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8", + "Cortex-A8 ARM processors", + [FeatureSlowFPBrcc, FeatureNEONForFP]>; +def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9", + "Cortex-A9 ARM processors">; + class ProcNoItin Features> : Processor; @@ -150,10 +159,10 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>; // V7 Processors. def : Processor<"cortex-a8", CortexA8Itineraries, - [ArchV7A, FeatureHasSlowVMLx, - FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>; + [ArchV7A, ProcA8, + FeatureHasSlowVMLx, FeatureT2XtPk]>; def : Processor<"cortex-a9", CortexA9Itineraries, - [ArchV7A, FeatureT2XtPk]>; + [ArchV7A, ProcA9, FeatureT2XtPk]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [ArchV7M]>; diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index c824b8bce96..e7b35c69288 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -1415,13 +1415,13 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const { unsigned ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI, - const InstrItineraryData &ItinData) const { - if (ItinData.isEmpty()) + const InstrItineraryData *ItinData) const { + if (!ItinData || ItinData->isEmpty()) return 1; const TargetInstrDesc &Desc = MI->getDesc(); unsigned Class = Desc.getSchedClass(); - unsigned UOps = ItinData.Itineratries[Class].NumMicroOps; + unsigned UOps = ItinData->Itineratries[Class].NumMicroOps; if (UOps) return UOps; @@ -1430,16 +1430,19 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI, default: llvm_unreachable("Unexpected multi-uops instruction!"); break; + case ARM::VLDMQ: case ARM::VSTMQ: return 2; // The number of uOps for load / store multiple are determined by the number // registers. - // On Cortex-A8, each odd / even pair of register loads / stores - // (e.g. r5 + r6) can be completed on the same cycle. The minimum is - // 2. For VFP / NEON load / store multiple, the formula is + // On Cortex-A8, each pair of register loads / stores can be scheduled on the + // same cycle. The scheduling for the first load / store must be done + // separately by assuming the the address is not 64-bit aligned. + // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address + // is not 64-bit aligned, then AGU would take an extra cycle. + // For VFP / NEON load / store multiple, the formula is // (#reg / 2) + (#reg % 2) + 1. - // On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). case ARM::VLDMD: case ARM::VLDMS: case ARM::VLDMD_UPD: @@ -1467,11 +1470,24 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI, case ARM::t2LDM_UPD: case ARM::t2STM: case ARM::t2STM_UPD: { - // FIXME: Distinquish between Cortex-A8 / Cortex-A9 and other processor - // families. - unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands(); - UOps = (NumRegs / 2) + (NumRegs % 2); - return (UOps > 2) ? UOps : 2; + unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1; + if (Subtarget.isCortexA8()) { + // 4 registers would be issued: 1, 2, 1. + // 5 registers would be issued: 1, 2, 2. + return 1 + (NumRegs / 2); + } else if (Subtarget.isCortexA9()) { + UOps = (NumRegs / 2); + // If there are odd number of registers or if it's not 64-bit aligned, + // then it takes an extra AGU (Address Generation Unit) cycle. + if ((NumRegs % 2) || + !MI->hasOneMemOperand() || + (*MI->memoperands_begin())->getAlignment() < 8) + ++UOps; + return UOps; + } else { + // Assume the worst. + return NumRegs; + } } } } diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h index b3abdeef96e..f471b6772e7 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/lib/Target/ARM/ARMBaseInstrInfo.h @@ -350,7 +350,7 @@ public: MachineInstr *CmpInstr) const; virtual unsigned getNumMicroOps(const MachineInstr *MI, - const InstrItineraryData &ItinData) const; + const InstrItineraryData *ItinData) const; }; static inline diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index d4198a5ea11..637c6e3b099 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -177,6 +177,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) : TargetLowering(TM, createTLOF(TM)) { Subtarget = &TM.getSubtarget(); RegInfo = TM.getRegisterInfo(); + Itins = TM.getInstrItineraryData(); if (Subtarget->isTargetDarwin()) { // Uses VFP for Thumb libfuncs if available. @@ -749,8 +750,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const { if (TID.mayLoad()) return Sched::Latency; - const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData(); - if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2) + if (!Itins->isEmpty() && Itins->getStageLatency(TID.getSchedClass()) > 2) return Sched::Latency; return Sched::RegPressure; } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index ba9ea7f15e7..58b8b9eb44d 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -301,6 +301,8 @@ namespace llvm { const TargetRegisterInfo *RegInfo; + const InstrItineraryData *Itins; + /// ARMPCLabelIndex - Keep track of the number of ARM PC labels created. /// unsigned ARMPCLabelIndex; diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp index cb539f4c01e..8a4052bc0cc 100644 --- a/lib/Target/ARM/ARMSubtarget.cpp +++ b/lib/Target/ARM/ARMSubtarget.cpp @@ -30,6 +30,7 @@ UseMOVT("arm-use-movt", ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, bool isT) : ARMArchVersion(V4) + , ARMProcFamily(Others) , ARMFPUType(None) , UseNEONForSinglePrecisionFP(false) , SlowVMLx(false) @@ -50,7 +51,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS, , CPUString("generic") , TargetType(isELF) // Default to ELF unless otherwise specified. , TargetABI(ARM_ABI_APCS) { - // default to soft float ABI + // Default to soft float ABI if (FloatABIType == FloatABI::Default) FloatABIType = FloatABI::Soft; diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h index 67e58038ee7..34f571fd70f 100644 --- a/lib/Target/ARM/ARMSubtarget.h +++ b/lib/Target/ARM/ARMSubtarget.h @@ -29,6 +29,10 @@ protected: V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M }; + enum ARMProcFamilyEnum { + Others, CortexA8, CortexA9 + }; + enum ARMFPEnum { None, VFPv2, VFPv3, NEON }; @@ -42,6 +46,9 @@ protected: /// V6, V6T2, V7A, V7M. ARMArchEnum ARMArchVersion; + /// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others. + ARMProcFamilyEnum ARMProcFamily; + /// ARMFPUType - Floating Point Unit type. ARMFPEnum ARMFPUType; @@ -143,6 +150,9 @@ protected: bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; } bool hasV7Ops() const { return ARMArchVersion >= V7A; } + bool isCortexA8() const { return ARMProcFamily == CortexA8; } + bool isCortexA9() const { return ARMProcFamily == CortexA9; } + bool hasARMOps() const { return !NoARM; } bool hasVFP2() const { return ARMFPUType >= VFPv2; } diff --git a/lib/Target/ARM/ARMTargetMachine.h b/lib/Target/ARM/ARMTargetMachine.h index 17e5425a9d3..9b375d76c77 100644 --- a/lib/Target/ARM/ARMTargetMachine.h +++ b/lib/Target/ARM/ARMTargetMachine.h @@ -45,8 +45,8 @@ public: virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; } virtual ARMJITInfo *getJITInfo() { return &JITInfo; } virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const InstrItineraryData getInstrItineraryData() const { - return InstrItins; + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; } // Pass Pipeline Configuration diff --git a/lib/Target/ARM/Thumb2HazardRecognizer.h b/lib/Target/ARM/Thumb2HazardRecognizer.h index 472665862e4..aa4411f186f 100644 --- a/lib/Target/ARM/Thumb2HazardRecognizer.h +++ b/lib/Target/ARM/Thumb2HazardRecognizer.h @@ -26,7 +26,7 @@ class Thumb2HazardRecognizer : public PostRAHazardRecognizer { MachineInstr *ITBlockMIs[4]; public: - Thumb2HazardRecognizer(const InstrItineraryData &ItinData) : + Thumb2HazardRecognizer(const InstrItineraryData *ItinData) : PostRAHazardRecognizer(ItinData) {} virtual HazardType getHazardType(SUnit *SU); diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index 442f41da8a2..962b312ae47 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -194,7 +194,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, } ScheduleHazardRecognizer *Thumb2InstrInfo:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const { +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const { return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II); } diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h index 3a9f8b194d3..b66be8eac48 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.h +++ b/lib/Target/ARM/Thumb2InstrInfo.h @@ -72,7 +72,7 @@ public: const Thumb2RegisterInfo &getRegisterInfo() const { return RI; } ScheduleHazardRecognizer * - CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const; + CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const; }; /// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical diff --git a/lib/Target/CellSPU/SPUTargetMachine.h b/lib/Target/CellSPU/SPUTargetMachine.h index 7e0270159a8..e306883ca39 100644 --- a/lib/Target/CellSPU/SPUTargetMachine.h +++ b/lib/Target/CellSPU/SPUTargetMachine.h @@ -75,8 +75,8 @@ public: return &DataLayout; } - virtual const InstrItineraryData getInstrItineraryData() const { - return InstrItins; + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; } // Pass Pipeline Configuration diff --git a/lib/Target/PowerPC/PPCTargetMachine.h b/lib/Target/PowerPC/PPCTargetMachine.h index 626ddbb6a6b..6f0fb158427 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.h +++ b/lib/Target/PowerPC/PPCTargetMachine.h @@ -58,8 +58,8 @@ public: virtual const TargetData *getTargetData() const { return &DataLayout; } virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; } - virtual const InstrItineraryData getInstrItineraryData() const { - return InstrItins; + virtual const InstrItineraryData *getInstrItineraryData() const { + return &InstrItins; } // Pass Pipeline Configuration diff --git a/lib/Target/TargetInstrInfo.cpp b/lib/Target/TargetInstrInfo.cpp index 118afd48c9f..47cd0fbe163 100644 --- a/lib/Target/TargetInstrInfo.cpp +++ b/lib/Target/TargetInstrInfo.cpp @@ -50,12 +50,12 @@ TargetInstrInfo::~TargetInstrInfo() { unsigned TargetInstrInfo::getNumMicroOps(const MachineInstr *MI, - const InstrItineraryData &ItinData) const { - if (ItinData.isEmpty()) + const InstrItineraryData *ItinData) const { + if (!ItinData || ItinData->isEmpty()) return 1; unsigned Class = MI->getDesc().getSchedClass(); - unsigned UOps = ItinData.Itineratries[Class].NumMicroOps; + unsigned UOps = ItinData->Itineratries[Class].NumMicroOps; if (UOps) return UOps; diff --git a/test/CodeGen/ARM/ifcvt10.ll b/test/CodeGen/ARM/ifcvt10.ll new file mode 100644 index 00000000000..3fd6a97c76e --- /dev/null +++ b/test/CodeGen/ARM/ifcvt10.ll @@ -0,0 +1,30 @@ +; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s +; rdar://8402126 +; Make sure if-converter is not predicating vldmia and ldmia. These are +; micro-coded and would have long issue latency even if predicated on +; false predicate. + +%0 = type { float, float, float, float } +%pln = type { %vec, float } +%vec = type { [4 x float] } + +define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) { +; CHECK: aaa: +; CHECK: vldr.32 +; CHECK-NOT: vldrne +; CHECK-NOT: vldmiane +; CHECK-NOT: ldmiane +; CHECK: vldmia sp! +; CHECK: ldmia sp! +entry: + br i1 undef, label %bb81, label %bb48 + +bb48: ; preds = %entry + %0 = call arm_aapcs_vfpcc %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0] + ret float 0.000000e+00 + +bb81: ; preds = %entry + ret float 0.000000e+00 +} + +declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind diff --git a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll index c5fc5098cd4..f91e1c9febe 100644 --- a/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll +++ b/test/CodeGen/Thumb2/2010-06-21-TailMergeBug.ll @@ -1,4 +1,4 @@ -; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s +; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s ; rdar://8115404 ; Tail merging must not split an IT block.