mirror of
https://github.com/c64scene-ar/llvm-6502.git
synced 2025-01-14 00:32:55 +00:00
Teach if-converter to be more careful with predicating instructions that would
take multiple cycles to decode. For the current if-converter clients (actually only ARM), the instructions that are predicated on false are not nops. They would still take machine cycles to decode. Micro-coded instructions such as LDM / STM can potentially take multiple cycles to decode. If-converter should take treat them as non-micro-coded simple instructions. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@113570 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
parent
920a2089d9
commit
3ef1c8759a
@ -75,13 +75,13 @@ class PostRAHazardRecognizer : public ScheduleHazardRecognizer {
|
||||
};
|
||||
|
||||
// Itinerary data for the target.
|
||||
const InstrItineraryData &ItinData;
|
||||
const InstrItineraryData *ItinData;
|
||||
|
||||
ScoreBoard ReservedScoreboard;
|
||||
ScoreBoard RequiredScoreboard;
|
||||
|
||||
public:
|
||||
PostRAHazardRecognizer(const InstrItineraryData &ItinData);
|
||||
PostRAHazardRecognizer(const InstrItineraryData *ItinData);
|
||||
|
||||
virtual HazardType getHazardType(SUnit *SU);
|
||||
virtual void Reset();
|
||||
|
@ -575,7 +575,7 @@ public:
|
||||
/// to use for this target when scheduling the machine instructions after
|
||||
/// register allocation.
|
||||
virtual ScheduleHazardRecognizer*
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const = 0;
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData*) const = 0;
|
||||
|
||||
/// AnalyzeCompare - For a comparison instruction, return the source register
|
||||
/// in SrcReg and the value it compares against in CmpValue. Return true if
|
||||
@ -595,7 +595,7 @@ public:
|
||||
/// getNumMicroOps - Return the number of u-operations the given machine
|
||||
/// instruction will be decoded to on the target cpu.
|
||||
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
||||
const InstrItineraryData &ItinData) const;
|
||||
const InstrItineraryData *ItinData) const;
|
||||
};
|
||||
|
||||
/// TargetInstrInfoImpl - This is the default implementation of
|
||||
@ -631,7 +631,7 @@ public:
|
||||
const MachineFunction &MF) const;
|
||||
|
||||
virtual ScheduleHazardRecognizer *
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData&) const;
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData*) const;
|
||||
};
|
||||
|
||||
} // End llvm namespace
|
||||
|
@ -181,6 +181,14 @@ public:
|
||||
|
||||
return (int)OperandCycles[FirstIdx + OperandIdx];
|
||||
}
|
||||
|
||||
/// isMicroCoded - Return true if the instructions in the given class decode
|
||||
/// to more than one micro-ops.
|
||||
bool isMicroCoded(unsigned ItinClassIndx) const {
|
||||
if (isEmpty())
|
||||
return false;
|
||||
return Itineratries[ItinClassIndx].NumMicroOps != 1;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
@ -152,8 +152,8 @@ public:
|
||||
/// getInstrItineraryData - Returns instruction itinerary data for the target
|
||||
/// or specific subtarget.
|
||||
///
|
||||
virtual const InstrItineraryData getInstrItineraryData() const {
|
||||
return InstrItineraryData();
|
||||
virtual const InstrItineraryData *getInstrItineraryData() const {
|
||||
return 0;
|
||||
}
|
||||
|
||||
/// getELFWriterInfo - If this target supports an ELF writer, return
|
||||
|
@ -18,6 +18,7 @@
|
||||
#include "llvm/CodeGen/MachineModuleInfo.h"
|
||||
#include "llvm/CodeGen/MachineFunctionPass.h"
|
||||
#include "llvm/Target/TargetInstrInfo.h"
|
||||
#include "llvm/Target/TargetInstrItineraries.h"
|
||||
#include "llvm/Target/TargetLowering.h"
|
||||
#include "llvm/Target/TargetMachine.h"
|
||||
#include "llvm/Target/TargetRegisterInfo.h"
|
||||
@ -150,6 +151,7 @@ namespace {
|
||||
const TargetLowering *TLI;
|
||||
const TargetInstrInfo *TII;
|
||||
const TargetRegisterInfo *TRI;
|
||||
const InstrItineraryData *InstrItins;
|
||||
bool MadeChange;
|
||||
int FnNum;
|
||||
public:
|
||||
@ -238,6 +240,7 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) {
|
||||
TLI = MF.getTarget().getTargetLowering();
|
||||
TII = MF.getTarget().getInstrInfo();
|
||||
TRI = MF.getTarget().getRegisterInfo();
|
||||
InstrItins = MF.getTarget().getInstrItineraryData();
|
||||
if (!TII) return false;
|
||||
|
||||
// Tail merge tend to expose more if-conversion opportunities.
|
||||
@ -641,9 +644,10 @@ void IfConverter::ScanInstructions(BBInfo &BBI) {
|
||||
bool isCondBr = BBI.IsBrAnalyzable && TID.isConditionalBranch();
|
||||
|
||||
if (!isCondBr) {
|
||||
if (!isPredicated)
|
||||
BBI.NonPredSize++;
|
||||
else if (!AlreadyPredicated) {
|
||||
if (!isPredicated) {
|
||||
unsigned NumOps = TII->getNumMicroOps(&*I, InstrItins);
|
||||
BBI.NonPredSize += NumOps;
|
||||
} else if (!AlreadyPredicated) {
|
||||
// FIXME: This instruction is already predicated before the
|
||||
// if-conversion pass. It's probably something like a conditional move.
|
||||
// Mark this block unpredicable for now.
|
||||
@ -1364,7 +1368,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI,
|
||||
|
||||
MachineInstr *MI = MF.CloneMachineInstr(I);
|
||||
ToBBI.BB->insert(ToBBI.BB->end(), MI);
|
||||
ToBBI.NonPredSize++;
|
||||
unsigned NumOps = TII->getNumMicroOps(MI, InstrItins);
|
||||
ToBBI.NonPredSize += NumOps;
|
||||
|
||||
if (!TII->isPredicated(I) && !MI->isDebugValue()) {
|
||||
if (!TII->PredicateInstruction(MI, Cond)) {
|
||||
|
@ -23,19 +23,19 @@
|
||||
using namespace llvm;
|
||||
|
||||
PostRAHazardRecognizer::
|
||||
PostRAHazardRecognizer(const InstrItineraryData &LItinData) :
|
||||
PostRAHazardRecognizer(const InstrItineraryData *LItinData) :
|
||||
ScheduleHazardRecognizer(), ItinData(LItinData) {
|
||||
// Determine the maximum depth of any itinerary. This determines the
|
||||
// depth of the scoreboard. We always make the scoreboard at least 1
|
||||
// cycle deep to avoid dealing with the boundary condition.
|
||||
unsigned ScoreboardDepth = 1;
|
||||
if (!ItinData.isEmpty()) {
|
||||
if (ItinData && !ItinData->isEmpty()) {
|
||||
for (unsigned idx = 0; ; ++idx) {
|
||||
if (ItinData.isEndMarker(idx))
|
||||
if (ItinData->isEndMarker(idx))
|
||||
break;
|
||||
|
||||
const InstrStage *IS = ItinData.beginStage(idx);
|
||||
const InstrStage *E = ItinData.endStage(idx);
|
||||
const InstrStage *IS = ItinData->beginStage(idx);
|
||||
const InstrStage *E = ItinData->endStage(idx);
|
||||
unsigned ItinDepth = 0;
|
||||
for (; IS != E; ++IS)
|
||||
ItinDepth += IS->getCycles();
|
||||
@ -74,7 +74,7 @@ void PostRAHazardRecognizer::ScoreBoard::dump() const {
|
||||
|
||||
ScheduleHazardRecognizer::HazardType
|
||||
PostRAHazardRecognizer::getHazardType(SUnit *SU) {
|
||||
if (ItinData.isEmpty())
|
||||
if (!ItinData || ItinData->isEmpty())
|
||||
return NoHazard;
|
||||
|
||||
unsigned cycle = 0;
|
||||
@ -82,8 +82,8 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) {
|
||||
// Use the itinerary for the underlying instruction to check for
|
||||
// free FU's in the scoreboard at the appropriate future cycles.
|
||||
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
|
||||
for (const InstrStage *IS = ItinData.beginStage(idx),
|
||||
*E = ItinData.endStage(idx); IS != E; ++IS) {
|
||||
for (const InstrStage *IS = ItinData->beginStage(idx),
|
||||
*E = ItinData->endStage(idx); IS != E; ++IS) {
|
||||
// We must find one of the stage's units free for every cycle the
|
||||
// stage is occupied. FIXME it would be more accurate to find the
|
||||
// same unit free in all the cycles.
|
||||
@ -121,7 +121,7 @@ PostRAHazardRecognizer::getHazardType(SUnit *SU) {
|
||||
}
|
||||
|
||||
void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
|
||||
if (ItinData.isEmpty())
|
||||
if (!ItinData || ItinData->isEmpty())
|
||||
return;
|
||||
|
||||
unsigned cycle = 0;
|
||||
@ -129,8 +129,8 @@ void PostRAHazardRecognizer::EmitInstruction(SUnit *SU) {
|
||||
// Use the itinerary for the underlying instruction to reserve FU's
|
||||
// in the scoreboard at the appropriate future cycles.
|
||||
unsigned idx = SU->getInstr()->getDesc().getSchedClass();
|
||||
for (const InstrStage *IS = ItinData.beginStage(idx),
|
||||
*E = ItinData.endStage(idx); IS != E; ++IS) {
|
||||
for (const InstrStage *IS = ItinData->beginStage(idx),
|
||||
*E = ItinData->endStage(idx); IS != E; ++IS) {
|
||||
// We must reserve one of the stage's units for every cycle the
|
||||
// stage is occupied. FIXME it would be more accurate to reserve
|
||||
// the same unit free in all the cycles.
|
||||
|
@ -213,7 +213,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
|
||||
const MachineLoopInfo &MLI = getAnalysis<MachineLoopInfo>();
|
||||
const MachineDominatorTree &MDT = getAnalysis<MachineDominatorTree>();
|
||||
const TargetMachine &TM = Fn.getTarget();
|
||||
const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
|
||||
const InstrItineraryData *InstrItins = TM.getInstrItineraryData();
|
||||
ScheduleHazardRecognizer *HR =
|
||||
TM.getInstrInfo()->CreateTargetPostRAHazardRecognizer(InstrItins);
|
||||
AntiDepBreaker *ADB =
|
||||
|
@ -32,9 +32,9 @@ using namespace llvm;
|
||||
ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
|
||||
const MachineLoopInfo &mli,
|
||||
const MachineDominatorTree &mdt)
|
||||
: ScheduleDAG(mf), MLI(mli), MDT(mdt), Defs(TRI->getNumRegs()),
|
||||
Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
|
||||
MFI = mf.getFrameInfo();
|
||||
: ScheduleDAG(mf), MLI(mli), MDT(mdt), MFI(mf.getFrameInfo()),
|
||||
InstrItins(mf.getTarget().getInstrItineraryData()),
|
||||
Defs(TRI->getNumRegs()), Uses(TRI->getNumRegs()), LoopRegs(MLI, MDT) {
|
||||
DbgValueVec.clear();
|
||||
}
|
||||
|
||||
@ -498,23 +498,22 @@ void ScheduleDAGInstrs::FinishBlock() {
|
||||
}
|
||||
|
||||
void ScheduleDAGInstrs::ComputeLatency(SUnit *SU) {
|
||||
const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
|
||||
|
||||
// Compute the latency for the node.
|
||||
SU->Latency =
|
||||
InstrItins.getStageLatency(SU->getInstr()->getDesc().getSchedClass());
|
||||
if (!InstrItins || InstrItins->isEmpty()) {
|
||||
SU->Latency = 1;
|
||||
|
||||
// Simplistic target-independent heuristic: assume that loads take
|
||||
// extra time.
|
||||
if (InstrItins.isEmpty())
|
||||
// Simplistic target-independent heuristic: assume that loads take
|
||||
// extra time.
|
||||
if (SU->getInstr()->getDesc().mayLoad())
|
||||
SU->Latency += 2;
|
||||
} else
|
||||
SU->Latency =
|
||||
InstrItins->getStageLatency(SU->getInstr()->getDesc().getSchedClass());
|
||||
}
|
||||
|
||||
void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
|
||||
SDep& dep) const {
|
||||
const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
|
||||
if (InstrItins.isEmpty())
|
||||
if (!InstrItins || InstrItins->isEmpty())
|
||||
return;
|
||||
|
||||
// For a data dependency with a known register...
|
||||
@ -528,8 +527,8 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
|
||||
MachineInstr *DefMI = Def->getInstr();
|
||||
int DefIdx = DefMI->findRegisterDefOperandIdx(Reg);
|
||||
if (DefIdx != -1) {
|
||||
int DefCycle = InstrItins.getOperandCycle(DefMI->getDesc().getSchedClass(),
|
||||
DefIdx);
|
||||
int DefCycle = InstrItins->getOperandCycle(DefMI->getDesc().getSchedClass(),
|
||||
DefIdx);
|
||||
if (DefCycle >= 0) {
|
||||
MachineInstr *UseMI = Use->getInstr();
|
||||
const unsigned UseClass = UseMI->getDesc().getSchedClass();
|
||||
@ -544,7 +543,7 @@ void ScheduleDAGInstrs::ComputeOperandLatency(SUnit *Def, SUnit *Use,
|
||||
if (MOReg != Reg)
|
||||
continue;
|
||||
|
||||
int UseCycle = InstrItins.getOperandCycle(UseClass, i);
|
||||
int UseCycle = InstrItins->getOperandCycle(UseClass, i);
|
||||
if (UseCycle >= 0)
|
||||
Latency = std::max(Latency, DefCycle - UseCycle + 1);
|
||||
}
|
||||
|
@ -101,6 +101,7 @@ namespace llvm {
|
||||
const MachineLoopInfo &MLI;
|
||||
const MachineDominatorTree &MDT;
|
||||
const MachineFrameInfo *MFI;
|
||||
const InstrItineraryData *InstrItins;
|
||||
|
||||
/// Defs, Uses - Remember where defs and uses of each physical register
|
||||
/// are as we iterate upward through the instructions. This is allocated
|
||||
|
@ -34,8 +34,8 @@ using namespace llvm;
|
||||
STATISTIC(LoadsClustered, "Number of loads clustered together");
|
||||
|
||||
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
|
||||
: ScheduleDAG(mf) {
|
||||
}
|
||||
: ScheduleDAG(mf),
|
||||
InstrItins(mf.getTarget().getInstrItineraryData()) {}
|
||||
|
||||
/// Run - perform scheduling.
|
||||
///
|
||||
@ -429,8 +429,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
|
||||
return;
|
||||
}
|
||||
|
||||
const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
|
||||
if (InstrItins.isEmpty()) {
|
||||
if (!InstrItins || InstrItins->isEmpty()) {
|
||||
SU->Latency = 1;
|
||||
return;
|
||||
}
|
||||
@ -440,7 +439,7 @@ void ScheduleDAGSDNodes::ComputeLatency(SUnit *SU) {
|
||||
SU->Latency = 0;
|
||||
for (SDNode *N = SU->getNode(); N; N = N->getFlaggedNode())
|
||||
if (N->isMachineOpcode()) {
|
||||
SU->Latency += InstrItins.
|
||||
SU->Latency += InstrItins->
|
||||
getStageLatency(TII->get(N->getMachineOpcode()).getSchedClass());
|
||||
}
|
||||
}
|
||||
@ -451,8 +450,7 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
|
||||
if (ForceUnitLatencies())
|
||||
return;
|
||||
|
||||
const InstrItineraryData &InstrItins = TM.getInstrItineraryData();
|
||||
if (InstrItins.isEmpty())
|
||||
if (!InstrItins || InstrItins->isEmpty())
|
||||
return;
|
||||
|
||||
if (dep.getKind() != SDep::Data)
|
||||
@ -463,13 +461,13 @@ void ScheduleDAGSDNodes::ComputeOperandLatency(SDNode *Def, SDNode *Use,
|
||||
const TargetInstrDesc &II = TII->get(Def->getMachineOpcode());
|
||||
if (DefIdx >= II.getNumDefs())
|
||||
return;
|
||||
int DefCycle = InstrItins.getOperandCycle(II.getSchedClass(), DefIdx);
|
||||
int DefCycle = InstrItins->getOperandCycle(II.getSchedClass(), DefIdx);
|
||||
if (DefCycle < 0)
|
||||
return;
|
||||
int UseCycle = 1;
|
||||
if (Use->isMachineOpcode()) {
|
||||
const unsigned UseClass = TII->get(Use->getMachineOpcode()).getSchedClass();
|
||||
UseCycle = InstrItins.getOperandCycle(UseClass, OpIdx);
|
||||
UseCycle = InstrItins->getOperandCycle(UseClass, OpIdx);
|
||||
}
|
||||
if (UseCycle >= 0) {
|
||||
int Latency = DefCycle - UseCycle + 1;
|
||||
|
@ -36,6 +36,7 @@ namespace llvm {
|
||||
class ScheduleDAGSDNodes : public ScheduleDAG {
|
||||
public:
|
||||
SelectionDAG *DAG; // DAG of the current basic block
|
||||
const InstrItineraryData *InstrItins;
|
||||
|
||||
explicit ScheduleDAGSDNodes(MachineFunction &mf);
|
||||
|
||||
|
@ -416,6 +416,6 @@ bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI,
|
||||
|
||||
// Default implementation of CreateTargetPostRAHazardRecognizer.
|
||||
ScheduleHazardRecognizer *TargetInstrInfoImpl::
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const {
|
||||
return (ScheduleHazardRecognizer *)new PostRAHazardRecognizer(II);
|
||||
}
|
||||
|
@ -91,6 +91,15 @@ def ArchV7M : SubtargetFeature<"v7m", "ARMArchVersion", "V7M",
|
||||
|
||||
include "ARMSchedule.td"
|
||||
|
||||
// ARM processor families.
|
||||
def ProcOthers : SubtargetFeature<"others", "ARMProcFamily", "Others",
|
||||
"One of the other ARM processor families">;
|
||||
def ProcA8 : SubtargetFeature<"a8", "ARMProcFamily", "CortexA8",
|
||||
"Cortex-A8 ARM processors",
|
||||
[FeatureSlowFPBrcc, FeatureNEONForFP]>;
|
||||
def ProcA9 : SubtargetFeature<"a9", "ARMProcFamily", "CortexA9",
|
||||
"Cortex-A9 ARM processors">;
|
||||
|
||||
class ProcNoItin<string Name, list<SubtargetFeature> Features>
|
||||
: Processor<Name, GenericItineraries, Features>;
|
||||
|
||||
@ -150,10 +159,10 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [ArchV6T2, FeatureVFP2]>;
|
||||
|
||||
// V7 Processors.
|
||||
def : Processor<"cortex-a8", CortexA8Itineraries,
|
||||
[ArchV7A, FeatureHasSlowVMLx,
|
||||
FeatureSlowFPBrcc, FeatureNEONForFP, FeatureT2XtPk]>;
|
||||
[ArchV7A, ProcA8,
|
||||
FeatureHasSlowVMLx, FeatureT2XtPk]>;
|
||||
def : Processor<"cortex-a9", CortexA9Itineraries,
|
||||
[ArchV7A, FeatureT2XtPk]>;
|
||||
[ArchV7A, ProcA9, FeatureT2XtPk]>;
|
||||
|
||||
// V7M Processors.
|
||||
def : ProcNoItin<"cortex-m3", [ArchV7M]>;
|
||||
|
@ -1415,13 +1415,13 @@ ConvertToSetZeroFlag(MachineInstr *MI, MachineInstr *CmpInstr) const {
|
||||
|
||||
unsigned
|
||||
ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
||||
const InstrItineraryData &ItinData) const {
|
||||
if (ItinData.isEmpty())
|
||||
const InstrItineraryData *ItinData) const {
|
||||
if (!ItinData || ItinData->isEmpty())
|
||||
return 1;
|
||||
|
||||
const TargetInstrDesc &Desc = MI->getDesc();
|
||||
unsigned Class = Desc.getSchedClass();
|
||||
unsigned UOps = ItinData.Itineratries[Class].NumMicroOps;
|
||||
unsigned UOps = ItinData->Itineratries[Class].NumMicroOps;
|
||||
if (UOps)
|
||||
return UOps;
|
||||
|
||||
@ -1430,16 +1430,19 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
||||
default:
|
||||
llvm_unreachable("Unexpected multi-uops instruction!");
|
||||
break;
|
||||
case ARM::VLDMQ:
|
||||
case ARM::VSTMQ:
|
||||
return 2;
|
||||
|
||||
// The number of uOps for load / store multiple are determined by the number
|
||||
// registers.
|
||||
// On Cortex-A8, each odd / even pair of register loads / stores
|
||||
// (e.g. r5 + r6) can be completed on the same cycle. The minimum is
|
||||
// 2. For VFP / NEON load / store multiple, the formula is
|
||||
// On Cortex-A8, each pair of register loads / stores can be scheduled on the
|
||||
// same cycle. The scheduling for the first load / store must be done
|
||||
// separately by assuming the the address is not 64-bit aligned.
|
||||
// On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2). If the address
|
||||
// is not 64-bit aligned, then AGU would take an extra cycle.
|
||||
// For VFP / NEON load / store multiple, the formula is
|
||||
// (#reg / 2) + (#reg % 2) + 1.
|
||||
// On Cortex-A9, the formula is simply (#reg / 2) + (#reg % 2).
|
||||
case ARM::VLDMD:
|
||||
case ARM::VLDMS:
|
||||
case ARM::VLDMD_UPD:
|
||||
@ -1467,11 +1470,24 @@ ARMBaseInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
||||
case ARM::t2LDM_UPD:
|
||||
case ARM::t2STM:
|
||||
case ARM::t2STM_UPD: {
|
||||
// FIXME: Distinquish between Cortex-A8 / Cortex-A9 and other processor
|
||||
// families.
|
||||
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands();
|
||||
UOps = (NumRegs / 2) + (NumRegs % 2);
|
||||
return (UOps > 2) ? UOps : 2;
|
||||
unsigned NumRegs = MI->getNumOperands() - Desc.getNumOperands() + 1;
|
||||
if (Subtarget.isCortexA8()) {
|
||||
// 4 registers would be issued: 1, 2, 1.
|
||||
// 5 registers would be issued: 1, 2, 2.
|
||||
return 1 + (NumRegs / 2);
|
||||
} else if (Subtarget.isCortexA9()) {
|
||||
UOps = (NumRegs / 2);
|
||||
// If there are odd number of registers or if it's not 64-bit aligned,
|
||||
// then it takes an extra AGU (Address Generation Unit) cycle.
|
||||
if ((NumRegs % 2) ||
|
||||
!MI->hasOneMemOperand() ||
|
||||
(*MI->memoperands_begin())->getAlignment() < 8)
|
||||
++UOps;
|
||||
return UOps;
|
||||
} else {
|
||||
// Assume the worst.
|
||||
return NumRegs;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -350,7 +350,7 @@ public:
|
||||
MachineInstr *CmpInstr) const;
|
||||
|
||||
virtual unsigned getNumMicroOps(const MachineInstr *MI,
|
||||
const InstrItineraryData &ItinData) const;
|
||||
const InstrItineraryData *ItinData) const;
|
||||
};
|
||||
|
||||
static inline
|
||||
|
@ -177,6 +177,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
|
||||
: TargetLowering(TM, createTLOF(TM)) {
|
||||
Subtarget = &TM.getSubtarget<ARMSubtarget>();
|
||||
RegInfo = TM.getRegisterInfo();
|
||||
Itins = TM.getInstrItineraryData();
|
||||
|
||||
if (Subtarget->isTargetDarwin()) {
|
||||
// Uses VFP for Thumb libfuncs if available.
|
||||
@ -749,8 +750,7 @@ Sched::Preference ARMTargetLowering::getSchedulingPreference(SDNode *N) const {
|
||||
if (TID.mayLoad())
|
||||
return Sched::Latency;
|
||||
|
||||
const InstrItineraryData &Itins = getTargetMachine().getInstrItineraryData();
|
||||
if (!Itins.isEmpty() && Itins.getStageLatency(TID.getSchedClass()) > 2)
|
||||
if (!Itins->isEmpty() && Itins->getStageLatency(TID.getSchedClass()) > 2)
|
||||
return Sched::Latency;
|
||||
return Sched::RegPressure;
|
||||
}
|
||||
|
@ -301,6 +301,8 @@ namespace llvm {
|
||||
|
||||
const TargetRegisterInfo *RegInfo;
|
||||
|
||||
const InstrItineraryData *Itins;
|
||||
|
||||
/// ARMPCLabelIndex - Keep track of the number of ARM PC labels created.
|
||||
///
|
||||
unsigned ARMPCLabelIndex;
|
||||
|
@ -30,6 +30,7 @@ UseMOVT("arm-use-movt",
|
||||
ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
|
||||
bool isT)
|
||||
: ARMArchVersion(V4)
|
||||
, ARMProcFamily(Others)
|
||||
, ARMFPUType(None)
|
||||
, UseNEONForSinglePrecisionFP(false)
|
||||
, SlowVMLx(false)
|
||||
@ -50,7 +51,7 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
|
||||
, CPUString("generic")
|
||||
, TargetType(isELF) // Default to ELF unless otherwise specified.
|
||||
, TargetABI(ARM_ABI_APCS) {
|
||||
// default to soft float ABI
|
||||
// Default to soft float ABI
|
||||
if (FloatABIType == FloatABI::Default)
|
||||
FloatABIType = FloatABI::Soft;
|
||||
|
||||
|
@ -29,6 +29,10 @@ protected:
|
||||
V4, V4T, V5T, V5TE, V6, V6M, V6T2, V7A, V7M
|
||||
};
|
||||
|
||||
enum ARMProcFamilyEnum {
|
||||
Others, CortexA8, CortexA9
|
||||
};
|
||||
|
||||
enum ARMFPEnum {
|
||||
None, VFPv2, VFPv3, NEON
|
||||
};
|
||||
@ -42,6 +46,9 @@ protected:
|
||||
/// V6, V6T2, V7A, V7M.
|
||||
ARMArchEnum ARMArchVersion;
|
||||
|
||||
/// ARMProcFamily - ARM processor family: Cortex-A8, Cortex-A9, and others.
|
||||
ARMProcFamilyEnum ARMProcFamily;
|
||||
|
||||
/// ARMFPUType - Floating Point Unit type.
|
||||
ARMFPEnum ARMFPUType;
|
||||
|
||||
@ -143,6 +150,9 @@ protected:
|
||||
bool hasV6T2Ops() const { return ARMArchVersion >= V6T2; }
|
||||
bool hasV7Ops() const { return ARMArchVersion >= V7A; }
|
||||
|
||||
bool isCortexA8() const { return ARMProcFamily == CortexA8; }
|
||||
bool isCortexA9() const { return ARMProcFamily == CortexA9; }
|
||||
|
||||
bool hasARMOps() const { return !NoARM; }
|
||||
|
||||
bool hasVFP2() const { return ARMFPUType >= VFPv2; }
|
||||
|
@ -45,8 +45,8 @@ public:
|
||||
virtual const ARMFrameInfo *getFrameInfo() const { return &FrameInfo; }
|
||||
virtual ARMJITInfo *getJITInfo() { return &JITInfo; }
|
||||
virtual const ARMSubtarget *getSubtargetImpl() const { return &Subtarget; }
|
||||
virtual const InstrItineraryData getInstrItineraryData() const {
|
||||
return InstrItins;
|
||||
virtual const InstrItineraryData *getInstrItineraryData() const {
|
||||
return &InstrItins;
|
||||
}
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
|
@ -26,7 +26,7 @@ class Thumb2HazardRecognizer : public PostRAHazardRecognizer {
|
||||
MachineInstr *ITBlockMIs[4];
|
||||
|
||||
public:
|
||||
Thumb2HazardRecognizer(const InstrItineraryData &ItinData) :
|
||||
Thumb2HazardRecognizer(const InstrItineraryData *ItinData) :
|
||||
PostRAHazardRecognizer(ItinData) {}
|
||||
|
||||
virtual HazardType getHazardType(SUnit *SU);
|
||||
|
@ -194,7 +194,7 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
|
||||
}
|
||||
|
||||
ScheduleHazardRecognizer *Thumb2InstrInfo::
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const {
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const {
|
||||
return (ScheduleHazardRecognizer *)new Thumb2HazardRecognizer(II);
|
||||
}
|
||||
|
||||
|
@ -72,7 +72,7 @@ public:
|
||||
const Thumb2RegisterInfo &getRegisterInfo() const { return RI; }
|
||||
|
||||
ScheduleHazardRecognizer *
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData &II) const;
|
||||
CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II) const;
|
||||
};
|
||||
|
||||
/// getITInstrPredicate - Valid only in Thumb2 mode. This function is identical
|
||||
|
@ -75,8 +75,8 @@ public:
|
||||
return &DataLayout;
|
||||
}
|
||||
|
||||
virtual const InstrItineraryData getInstrItineraryData() const {
|
||||
return InstrItins;
|
||||
virtual const InstrItineraryData *getInstrItineraryData() const {
|
||||
return &InstrItins;
|
||||
}
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
|
@ -58,8 +58,8 @@ public:
|
||||
|
||||
virtual const TargetData *getTargetData() const { return &DataLayout; }
|
||||
virtual const PPCSubtarget *getSubtargetImpl() const { return &Subtarget; }
|
||||
virtual const InstrItineraryData getInstrItineraryData() const {
|
||||
return InstrItins;
|
||||
virtual const InstrItineraryData *getInstrItineraryData() const {
|
||||
return &InstrItins;
|
||||
}
|
||||
|
||||
// Pass Pipeline Configuration
|
||||
|
@ -50,12 +50,12 @@ TargetInstrInfo::~TargetInstrInfo() {
|
||||
|
||||
unsigned
|
||||
TargetInstrInfo::getNumMicroOps(const MachineInstr *MI,
|
||||
const InstrItineraryData &ItinData) const {
|
||||
if (ItinData.isEmpty())
|
||||
const InstrItineraryData *ItinData) const {
|
||||
if (!ItinData || ItinData->isEmpty())
|
||||
return 1;
|
||||
|
||||
unsigned Class = MI->getDesc().getSchedClass();
|
||||
unsigned UOps = ItinData.Itineratries[Class].NumMicroOps;
|
||||
unsigned UOps = ItinData->Itineratries[Class].NumMicroOps;
|
||||
if (UOps)
|
||||
return UOps;
|
||||
|
||||
|
30
test/CodeGen/ARM/ifcvt10.ll
Normal file
30
test/CodeGen/ARM/ifcvt10.ll
Normal file
@ -0,0 +1,30 @@
|
||||
; RUN: llc < %s -march=arm -mcpu=cortex-a8 | FileCheck %s
|
||||
; rdar://8402126
|
||||
; Make sure if-converter is not predicating vldmia and ldmia. These are
|
||||
; micro-coded and would have long issue latency even if predicated on
|
||||
; false predicate.
|
||||
|
||||
%0 = type { float, float, float, float }
|
||||
%pln = type { %vec, float }
|
||||
%vec = type { [4 x float] }
|
||||
|
||||
define arm_aapcs_vfpcc float @aaa(%vec* nocapture %ustart, %vec* nocapture %udir, %vec* nocapture %vstart, %vec* nocapture %vdir, %vec* %upoint, %vec* %vpoint) {
|
||||
; CHECK: aaa:
|
||||
; CHECK: vldr.32
|
||||
; CHECK-NOT: vldrne
|
||||
; CHECK-NOT: vldmiane
|
||||
; CHECK-NOT: ldmiane
|
||||
; CHECK: vldmia sp!
|
||||
; CHECK: ldmia sp!
|
||||
entry:
|
||||
br i1 undef, label %bb81, label %bb48
|
||||
|
||||
bb48: ; preds = %entry
|
||||
%0 = call arm_aapcs_vfpcc %0 @bbb(%pln* undef, %vec* %vstart, %vec* undef) nounwind ; <%0> [#uses=0]
|
||||
ret float 0.000000e+00
|
||||
|
||||
bb81: ; preds = %entry
|
||||
ret float 0.000000e+00
|
||||
}
|
||||
|
||||
declare arm_aapcs_vfpcc %0 @bbb(%pln* nocapture, %vec* nocapture, %vec* nocapture) nounwind
|
@ -1,4 +1,4 @@
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic -mcpu=cortex-a8 | FileCheck %s
|
||||
; RUN: llc < %s -mtriple=thumbv7-apple-darwin -O3 -relocation-model=pic | FileCheck %s
|
||||
; rdar://8115404
|
||||
; Tail merging must not split an IT block.
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user